/*******************************************************************************
;*******************************************************************************
;**                                                                           **
;**                    COPYRIGHT 2004-2012 NUANCE COMMUNICATIONS              **
;**                                                                           **
;**               NUANCE COMMUNICATIONS PROPRIETARY INFORMATION               **
;**                                                                           **
;**     This software is supplied under the terms of a license agreement      **
;**     or non-disclosure agreement with Nuance Communications and may not    **
;**     be copied or disclosed except in accordance with the terms of that    **
;**     agreement.                                                            **
;**                                                                           **
;**                                                                           **
;*******************************************************************************
;**                                                                           **
;**     FileName: et9cpphse.c                                                 **
;**                                                                           **
;**  Description: Chinese Phrase Text Input character and phrase module.      **
;**               Conforming to the development version of Chinese XT9.       **
;**                                                                           **
;*******************************************************************************
;******************************************************************************/

#include "et9api.h"
#include "et9cpcntx.h"
#include "et9cppbuf.h"
#include "et9cpkey.h"
#include "et9cpsys.h"
#include "et9cpinit.h"
#include "et9cprdb.h"
#include "et9cpspel.h"
#include "et9cpldb.h"
#include "et9cpmisc.h"
#include "et9cpname.h"
#include "et9cpwdls.h"
#include "et9cptone.h"
#include "et9cstrie.h"

/*---------------------------------------------------------------------------
 *
 *   Function: ET9_CP_ContextFillPhraseBuffer
 *
 *   Synopsis: This function fills phrase buffer with context predictions
 *             and common characters.
 *
 *     Input:  pET9CPLingInfo    = Pointer to Chinese XT9 LingInfo structure.
 *
 *     Return: ET9STATUS_NONE on success, otherwise return XT9 error code.
 *
 *---------------------------------------------------------------------------*/

ET9STATUS ET9FARCALL ET9_CP_ContextFillPhraseBuffer(ET9CPLingInfo *pET9CPLingInfo)
{
    ET9_CP_PhraseBuf *pMainPhraseBuf;
    ET9U16 *pwPrefix;
    ET9U8 *pbContextLen, bPrefixLen, b;
    ET9BOOL bIsSID;

    bIsSID = (ET9BOOL)(ET9CPIsModePhonetic(pET9CPLingInfo) ? 0 : 1);

    pMainPhraseBuf = ET9_CP_GetMainPhraseBuf(pET9CPLingInfo);
    ET9Assert(pMainPhraseBuf->wLastTotal == pMainPhraseBuf->wTotal);
    ET9Assert(!ET9_CP_HasInput(pET9CPLingInfo) ); /* no input: find context suffix and common char */
    ET9Assert(0 == pET9CPLingInfo->CommonInfo.bSylCount);

    pwPrefix = pET9CPLingInfo->CommonInfo.pwContextBuf;
    pbContextLen = pET9CPLingInfo->CommonInfo.pbContextLen;
    bPrefixLen = 0;
    for (b = 0; pbContextLen[b]; b++) {
        bPrefixLen = (ET9U8)(bPrefixLen + pbContextLen[b]);
    }
    for (b = 0; pbContextLen[b]; b++) {
        ET9Assert(bPrefixLen);
        /* no input: no need for partial syl, but need partial phrase */
        ET9_CP_GetUdbPhrases(pET9CPLingInfo, &pET9CPLingInfo->Udb, /*bNeedPartialSyl*/0, /*bNeedPartialPhrase*/1, pwPrefix, bPrefixLen, /*pMatchType*/NULL, /*pSpellData*/NULL, bIsSID, /*pbTones*/NULL, /*bValidateCntxPrefix*/0, pMainPhraseBuf);
        ET9_CP_GetUdbPhrases(pET9CPLingInfo, &pET9CPLingInfo->Mdb, /*bNeedPartialSyl*/0, /*bNeedPartialPhrase*/1, pwPrefix, bPrefixLen, /*pMatchType*/NULL, /*pSpellData*/NULL, bIsSID, /*pbTones*/NULL, /*bValidateCntxPrefix*/0, pMainPhraseBuf);
        ET9_CP_GetLdbPhrases(pET9CPLingInfo, bIsSID, /*bNeedPartialSyl*/0, /*bNeedPartialPhrase*/1, /*pMatchType*/NULL, /*pSpellData*/NULL, /*pbTones*/NULL, pwPrefix, bPrefixLen, /*bValidateCntxPrefix*/0, /*pNameTableBookmarks*/NULL, pMainPhraseBuf);
        bPrefixLen = (ET9U8)(bPrefixLen - pbContextLen[b]);
        pwPrefix += pbContextLen[b];
    }

    ET9_CP_GetSmartPuncts(pET9CPLingInfo);

    /* the current page is not full */
    if (ET9_CP_IsPhraseBufEmpty(pMainPhraseBuf) || !ET9_CP_PhraseBufPageFillDone(pMainPhraseBuf) ) {
        /* build common charcters */
        if (ET9CPIsNameInputActive(pET9CPLingInfo) ) {
            ET9_CP_GetCommonNameChar(pET9CPLingInfo, bIsSID);
        }
        else {
            ET9_CP_GetCommonChar(pET9CPLingInfo);
        }
    }
    pMainPhraseBuf->wLastTotal = pMainPhraseBuf->wTotal;
    return ET9STATUS_NONE;
}

/* ( psPhrase is in PID|SID )
   return 1 if all chars in the phrase are Chinese, 0 otherwise
   We need this function because UDB functions doesn't accept a non-Chinese PID.
*/
ET9UINT ET9FARCALL ET9_CP_PhraseIsAllChn(ET9CPLingInfo *pET9CPLingInfo, const ET9SYMB *psPhrase, ET9U8 bLen)
{
    ET9U16 wMaxChnID;
    ET9U8 b;
    ET9Assert(psPhrase);
    ET9Assert(bLen);
    if (ET9CPIsModeStroke(pET9CPLingInfo) || ET9CPIsModeCangJie(pET9CPLingInfo) || ET9CPIsModeQuickCangJie(pET9CPLingInfo) ) {
        wMaxChnID = pET9CPLingInfo->Private.SPrivate.wCurTotalMatch[0];  /* wCurTotalMatch[0] is the total number of normal SID */
    }
    else {
        wMaxChnID = ET9_CP_NORMAL_PID_COUNT(&pET9CPLingInfo->CommonInfo);
    }
    for(b = 0; b < bLen; b++) {
        if (psPhrase[b] >= wMaxChnID) {
            return 0;
        }
    }
    return 1;
}

static ET9UINT ET9LOCALCALL AreRangesValid(ET9CPLingInfo *pLing)
{
    ET9U8 * pbRangeEnds = pLing->CommonInfo.pbRangeEnd;
    ET9U8 bSylCount = pLing->CommonInfo.bSylCount;
    ET9U8 b;

    if (0 == pbRangeEnds[0]) {
        return 0;
    }

    for (b = 1; b < bSylCount; b++) {
        if (pbRangeEnds[b] == pbRangeEnds[b-1]) {
            return 0;
        }
    }
    return 1;
}

/* Search for phrases matching given spell in LDB/UDB, either finding best match or adding to phrase buffer
 *  If pMatchType is provided, *pMatchType will be set to the "best" match type among {eNoMatch, eExactMatch, ePartialMatch},
 *  where eExactMatch is better than ePartialMatch, and ePartialMatch is better than eNoMatch.
 *  Otherwise, pPhraseBuf should be provided. All matching phrases will be added to the phrase buffer.
 */
static void ET9LOCALCALL PhraseSearchSpell(
    ET9CPLingInfo *pLing,
    ET9_CP_SpellData *pSpellData,
    ET9_CP_Spell *pSpell,
    ET9_CP_PhraseBuf *pPhraseBuf,
    ET9BOOL bNeedPartialSyl,
    ET9BOOL bNeedPartialPhrase,
    ET9_CP_SpellMatch *pMatchType)
{
    ET9_CP_SpellMatch eLdbMatchResult, eUdbMatchResult, eMdbMatchResult;
    ET9U8 abToneMask[ET9CPMAXUDBPHRASESIZE], bToneCount, *pbToneMask;

    ET9Assert(ET9CPIsModePhonetic(pLing));

    if (pMatchType) {
        *pMatchType = eNoMatch; /* assume no match */
        ET9Assert(NULL == pPhraseBuf);
    }

    if (0 == pSpell->bLen) {
        return; /* empty spell, no phrase match */
    }

    /* set up ranges/tones for spell */
#ifdef ET9_DEBUG
    pSpell->pbChars[pSpell->bLen] = 0; /* for easier debug */
#endif
    if (!ET9_CP_SpellingToPidRanges(pLing, pSpell->pbChars, pSpell->bLen) ) {
        return;
    }

    /* guard against invalid syllables in AW's output */
    if (!AreRangesValid(pLing)) {
        return;
    }

    bToneCount = ET9_CP_GetSpellTones(pSpell->pbChars, pSpell->bLen, abToneMask);
    pbToneMask = bToneCount ? abToneMask : NULL;

    /* search UDB */
    ET9_CP_GetUdbPhrases(pLing, &pLing->Udb, bNeedPartialSyl, bNeedPartialPhrase, NULL, 0, (pMatchType ? &eUdbMatchResult : NULL), pSpellData, 0, pbToneMask, 0, pPhraseBuf);
    if (pMatchType) {
        *pMatchType = (ET9_CP_SpellMatch)__ET9Max(*pMatchType, eUdbMatchResult);
        if (eExactMatch == *pMatchType) {
            return; /* found exact match, no need to continue */
        }
    }
    /* search MDB */
    ET9_CP_GetUdbPhrases(pLing, &pLing->Mdb, bNeedPartialSyl, bNeedPartialPhrase, NULL, 0, (pMatchType ? &eMdbMatchResult : NULL), pSpellData, 0, pbToneMask, 0, pPhraseBuf);
    if (pMatchType) {
        *pMatchType = (ET9_CP_SpellMatch)__ET9Max(*pMatchType, eMdbMatchResult);
        if (eExactMatch == *pMatchType) {
            return; /* found exact match, no need to continue */
        }
    }
    /* search LDB */
    ET9_CP_GetLdbPhrases(pLing, 0, bNeedPartialSyl, bNeedPartialPhrase, (pMatchType ? &eLdbMatchResult : NULL), pSpellData, pbToneMask, /*pwContext*/NULL, /*bContextLen*/0, /*bValidateContext*/0, /*pNameTableBookmarks*/NULL, pPhraseBuf);
    if (pMatchType) {
        *pMatchType = (ET9_CP_SpellMatch)__ET9Max(*pMatchType, eLdbMatchResult);
        if (eExactMatch == *pMatchType) {
            return; /* found exact match, no need to continue */
        }
    }
}

void ET9FARCALL ET9_CP_GetPhraseFromSpell(
    ET9CPLingInfo *pLing,
    ET9_CP_SpellData *pSpellData,
    ET9_CP_Spell *pSpell,
    ET9_CP_PhraseBuf *pPhraseBuf,
    ET9BOOL bNeedPartialSyl,
    ET9BOOL bNeedPartialPhrase)
{
    PhraseSearchSpell(pLing, pSpellData, pSpell, pPhraseBuf, bNeedPartialSyl, bNeedPartialPhrase, /*pMatchType*/NULL);
}

ET9_CP_SpellMatch ET9FARCALL ET9_CP_ValidateToneSpell(
    ET9CPLingInfo *pLing,
    ET9_CP_Spell *pSpell,
    ET9BOOL bNeedPartialPhrase)
{
    ET9_CP_SpellMatch eMatchType;
    PhraseSearchSpell(pLing, /*pSpellData*/NULL, pSpell, /*pPhraseBuf*/NULL, /*bNeedPartialSyl*/0, bNeedPartialPhrase, &eMatchType);
    return eMatchType;
}

static ET9U8 ET9LOCALCALL ET9_CP_FirstSegmentLength(const ET9_CP_Spell * pSpell) {
    ET9U8 i;
    for (i = 0; i < pSpell->bLen; i++) {
        if (ET9CP_SEGMENT_DELIMITER == pSpell->pbChars[i]) {
            break;
        }
    }
    return i;
}

/* do the syllable boundaries of this prefix match syllable starts of the given segmentation */
static ET9BOOL ET9LOCALCALL SyllablesAligned(ET9U8 *pbPrefix, ET9UINT nPrefixLen, ET9U8 *pbSpell, ET9UINT nSpellLen) {
    ET9UINT nPrefix, nSpell;

    ET9Assert(nPrefixLen && nSpellLen);

    nSpell = 0;
    nPrefix = 0;
    while (nPrefix < nPrefixLen && nSpell < nSpellLen) {
        ET9BOOL fMoved = 0;
        if (ET9_CP_IsDelim(pbPrefix[nPrefix]) ) {
            nPrefix++; /* ignore tone/delimiter/segment delimiter in prefix since they do not affect syllable alignment */
            fMoved = 1;
        }
        if (ET9_CP_IsDelim(pbSpell[nSpell]) ) {
            nSpell++; /* ignore tone/delimiter/segment delimiter in spell since they do not affect syllable alignment */
            fMoved = 1;
        }
        if ( !fMoved )
        {
            if (ET9_CP_IsLowerCase(pbPrefix[nPrefix]) != ET9_CP_IsLowerCase(pbSpell[nSpell])) {
                return 0;
            }
            nPrefix++;
            nSpell++;
        }
    }
    if ( (nSpell < nSpellLen)
        && ( ET9_CP_IsLowerCase(pbSpell[nSpell]) || ET9_CP_LOCKED_IS_TONE(pbSpell[nSpell]) ) ) {
        /* spell has more lower case letters or tones beyond prefix, not syllable aligned. */
        return 0;
    }
    if (nPrefix < nPrefixLen) {
        return 0; /* prefix remained but spelling finished */
    }

    return 1;
}

/* is the last syllable elligible for partial pinyin */
ET9UINT ET9FARCALL ET9_CP_EndsWithInitial(ET9U8 *pbSpell, ET9UINT nSpellLen) {
    ET9U8 bLastChar;

    if (0 == nSpellLen) {
        return 0;
    }

    bLastChar = pbSpell[nSpellLen-1];
    if (bLastChar == ET9CPSYLLABLEDELIMITER) {
        ET9Assert(nSpellLen >= 2);
        bLastChar = pbSpell[nSpellLen-2];
    }

    if ('h' == bLastChar || ET9_CP_IsUpperCase(bLastChar)) {
        return 1;
    }

    return 0;
}

/* count the number of syllables, including partial pinyin syllables */
static ET9UINT ET9LOCALCALL CountSyllables(ET9U8 *pbSpell, ET9UINT nSpellLen) {
    ET9UINT n;
    ET9UINT nSylCount;

    nSylCount = 0;
    for (n = 0; n < nSpellLen; n++) {
        ET9U8 bChar = pbSpell[n];

        if (ET9_CP_IsUpperCase(bChar)) {
            nSylCount++;
        }
    }
    return nSylCount;
}

/* count the number of syllables, including partial pinyin syllables */
static ET9UINT ET9LOCALCALL FirstSylLen(ET9U8 *pbSpell, ET9UINT nSpellLen) {
    ET9UINT n;

    if (0 == nSpellLen) {
        return 0;
    }

    for (n = 1; n < nSpellLen; n++) {
        ET9U8 bChar = pbSpell[n];

        if (ET9CPSYLLABLEDELIMITER == (bChar)
            || ET9CP_SEGMENT_DELIMITER == (bChar)
            || ET9_CP_IsUpperCase(bChar))
        {
            break;
        }
    }
    return n;
}

/* return the first possible spot partial pinyin may have been applied
 * (erroneously) assume that AEONM are always partial when used in the middle of a spell */
static ET9UINT ET9LOCALCALL FirstPartialPinyin(ET9U8 *pbSpell, ET9UINT nSpellLen) {
    ET9UINT n;
    ET9UINT fFoundRyhme = 0;

    if (0 == nSpellLen) {
        return 0;
    }

    for (n = 1; n < nSpellLen; n++) {
        ET9U8 bChar = pbSpell[n];

        if (ET9_CP_IsUpperCase(bChar))
        {
            if (!fFoundRyhme) {
                break;
            }
            else {
                fFoundRyhme = 0;
            }
        }
        else if (ET9CPSYLLABLEDELIMITER != bChar &&
                 ET9CP_SEGMENT_DELIMITER != bChar &&
                 'h' != bChar)
        {
            fFoundRyhme = 1;
        }
    }
    return n;
}

static ET9INT ET9LOCALCALL ET9_CP_SpellDiff(const ET9U8 *pStr1,
                                            ET9U8 bLen1,
                                            const ET9U8 *pStr2,
                                            ET9U8 bLen2)
{
    ET9INT iDiff;
    ET9U8 i, j;

    iDiff = 0;
    i = 0;
    j = 0;
    while (i < bLen1 && j < bLen2) {
        if ( ET9_CP_LOCKED_IS_TONE(pStr1[i]) || ET9_CP_IsDelim(pStr1[i]) ) {
            i++; /* skip tones/delimiters */
            continue;
        }
        else if ( ET9_CP_LOCKED_IS_TONE(pStr2[j]) || ET9_CP_IsDelim(pStr2[j]) ) {
            j++; /* skip tones/delimiters */
            continue;
        }
        else if (pStr1[i] != pStr2[j]) {
            iDiff++;
        }
        i++;
        j++;
    }
    if (i < bLen1) {
        if ( ET9_CP_LOCKED_IS_TONE(pStr1[i]) || ET9_CP_IsDelim(pStr1[i]) ) {
            i++; /* skip tones/delimiters */
        }
        if (i < bLen1) { /* str1 has more letters */
            iDiff += (bLen1 - i) * 16; /* length diff has more weight than letter diff */
        }
    }
    if (j < bLen2) {
        if ( ET9_CP_LOCKED_IS_TONE(pStr2[j]) || ET9_CP_IsDelim(pStr2[j]) ) {
            j++; /* skip tones/delimiters */
        }
        if (j < bLen2) { /* str2 has more letters */
            iDiff += (bLen2 - j) * 16; /* length diff has more weight than letter diff */
        }
    }
    return iDiff;
}

static ET9BOOL ET9LOCALCALL ET9_CP_FindPrefix(const ET9_CP_SSBITrie *pSBI,
                                              ET9INT iStart,
                                              ET9INT iEnd,
                                              ET9_CP_Spell *pSpell,
                                              ET9_CS_Prefix *pMatchedPrefix)
{
    ET9INT i;
    for (i = iStart; i < iEnd; i++) {
        ET9_CS_GetPrefix(pSBI, i, pMatchedPrefix);
        if (0 == ET9_CP_SpellDiff(pSpell->pbChars, pSpell->bLen, pMatchedPrefix->m_pcPfx, pMatchedPrefix->m_bPfxLen) ) {
            return 1;
        }
    }
    return 0;
}

static void ET9LOCALCALL SBISpellDataInit(ET9_CP_SpellData * psSpellData,
                                          ET9BOOL fSearchingSegment,
                                          ET9BOOL fSearchingSegmentLen,
                                          ET9BOOL fSearchingSegment1stSylLen,
                                          ET9BOOL fSearchingLastSegment,
                                          ET9BOOL fSearchingSetPrefix,
                                          ET9BOOL fEndsWithInitial,
                                          ET9BOOL fPrefixSyllablesAligned,
                                          ET9INT nFirstPartialPinyin,
                                          ET9INT nSyllableCount,
                                          ET9_CP_Spell * pSpell,
                                          ET9S32 iSpellWeight,
                                          ET9U16 wSegmentFreq, /* the frequency of the segmentation's best phrase */
                                          ET9BOOL fSegmentFull)
{
    ET9_CP_SBISpellData * psSBISpellData = &psSpellData->u.sSBI;
    ET9_CP_Spell * psSpellDst = &psSpellData->sSpell;

    psSpellData->eSpellSource = ET9_CP_SpellSource_SBI;

    psSBISpellData->fSearchingSegment = fSearchingSegment;
    psSBISpellData->fSearchingSegmentLen = fSearchingSegmentLen;
    psSBISpellData->fSearchingSegment1stSylLen = fSearchingSegment1stSylLen;
    psSBISpellData->fSearchingLastSegment = fSearchingLastSegment;
    psSBISpellData->fSearchingSetPrefix = fSearchingSetPrefix;
    psSBISpellData->fPrefixSyllablesAligned = fPrefixSyllablesAligned;
    psSBISpellData->fEndsWithInitial = fEndsWithInitial;
    psSBISpellData->nFirstPartialPinyin = nFirstPartialPinyin;
    psSBISpellData->nSyllableCount = nSyllableCount;
    psSBISpellData->iSpellWeight = iSpellWeight;

    psSBISpellData->wSegPhraseFreq = wSegmentFreq;
    psSBISpellData->fSegmentFull = fSegmentFull;

    if (fSearchingSegment) {
        psSBISpellData->fSegmentFull = 0;
    }

    if (pSpell) {
        ET9U8 b;

        for (b = 0; b < pSpell->bLen; b++) {
            psSpellDst->pbChars[b] = pSpell->pbChars[b];
        }
        psSpellDst->bLen = b;
    }
    else {
        psSpellDst->bLen = 0;
    }
}

ET9STATUS ET9FARCALL ET9_CP_JianpinFillPhraseBuffer(ET9CPLingInfo *pET9CPLingInfo)
{
    ET9STATUS status = ET9STATUS_INVALID_INPUT; /* assume invalid input */

    if (ET9_CP_SelectionHistUnselectedStart(&pET9CPLingInfo->SelHistory) ) {
        return ET9STATUS_INVALID_INPUT; /* cannot do Jianpin when there is selection history, which is used for SBI */
    }
    if (ET9_CP_WSIToJianpinPidRanges(pET9CPLingInfo) )
    {
        ET9_CP_PhraseBuf *pMainPhraseBuf;
        ET9CPPhrase sPhrase;
        ET9_CP_SpellData sSpellData;
        ET9U8 bSylCount;

        pMainPhraseBuf = ET9_CP_GetMainPhraseBuf(pET9CPLingInfo);
        bSylCount = pET9CPLingInfo->CommonInfo.bSylCount;

        SBISpellDataInit(&sSpellData, 1, 1, 1, 1, 1, 1, 1, bSylCount - 1, bSylCount, NULL, 0, 0, 0);
        /* non-context search */
        ET9_CP_GetUdbPhrases(pET9CPLingInfo, &pET9CPLingInfo->Udb, /*bNeedPartialSyl*/1, /*bNeedPartialPhrase*/1, /*pwPrefix*/NULL, /*bPrefixLen*/0, /*pMatchType*/NULL, &sSpellData, /*bIsSID*/0, /*pbTones*/NULL, /*bValidateCntxPrefix*/0, pMainPhraseBuf);
        ET9_CP_GetUdbPhrases(pET9CPLingInfo, &pET9CPLingInfo->Mdb, /*bNeedPartialSyl*/1, /*bNeedPartialPhrase*/1, /*pwPrefix*/NULL, /*bPrefixLen*/0, /*pMatchType*/NULL, &sSpellData, /*bIsSID*/0, /*pbTones*/NULL, /*bValidateCntxPrefix*/0, pMainPhraseBuf);
        ET9_CP_GetLdbPhrases(pET9CPLingInfo, /*bIsSID*/0, /*bNeedPartialSyl*/1, /*bNeedPartialPhrase*/1, /*pMatchType*/NULL, &sSpellData, /*pbTones*/NULL, /*pwCntxPrefix*/NULL, /*bCntxPrefixLen*/0, /*bValidateCntxPrefix*/0, /*pNameTableBookmarks*/NULL, pMainPhraseBuf);

        pMainPhraseBuf->wLastTotal = pMainPhraseBuf->wTotal;

        if (!ET9_CP_IsPhraseBufEmpty(pMainPhraseBuf) ) {
            /* found phrases, set the active spell to be the Jianpin spell of 1st phrase */
            ET9_CP_GetPhraseFromBuf(pMainPhraseBuf, 1, &sPhrase, NULL, NULL);
            ET9_CP_MakeInternalJianpinSpell(pET9CPLingInfo, &sPhrase, &pET9CPLingInfo->CommonInfo.sActiveSpell);
            status = ET9STATUS_NONE;
        }
    }
    return status;
}

/* Derived struct from PhraseBuf */
#define ET9_CP_MINI_PHRASE_BUF_SIZE   100
typedef struct ET9_CP_MiniPhraseBuf_s {
    ET9_CP_PhraseBuf sPhraseBuf;
    ET9U8           pbDataBuf[ET9_CP_MINI_PHRASE_BUF_SIZE];
} ET9_CP_MiniPhraseBuf;

void ET9FARCALL AddWholePhraseOfSBI(ET9CPLingInfo *pET9CPLingInfo, const ET9_CP_Spell * psSpell, ET9BOOL fAllowFailure)
{
    ET9_CP_MiniPhraseBuf sMiniPhraseBuf;
    ET9CPPhrase sPhrase, sFullSentence;
    ET9U8   b;
    ET9_CP_Spell sSegSpell, sToneSpell;
    ET9BOOL bNeedPartialSyl = (ET9BOOL)ET9CPIsPartialSpellActive(pET9CPLingInfo);
    ET9BOOL bHasTone = (ET9_CP_InputToneCount(pET9CPLingInfo) > 0);
    ET9_CP_SpellData sSpellData;

    ET9_CP_INIT_PHRASE_BUF(sMiniPhraseBuf);
    sToneSpell.bLen = 0;
    sSegSpell.bLen = 0;
    sFullSentence.bLen = 0;
    for (b = 0; b <= psSpell->bLen; b++ ) {
        if ( b == psSpell->bLen || psSpell->pbChars[b] == ET9CP_SEGMENT_DELIMITER ) { /* Prevent reading out of bound */
            ET9BOOL bNeedPartialPhrase = (ET9BOOL)(b == psSpell->bLen);

            ET9_CP_ZeroPhraseBuffer(&sMiniPhraseBuf.sPhraseBuf);
            ET9_CP_SetPageSize(&sMiniPhraseBuf.sPhraseBuf, 1);
            SBISpellDataInit(&sSpellData,
                1,
                1,
                1,
                (ET9BOOL)(b == psSpell->bLen),
                (ET9BOOL)0,
                (ET9BOOL)ET9_CP_EndsWithInitial(sSegSpell.pbChars, sSegSpell.bLen),
                1,
                sSegSpell.bLen - FirstPartialPinyin(sSegSpell.pbChars, sSegSpell.bLen),
                CountSyllables(sSegSpell.pbChars, sSegSpell.bLen),
                &sSegSpell,
                0,
                0,
                0);
            ET9_CP_GetPhraseFromSpell(pET9CPLingInfo, &sSpellData, &sSegSpell, &sMiniPhraseBuf.sPhraseBuf, bNeedPartialSyl, bNeedPartialPhrase);

            if ( !fAllowFailure || !ET9_CP_IsPhraseBufEmpty(&sMiniPhraseBuf.sPhraseBuf) )
            {
                ET9_CP_GetPhraseFromBuf(&sMiniPhraseBuf.sPhraseBuf, 1, &sPhrase, &sSegSpell, NULL); /* ET9_CP_GetPhraseFromBuf needs 1-based index */
                if (sFullSentence.bLen + sPhrase.bLen > ET9CPMAXPHRASESIZE) {
                    /* Trim the new phrase segment to prevent overflow after appending */
                    ET9Assert(sFullSentence.bLen < ET9CPMAXPHRASESIZE);
                    sPhrase.bLen = ET9CPMAXPHRASESIZE - sFullSentence.bLen;
                }
                _ET9SymCopy(sFullSentence.pSymbs + sFullSentence.bLen, sPhrase.pSymbs, sPhrase.bLen);
                sFullSentence.bLen = (ET9U8)(sFullSentence.bLen + sPhrase.bLen);
                if (bHasTone) {
                    _ET9ByteCopy(sToneSpell.pbChars + sToneSpell.bLen, sSegSpell.pbChars, sSegSpell.bLen);
                    sToneSpell.bLen = (ET9U8)(sToneSpell.bLen + sSegSpell.bLen);
                    if (b < psSpell->bLen && psSpell->pbChars[b] == ET9CP_SEGMENT_DELIMITER) {
                        sToneSpell.pbChars[sToneSpell.bLen++] = ET9CP_SEGMENT_DELIMITER;
                    }
                }
                sSegSpell.bLen = 0;
            }
        }
        else {
            sSegSpell.pbChars[sSegSpell.bLen++] = psSpell->pbChars[b];
        }
    }
    if ( sFullSentence.bLen > 0 )
    {
        ET9_CP_PhraseBuf *pMainPhraseBuf;
        pMainPhraseBuf = ET9_CP_GetMainPhraseBuf(pET9CPLingInfo);
        if (bHasTone) {
            psSpell = &sToneSpell;
        }
        ET9_CP_AddPhraseToBuf(pET9CPLingInfo, pMainPhraseBuf, sFullSentence.pSymbs, sFullSentence.bLen, psSpell->pbChars, psSpell->bLen, ET9_CP_IDEncode_PID, ET9CPPhraseSource_Sentence, (ET9U16)0xFFFF);
    }
}

static ET9BOOL ET9LOCALCALL SuffixIsSame(ET9_CP_Spell * p1, ET9_CP_Spell * p2)
{
    ET9U8 bLen1 = p1->bLen;
    ET9U8 bLen2 = p2->bLen;
#define IS_GENERALIZED_TONE_DELIMITER(c)  ( ET9_CP_LetterIsToneOrDelim(c) || ET9_CP_LOCKED_IS_TONE(c) )
    if ( bLen1 > 0 && IS_GENERALIZED_TONE_DELIMITER(p1->pbChars[bLen1 - 1]) )
    {
        bLen1--;
    }
    if ( bLen2 > 0 && IS_GENERALIZED_TONE_DELIMITER(p2->pbChars[bLen2 - 1]) )
    {
        bLen2--;
    }
    while ( bLen1 > 0 && bLen2 > 0 )
    {
        if (p1->pbChars[bLen1 - 1] != p2->pbChars[bLen2 - 1] )
            return 0;
        if ( IS_GENERALIZED_TONE_DELIMITER(p1->pbChars[bLen1 - 1]) )
            return 1;
        bLen1--;
        bLen2--;
    }

    /* Now, at least one of bLen1 and bLen2 is 0 */
    if ( bLen1 > 0 )
    {
        return IS_GENERALIZED_TONE_DELIMITER(p1->pbChars[bLen1 - 1])? 1: 0;
    }
    if ( bLen2 > 0 )
    {
        return IS_GENERALIZED_TONE_DELIMITER(p2->pbChars[bLen2 - 1])? 1: 0;
    }

    /* Now, bLen1 == bLen2 == 0 */
    return 1;
}

static ET9S32 ET9LOCALCALL ET9_CP_TapProbToSpellWeight(ET9S32 iTapProb,
                                                       ET9U8 bSpellLen)
{   /* longer spellings get lower iTapProb, add spell length factor to compensate so that single chars will not dominate. */
    return (ET9S32)(0x200 + bSpellLen * 0x20 + iTapProb / 32);
}

ET9STATUS ET9FARCALL ET9_CP_PrefixFillPhraseBuffer(ET9CPLingInfo *pCLing)
{
    ET9_CP_PhraseBuf * pPhraseBuf = ET9_CP_GetMainPhraseBuf(pCLing);
    ET9_CP_SSBITrie * pSBI = &pCLing->SBI;
    ET9_CP_SpellData sSpellData; /* local spell data structure */
    ET9_CP_Spell sFullSpell, s1stSegment;
    ET9_CS_Prefix prefix;
    ET9S32 iSpellWeight;
    ET9INT i, iStart, iEnd;
    ET9U8 bInputStart, bInputLen, bSegment1stSylLen;
    ET9BOOL bNeedPartialSyl, bNeedPartialPhrase;
    ET9BOOL fSegmentationFull; /* 1: initial expansion not used in the segmentation */
    const ET9WordSymbInfo *pWSI;
    const ET9SymbInfo *pSymbInfo;

    pWSI = pCLing->Base.pWordSymbInfo;

    /* reset top segmentation freq when getting phrases for the first time */
    if (ET9_CP_IsPhraseBufEmpty(pPhraseBuf)) {
        pSBI->wSegPhraseFreq = 0;
    }
    bInputStart = ET9_CP_SelectionHistUnselectedStart(&pCLing->SelHistory);
    bInputLen = (ET9U8)(pWSI->bNumSymbs - bInputStart);
    pSymbInfo = pWSI->SymbsInfo + bInputStart;

    /* allow partial syllable when partial spell is active */
    bNeedPartialSyl = (ET9BOOL)ET9CPIsPartialSpellActive(pCLing);

    if ( pCLing->CommonInfo.bActivePrefix == 0xFF )  /* no active prefix */
    {
        iStart = 0;
        iEnd = (ET9INT)ET9_CS_GetPrefixCount(pSBI);
    }
    else /* has active prefix, use only this group */
    {
        iStart = (ET9INT)pCLing->CommonInfo.pPrefixGroup[pCLing->CommonInfo.bActivePrefix].wStartIndex;
        iEnd = (iStart + pCLing->CommonInfo.pPrefixGroup[pCLing->CommonInfo.bActivePrefix].wPfxCount);
    }

    /* get 1st segment's phrases */
    {
        sFullSpell = pCLing->CommonInfo.sActiveSpell;
        /* Convert real tones in active spell to tone masks */
        for ( i = 0; i < sFullSpell.bLen; i++ )
        {
            if ( ET9CPSymToCPTone(sFullSpell.pbChars[i]) )
                sFullSpell.pbChars[i] = ( 1 << (sFullSpell.pbChars[i] - ET9CPTONE1) );
        }
        ET9Assert(0 < sFullSpell.bLen);

        /* get 1st segment from the full segmentation, overwriting the tones with ambig-tones from WSI */
        s1stSegment.bLen = ET9_CP_CopyAddDelimiter(s1stSegment.pbChars, sizeof(s1stSegment.pbChars), sFullSpell.pbChars, ET9_CP_FirstSegmentLength(&sFullSpell), pSymbInfo, bInputLen);

        if ( ET9CPIsFullSentenceActive(pCLing) && s1stSegment.bLen < sFullSpell.bLen && ET9_CP_IsPhraseBufEmpty(pPhraseBuf) )
        {
             AddWholePhraseOfSBI( pCLing, &sFullSpell, /*fAllowFailure*/0 );
        }

        /* get 1st syl from 1st segment */
        bSegment1stSylLen = (ET9U8)FirstSylLen(s1stSegment.pbChars, s1stSegment.bLen);

        /* allow partial phrase match on the last segment */
        bNeedPartialPhrase = (ET9BOOL)(sFullSpell.bLen == s1stSegment.bLen);

        if (ET9_CP_FindPrefix(pSBI, iStart, iEnd, &s1stSegment, &prefix) && prefix.m_iTapProb) {
            /* Apply tap weight on phrase ranking ONLY when input is regional */
            iSpellWeight = ET9_CP_TapProbToSpellWeight(prefix.m_iTapProb, prefix.m_bPfxLen);
        }
        else {
            iSpellWeight = 0;
        }

        SBISpellDataInit(&sSpellData,
                         1,
                         1,
                         (ET9BOOL)(s1stSegment.bLen == bSegment1stSylLen),
                         (ET9BOOL)(s1stSegment.bLen == bInputLen),
                         (ET9BOOL)(0xFF != pCLing->CommonInfo.bActivePrefix),
                         (ET9BOOL)ET9_CP_EndsWithInitial(s1stSegment.pbChars, s1stSegment.bLen),
                         1,
                         bInputLen - FirstPartialPinyin(s1stSegment.pbChars, s1stSegment.bLen),
                         CountSyllables(s1stSegment.pbChars, s1stSegment.bLen),
                         &s1stSegment,
                         iSpellWeight,
                         0,
                         0);
        ET9_CP_GetPhraseFromSpell(pCLing, &sSpellData, &s1stSegment, pPhraseBuf, bNeedPartialSyl, bNeedPartialPhrase);
        fSegmentationFull = sSpellData.u.sSBI.fSegmentFull;

        /* should be able to find some phrase for the default segmentation */
        ET9Assert(pPhraseBuf->wTotal);

        /* 1st phrase of 1st segment locked in 1st place, regardless of freq */
        if (pPhraseBuf->wTotal <= pPhraseBuf->bPageSize) {
            pSBI->wSegPhraseFreq = pPhraseBuf->pwFreq[0];
            pPhraseBuf->pwFreq[0] = 0xFFFF;
        }
    }

    /* get prefixes' phrases */
    {
        ET9_CP_Spell spell;

        for ( i = iEnd - 1; i >= iStart; i-- )  /* must go in decreasing order */
        {
            ET9_CP_SBISpellData * psSBISpellData;

            ET9_CS_GetPrefix(pSBI, i, &prefix);

            /* copy tone/delimiter into internal prefix to allow tone filtering in phrase search */
            spell.bLen = (ET9U8)ET9_CP_CopyAddDelimiter(spell.pbChars, sizeof(spell.pbChars) - 1, prefix.m_pcPfx, prefix.m_bPfxLen, pSymbInfo, bInputLen);

            /* skip if we've already handled this with the s1stSegment search */
            if (0 == ET9_CP_SpellDiff(spell.pbChars, spell.bLen, s1stSegment.pbChars, s1stSegment.bLen) ) {
                continue;
            }
            if ( ET9_CP_InputToneCount(pCLing) && 0xFF != pCLing->CommonInfo.bActiveSuffix && bInputLen == spell.bLen )
            {
                if ( !SuffixIsSame(&sFullSpell, &spell) )
                    continue;
            }

            if (prefix.m_iTapProb) {
                /* Apply tap weight on phrase ranking ONLY when input is regional */
                iSpellWeight = ET9_CP_TapProbToSpellWeight(prefix.m_iTapProb, prefix.m_bPfxLen);
            }
            else {
                iSpellWeight = 0;
            }

            psSBISpellData = &sSpellData.u.sSBI;
            SBISpellDataInit(&sSpellData,
                             0,
                             (ET9BOOL)(spell.bLen == s1stSegment.bLen),
                             (ET9BOOL)(spell.bLen == bSegment1stSylLen),
                             (ET9BOOL)(spell.bLen == bInputLen),
                             (ET9BOOL)(0xFF != pCLing->CommonInfo.bActivePrefix),
                             (ET9BOOL)ET9_CP_EndsWithInitial(spell.pbChars, spell.bLen),
                             (ET9BOOL)SyllablesAligned(spell.pbChars, spell.bLen, sFullSpell.pbChars, sFullSpell.bLen),
                             bInputLen - FirstPartialPinyin(spell.pbChars, spell.bLen),
                             CountSyllables(spell.pbChars, spell.bLen),
                             &spell,
                             iSpellWeight,
                             pSBI->wSegPhraseFreq,
                             fSegmentationFull);
            if (1 == psSBISpellData->nSyllableCount ||
                psSBISpellData->fSearchingSetPrefix ||
                psSBISpellData->fSearchingLastSegment ||
                psSBISpellData->fSearchingSegmentLen ||
                psSBISpellData->fSearchingSegment1stSylLen ||
                psSBISpellData->fPrefixSyllablesAligned)
            {
                ET9_CP_GetPhraseFromSpell(pCLing, &sSpellData, &spell, pPhraseBuf, bNeedPartialSyl, /*bNeedPartialPhrase*/0);
            }
        }
    }

    pPhraseBuf->wLastTotal = pPhraseBuf->wTotal;
    return ET9STATUS_NONE;
}

/* determine the types of completion used for this phrase */
void ET9FARCALL ET9_CP_MatchType(const ET9_CP_CommonInfo * pCommon,
                                 const ET9SYMB * psPhrase,
                                 ET9U8 bPhraseLen,
                                 ET9BOOL fEndsWithInitial,
                                 ET9BOOL * pfInitialExpansion,
                                 ET9BOOL * pfSyllableCompletion,
                                 ET9BOOL * pfPhraseCompletion)
{
    const ET9U16 *pwRange, *pwStart, *pwStartPartial, *pwEnd;
    const ET9U8 *pbRangeEnd;
    ET9U16 wID;
    ET9UINT nSylCount, i, j;

    if (!psPhrase) {
        *pfInitialExpansion = 0;
        *pfSyllableCompletion = 0;
        *pfPhraseCompletion = 0;

        return;
    }

    pwStartPartial = 0;
    wID = 0;

    pwRange = pCommon->pwRange;
    pbRangeEnd = pCommon->pbRangeEnd;
    nSylCount = pCommon->bSylCount;

    ET9Assert(pCommon && psPhrase && bPhraseLen);
    ET9Assert(bPhraseLen >= nSylCount);

    pwStart = pwRange;

    /* phrase completion? */
    *pfPhraseCompletion = (ET9BOOL)(nSylCount < bPhraseLen ? 1 : 0);

    /* assume no other completion by default */
    *pfInitialExpansion = 0;
    *pfSyllableCompletion = 0;

    for (i = 0, j = 0; i < nSylCount; i++) {
        wID = *psPhrase++;
        for (; j < pbRangeEnd[i]; ) { /* each mohu range */
            pwStartPartial = pwStart + 1;
            pwEnd = pwStartPartial + 1;
            if ((wID >= *pwStart) && (wID < *pwEnd)) { /* match ID range */

                /* check for partial match */
                if (pwStartPartial && (wID >= *pwStartPartial)) {
                    if (i == nSylCount - 1 && !fEndsWithInitial) {
                        *pfSyllableCompletion = 1;
                    }
                    else {
                        /* partial range in the middle of a phrase -- must be initial expansion */
                        *pfInitialExpansion = 1;
                    }
                }
                break; /* match, no need to try other mohu ranges */
            }
            pwStart += ET9_CP_ID_RANGE_SIZE; /* try next mohu range */
            j += ET9_CP_ID_RANGE_SIZE;
        } /* END loop mohu ranges for this syllable */
        ET9Assert(j <= pbRangeEnd[i]);

        pwStart = pwRange + pbRangeEnd[i];
        j = pbRangeEnd[i];
    } /* END loop syllables */
}

ET9BOOL ET9FARCALL ET9_CP_GetHomophone(ET9CPLingInfo *pLing,
                                       const ET9CPPhrase *pBasePhrase,
                                       ET9_CP_PhraseBuf *pPhraseBuf)
{
    ET9U16 pwRangeBkup[ET9_CP_MAX_ID_RANGE_SIZE];
    ET9U8 pbRangeEndBkup[ET9CPMAXPHRASESIZE];
    ET9U8 bSylCountBkup;
    ET9BOOL bFound = 0;

    /* backup phrase search range, which will be overwritten by the following search */
    bSylCountBkup = pLing->CommonInfo.bSylCount;
    if (bSylCountBkup > 0) {
        _ET9SymCopy(pwRangeBkup, pLing->CommonInfo.pwRange, ET9_CP_MAX_ID_RANGE_SIZE);
        _ET9ByteCopy(pbRangeEndBkup, pLing->CommonInfo.pbRangeEnd, ET9CPMAXPHRASESIZE);
    }

    if (ET9_CP_UniPhraseToPidRanges(pLing, pBasePhrase) ) {
        /* get all phrases into the given phrase buffer */
        do {
            pPhraseBuf->wLastTotal = pPhraseBuf->wTotal;
            /* search UDB */
            ET9_CP_GetUdbPhrases(pLing, &pLing->Udb, /*bNeedPartialSyl*/0, /*bNeedPartialPhrase*/0, NULL, 0, /*pMatchType*/NULL, /*pSpellData*/NULL, 0, /*pbTones*/NULL, 0, pPhraseBuf);
            /* search MDB */
            ET9_CP_GetUdbPhrases(pLing, &pLing->Mdb, /*bNeedPartialSyl*/0, /*bNeedPartialPhrase*/0, NULL, 0, /*pMatchType*/NULL, /*pSpellData*/NULL, 0, /*pbTones*/NULL, 0, pPhraseBuf);
            /* search LDB */
            ET9_CP_GetLdbPhrases(pLing, /*bIsSID*/0, /*bNeedPartialSyl*/0, /*bNeedPartialPhrase*/0, /*pMatchType*/NULL, /*pSpellData*/NULL, /*pbTones*/NULL, /*pwContext*/NULL, /*bContextLen*/0, /*bValidateContext*/0, /*pNameTableBookmarks*/NULL, pPhraseBuf);
        } while (pPhraseBuf->wTotal != pPhraseBuf->wLastTotal);
        bFound = 1;
    }
    /* restore phrase search range, which has been overwritten in the search above */
    pLing->CommonInfo.bSylCount = bSylCountBkup;
    if (bSylCountBkup > 0) {
        _ET9SymCopy(pLing->CommonInfo.pwRange, pwRangeBkup, ET9_CP_MAX_ID_RANGE_SIZE);
        _ET9ByteCopy(pLing->CommonInfo.pbRangeEnd, pbRangeEndBkup, ET9CPMAXPHRASESIZE);
    }
    return bFound;
}

/* ----------------------------------< eof >--------------------------------- */
