diff options
author | 2016-09-23 09:21:58 +0000 | |
---|---|---|
committer | 2016-09-23 09:21:58 +0000 | |
commit | 25e4f8ab5acd0ef40feec6767a572bebbbe294b3 (patch) | |
tree | 20197c0e46bb6d260f4a310b6d5dd73b8d826f01 /lib/libsqlite3/ext/fts3/fts3_snippet.c | |
parent | remove usr.bin/sqlite3, it has moved back to ports (diff) | |
download | wireguard-openbsd-25e4f8ab5acd0ef40feec6767a572bebbbe294b3.tar.xz wireguard-openbsd-25e4f8ab5acd0ef40feec6767a572bebbbe294b3.zip |
remove lib/libsqlite3, it has moved back to ports
Diffstat (limited to 'lib/libsqlite3/ext/fts3/fts3_snippet.c')
-rw-r--r-- | lib/libsqlite3/ext/fts3/fts3_snippet.c | 1706 |
1 files changed, 0 insertions, 1706 deletions
diff --git a/lib/libsqlite3/ext/fts3/fts3_snippet.c b/lib/libsqlite3/ext/fts3/fts3_snippet.c deleted file mode 100644 index a0771c0b305..00000000000 --- a/lib/libsqlite3/ext/fts3/fts3_snippet.c +++ /dev/null @@ -1,1706 +0,0 @@ -/* -** 2009 Oct 23 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -*/ - -#include "fts3Int.h" -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - -#include <string.h> -#include <assert.h> - -/* -** Characters that may appear in the second argument to matchinfo(). -*/ -#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ -#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ -#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ -#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ -#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ -#define FTS3_MATCHINFO_LCS 's' /* nCol values */ -#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ -#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ -#define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ - -/* -** The default value for the second argument to matchinfo(). -*/ -#define FTS3_MATCHINFO_DEFAULT "pcx" - - -/* -** Used as an fts3ExprIterate() context when loading phrase doclists to -** Fts3Expr.aDoclist[]/nDoclist. -*/ -typedef struct LoadDoclistCtx LoadDoclistCtx; -struct LoadDoclistCtx { - Fts3Cursor *pCsr; /* FTS3 Cursor */ - int nPhrase; /* Number of phrases seen so far */ - int nToken; /* Number of tokens seen so far */ -}; - -/* -** The following types are used as part of the implementation of the -** fts3BestSnippet() routine. -*/ -typedef struct SnippetIter SnippetIter; -typedef struct SnippetPhrase SnippetPhrase; -typedef struct SnippetFragment SnippetFragment; - -struct SnippetIter { - Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ - int iCol; /* Extract snippet from this column */ - int nSnippet; /* Requested snippet length (in tokens) */ - int nPhrase; /* Number of phrases in query */ - SnippetPhrase *aPhrase; /* Array of size nPhrase */ - int iCurrent; /* First token of current snippet */ -}; - -struct SnippetPhrase { - int nToken; /* Number of tokens in phrase */ - char *pList; /* Pointer to start of phrase position list */ - int iHead; /* Next value in position list */ - char *pHead; /* Position list data following iHead */ - int iTail; /* Next value in trailing position list */ - char *pTail; /* Position list data following iTail */ -}; - -struct SnippetFragment { - int iCol; /* Column snippet is extracted from */ - int iPos; /* Index of first token in snippet */ - u64 covered; /* Mask of query phrases covered */ - u64 hlmask; /* Mask of snippet terms to highlight */ -}; - -/* -** This type is used as an fts3ExprIterate() context object while -** accumulating the data returned by the matchinfo() function. -*/ -typedef struct MatchInfo MatchInfo; -struct MatchInfo { - Fts3Cursor *pCursor; /* FTS3 Cursor */ - int nCol; /* Number of columns in table */ - int nPhrase; /* Number of matchable phrases in query */ - sqlite3_int64 nDoc; /* Number of docs in database */ - char flag; - u32 *aMatchinfo; /* Pre-allocated buffer */ -}; - -/* -** An instance of this structure is used to manage a pair of buffers, each -** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below -** for details. -*/ -struct MatchinfoBuffer { - u8 aRef[3]; - int nElem; - int bGlobal; /* Set if global data is loaded */ - char *zMatchinfo; - u32 aMatchinfo[1]; -}; - - -/* -** The snippet() and offsets() functions both return text values. An instance -** of the following structure is used to accumulate those values while the -** functions are running. See fts3StringAppend() for details. -*/ -typedef struct StrBuffer StrBuffer; -struct StrBuffer { - char *z; /* Pointer to buffer containing string */ - int n; /* Length of z in bytes (excl. nul-term) */ - int nAlloc; /* Allocated size of buffer z in bytes */ -}; - - -/************************************************************************* -** Start of MatchinfoBuffer code. -*/ - -/* -** Allocate a two-slot MatchinfoBuffer object. -*/ -static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ - MatchinfoBuffer *pRet; - int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); - int nStr = (int)strlen(zMatchinfo); - - pRet = sqlite3_malloc(nByte + nStr+1); - if( pRet ){ - memset(pRet, 0, nByte); - pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; - pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); - pRet->nElem = nElem; - pRet->zMatchinfo = ((char*)pRet) + nByte; - memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); - pRet->aRef[0] = 1; - } - - return pRet; -} - -static void fts3MIBufferFree(void *p){ - MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); - - assert( (u32*)p==&pBuf->aMatchinfo[1] - || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] - ); - if( (u32*)p==&pBuf->aMatchinfo[1] ){ - pBuf->aRef[1] = 0; - }else{ - pBuf->aRef[2] = 0; - } - - if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ - sqlite3_free(pBuf); - } -} - -static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ - void (*xRet)(void*) = 0; - u32 *aOut = 0; - - if( p->aRef[1]==0 ){ - p->aRef[1] = 1; - aOut = &p->aMatchinfo[1]; - xRet = fts3MIBufferFree; - } - else if( p->aRef[2]==0 ){ - p->aRef[2] = 1; - aOut = &p->aMatchinfo[p->nElem+2]; - xRet = fts3MIBufferFree; - }else{ - aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); - if( aOut ){ - xRet = sqlite3_free; - if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); - } - } - - *paOut = aOut; - return xRet; -} - -static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ - p->bGlobal = 1; - memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); -} - -/* -** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() -*/ -void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ - if( p ){ - assert( p->aRef[0]==1 ); - p->aRef[0] = 0; - if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ - sqlite3_free(p); - } - } -} - -/* -** End of MatchinfoBuffer code. -*************************************************************************/ - - -/* -** This function is used to help iterate through a position-list. A position -** list is a list of unique integers, sorted from smallest to largest. Each -** element of the list is represented by an FTS3 varint that takes the value -** of the difference between the current element and the previous one plus -** two. For example, to store the position-list: -** -** 4 9 113 -** -** the three varints: -** -** 6 7 106 -** -** are encoded. -** -** When this function is called, *pp points to the start of an element of -** the list. *piPos contains the value of the previous entry in the list. -** After it returns, *piPos contains the value of the next element of the -** list and *pp is advanced to the following varint. -*/ -static void fts3GetDeltaPosition(char **pp, int *piPos){ - int iVal; - *pp += fts3GetVarint32(*pp, &iVal); - *piPos += (iVal-2); -} - -/* -** Helper function for fts3ExprIterate() (see below). -*/ -static int fts3ExprIterate2( - Fts3Expr *pExpr, /* Expression to iterate phrases of */ - int *piPhrase, /* Pointer to phrase counter */ - int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ - void *pCtx /* Second argument to pass to callback */ -){ - int rc; /* Return code */ - int eType = pExpr->eType; /* Type of expression node pExpr */ - - if( eType!=FTSQUERY_PHRASE ){ - assert( pExpr->pLeft && pExpr->pRight ); - rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); - if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ - rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); - } - }else{ - rc = x(pExpr, *piPhrase, pCtx); - (*piPhrase)++; - } - return rc; -} - -/* -** Iterate through all phrase nodes in an FTS3 query, except those that -** are part of a sub-tree that is the right-hand-side of a NOT operator. -** For each phrase node found, the supplied callback function is invoked. -** -** If the callback function returns anything other than SQLITE_OK, -** the iteration is abandoned and the error code returned immediately. -** Otherwise, SQLITE_OK is returned after a callback has been made for -** all eligible phrase nodes. -*/ -static int fts3ExprIterate( - Fts3Expr *pExpr, /* Expression to iterate phrases of */ - int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ - void *pCtx /* Second argument to pass to callback */ -){ - int iPhrase = 0; /* Variable used as the phrase counter */ - return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); -} - - -/* -** This is an fts3ExprIterate() callback used while loading the doclists -** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also -** fts3ExprLoadDoclists(). -*/ -static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ - int rc = SQLITE_OK; - Fts3Phrase *pPhrase = pExpr->pPhrase; - LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; - - UNUSED_PARAMETER(iPhrase); - - p->nPhrase++; - p->nToken += pPhrase->nToken; - - return rc; -} - -/* -** Load the doclists for each phrase in the query associated with FTS3 cursor -** pCsr. -** -** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable -** phrases in the expression (all phrases except those directly or -** indirectly descended from the right-hand-side of a NOT operator). If -** pnToken is not NULL, then it is set to the number of tokens in all -** matchable phrases of the expression. -*/ -static int fts3ExprLoadDoclists( - Fts3Cursor *pCsr, /* Fts3 cursor for current query */ - int *pnPhrase, /* OUT: Number of phrases in query */ - int *pnToken /* OUT: Number of tokens in query */ -){ - int rc; /* Return Code */ - LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ - sCtx.pCsr = pCsr; - rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); - if( pnPhrase ) *pnPhrase = sCtx.nPhrase; - if( pnToken ) *pnToken = sCtx.nToken; - return rc; -} - -static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ - (*(int *)ctx)++; - pExpr->iPhrase = iPhrase; - return SQLITE_OK; -} -static int fts3ExprPhraseCount(Fts3Expr *pExpr){ - int nPhrase = 0; - (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); - return nPhrase; -} - -/* -** Advance the position list iterator specified by the first two -** arguments so that it points to the first element with a value greater -** than or equal to parameter iNext. -*/ -static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ - char *pIter = *ppIter; - if( pIter ){ - int iIter = *piIter; - - while( iIter<iNext ){ - if( 0==(*pIter & 0xFE) ){ - iIter = -1; - pIter = 0; - break; - } - fts3GetDeltaPosition(&pIter, &iIter); - } - - *piIter = iIter; - *ppIter = pIter; - } -} - -/* -** Advance the snippet iterator to the next candidate snippet. -*/ -static int fts3SnippetNextCandidate(SnippetIter *pIter){ - int i; /* Loop counter */ - - if( pIter->iCurrent<0 ){ - /* The SnippetIter object has just been initialized. The first snippet - ** candidate always starts at offset 0 (even if this candidate has a - ** score of 0.0). - */ - pIter->iCurrent = 0; - - /* Advance the 'head' iterator of each phrase to the first offset that - ** is greater than or equal to (iNext+nSnippet). - */ - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); - } - }else{ - int iStart; - int iEnd = 0x7FFFFFFF; - - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - if( pPhrase->pHead && pPhrase->iHead<iEnd ){ - iEnd = pPhrase->iHead; - } - } - if( iEnd==0x7FFFFFFF ){ - return 1; - } - - pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); - fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); - } - } - - return 0; -} - -/* -** Retrieve information about the current candidate snippet of snippet -** iterator pIter. -*/ -static void fts3SnippetDetails( - SnippetIter *pIter, /* Snippet iterator */ - u64 mCovered, /* Bitmask of phrases already covered */ - int *piToken, /* OUT: First token of proposed snippet */ - int *piScore, /* OUT: "Score" for this snippet */ - u64 *pmCover, /* OUT: Bitmask of phrases covered */ - u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ -){ - int iStart = pIter->iCurrent; /* First token of snippet */ - int iScore = 0; /* Score of this snippet */ - int i; /* Loop counter */ - u64 mCover = 0; /* Mask of phrases covered by this snippet */ - u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ - - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - if( pPhrase->pTail ){ - char *pCsr = pPhrase->pTail; - int iCsr = pPhrase->iTail; - - while( iCsr<(iStart+pIter->nSnippet) ){ - int j; - u64 mPhrase = (u64)1 << i; - u64 mPos = (u64)1 << (iCsr - iStart); - assert( iCsr>=iStart ); - if( (mCover|mCovered)&mPhrase ){ - iScore++; - }else{ - iScore += 1000; - } - mCover |= mPhrase; - - for(j=0; j<pPhrase->nToken; j++){ - mHighlight |= (mPos>>j); - } - - if( 0==(*pCsr & 0x0FE) ) break; - fts3GetDeltaPosition(&pCsr, &iCsr); - } - } - } - - /* Set the output variables before returning. */ - *piToken = iStart; - *piScore = iScore; - *pmCover = mCover; - *pmHighlight = mHighlight; -} - -/* -** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). -** Each invocation populates an element of the SnippetIter.aPhrase[] array. -*/ -static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ - SnippetIter *p = (SnippetIter *)ctx; - SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; - char *pCsr; - int rc; - - pPhrase->nToken = pExpr->pPhrase->nToken; - rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); - assert( rc==SQLITE_OK || pCsr==0 ); - if( pCsr ){ - int iFirst = 0; - pPhrase->pList = pCsr; - fts3GetDeltaPosition(&pCsr, &iFirst); - assert( iFirst>=0 ); - pPhrase->pHead = pCsr; - pPhrase->pTail = pCsr; - pPhrase->iHead = iFirst; - pPhrase->iTail = iFirst; - }else{ - assert( rc!=SQLITE_OK || ( - pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 - )); - } - - return rc; -} - -/* -** Select the fragment of text consisting of nFragment contiguous tokens -** from column iCol that represent the "best" snippet. The best snippet -** is the snippet with the highest score, where scores are calculated -** by adding: -** -** (a) +1 point for each occurrence of a matchable phrase in the snippet. -** -** (b) +1000 points for the first occurrence of each matchable phrase in -** the snippet for which the corresponding mCovered bit is not set. -** -** The selected snippet parameters are stored in structure *pFragment before -** returning. The score of the selected snippet is stored in *piScore -** before returning. -*/ -static int fts3BestSnippet( - int nSnippet, /* Desired snippet length */ - Fts3Cursor *pCsr, /* Cursor to create snippet for */ - int iCol, /* Index of column to create snippet from */ - u64 mCovered, /* Mask of phrases already covered */ - u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ - SnippetFragment *pFragment, /* OUT: Best snippet found */ - int *piScore /* OUT: Score of snippet pFragment */ -){ - int rc; /* Return Code */ - int nList; /* Number of phrases in expression */ - SnippetIter sIter; /* Iterates through snippet candidates */ - int nByte; /* Number of bytes of space to allocate */ - int iBestScore = -1; /* Best snippet score found so far */ - int i; /* Loop counter */ - - memset(&sIter, 0, sizeof(sIter)); - - /* Iterate through the phrases in the expression to count them. The same - ** callback makes sure the doclists are loaded for each phrase. - */ - rc = fts3ExprLoadDoclists(pCsr, &nList, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Now that it is known how many phrases there are, allocate and zero - ** the required space using malloc(). - */ - nByte = sizeof(SnippetPhrase) * nList; - sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); - if( !sIter.aPhrase ){ - return SQLITE_NOMEM; - } - memset(sIter.aPhrase, 0, nByte); - - /* Initialize the contents of the SnippetIter object. Then iterate through - ** the set of phrases in the expression to populate the aPhrase[] array. - */ - sIter.pCsr = pCsr; - sIter.iCol = iCol; - sIter.nSnippet = nSnippet; - sIter.nPhrase = nList; - sIter.iCurrent = -1; - rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); - if( rc==SQLITE_OK ){ - - /* Set the *pmSeen output variable. */ - for(i=0; i<nList; i++){ - if( sIter.aPhrase[i].pHead ){ - *pmSeen |= (u64)1 << i; - } - } - - /* Loop through all candidate snippets. Store the best snippet in - ** *pFragment. Store its associated 'score' in iBestScore. - */ - pFragment->iCol = iCol; - while( !fts3SnippetNextCandidate(&sIter) ){ - int iPos; - int iScore; - u64 mCover; - u64 mHighlite; - fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); - assert( iScore>=0 ); - if( iScore>iBestScore ){ - pFragment->iPos = iPos; - pFragment->hlmask = mHighlite; - pFragment->covered = mCover; - iBestScore = iScore; - } - } - - *piScore = iBestScore; - } - sqlite3_free(sIter.aPhrase); - return rc; -} - - -/* -** Append a string to the string-buffer passed as the first argument. -** -** If nAppend is negative, then the length of the string zAppend is -** determined using strlen(). -*/ -static int fts3StringAppend( - StrBuffer *pStr, /* Buffer to append to */ - const char *zAppend, /* Pointer to data to append to buffer */ - int nAppend /* Size of zAppend in bytes (or -1) */ -){ - if( nAppend<0 ){ - nAppend = (int)strlen(zAppend); - } - - /* If there is insufficient space allocated at StrBuffer.z, use realloc() - ** to grow the buffer until so that it is big enough to accomadate the - ** appended data. - */ - if( pStr->n+nAppend+1>=pStr->nAlloc ){ - int nAlloc = pStr->nAlloc+nAppend+100; - char *zNew = sqlite3_realloc(pStr->z, nAlloc); - if( !zNew ){ - return SQLITE_NOMEM; - } - pStr->z = zNew; - pStr->nAlloc = nAlloc; - } - assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); - - /* Append the data to the string buffer. */ - memcpy(&pStr->z[pStr->n], zAppend, nAppend); - pStr->n += nAppend; - pStr->z[pStr->n] = '\0'; - - return SQLITE_OK; -} - -/* -** The fts3BestSnippet() function often selects snippets that end with a -** query term. That is, the final term of the snippet is always a term -** that requires highlighting. For example, if 'X' is a highlighted term -** and '.' is a non-highlighted term, BestSnippet() may select: -** -** ........X.....X -** -** This function "shifts" the beginning of the snippet forward in the -** document so that there are approximately the same number of -** non-highlighted terms to the right of the final highlighted term as there -** are to the left of the first highlighted term. For example, to this: -** -** ....X.....X.... -** -** This is done as part of extracting the snippet text, not when selecting -** the snippet. Snippet selection is done based on doclists only, so there -** is no way for fts3BestSnippet() to know whether or not the document -** actually contains terms that follow the final highlighted term. -*/ -static int fts3SnippetShift( - Fts3Table *pTab, /* FTS3 table snippet comes from */ - int iLangid, /* Language id to use in tokenizing */ - int nSnippet, /* Number of tokens desired for snippet */ - const char *zDoc, /* Document text to extract snippet from */ - int nDoc, /* Size of buffer zDoc in bytes */ - int *piPos, /* IN/OUT: First token of snippet */ - u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ -){ - u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ - - if( hlmask ){ - int nLeft; /* Tokens to the left of first highlight */ - int nRight; /* Tokens to the right of last highlight */ - int nDesired; /* Ideal number of tokens to shift forward */ - - for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); - for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); - nDesired = (nLeft-nRight)/2; - - /* Ideally, the start of the snippet should be pushed forward in the - ** document nDesired tokens. This block checks if there are actually - ** nDesired tokens to the right of the snippet. If so, *piPos and - ** *pHlMask are updated to shift the snippet nDesired tokens to the - ** right. Otherwise, the snippet is shifted by the number of tokens - ** available. - */ - if( nDesired>0 ){ - int nShift; /* Number of tokens to shift snippet by */ - int iCurrent = 0; /* Token counter */ - int rc; /* Return Code */ - sqlite3_tokenizer_module *pMod; - sqlite3_tokenizer_cursor *pC; - pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - - /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) - ** or more tokens in zDoc/nDoc. - */ - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); - if( rc!=SQLITE_OK ){ - return rc; - } - while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ - const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); - } - pMod->xClose(pC); - if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } - - nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; - assert( nShift<=nDesired ); - if( nShift>0 ){ - *piPos += nShift; - *pHlmask = hlmask >> nShift; - } - } - } - return SQLITE_OK; -} - -/* -** Extract the snippet text for fragment pFragment from cursor pCsr and -** append it to string buffer pOut. -*/ -static int fts3SnippetText( - Fts3Cursor *pCsr, /* FTS3 Cursor */ - SnippetFragment *pFragment, /* Snippet to extract */ - int iFragment, /* Fragment number */ - int isLast, /* True for final fragment in snippet */ - int nSnippet, /* Number of tokens in extracted snippet */ - const char *zOpen, /* String inserted before highlighted term */ - const char *zClose, /* String inserted after highlighted term */ - const char *zEllipsis, /* String inserted between snippets */ - StrBuffer *pOut /* Write output here */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc; /* Return code */ - const char *zDoc; /* Document text to extract snippet from */ - int nDoc; /* Size of zDoc in bytes */ - int iCurrent = 0; /* Current token number of document */ - int iEnd = 0; /* Byte offset of end of current token */ - int isShiftDone = 0; /* True after snippet is shifted */ - int iPos = pFragment->iPos; /* First token of snippet */ - u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ - int iCol = pFragment->iCol+1; /* Query column to extract text from */ - sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ - sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ - - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); - if( zDoc==0 ){ - if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ - return SQLITE_NOMEM; - } - return SQLITE_OK; - } - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); - - /* Open a token cursor on the document. */ - pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); - if( rc!=SQLITE_OK ){ - return rc; - } - - while( rc==SQLITE_OK ){ - const char *ZDUMMY; /* Dummy argument used with tokenizer */ - int DUMMY1 = -1; /* Dummy argument used with tokenizer */ - int iBegin = 0; /* Offset in zDoc of start of token */ - int iFin = 0; /* Offset in zDoc of end of token */ - int isHighlight = 0; /* True for highlighted terms */ - - /* Variable DUMMY1 is initialized to a negative value above. Elsewhere - ** in the FTS code the variable that the third argument to xNext points to - ** is initialized to zero before the first (*but not necessarily - ** subsequent*) call to xNext(). This is done for a particular application - ** that needs to know whether or not the tokenizer is being used for - ** snippet generation or for some other purpose. - ** - ** Extreme care is required when writing code to depend on this - ** initialization. It is not a documented part of the tokenizer interface. - ** If a tokenizer is used directly by any code outside of FTS, this - ** convention might not be respected. */ - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_DONE ){ - /* Special case - the last token of the snippet is also the last token - ** of the column. Append any punctuation that occurred between the end - ** of the previous token and the end of the document to the output. - ** Then break out of the loop. */ - rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); - } - break; - } - if( iCurrent<iPos ){ continue; } - - if( !isShiftDone ){ - int n = nDoc - iBegin; - rc = fts3SnippetShift( - pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask - ); - isShiftDone = 1; - - /* Now that the shift has been done, check if the initial "..." are - ** required. They are required if (a) this is not the first fragment, - ** or (b) this fragment does not begin at position 0 of its column. - */ - if( rc==SQLITE_OK ){ - if( iPos>0 || iFragment>0 ){ - rc = fts3StringAppend(pOut, zEllipsis, -1); - }else if( iBegin ){ - rc = fts3StringAppend(pOut, zDoc, iBegin); - } - } - if( rc!=SQLITE_OK || iCurrent<iPos ) continue; - } - - if( iCurrent>=(iPos+nSnippet) ){ - if( isLast ){ - rc = fts3StringAppend(pOut, zEllipsis, -1); - } - break; - } - - /* Set isHighlight to true if this term should be highlighted. */ - isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; - - if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); - if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); - if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); - if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); - - iEnd = iFin; - } - - pMod->xClose(pC); - return rc; -} - - -/* -** This function is used to count the entries in a column-list (a -** delta-encoded list of term offsets within a single column of a single -** row). When this function is called, *ppCollist should point to the -** beginning of the first varint in the column-list (the varint that -** contains the position of the first matching term in the column data). -** Before returning, *ppCollist is set to point to the first byte after -** the last varint in the column-list (either the 0x00 signifying the end -** of the position-list, or the 0x01 that precedes the column number of -** the next column in the position-list). -** -** The number of elements in the column-list is returned. -*/ -static int fts3ColumnlistCount(char **ppCollist){ - char *pEnd = *ppCollist; - char c = 0; - int nEntry = 0; - - /* A column-list is terminated by either a 0x01 or 0x00. */ - while( 0xFE & (*pEnd | c) ){ - c = *pEnd++ & 0x80; - if( !c ) nEntry++; - } - - *ppCollist = pEnd; - return nEntry; -} - -/* -** This function gathers 'y' or 'b' data for a single phrase. -*/ -static void fts3ExprLHits( - Fts3Expr *pExpr, /* Phrase expression node */ - MatchInfo *p /* Matchinfo context */ -){ - Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; - int iStart; - Fts3Phrase *pPhrase = pExpr->pPhrase; - char *pIter = pPhrase->doclist.pList; - int iCol = 0; - - assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); - if( p->flag==FTS3_MATCHINFO_LHITS ){ - iStart = pExpr->iPhrase * p->nCol; - }else{ - iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); - } - - while( 1 ){ - int nHit = fts3ColumnlistCount(&pIter); - if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ - if( p->flag==FTS3_MATCHINFO_LHITS ){ - p->aMatchinfo[iStart + iCol] = (u32)nHit; - }else if( nHit ){ - p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); - } - } - assert( *pIter==0x00 || *pIter==0x01 ); - if( *pIter!=0x01 ) break; - pIter++; - pIter += fts3GetVarint32(pIter, &iCol); - } -} - -/* -** Gather the results for matchinfo directives 'y' and 'b'. -*/ -static void fts3ExprLHitGather( - Fts3Expr *pExpr, - MatchInfo *p -){ - assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); - if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ - if( pExpr->pLeft ){ - fts3ExprLHitGather(pExpr->pLeft, p); - fts3ExprLHitGather(pExpr->pRight, p); - }else{ - fts3ExprLHits(pExpr, p); - } - } -} - -/* -** fts3ExprIterate() callback used to collect the "global" matchinfo stats -** for a single query. -** -** fts3ExprIterate() callback to load the 'global' elements of a -** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements -** of the matchinfo array that are constant for all rows returned by the -** current query. -** -** Argument pCtx is actually a pointer to a struct of type MatchInfo. This -** function populates Matchinfo.aMatchinfo[] as follows: -** -** for(iCol=0; iCol<nCol; iCol++){ -** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; -** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; -** } -** -** where X is the number of matches for phrase iPhrase is column iCol of all -** rows of the table. Y is the number of rows for which column iCol contains -** at least one instance of phrase iPhrase. -** -** If the phrase pExpr consists entirely of deferred tokens, then all X and -** Y values are set to nDoc, where nDoc is the number of documents in the -** file system. This is done because the full-text index doclist is required -** to calculate these values properly, and the full-text index doclist is -** not available for deferred tokens. -*/ -static int fts3ExprGlobalHitsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number (numbered from zero) */ - void *pCtx /* Pointer to MatchInfo structure */ -){ - MatchInfo *p = (MatchInfo *)pCtx; - return sqlite3Fts3EvalPhraseStats( - p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] - ); -} - -/* -** fts3ExprIterate() callback used to collect the "local" part of the -** FTS3_MATCHINFO_HITS array. The local stats are those elements of the -** array that are different for each row returned by the query. -*/ -static int fts3ExprLocalHitsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number */ - void *pCtx /* Pointer to MatchInfo structure */ -){ - int rc = SQLITE_OK; - MatchInfo *p = (MatchInfo *)pCtx; - int iStart = iPhrase * p->nCol * 3; - int i; - - for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ - char *pCsr; - rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); - if( pCsr ){ - p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); - }else{ - p->aMatchinfo[iStart+i*3] = 0; - } - } - - return rc; -} - -static int fts3MatchinfoCheck( - Fts3Table *pTab, - char cArg, - char **pzErr -){ - if( (cArg==FTS3_MATCHINFO_NPHRASE) - || (cArg==FTS3_MATCHINFO_NCOL) - || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) - || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) - || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) - || (cArg==FTS3_MATCHINFO_LCS) - || (cArg==FTS3_MATCHINFO_HITS) - || (cArg==FTS3_MATCHINFO_LHITS) - || (cArg==FTS3_MATCHINFO_LHITS_BM) - ){ - return SQLITE_OK; - } - sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); - return SQLITE_ERROR; -} - -static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ - int nVal; /* Number of integers output by cArg */ - - switch( cArg ){ - case FTS3_MATCHINFO_NDOC: - case FTS3_MATCHINFO_NPHRASE: - case FTS3_MATCHINFO_NCOL: - nVal = 1; - break; - - case FTS3_MATCHINFO_AVGLENGTH: - case FTS3_MATCHINFO_LENGTH: - case FTS3_MATCHINFO_LCS: - nVal = pInfo->nCol; - break; - - case FTS3_MATCHINFO_LHITS: - nVal = pInfo->nCol * pInfo->nPhrase; - break; - - case FTS3_MATCHINFO_LHITS_BM: - nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); - break; - - default: - assert( cArg==FTS3_MATCHINFO_HITS ); - nVal = pInfo->nCol * pInfo->nPhrase * 3; - break; - } - - return nVal; -} - -static int fts3MatchinfoSelectDoctotal( - Fts3Table *pTab, - sqlite3_stmt **ppStmt, - sqlite3_int64 *pnDoc, - const char **paLen -){ - sqlite3_stmt *pStmt; - const char *a; - sqlite3_int64 nDoc; - - if( !*ppStmt ){ - int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); - if( rc!=SQLITE_OK ) return rc; - } - pStmt = *ppStmt; - assert( sqlite3_data_count(pStmt)==1 ); - - a = sqlite3_column_blob(pStmt, 0); - a += sqlite3Fts3GetVarint(a, &nDoc); - if( nDoc==0 ) return FTS_CORRUPT_VTAB; - *pnDoc = (u32)nDoc; - - if( paLen ) *paLen = a; - return SQLITE_OK; -} - -/* -** An instance of the following structure is used to store state while -** iterating through a multi-column position-list corresponding to the -** hits for a single phrase on a single row in order to calculate the -** values for a matchinfo() FTS3_MATCHINFO_LCS request. -*/ -typedef struct LcsIterator LcsIterator; -struct LcsIterator { - Fts3Expr *pExpr; /* Pointer to phrase expression */ - int iPosOffset; /* Tokens count up to end of this phrase */ - char *pRead; /* Cursor used to iterate through aDoclist */ - int iPos; /* Current position */ -}; - -/* -** If LcsIterator.iCol is set to the following value, the iterator has -** finished iterating through all offsets for all columns. -*/ -#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; - -static int fts3MatchinfoLcsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number (numbered from zero) */ - void *pCtx /* Pointer to MatchInfo structure */ -){ - LcsIterator *aIter = (LcsIterator *)pCtx; - aIter[iPhrase].pExpr = pExpr; - return SQLITE_OK; -} - -/* -** Advance the iterator passed as an argument to the next position. Return -** 1 if the iterator is at EOF or if it now points to the start of the -** position list for the next column. -*/ -static int fts3LcsIteratorAdvance(LcsIterator *pIter){ - char *pRead = pIter->pRead; - sqlite3_int64 iRead; - int rc = 0; - - pRead += sqlite3Fts3GetVarint(pRead, &iRead); - if( iRead==0 || iRead==1 ){ - pRead = 0; - rc = 1; - }else{ - pIter->iPos += (int)(iRead-2); - } - - pIter->pRead = pRead; - return rc; -} - -/* -** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. -** -** If the call is successful, the longest-common-substring lengths for each -** column are written into the first nCol elements of the pInfo->aMatchinfo[] -** array before returning. SQLITE_OK is returned in this case. -** -** Otherwise, if an error occurs, an SQLite error code is returned and the -** data written to the first nCol elements of pInfo->aMatchinfo[] is -** undefined. -*/ -static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ - LcsIterator *aIter; - int i; - int iCol; - int nToken = 0; - - /* Allocate and populate the array of LcsIterator objects. The array - ** contains one element for each matchable phrase in the query. - **/ - aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); - if( !aIter ) return SQLITE_NOMEM; - memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); - (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); - - for(i=0; i<pInfo->nPhrase; i++){ - LcsIterator *pIter = &aIter[i]; - nToken -= pIter->pExpr->pPhrase->nToken; - pIter->iPosOffset = nToken; - } - - for(iCol=0; iCol<pInfo->nCol; iCol++){ - int nLcs = 0; /* LCS value for this column */ - int nLive = 0; /* Number of iterators in aIter not at EOF */ - - for(i=0; i<pInfo->nPhrase; i++){ - int rc; - LcsIterator *pIt = &aIter[i]; - rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); - if( rc!=SQLITE_OK ) return rc; - if( pIt->pRead ){ - pIt->iPos = pIt->iPosOffset; - fts3LcsIteratorAdvance(&aIter[i]); - nLive++; - } - } - - while( nLive>0 ){ - LcsIterator *pAdv = 0; /* The iterator to advance by one position */ - int nThisLcs = 0; /* LCS for the current iterator positions */ - - for(i=0; i<pInfo->nPhrase; i++){ - LcsIterator *pIter = &aIter[i]; - if( pIter->pRead==0 ){ - /* This iterator is already at EOF for this column. */ - nThisLcs = 0; - }else{ - if( pAdv==0 || pIter->iPos<pAdv->iPos ){ - pAdv = pIter; - } - if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ - nThisLcs++; - }else{ - nThisLcs = 1; - } - if( nThisLcs>nLcs ) nLcs = nThisLcs; - } - } - if( fts3LcsIteratorAdvance(pAdv) ) nLive--; - } - - pInfo->aMatchinfo[iCol] = nLcs; - } - - sqlite3_free(aIter); - return SQLITE_OK; -} - -/* -** Populate the buffer pInfo->aMatchinfo[] with an array of integers to -** be returned by the matchinfo() function. Argument zArg contains the -** format string passed as the second argument to matchinfo (or the -** default value "pcx" if no second argument was specified). The format -** string has already been validated and the pInfo->aMatchinfo[] array -** is guaranteed to be large enough for the output. -** -** If bGlobal is true, then populate all fields of the matchinfo() output. -** If it is false, then assume that those fields that do not change between -** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) -** have already been populated. -** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. If a value other than SQLITE_OK is returned, the state the -** pInfo->aMatchinfo[] buffer is left in is undefined. -*/ -static int fts3MatchinfoValues( - Fts3Cursor *pCsr, /* FTS3 cursor object */ - int bGlobal, /* True to grab the global stats */ - MatchInfo *pInfo, /* Matchinfo context object */ - const char *zArg /* Matchinfo format string */ -){ - int rc = SQLITE_OK; - int i; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - sqlite3_stmt *pSelect = 0; - - for(i=0; rc==SQLITE_OK && zArg[i]; i++){ - pInfo->flag = zArg[i]; - switch( zArg[i] ){ - case FTS3_MATCHINFO_NPHRASE: - if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; - break; - - case FTS3_MATCHINFO_NCOL: - if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; - break; - - case FTS3_MATCHINFO_NDOC: - if( bGlobal ){ - sqlite3_int64 nDoc = 0; - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); - pInfo->aMatchinfo[0] = (u32)nDoc; - } - break; - - case FTS3_MATCHINFO_AVGLENGTH: - if( bGlobal ){ - sqlite3_int64 nDoc; /* Number of rows in table */ - const char *a; /* Aggregate column length array */ - - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); - if( rc==SQLITE_OK ){ - int iCol; - for(iCol=0; iCol<pInfo->nCol; iCol++){ - u32 iVal; - sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); - iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); - pInfo->aMatchinfo[iCol] = iVal; - } - } - } - break; - - case FTS3_MATCHINFO_LENGTH: { - sqlite3_stmt *pSelectDocsize = 0; - rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); - if( rc==SQLITE_OK ){ - int iCol; - const char *a = sqlite3_column_blob(pSelectDocsize, 0); - for(iCol=0; iCol<pInfo->nCol; iCol++){ - sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); - pInfo->aMatchinfo[iCol] = (u32)nToken; - } - } - sqlite3_reset(pSelectDocsize); - break; - } - - case FTS3_MATCHINFO_LCS: - rc = fts3ExprLoadDoclists(pCsr, 0, 0); - if( rc==SQLITE_OK ){ - rc = fts3MatchinfoLcs(pCsr, pInfo); - } - break; - - case FTS3_MATCHINFO_LHITS_BM: - case FTS3_MATCHINFO_LHITS: { - int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); - memset(pInfo->aMatchinfo, 0, nZero); - fts3ExprLHitGather(pCsr->pExpr, pInfo); - break; - } - - default: { - Fts3Expr *pExpr; - assert( zArg[i]==FTS3_MATCHINFO_HITS ); - pExpr = pCsr->pExpr; - rc = fts3ExprLoadDoclists(pCsr, 0, 0); - if( rc!=SQLITE_OK ) break; - if( bGlobal ){ - if( pCsr->pDeferred ){ - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); - if( rc!=SQLITE_OK ) break; - } - rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); - sqlite3Fts3EvalTestDeferred(pCsr, &rc); - if( rc!=SQLITE_OK ) break; - } - (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); - break; - } - } - - pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); - } - - sqlite3_reset(pSelect); - return rc; -} - - -/* -** Populate pCsr->aMatchinfo[] with data for the current row. The -** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). -*/ -static void fts3GetMatchinfo( - sqlite3_context *pCtx, /* Return results here */ - Fts3Cursor *pCsr, /* FTS3 Cursor object */ - const char *zArg /* Second argument to matchinfo() function */ -){ - MatchInfo sInfo; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int bGlobal = 0; /* Collect 'global' stats as well as local */ - - u32 *aOut = 0; - void (*xDestroyOut)(void*) = 0; - - memset(&sInfo, 0, sizeof(MatchInfo)); - sInfo.pCursor = pCsr; - sInfo.nCol = pTab->nColumn; - - /* If there is cached matchinfo() data, but the format string for the - ** cache does not match the format string for this request, discard - ** the cached data. */ - if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ - sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); - pCsr->pMIBuffer = 0; - } - - /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the - ** matchinfo function has been called for this query. In this case - ** allocate the array used to accumulate the matchinfo data and - ** initialize those elements that are constant for every row. - */ - if( pCsr->pMIBuffer==0 ){ - int nMatchinfo = 0; /* Number of u32 elements in match-info */ - int i; /* Used to iterate through zArg */ - - /* Determine the number of phrases in the query */ - pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); - sInfo.nPhrase = pCsr->nPhrase; - - /* Determine the number of integers in the buffer returned by this call. */ - for(i=0; zArg[i]; i++){ - char *zErr = 0; - if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ - sqlite3_result_error(pCtx, zErr, -1); - sqlite3_free(zErr); - return; - } - nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); - } - - /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ - pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); - if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; - - pCsr->isMatchinfoNeeded = 1; - bGlobal = 1; - } - - if( rc==SQLITE_OK ){ - xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); - if( xDestroyOut==0 ){ - rc = SQLITE_NOMEM; - } - } - - if( rc==SQLITE_OK ){ - sInfo.aMatchinfo = aOut; - sInfo.nPhrase = pCsr->nPhrase; - rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); - if( bGlobal ){ - fts3MIBufferSetGlobal(pCsr->pMIBuffer); - } - } - - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - if( xDestroyOut ) xDestroyOut(aOut); - }else{ - int n = pCsr->pMIBuffer->nElem * sizeof(u32); - sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); - } -} - -/* -** Implementation of snippet() function. -*/ -void sqlite3Fts3Snippet( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr, /* Cursor object */ - const char *zStart, /* Snippet start text - "<b>" */ - const char *zEnd, /* Snippet end text - "</b>" */ - const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ - int iCol, /* Extract snippet from this column */ - int nToken /* Approximate number of tokens in snippet */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int i; - StrBuffer res = {0, 0, 0}; - - /* The returned text includes up to four fragments of text extracted from - ** the data in the current row. The first iteration of the for(...) loop - ** below attempts to locate a single fragment of text nToken tokens in - ** size that contains at least one instance of all phrases in the query - ** expression that appear in the current row. If such a fragment of text - ** cannot be found, the second iteration of the loop attempts to locate - ** a pair of fragments, and so on. - */ - int nSnippet = 0; /* Number of fragments in this snippet */ - SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ - int nFToken = -1; /* Number of tokens in each fragment */ - - if( !pCsr->pExpr ){ - sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); - return; - } - - for(nSnippet=1; 1; nSnippet++){ - - int iSnip; /* Loop counter 0..nSnippet-1 */ - u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ - u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ - - if( nToken>=0 ){ - nFToken = (nToken+nSnippet-1) / nSnippet; - }else{ - nFToken = -1 * nToken; - } - - for(iSnip=0; iSnip<nSnippet; iSnip++){ - int iBestScore = -1; /* Best score of columns checked so far */ - int iRead; /* Used to iterate through columns */ - SnippetFragment *pFragment = &aSnippet[iSnip]; - - memset(pFragment, 0, sizeof(*pFragment)); - - /* Loop through all columns of the table being considered for snippets. - ** If the iCol argument to this function was negative, this means all - ** columns of the FTS3 table. Otherwise, only column iCol is considered. - */ - for(iRead=0; iRead<pTab->nColumn; iRead++){ - SnippetFragment sF = {0, 0, 0, 0}; - int iS = 0; - if( iCol>=0 && iRead!=iCol ) continue; - - /* Find the best snippet of nFToken tokens in column iRead. */ - rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); - if( rc!=SQLITE_OK ){ - goto snippet_out; - } - if( iS>iBestScore ){ - *pFragment = sF; - iBestScore = iS; - } - } - - mCovered |= pFragment->covered; - } - - /* If all query phrases seen by fts3BestSnippet() are present in at least - ** one of the nSnippet snippet fragments, break out of the loop. - */ - assert( (mCovered&mSeen)==mCovered ); - if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; - } - - assert( nFToken>0 ); - - for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ - rc = fts3SnippetText(pCsr, &aSnippet[i], - i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res - ); - } - - snippet_out: - sqlite3Fts3SegmentsClose(pTab); - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - sqlite3_free(res.z); - }else{ - sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); - } -} - - -typedef struct TermOffset TermOffset; -typedef struct TermOffsetCtx TermOffsetCtx; - -struct TermOffset { - char *pList; /* Position-list */ - int iPos; /* Position just read from pList */ - int iOff; /* Offset of this term from read positions */ -}; - -struct TermOffsetCtx { - Fts3Cursor *pCsr; - int iCol; /* Column of table to populate aTerm for */ - int iTerm; - sqlite3_int64 iDocid; - TermOffset *aTerm; -}; - -/* -** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). -*/ -static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ - TermOffsetCtx *p = (TermOffsetCtx *)ctx; - int nTerm; /* Number of tokens in phrase */ - int iTerm; /* For looping through nTerm phrase terms */ - char *pList; /* Pointer to position list for phrase */ - int iPos = 0; /* First position in position-list */ - int rc; - - UNUSED_PARAMETER(iPhrase); - rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); - nTerm = pExpr->pPhrase->nToken; - if( pList ){ - fts3GetDeltaPosition(&pList, &iPos); - assert( iPos>=0 ); - } - - for(iTerm=0; iTerm<nTerm; iTerm++){ - TermOffset *pT = &p->aTerm[p->iTerm++]; - pT->iOff = nTerm-iTerm-1; - pT->pList = pList; - pT->iPos = iPos; - } - - return rc; -} - -/* -** Implementation of offsets() function. -*/ -void sqlite3Fts3Offsets( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr /* Cursor object */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; - int rc; /* Return Code */ - int nToken; /* Number of tokens in query */ - int iCol; /* Column currently being processed */ - StrBuffer res = {0, 0, 0}; /* Result string */ - TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ - - if( !pCsr->pExpr ){ - sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); - return; - } - - memset(&sCtx, 0, sizeof(sCtx)); - assert( pCsr->isRequireSeek==0 ); - - /* Count the number of terms in the query */ - rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); - if( rc!=SQLITE_OK ) goto offsets_out; - - /* Allocate the array of TermOffset iterators. */ - sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); - if( 0==sCtx.aTerm ){ - rc = SQLITE_NOMEM; - goto offsets_out; - } - sCtx.iDocid = pCsr->iPrevId; - sCtx.pCsr = pCsr; - - /* Loop through the table columns, appending offset information to - ** string-buffer res for each column. - */ - for(iCol=0; iCol<pTab->nColumn; iCol++){ - sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ - const char *ZDUMMY; /* Dummy argument used with xNext() */ - int NDUMMY = 0; /* Dummy argument used with xNext() */ - int iStart = 0; - int iEnd = 0; - int iCurrent = 0; - const char *zDoc; - int nDoc; - - /* Initialize the contents of sCtx.aTerm[] for column iCol. There is - ** no way that this operation can fail, so the return code from - ** fts3ExprIterate() can be discarded. - */ - sCtx.iCol = iCol; - sCtx.iTerm = 0; - (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); - - /* Retreive the text stored in column iCol. If an SQL NULL is stored - ** in column iCol, jump immediately to the next iteration of the loop. - ** If an OOM occurs while retrieving the data (this can happen if SQLite - ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM - ** to the caller. - */ - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); - if( zDoc==0 ){ - if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ - continue; - } - rc = SQLITE_NOMEM; - goto offsets_out; - } - - /* Initialize a tokenizer iterator to iterate through column iCol. */ - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, - zDoc, nDoc, &pC - ); - if( rc!=SQLITE_OK ) goto offsets_out; - - rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); - while( rc==SQLITE_OK ){ - int i; /* Used to loop through terms */ - int iMinPos = 0x7FFFFFFF; /* Position of next token */ - TermOffset *pTerm = 0; /* TermOffset associated with next token */ - - for(i=0; i<nToken; i++){ - TermOffset *pT = &sCtx.aTerm[i]; - if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ - iMinPos = pT->iPos-pT->iOff; - pTerm = pT; - } - } - - if( !pTerm ){ - /* All offsets for this column have been gathered. */ - rc = SQLITE_DONE; - }else{ - assert( iCurrent<=iMinPos ); - if( 0==(0xFE&*pTerm->pList) ){ - pTerm->pList = 0; - }else{ - fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); - } - while( rc==SQLITE_OK && iCurrent<iMinPos ){ - rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); - } - if( rc==SQLITE_OK ){ - char aBuffer[64]; - sqlite3_snprintf(sizeof(aBuffer), aBuffer, - "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart - ); - rc = fts3StringAppend(&res, aBuffer, -1); - }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ - rc = FTS_CORRUPT_VTAB; - } - } - } - if( rc==SQLITE_DONE ){ - rc = SQLITE_OK; - } - - pMod->xClose(pC); - if( rc!=SQLITE_OK ) goto offsets_out; - } - - offsets_out: - sqlite3_free(sCtx.aTerm); - assert( rc!=SQLITE_DONE ); - sqlite3Fts3SegmentsClose(pTab); - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - sqlite3_free(res.z); - }else{ - sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); - } - return; -} - -/* -** Implementation of matchinfo() function. -*/ -void sqlite3Fts3Matchinfo( - sqlite3_context *pContext, /* Function call context */ - Fts3Cursor *pCsr, /* FTS3 table cursor */ - const char *zArg /* Second arg to matchinfo() function */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - const char *zFormat; - - if( zArg ){ - zFormat = zArg; - }else{ - zFormat = FTS3_MATCHINFO_DEFAULT; - } - - if( !pCsr->pExpr ){ - sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); - return; - }else{ - /* Retrieve matchinfo() data. */ - fts3GetMatchinfo(pContext, pCsr, zFormat); - sqlite3Fts3SegmentsClose(pTab); - } -} - -#endif |