summaryrefslogtreecommitdiffstats
path: root/lib/libsqlite3/ext/fts3/fts3_snippet.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libsqlite3/ext/fts3/fts3_snippet.c')
-rw-r--r--lib/libsqlite3/ext/fts3/fts3_snippet.c1706
1 files changed, 0 insertions, 1706 deletions
diff --git a/lib/libsqlite3/ext/fts3/fts3_snippet.c b/lib/libsqlite3/ext/fts3/fts3_snippet.c
deleted file mode 100644
index a0771c0b305..00000000000
--- a/lib/libsqlite3/ext/fts3/fts3_snippet.c
+++ /dev/null
@@ -1,1706 +0,0 @@
-/*
-** 2009 Oct 23
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-******************************************************************************
-*/
-
-#include "fts3Int.h"
-#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
-
-#include <string.h>
-#include <assert.h>
-
-/*
-** Characters that may appear in the second argument to matchinfo().
-*/
-#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */
-#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */
-#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */
-#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */
-#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */
-#define FTS3_MATCHINFO_LCS 's' /* nCol values */
-#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */
-#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */
-#define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */
-
-/*
-** The default value for the second argument to matchinfo().
-*/
-#define FTS3_MATCHINFO_DEFAULT "pcx"
-
-
-/*
-** Used as an fts3ExprIterate() context when loading phrase doclists to
-** Fts3Expr.aDoclist[]/nDoclist.
-*/
-typedef struct LoadDoclistCtx LoadDoclistCtx;
-struct LoadDoclistCtx {
- Fts3Cursor *pCsr; /* FTS3 Cursor */
- int nPhrase; /* Number of phrases seen so far */
- int nToken; /* Number of tokens seen so far */
-};
-
-/*
-** The following types are used as part of the implementation of the
-** fts3BestSnippet() routine.
-*/
-typedef struct SnippetIter SnippetIter;
-typedef struct SnippetPhrase SnippetPhrase;
-typedef struct SnippetFragment SnippetFragment;
-
-struct SnippetIter {
- Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
- int iCol; /* Extract snippet from this column */
- int nSnippet; /* Requested snippet length (in tokens) */
- int nPhrase; /* Number of phrases in query */
- SnippetPhrase *aPhrase; /* Array of size nPhrase */
- int iCurrent; /* First token of current snippet */
-};
-
-struct SnippetPhrase {
- int nToken; /* Number of tokens in phrase */
- char *pList; /* Pointer to start of phrase position list */
- int iHead; /* Next value in position list */
- char *pHead; /* Position list data following iHead */
- int iTail; /* Next value in trailing position list */
- char *pTail; /* Position list data following iTail */
-};
-
-struct SnippetFragment {
- int iCol; /* Column snippet is extracted from */
- int iPos; /* Index of first token in snippet */
- u64 covered; /* Mask of query phrases covered */
- u64 hlmask; /* Mask of snippet terms to highlight */
-};
-
-/*
-** This type is used as an fts3ExprIterate() context object while
-** accumulating the data returned by the matchinfo() function.
-*/
-typedef struct MatchInfo MatchInfo;
-struct MatchInfo {
- Fts3Cursor *pCursor; /* FTS3 Cursor */
- int nCol; /* Number of columns in table */
- int nPhrase; /* Number of matchable phrases in query */
- sqlite3_int64 nDoc; /* Number of docs in database */
- char flag;
- u32 *aMatchinfo; /* Pre-allocated buffer */
-};
-
-/*
-** An instance of this structure is used to manage a pair of buffers, each
-** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below
-** for details.
-*/
-struct MatchinfoBuffer {
- u8 aRef[3];
- int nElem;
- int bGlobal; /* Set if global data is loaded */
- char *zMatchinfo;
- u32 aMatchinfo[1];
-};
-
-
-/*
-** The snippet() and offsets() functions both return text values. An instance
-** of the following structure is used to accumulate those values while the
-** functions are running. See fts3StringAppend() for details.
-*/
-typedef struct StrBuffer StrBuffer;
-struct StrBuffer {
- char *z; /* Pointer to buffer containing string */
- int n; /* Length of z in bytes (excl. nul-term) */
- int nAlloc; /* Allocated size of buffer z in bytes */
-};
-
-
-/*************************************************************************
-** Start of MatchinfoBuffer code.
-*/
-
-/*
-** Allocate a two-slot MatchinfoBuffer object.
-*/
-static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){
- MatchinfoBuffer *pRet;
- int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer);
- int nStr = (int)strlen(zMatchinfo);
-
- pRet = sqlite3_malloc(nByte + nStr+1);
- if( pRet ){
- memset(pRet, 0, nByte);
- pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet;
- pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1);
- pRet->nElem = nElem;
- pRet->zMatchinfo = ((char*)pRet) + nByte;
- memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1);
- pRet->aRef[0] = 1;
- }
-
- return pRet;
-}
-
-static void fts3MIBufferFree(void *p){
- MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]);
-
- assert( (u32*)p==&pBuf->aMatchinfo[1]
- || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2]
- );
- if( (u32*)p==&pBuf->aMatchinfo[1] ){
- pBuf->aRef[1] = 0;
- }else{
- pBuf->aRef[2] = 0;
- }
-
- if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){
- sqlite3_free(pBuf);
- }
-}
-
-static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){
- void (*xRet)(void*) = 0;
- u32 *aOut = 0;
-
- if( p->aRef[1]==0 ){
- p->aRef[1] = 1;
- aOut = &p->aMatchinfo[1];
- xRet = fts3MIBufferFree;
- }
- else if( p->aRef[2]==0 ){
- p->aRef[2] = 1;
- aOut = &p->aMatchinfo[p->nElem+2];
- xRet = fts3MIBufferFree;
- }else{
- aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32));
- if( aOut ){
- xRet = sqlite3_free;
- if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32));
- }
- }
-
- *paOut = aOut;
- return xRet;
-}
-
-static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){
- p->bGlobal = 1;
- memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32));
-}
-
-/*
-** Free a MatchinfoBuffer object allocated using fts3MIBufferNew()
-*/
-void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){
- if( p ){
- assert( p->aRef[0]==1 );
- p->aRef[0] = 0;
- if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){
- sqlite3_free(p);
- }
- }
-}
-
-/*
-** End of MatchinfoBuffer code.
-*************************************************************************/
-
-
-/*
-** This function is used to help iterate through a position-list. A position
-** list is a list of unique integers, sorted from smallest to largest. Each
-** element of the list is represented by an FTS3 varint that takes the value
-** of the difference between the current element and the previous one plus
-** two. For example, to store the position-list:
-**
-** 4 9 113
-**
-** the three varints:
-**
-** 6 7 106
-**
-** are encoded.
-**
-** When this function is called, *pp points to the start of an element of
-** the list. *piPos contains the value of the previous entry in the list.
-** After it returns, *piPos contains the value of the next element of the
-** list and *pp is advanced to the following varint.
-*/
-static void fts3GetDeltaPosition(char **pp, int *piPos){
- int iVal;
- *pp += fts3GetVarint32(*pp, &iVal);
- *piPos += (iVal-2);
-}
-
-/*
-** Helper function for fts3ExprIterate() (see below).
-*/
-static int fts3ExprIterate2(
- Fts3Expr *pExpr, /* Expression to iterate phrases of */
- int *piPhrase, /* Pointer to phrase counter */
- int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
- void *pCtx /* Second argument to pass to callback */
-){
- int rc; /* Return code */
- int eType = pExpr->eType; /* Type of expression node pExpr */
-
- if( eType!=FTSQUERY_PHRASE ){
- assert( pExpr->pLeft && pExpr->pRight );
- rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
- if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){
- rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx);
- }
- }else{
- rc = x(pExpr, *piPhrase, pCtx);
- (*piPhrase)++;
- }
- return rc;
-}
-
-/*
-** Iterate through all phrase nodes in an FTS3 query, except those that
-** are part of a sub-tree that is the right-hand-side of a NOT operator.
-** For each phrase node found, the supplied callback function is invoked.
-**
-** If the callback function returns anything other than SQLITE_OK,
-** the iteration is abandoned and the error code returned immediately.
-** Otherwise, SQLITE_OK is returned after a callback has been made for
-** all eligible phrase nodes.
-*/
-static int fts3ExprIterate(
- Fts3Expr *pExpr, /* Expression to iterate phrases of */
- int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
- void *pCtx /* Second argument to pass to callback */
-){
- int iPhrase = 0; /* Variable used as the phrase counter */
- return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
-}
-
-
-/*
-** This is an fts3ExprIterate() callback used while loading the doclists
-** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
-** fts3ExprLoadDoclists().
-*/
-static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
- int rc = SQLITE_OK;
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
-
- UNUSED_PARAMETER(iPhrase);
-
- p->nPhrase++;
- p->nToken += pPhrase->nToken;
-
- return rc;
-}
-
-/*
-** Load the doclists for each phrase in the query associated with FTS3 cursor
-** pCsr.
-**
-** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
-** phrases in the expression (all phrases except those directly or
-** indirectly descended from the right-hand-side of a NOT operator). If
-** pnToken is not NULL, then it is set to the number of tokens in all
-** matchable phrases of the expression.
-*/
-static int fts3ExprLoadDoclists(
- Fts3Cursor *pCsr, /* Fts3 cursor for current query */
- int *pnPhrase, /* OUT: Number of phrases in query */
- int *pnToken /* OUT: Number of tokens in query */
-){
- int rc; /* Return Code */
- LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
- sCtx.pCsr = pCsr;
- rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx);
- if( pnPhrase ) *pnPhrase = sCtx.nPhrase;
- if( pnToken ) *pnToken = sCtx.nToken;
- return rc;
-}
-
-static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){
- (*(int *)ctx)++;
- pExpr->iPhrase = iPhrase;
- return SQLITE_OK;
-}
-static int fts3ExprPhraseCount(Fts3Expr *pExpr){
- int nPhrase = 0;
- (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase);
- return nPhrase;
-}
-
-/*
-** Advance the position list iterator specified by the first two
-** arguments so that it points to the first element with a value greater
-** than or equal to parameter iNext.
-*/
-static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){
- char *pIter = *ppIter;
- if( pIter ){
- int iIter = *piIter;
-
- while( iIter<iNext ){
- if( 0==(*pIter & 0xFE) ){
- iIter = -1;
- pIter = 0;
- break;
- }
- fts3GetDeltaPosition(&pIter, &iIter);
- }
-
- *piIter = iIter;
- *ppIter = pIter;
- }
-}
-
-/*
-** Advance the snippet iterator to the next candidate snippet.
-*/
-static int fts3SnippetNextCandidate(SnippetIter *pIter){
- int i; /* Loop counter */
-
- if( pIter->iCurrent<0 ){
- /* The SnippetIter object has just been initialized. The first snippet
- ** candidate always starts at offset 0 (even if this candidate has a
- ** score of 0.0).
- */
- pIter->iCurrent = 0;
-
- /* Advance the 'head' iterator of each phrase to the first offset that
- ** is greater than or equal to (iNext+nSnippet).
- */
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet);
- }
- }else{
- int iStart;
- int iEnd = 0x7FFFFFFF;
-
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- if( pPhrase->pHead && pPhrase->iHead<iEnd ){
- iEnd = pPhrase->iHead;
- }
- }
- if( iEnd==0x7FFFFFFF ){
- return 1;
- }
-
- pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1;
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1);
- fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart);
- }
- }
-
- return 0;
-}
-
-/*
-** Retrieve information about the current candidate snippet of snippet
-** iterator pIter.
-*/
-static void fts3SnippetDetails(
- SnippetIter *pIter, /* Snippet iterator */
- u64 mCovered, /* Bitmask of phrases already covered */
- int *piToken, /* OUT: First token of proposed snippet */
- int *piScore, /* OUT: "Score" for this snippet */
- u64 *pmCover, /* OUT: Bitmask of phrases covered */
- u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
-){
- int iStart = pIter->iCurrent; /* First token of snippet */
- int iScore = 0; /* Score of this snippet */
- int i; /* Loop counter */
- u64 mCover = 0; /* Mask of phrases covered by this snippet */
- u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
-
- for(i=0; i<pIter->nPhrase; i++){
- SnippetPhrase *pPhrase = &pIter->aPhrase[i];
- if( pPhrase->pTail ){
- char *pCsr = pPhrase->pTail;
- int iCsr = pPhrase->iTail;
-
- while( iCsr<(iStart+pIter->nSnippet) ){
- int j;
- u64 mPhrase = (u64)1 << i;
- u64 mPos = (u64)1 << (iCsr - iStart);
- assert( iCsr>=iStart );
- if( (mCover|mCovered)&mPhrase ){
- iScore++;
- }else{
- iScore += 1000;
- }
- mCover |= mPhrase;
-
- for(j=0; j<pPhrase->nToken; j++){
- mHighlight |= (mPos>>j);
- }
-
- if( 0==(*pCsr & 0x0FE) ) break;
- fts3GetDeltaPosition(&pCsr, &iCsr);
- }
- }
- }
-
- /* Set the output variables before returning. */
- *piToken = iStart;
- *piScore = iScore;
- *pmCover = mCover;
- *pmHighlight = mHighlight;
-}
-
-/*
-** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
-** Each invocation populates an element of the SnippetIter.aPhrase[] array.
-*/
-static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
- SnippetIter *p = (SnippetIter *)ctx;
- SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
- char *pCsr;
- int rc;
-
- pPhrase->nToken = pExpr->pPhrase->nToken;
- rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
- assert( rc==SQLITE_OK || pCsr==0 );
- if( pCsr ){
- int iFirst = 0;
- pPhrase->pList = pCsr;
- fts3GetDeltaPosition(&pCsr, &iFirst);
- assert( iFirst>=0 );
- pPhrase->pHead = pCsr;
- pPhrase->pTail = pCsr;
- pPhrase->iHead = iFirst;
- pPhrase->iTail = iFirst;
- }else{
- assert( rc!=SQLITE_OK || (
- pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
- ));
- }
-
- return rc;
-}
-
-/*
-** Select the fragment of text consisting of nFragment contiguous tokens
-** from column iCol that represent the "best" snippet. The best snippet
-** is the snippet with the highest score, where scores are calculated
-** by adding:
-**
-** (a) +1 point for each occurrence of a matchable phrase in the snippet.
-**
-** (b) +1000 points for the first occurrence of each matchable phrase in
-** the snippet for which the corresponding mCovered bit is not set.
-**
-** The selected snippet parameters are stored in structure *pFragment before
-** returning. The score of the selected snippet is stored in *piScore
-** before returning.
-*/
-static int fts3BestSnippet(
- int nSnippet, /* Desired snippet length */
- Fts3Cursor *pCsr, /* Cursor to create snippet for */
- int iCol, /* Index of column to create snippet from */
- u64 mCovered, /* Mask of phrases already covered */
- u64 *pmSeen, /* IN/OUT: Mask of phrases seen */
- SnippetFragment *pFragment, /* OUT: Best snippet found */
- int *piScore /* OUT: Score of snippet pFragment */
-){
- int rc; /* Return Code */
- int nList; /* Number of phrases in expression */
- SnippetIter sIter; /* Iterates through snippet candidates */
- int nByte; /* Number of bytes of space to allocate */
- int iBestScore = -1; /* Best snippet score found so far */
- int i; /* Loop counter */
-
- memset(&sIter, 0, sizeof(sIter));
-
- /* Iterate through the phrases in the expression to count them. The same
- ** callback makes sure the doclists are loaded for each phrase.
- */
- rc = fts3ExprLoadDoclists(pCsr, &nList, 0);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- /* Now that it is known how many phrases there are, allocate and zero
- ** the required space using malloc().
- */
- nByte = sizeof(SnippetPhrase) * nList;
- sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
- if( !sIter.aPhrase ){
- return SQLITE_NOMEM;
- }
- memset(sIter.aPhrase, 0, nByte);
-
- /* Initialize the contents of the SnippetIter object. Then iterate through
- ** the set of phrases in the expression to populate the aPhrase[] array.
- */
- sIter.pCsr = pCsr;
- sIter.iCol = iCol;
- sIter.nSnippet = nSnippet;
- sIter.nPhrase = nList;
- sIter.iCurrent = -1;
- rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter);
- if( rc==SQLITE_OK ){
-
- /* Set the *pmSeen output variable. */
- for(i=0; i<nList; i++){
- if( sIter.aPhrase[i].pHead ){
- *pmSeen |= (u64)1 << i;
- }
- }
-
- /* Loop through all candidate snippets. Store the best snippet in
- ** *pFragment. Store its associated 'score' in iBestScore.
- */
- pFragment->iCol = iCol;
- while( !fts3SnippetNextCandidate(&sIter) ){
- int iPos;
- int iScore;
- u64 mCover;
- u64 mHighlite;
- fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite);
- assert( iScore>=0 );
- if( iScore>iBestScore ){
- pFragment->iPos = iPos;
- pFragment->hlmask = mHighlite;
- pFragment->covered = mCover;
- iBestScore = iScore;
- }
- }
-
- *piScore = iBestScore;
- }
- sqlite3_free(sIter.aPhrase);
- return rc;
-}
-
-
-/*
-** Append a string to the string-buffer passed as the first argument.
-**
-** If nAppend is negative, then the length of the string zAppend is
-** determined using strlen().
-*/
-static int fts3StringAppend(
- StrBuffer *pStr, /* Buffer to append to */
- const char *zAppend, /* Pointer to data to append to buffer */
- int nAppend /* Size of zAppend in bytes (or -1) */
-){
- if( nAppend<0 ){
- nAppend = (int)strlen(zAppend);
- }
-
- /* If there is insufficient space allocated at StrBuffer.z, use realloc()
- ** to grow the buffer until so that it is big enough to accomadate the
- ** appended data.
- */
- if( pStr->n+nAppend+1>=pStr->nAlloc ){
- int nAlloc = pStr->nAlloc+nAppend+100;
- char *zNew = sqlite3_realloc(pStr->z, nAlloc);
- if( !zNew ){
- return SQLITE_NOMEM;
- }
- pStr->z = zNew;
- pStr->nAlloc = nAlloc;
- }
- assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) );
-
- /* Append the data to the string buffer. */
- memcpy(&pStr->z[pStr->n], zAppend, nAppend);
- pStr->n += nAppend;
- pStr->z[pStr->n] = '\0';
-
- return SQLITE_OK;
-}
-
-/*
-** The fts3BestSnippet() function often selects snippets that end with a
-** query term. That is, the final term of the snippet is always a term
-** that requires highlighting. For example, if 'X' is a highlighted term
-** and '.' is a non-highlighted term, BestSnippet() may select:
-**
-** ........X.....X
-**
-** This function "shifts" the beginning of the snippet forward in the
-** document so that there are approximately the same number of
-** non-highlighted terms to the right of the final highlighted term as there
-** are to the left of the first highlighted term. For example, to this:
-**
-** ....X.....X....
-**
-** This is done as part of extracting the snippet text, not when selecting
-** the snippet. Snippet selection is done based on doclists only, so there
-** is no way for fts3BestSnippet() to know whether or not the document
-** actually contains terms that follow the final highlighted term.
-*/
-static int fts3SnippetShift(
- Fts3Table *pTab, /* FTS3 table snippet comes from */
- int iLangid, /* Language id to use in tokenizing */
- int nSnippet, /* Number of tokens desired for snippet */
- const char *zDoc, /* Document text to extract snippet from */
- int nDoc, /* Size of buffer zDoc in bytes */
- int *piPos, /* IN/OUT: First token of snippet */
- u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
-){
- u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
-
- if( hlmask ){
- int nLeft; /* Tokens to the left of first highlight */
- int nRight; /* Tokens to the right of last highlight */
- int nDesired; /* Ideal number of tokens to shift forward */
-
- for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
- for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
- nDesired = (nLeft-nRight)/2;
-
- /* Ideally, the start of the snippet should be pushed forward in the
- ** document nDesired tokens. This block checks if there are actually
- ** nDesired tokens to the right of the snippet. If so, *piPos and
- ** *pHlMask are updated to shift the snippet nDesired tokens to the
- ** right. Otherwise, the snippet is shifted by the number of tokens
- ** available.
- */
- if( nDesired>0 ){
- int nShift; /* Number of tokens to shift snippet by */
- int iCurrent = 0; /* Token counter */
- int rc; /* Return Code */
- sqlite3_tokenizer_module *pMod;
- sqlite3_tokenizer_cursor *pC;
- pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
-
- /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
- ** or more tokens in zDoc/nDoc.
- */
- rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
- if( rc!=SQLITE_OK ){
- return rc;
- }
- while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
- const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
- rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
- }
- pMod->xClose(pC);
- if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; }
-
- nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet;
- assert( nShift<=nDesired );
- if( nShift>0 ){
- *piPos += nShift;
- *pHlmask = hlmask >> nShift;
- }
- }
- }
- return SQLITE_OK;
-}
-
-/*
-** Extract the snippet text for fragment pFragment from cursor pCsr and
-** append it to string buffer pOut.
-*/
-static int fts3SnippetText(
- Fts3Cursor *pCsr, /* FTS3 Cursor */
- SnippetFragment *pFragment, /* Snippet to extract */
- int iFragment, /* Fragment number */
- int isLast, /* True for final fragment in snippet */
- int nSnippet, /* Number of tokens in extracted snippet */
- const char *zOpen, /* String inserted before highlighted term */
- const char *zClose, /* String inserted after highlighted term */
- const char *zEllipsis, /* String inserted between snippets */
- StrBuffer *pOut /* Write output here */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc; /* Return code */
- const char *zDoc; /* Document text to extract snippet from */
- int nDoc; /* Size of zDoc in bytes */
- int iCurrent = 0; /* Current token number of document */
- int iEnd = 0; /* Byte offset of end of current token */
- int isShiftDone = 0; /* True after snippet is shifted */
- int iPos = pFragment->iPos; /* First token of snippet */
- u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
- int iCol = pFragment->iCol+1; /* Query column to extract text from */
- sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
- sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
-
- zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
- if( zDoc==0 ){
- if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
- return SQLITE_NOMEM;
- }
- return SQLITE_OK;
- }
- nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
-
- /* Open a token cursor on the document. */
- pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
- rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
- if( rc!=SQLITE_OK ){
- return rc;
- }
-
- while( rc==SQLITE_OK ){
- const char *ZDUMMY; /* Dummy argument used with tokenizer */
- int DUMMY1 = -1; /* Dummy argument used with tokenizer */
- int iBegin = 0; /* Offset in zDoc of start of token */
- int iFin = 0; /* Offset in zDoc of end of token */
- int isHighlight = 0; /* True for highlighted terms */
-
- /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
- ** in the FTS code the variable that the third argument to xNext points to
- ** is initialized to zero before the first (*but not necessarily
- ** subsequent*) call to xNext(). This is done for a particular application
- ** that needs to know whether or not the tokenizer is being used for
- ** snippet generation or for some other purpose.
- **
- ** Extreme care is required when writing code to depend on this
- ** initialization. It is not a documented part of the tokenizer interface.
- ** If a tokenizer is used directly by any code outside of FTS, this
- ** convention might not be respected. */
- rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
- if( rc!=SQLITE_OK ){
- if( rc==SQLITE_DONE ){
- /* Special case - the last token of the snippet is also the last token
- ** of the column. Append any punctuation that occurred between the end
- ** of the previous token and the end of the document to the output.
- ** Then break out of the loop. */
- rc = fts3StringAppend(pOut, &zDoc[iEnd], -1);
- }
- break;
- }
- if( iCurrent<iPos ){ continue; }
-
- if( !isShiftDone ){
- int n = nDoc - iBegin;
- rc = fts3SnippetShift(
- pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
- );
- isShiftDone = 1;
-
- /* Now that the shift has been done, check if the initial "..." are
- ** required. They are required if (a) this is not the first fragment,
- ** or (b) this fragment does not begin at position 0 of its column.
- */
- if( rc==SQLITE_OK ){
- if( iPos>0 || iFragment>0 ){
- rc = fts3StringAppend(pOut, zEllipsis, -1);
- }else if( iBegin ){
- rc = fts3StringAppend(pOut, zDoc, iBegin);
- }
- }
- if( rc!=SQLITE_OK || iCurrent<iPos ) continue;
- }
-
- if( iCurrent>=(iPos+nSnippet) ){
- if( isLast ){
- rc = fts3StringAppend(pOut, zEllipsis, -1);
- }
- break;
- }
-
- /* Set isHighlight to true if this term should be highlighted. */
- isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0;
-
- if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd);
- if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1);
- if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin);
- if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1);
-
- iEnd = iFin;
- }
-
- pMod->xClose(pC);
- return rc;
-}
-
-
-/*
-** This function is used to count the entries in a column-list (a
-** delta-encoded list of term offsets within a single column of a single
-** row). When this function is called, *ppCollist should point to the
-** beginning of the first varint in the column-list (the varint that
-** contains the position of the first matching term in the column data).
-** Before returning, *ppCollist is set to point to the first byte after
-** the last varint in the column-list (either the 0x00 signifying the end
-** of the position-list, or the 0x01 that precedes the column number of
-** the next column in the position-list).
-**
-** The number of elements in the column-list is returned.
-*/
-static int fts3ColumnlistCount(char **ppCollist){
- char *pEnd = *ppCollist;
- char c = 0;
- int nEntry = 0;
-
- /* A column-list is terminated by either a 0x01 or 0x00. */
- while( 0xFE & (*pEnd | c) ){
- c = *pEnd++ & 0x80;
- if( !c ) nEntry++;
- }
-
- *ppCollist = pEnd;
- return nEntry;
-}
-
-/*
-** This function gathers 'y' or 'b' data for a single phrase.
-*/
-static void fts3ExprLHits(
- Fts3Expr *pExpr, /* Phrase expression node */
- MatchInfo *p /* Matchinfo context */
-){
- Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab;
- int iStart;
- Fts3Phrase *pPhrase = pExpr->pPhrase;
- char *pIter = pPhrase->doclist.pList;
- int iCol = 0;
-
- assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS );
- if( p->flag==FTS3_MATCHINFO_LHITS ){
- iStart = pExpr->iPhrase * p->nCol;
- }else{
- iStart = pExpr->iPhrase * ((p->nCol + 31) / 32);
- }
-
- while( 1 ){
- int nHit = fts3ColumnlistCount(&pIter);
- if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){
- if( p->flag==FTS3_MATCHINFO_LHITS ){
- p->aMatchinfo[iStart + iCol] = (u32)nHit;
- }else if( nHit ){
- p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F));
- }
- }
- assert( *pIter==0x00 || *pIter==0x01 );
- if( *pIter!=0x01 ) break;
- pIter++;
- pIter += fts3GetVarint32(pIter, &iCol);
- }
-}
-
-/*
-** Gather the results for matchinfo directives 'y' and 'b'.
-*/
-static void fts3ExprLHitGather(
- Fts3Expr *pExpr,
- MatchInfo *p
-){
- assert( (pExpr->pLeft==0)==(pExpr->pRight==0) );
- if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){
- if( pExpr->pLeft ){
- fts3ExprLHitGather(pExpr->pLeft, p);
- fts3ExprLHitGather(pExpr->pRight, p);
- }else{
- fts3ExprLHits(pExpr, p);
- }
- }
-}
-
-/*
-** fts3ExprIterate() callback used to collect the "global" matchinfo stats
-** for a single query.
-**
-** fts3ExprIterate() callback to load the 'global' elements of a
-** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements
-** of the matchinfo array that are constant for all rows returned by the
-** current query.
-**
-** Argument pCtx is actually a pointer to a struct of type MatchInfo. This
-** function populates Matchinfo.aMatchinfo[] as follows:
-**
-** for(iCol=0; iCol<nCol; iCol++){
-** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X;
-** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y;
-** }
-**
-** where X is the number of matches for phrase iPhrase is column iCol of all
-** rows of the table. Y is the number of rows for which column iCol contains
-** at least one instance of phrase iPhrase.
-**
-** If the phrase pExpr consists entirely of deferred tokens, then all X and
-** Y values are set to nDoc, where nDoc is the number of documents in the
-** file system. This is done because the full-text index doclist is required
-** to calculate these values properly, and the full-text index doclist is
-** not available for deferred tokens.
-*/
-static int fts3ExprGlobalHitsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number (numbered from zero) */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- MatchInfo *p = (MatchInfo *)pCtx;
- return sqlite3Fts3EvalPhraseStats(
- p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol]
- );
-}
-
-/*
-** fts3ExprIterate() callback used to collect the "local" part of the
-** FTS3_MATCHINFO_HITS array. The local stats are those elements of the
-** array that are different for each row returned by the query.
-*/
-static int fts3ExprLocalHitsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- int rc = SQLITE_OK;
- MatchInfo *p = (MatchInfo *)pCtx;
- int iStart = iPhrase * p->nCol * 3;
- int i;
-
- for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
- char *pCsr;
- rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
- if( pCsr ){
- p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
- }else{
- p->aMatchinfo[iStart+i*3] = 0;
- }
- }
-
- return rc;
-}
-
-static int fts3MatchinfoCheck(
- Fts3Table *pTab,
- char cArg,
- char **pzErr
-){
- if( (cArg==FTS3_MATCHINFO_NPHRASE)
- || (cArg==FTS3_MATCHINFO_NCOL)
- || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
- || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
- || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
- || (cArg==FTS3_MATCHINFO_LCS)
- || (cArg==FTS3_MATCHINFO_HITS)
- || (cArg==FTS3_MATCHINFO_LHITS)
- || (cArg==FTS3_MATCHINFO_LHITS_BM)
- ){
- return SQLITE_OK;
- }
- sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg);
- return SQLITE_ERROR;
-}
-
-static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){
- int nVal; /* Number of integers output by cArg */
-
- switch( cArg ){
- case FTS3_MATCHINFO_NDOC:
- case FTS3_MATCHINFO_NPHRASE:
- case FTS3_MATCHINFO_NCOL:
- nVal = 1;
- break;
-
- case FTS3_MATCHINFO_AVGLENGTH:
- case FTS3_MATCHINFO_LENGTH:
- case FTS3_MATCHINFO_LCS:
- nVal = pInfo->nCol;
- break;
-
- case FTS3_MATCHINFO_LHITS:
- nVal = pInfo->nCol * pInfo->nPhrase;
- break;
-
- case FTS3_MATCHINFO_LHITS_BM:
- nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32);
- break;
-
- default:
- assert( cArg==FTS3_MATCHINFO_HITS );
- nVal = pInfo->nCol * pInfo->nPhrase * 3;
- break;
- }
-
- return nVal;
-}
-
-static int fts3MatchinfoSelectDoctotal(
- Fts3Table *pTab,
- sqlite3_stmt **ppStmt,
- sqlite3_int64 *pnDoc,
- const char **paLen
-){
- sqlite3_stmt *pStmt;
- const char *a;
- sqlite3_int64 nDoc;
-
- if( !*ppStmt ){
- int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt);
- if( rc!=SQLITE_OK ) return rc;
- }
- pStmt = *ppStmt;
- assert( sqlite3_data_count(pStmt)==1 );
-
- a = sqlite3_column_blob(pStmt, 0);
- a += sqlite3Fts3GetVarint(a, &nDoc);
- if( nDoc==0 ) return FTS_CORRUPT_VTAB;
- *pnDoc = (u32)nDoc;
-
- if( paLen ) *paLen = a;
- return SQLITE_OK;
-}
-
-/*
-** An instance of the following structure is used to store state while
-** iterating through a multi-column position-list corresponding to the
-** hits for a single phrase on a single row in order to calculate the
-** values for a matchinfo() FTS3_MATCHINFO_LCS request.
-*/
-typedef struct LcsIterator LcsIterator;
-struct LcsIterator {
- Fts3Expr *pExpr; /* Pointer to phrase expression */
- int iPosOffset; /* Tokens count up to end of this phrase */
- char *pRead; /* Cursor used to iterate through aDoclist */
- int iPos; /* Current position */
-};
-
-/*
-** If LcsIterator.iCol is set to the following value, the iterator has
-** finished iterating through all offsets for all columns.
-*/
-#define LCS_ITERATOR_FINISHED 0x7FFFFFFF;
-
-static int fts3MatchinfoLcsCb(
- Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase, /* Phrase number (numbered from zero) */
- void *pCtx /* Pointer to MatchInfo structure */
-){
- LcsIterator *aIter = (LcsIterator *)pCtx;
- aIter[iPhrase].pExpr = pExpr;
- return SQLITE_OK;
-}
-
-/*
-** Advance the iterator passed as an argument to the next position. Return
-** 1 if the iterator is at EOF or if it now points to the start of the
-** position list for the next column.
-*/
-static int fts3LcsIteratorAdvance(LcsIterator *pIter){
- char *pRead = pIter->pRead;
- sqlite3_int64 iRead;
- int rc = 0;
-
- pRead += sqlite3Fts3GetVarint(pRead, &iRead);
- if( iRead==0 || iRead==1 ){
- pRead = 0;
- rc = 1;
- }else{
- pIter->iPos += (int)(iRead-2);
- }
-
- pIter->pRead = pRead;
- return rc;
-}
-
-/*
-** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag.
-**
-** If the call is successful, the longest-common-substring lengths for each
-** column are written into the first nCol elements of the pInfo->aMatchinfo[]
-** array before returning. SQLITE_OK is returned in this case.
-**
-** Otherwise, if an error occurs, an SQLite error code is returned and the
-** data written to the first nCol elements of pInfo->aMatchinfo[] is
-** undefined.
-*/
-static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
- LcsIterator *aIter;
- int i;
- int iCol;
- int nToken = 0;
-
- /* Allocate and populate the array of LcsIterator objects. The array
- ** contains one element for each matchable phrase in the query.
- **/
- aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase);
- if( !aIter ) return SQLITE_NOMEM;
- memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase);
- (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter);
-
- for(i=0; i<pInfo->nPhrase; i++){
- LcsIterator *pIter = &aIter[i];
- nToken -= pIter->pExpr->pPhrase->nToken;
- pIter->iPosOffset = nToken;
- }
-
- for(iCol=0; iCol<pInfo->nCol; iCol++){
- int nLcs = 0; /* LCS value for this column */
- int nLive = 0; /* Number of iterators in aIter not at EOF */
-
- for(i=0; i<pInfo->nPhrase; i++){
- int rc;
- LcsIterator *pIt = &aIter[i];
- rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
- if( rc!=SQLITE_OK ) return rc;
- if( pIt->pRead ){
- pIt->iPos = pIt->iPosOffset;
- fts3LcsIteratorAdvance(&aIter[i]);
- nLive++;
- }
- }
-
- while( nLive>0 ){
- LcsIterator *pAdv = 0; /* The iterator to advance by one position */
- int nThisLcs = 0; /* LCS for the current iterator positions */
-
- for(i=0; i<pInfo->nPhrase; i++){
- LcsIterator *pIter = &aIter[i];
- if( pIter->pRead==0 ){
- /* This iterator is already at EOF for this column. */
- nThisLcs = 0;
- }else{
- if( pAdv==0 || pIter->iPos<pAdv->iPos ){
- pAdv = pIter;
- }
- if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){
- nThisLcs++;
- }else{
- nThisLcs = 1;
- }
- if( nThisLcs>nLcs ) nLcs = nThisLcs;
- }
- }
- if( fts3LcsIteratorAdvance(pAdv) ) nLive--;
- }
-
- pInfo->aMatchinfo[iCol] = nLcs;
- }
-
- sqlite3_free(aIter);
- return SQLITE_OK;
-}
-
-/*
-** Populate the buffer pInfo->aMatchinfo[] with an array of integers to
-** be returned by the matchinfo() function. Argument zArg contains the
-** format string passed as the second argument to matchinfo (or the
-** default value "pcx" if no second argument was specified). The format
-** string has already been validated and the pInfo->aMatchinfo[] array
-** is guaranteed to be large enough for the output.
-**
-** If bGlobal is true, then populate all fields of the matchinfo() output.
-** If it is false, then assume that those fields that do not change between
-** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS)
-** have already been populated.
-**
-** Return SQLITE_OK if successful, or an SQLite error code if an error
-** occurs. If a value other than SQLITE_OK is returned, the state the
-** pInfo->aMatchinfo[] buffer is left in is undefined.
-*/
-static int fts3MatchinfoValues(
- Fts3Cursor *pCsr, /* FTS3 cursor object */
- int bGlobal, /* True to grab the global stats */
- MatchInfo *pInfo, /* Matchinfo context object */
- const char *zArg /* Matchinfo format string */
-){
- int rc = SQLITE_OK;
- int i;
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- sqlite3_stmt *pSelect = 0;
-
- for(i=0; rc==SQLITE_OK && zArg[i]; i++){
- pInfo->flag = zArg[i];
- switch( zArg[i] ){
- case FTS3_MATCHINFO_NPHRASE:
- if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase;
- break;
-
- case FTS3_MATCHINFO_NCOL:
- if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol;
- break;
-
- case FTS3_MATCHINFO_NDOC:
- if( bGlobal ){
- sqlite3_int64 nDoc = 0;
- rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0);
- pInfo->aMatchinfo[0] = (u32)nDoc;
- }
- break;
-
- case FTS3_MATCHINFO_AVGLENGTH:
- if( bGlobal ){
- sqlite3_int64 nDoc; /* Number of rows in table */
- const char *a; /* Aggregate column length array */
-
- rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a);
- if( rc==SQLITE_OK ){
- int iCol;
- for(iCol=0; iCol<pInfo->nCol; iCol++){
- u32 iVal;
- sqlite3_int64 nToken;
- a += sqlite3Fts3GetVarint(a, &nToken);
- iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc);
- pInfo->aMatchinfo[iCol] = iVal;
- }
- }
- }
- break;
-
- case FTS3_MATCHINFO_LENGTH: {
- sqlite3_stmt *pSelectDocsize = 0;
- rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize);
- if( rc==SQLITE_OK ){
- int iCol;
- const char *a = sqlite3_column_blob(pSelectDocsize, 0);
- for(iCol=0; iCol<pInfo->nCol; iCol++){
- sqlite3_int64 nToken;
- a += sqlite3Fts3GetVarint(a, &nToken);
- pInfo->aMatchinfo[iCol] = (u32)nToken;
- }
- }
- sqlite3_reset(pSelectDocsize);
- break;
- }
-
- case FTS3_MATCHINFO_LCS:
- rc = fts3ExprLoadDoclists(pCsr, 0, 0);
- if( rc==SQLITE_OK ){
- rc = fts3MatchinfoLcs(pCsr, pInfo);
- }
- break;
-
- case FTS3_MATCHINFO_LHITS_BM:
- case FTS3_MATCHINFO_LHITS: {
- int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32);
- memset(pInfo->aMatchinfo, 0, nZero);
- fts3ExprLHitGather(pCsr->pExpr, pInfo);
- break;
- }
-
- default: {
- Fts3Expr *pExpr;
- assert( zArg[i]==FTS3_MATCHINFO_HITS );
- pExpr = pCsr->pExpr;
- rc = fts3ExprLoadDoclists(pCsr, 0, 0);
- if( rc!=SQLITE_OK ) break;
- if( bGlobal ){
- if( pCsr->pDeferred ){
- rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0);
- if( rc!=SQLITE_OK ) break;
- }
- rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo);
- sqlite3Fts3EvalTestDeferred(pCsr, &rc);
- if( rc!=SQLITE_OK ) break;
- }
- (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo);
- break;
- }
- }
-
- pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]);
- }
-
- sqlite3_reset(pSelect);
- return rc;
-}
-
-
-/*
-** Populate pCsr->aMatchinfo[] with data for the current row. The
-** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
-*/
-static void fts3GetMatchinfo(
- sqlite3_context *pCtx, /* Return results here */
- Fts3Cursor *pCsr, /* FTS3 Cursor object */
- const char *zArg /* Second argument to matchinfo() function */
-){
- MatchInfo sInfo;
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc = SQLITE_OK;
- int bGlobal = 0; /* Collect 'global' stats as well as local */
-
- u32 *aOut = 0;
- void (*xDestroyOut)(void*) = 0;
-
- memset(&sInfo, 0, sizeof(MatchInfo));
- sInfo.pCursor = pCsr;
- sInfo.nCol = pTab->nColumn;
-
- /* If there is cached matchinfo() data, but the format string for the
- ** cache does not match the format string for this request, discard
- ** the cached data. */
- if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){
- sqlite3Fts3MIBufferFree(pCsr->pMIBuffer);
- pCsr->pMIBuffer = 0;
- }
-
- /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the
- ** matchinfo function has been called for this query. In this case
- ** allocate the array used to accumulate the matchinfo data and
- ** initialize those elements that are constant for every row.
- */
- if( pCsr->pMIBuffer==0 ){
- int nMatchinfo = 0; /* Number of u32 elements in match-info */
- int i; /* Used to iterate through zArg */
-
- /* Determine the number of phrases in the query */
- pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr);
- sInfo.nPhrase = pCsr->nPhrase;
-
- /* Determine the number of integers in the buffer returned by this call. */
- for(i=0; zArg[i]; i++){
- char *zErr = 0;
- if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){
- sqlite3_result_error(pCtx, zErr, -1);
- sqlite3_free(zErr);
- return;
- }
- nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]);
- }
-
- /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */
- pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg);
- if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM;
-
- pCsr->isMatchinfoNeeded = 1;
- bGlobal = 1;
- }
-
- if( rc==SQLITE_OK ){
- xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut);
- if( xDestroyOut==0 ){
- rc = SQLITE_NOMEM;
- }
- }
-
- if( rc==SQLITE_OK ){
- sInfo.aMatchinfo = aOut;
- sInfo.nPhrase = pCsr->nPhrase;
- rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg);
- if( bGlobal ){
- fts3MIBufferSetGlobal(pCsr->pMIBuffer);
- }
- }
-
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pCtx, rc);
- if( xDestroyOut ) xDestroyOut(aOut);
- }else{
- int n = pCsr->pMIBuffer->nElem * sizeof(u32);
- sqlite3_result_blob(pCtx, aOut, n, xDestroyOut);
- }
-}
-
-/*
-** Implementation of snippet() function.
-*/
-void sqlite3Fts3Snippet(
- sqlite3_context *pCtx, /* SQLite function call context */
- Fts3Cursor *pCsr, /* Cursor object */
- const char *zStart, /* Snippet start text - "<b>" */
- const char *zEnd, /* Snippet end text - "</b>" */
- const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */
- int iCol, /* Extract snippet from this column */
- int nToken /* Approximate number of tokens in snippet */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- int rc = SQLITE_OK;
- int i;
- StrBuffer res = {0, 0, 0};
-
- /* The returned text includes up to four fragments of text extracted from
- ** the data in the current row. The first iteration of the for(...) loop
- ** below attempts to locate a single fragment of text nToken tokens in
- ** size that contains at least one instance of all phrases in the query
- ** expression that appear in the current row. If such a fragment of text
- ** cannot be found, the second iteration of the loop attempts to locate
- ** a pair of fragments, and so on.
- */
- int nSnippet = 0; /* Number of fragments in this snippet */
- SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */
- int nFToken = -1; /* Number of tokens in each fragment */
-
- if( !pCsr->pExpr ){
- sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
- return;
- }
-
- for(nSnippet=1; 1; nSnippet++){
-
- int iSnip; /* Loop counter 0..nSnippet-1 */
- u64 mCovered = 0; /* Bitmask of phrases covered by snippet */
- u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */
-
- if( nToken>=0 ){
- nFToken = (nToken+nSnippet-1) / nSnippet;
- }else{
- nFToken = -1 * nToken;
- }
-
- for(iSnip=0; iSnip<nSnippet; iSnip++){
- int iBestScore = -1; /* Best score of columns checked so far */
- int iRead; /* Used to iterate through columns */
- SnippetFragment *pFragment = &aSnippet[iSnip];
-
- memset(pFragment, 0, sizeof(*pFragment));
-
- /* Loop through all columns of the table being considered for snippets.
- ** If the iCol argument to this function was negative, this means all
- ** columns of the FTS3 table. Otherwise, only column iCol is considered.
- */
- for(iRead=0; iRead<pTab->nColumn; iRead++){
- SnippetFragment sF = {0, 0, 0, 0};
- int iS = 0;
- if( iCol>=0 && iRead!=iCol ) continue;
-
- /* Find the best snippet of nFToken tokens in column iRead. */
- rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS);
- if( rc!=SQLITE_OK ){
- goto snippet_out;
- }
- if( iS>iBestScore ){
- *pFragment = sF;
- iBestScore = iS;
- }
- }
-
- mCovered |= pFragment->covered;
- }
-
- /* If all query phrases seen by fts3BestSnippet() are present in at least
- ** one of the nSnippet snippet fragments, break out of the loop.
- */
- assert( (mCovered&mSeen)==mCovered );
- if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break;
- }
-
- assert( nFToken>0 );
-
- for(i=0; i<nSnippet && rc==SQLITE_OK; i++){
- rc = fts3SnippetText(pCsr, &aSnippet[i],
- i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res
- );
- }
-
- snippet_out:
- sqlite3Fts3SegmentsClose(pTab);
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pCtx, rc);
- sqlite3_free(res.z);
- }else{
- sqlite3_result_text(pCtx, res.z, -1, sqlite3_free);
- }
-}
-
-
-typedef struct TermOffset TermOffset;
-typedef struct TermOffsetCtx TermOffsetCtx;
-
-struct TermOffset {
- char *pList; /* Position-list */
- int iPos; /* Position just read from pList */
- int iOff; /* Offset of this term from read positions */
-};
-
-struct TermOffsetCtx {
- Fts3Cursor *pCsr;
- int iCol; /* Column of table to populate aTerm for */
- int iTerm;
- sqlite3_int64 iDocid;
- TermOffset *aTerm;
-};
-
-/*
-** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets().
-*/
-static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
- TermOffsetCtx *p = (TermOffsetCtx *)ctx;
- int nTerm; /* Number of tokens in phrase */
- int iTerm; /* For looping through nTerm phrase terms */
- char *pList; /* Pointer to position list for phrase */
- int iPos = 0; /* First position in position-list */
- int rc;
-
- UNUSED_PARAMETER(iPhrase);
- rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
- nTerm = pExpr->pPhrase->nToken;
- if( pList ){
- fts3GetDeltaPosition(&pList, &iPos);
- assert( iPos>=0 );
- }
-
- for(iTerm=0; iTerm<nTerm; iTerm++){
- TermOffset *pT = &p->aTerm[p->iTerm++];
- pT->iOff = nTerm-iTerm-1;
- pT->pList = pList;
- pT->iPos = iPos;
- }
-
- return rc;
-}
-
-/*
-** Implementation of offsets() function.
-*/
-void sqlite3Fts3Offsets(
- sqlite3_context *pCtx, /* SQLite function call context */
- Fts3Cursor *pCsr /* Cursor object */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
- int rc; /* Return Code */
- int nToken; /* Number of tokens in query */
- int iCol; /* Column currently being processed */
- StrBuffer res = {0, 0, 0}; /* Result string */
- TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
-
- if( !pCsr->pExpr ){
- sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC);
- return;
- }
-
- memset(&sCtx, 0, sizeof(sCtx));
- assert( pCsr->isRequireSeek==0 );
-
- /* Count the number of terms in the query */
- rc = fts3ExprLoadDoclists(pCsr, 0, &nToken);
- if( rc!=SQLITE_OK ) goto offsets_out;
-
- /* Allocate the array of TermOffset iterators. */
- sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken);
- if( 0==sCtx.aTerm ){
- rc = SQLITE_NOMEM;
- goto offsets_out;
- }
- sCtx.iDocid = pCsr->iPrevId;
- sCtx.pCsr = pCsr;
-
- /* Loop through the table columns, appending offset information to
- ** string-buffer res for each column.
- */
- for(iCol=0; iCol<pTab->nColumn; iCol++){
- sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
- const char *ZDUMMY; /* Dummy argument used with xNext() */
- int NDUMMY = 0; /* Dummy argument used with xNext() */
- int iStart = 0;
- int iEnd = 0;
- int iCurrent = 0;
- const char *zDoc;
- int nDoc;
-
- /* Initialize the contents of sCtx.aTerm[] for column iCol. There is
- ** no way that this operation can fail, so the return code from
- ** fts3ExprIterate() can be discarded.
- */
- sCtx.iCol = iCol;
- sCtx.iTerm = 0;
- (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx);
-
- /* Retreive the text stored in column iCol. If an SQL NULL is stored
- ** in column iCol, jump immediately to the next iteration of the loop.
- ** If an OOM occurs while retrieving the data (this can happen if SQLite
- ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM
- ** to the caller.
- */
- zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1);
- nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1);
- if( zDoc==0 ){
- if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){
- continue;
- }
- rc = SQLITE_NOMEM;
- goto offsets_out;
- }
-
- /* Initialize a tokenizer iterator to iterate through column iCol. */
- rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
- zDoc, nDoc, &pC
- );
- if( rc!=SQLITE_OK ) goto offsets_out;
-
- rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
- while( rc==SQLITE_OK ){
- int i; /* Used to loop through terms */
- int iMinPos = 0x7FFFFFFF; /* Position of next token */
- TermOffset *pTerm = 0; /* TermOffset associated with next token */
-
- for(i=0; i<nToken; i++){
- TermOffset *pT = &sCtx.aTerm[i];
- if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){
- iMinPos = pT->iPos-pT->iOff;
- pTerm = pT;
- }
- }
-
- if( !pTerm ){
- /* All offsets for this column have been gathered. */
- rc = SQLITE_DONE;
- }else{
- assert( iCurrent<=iMinPos );
- if( 0==(0xFE&*pTerm->pList) ){
- pTerm->pList = 0;
- }else{
- fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos);
- }
- while( rc==SQLITE_OK && iCurrent<iMinPos ){
- rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
- }
- if( rc==SQLITE_OK ){
- char aBuffer[64];
- sqlite3_snprintf(sizeof(aBuffer), aBuffer,
- "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
- );
- rc = fts3StringAppend(&res, aBuffer, -1);
- }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
- rc = FTS_CORRUPT_VTAB;
- }
- }
- }
- if( rc==SQLITE_DONE ){
- rc = SQLITE_OK;
- }
-
- pMod->xClose(pC);
- if( rc!=SQLITE_OK ) goto offsets_out;
- }
-
- offsets_out:
- sqlite3_free(sCtx.aTerm);
- assert( rc!=SQLITE_DONE );
- sqlite3Fts3SegmentsClose(pTab);
- if( rc!=SQLITE_OK ){
- sqlite3_result_error_code(pCtx, rc);
- sqlite3_free(res.z);
- }else{
- sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free);
- }
- return;
-}
-
-/*
-** Implementation of matchinfo() function.
-*/
-void sqlite3Fts3Matchinfo(
- sqlite3_context *pContext, /* Function call context */
- Fts3Cursor *pCsr, /* FTS3 table cursor */
- const char *zArg /* Second arg to matchinfo() function */
-){
- Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- const char *zFormat;
-
- if( zArg ){
- zFormat = zArg;
- }else{
- zFormat = FTS3_MATCHINFO_DEFAULT;
- }
-
- if( !pCsr->pExpr ){
- sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC);
- return;
- }else{
- /* Retrieve matchinfo() data. */
- fts3GetMatchinfo(pContext, pCsr, zFormat);
- sqlite3Fts3SegmentsClose(pTab);
- }
-}
-
-#endif