summaryrefslogtreecommitdiffstats
path: root/lib/libsqlite3/ext/fts3/fts3.c
diff options
context:
space:
mode:
authorespie <espie@openbsd.org>2012-05-22 09:02:29 +0000
committerespie <espie@openbsd.org>2012-05-22 09:02:29 +0000
commitf962b6f71abebd72afa58d78eab055d750d49d0c (patch)
tree490fe4a0a2c05408cd845d4a35f25c6c8f35c9a3 /lib/libsqlite3/ext/fts3/fts3.c
parentFix ftell() to return EOVERFLOW if the file offset is greater than (diff)
downloadwireguard-openbsd-f962b6f71abebd72afa58d78eab055d750d49d0c.tar.xz
wireguard-openbsd-f962b6f71abebd72afa58d78eab055d750d49d0c.zip
import sqlite 3.7.12 (tested by landry@)
Diffstat (limited to 'lib/libsqlite3/ext/fts3/fts3.c')
-rw-r--r--lib/libsqlite3/ext/fts3/fts3.c216
1 files changed, 185 insertions, 31 deletions
diff --git a/lib/libsqlite3/ext/fts3/fts3.c b/lib/libsqlite3/ext/fts3/fts3.c
index 421052b9377..f80e303b56e 100644
--- a/lib/libsqlite3/ext/fts3/fts3.c
+++ b/lib/libsqlite3/ext/fts3/fts3.c
@@ -70,7 +70,7 @@
** A doclist is stored like this:
**
** array {
-** varint docid;
+** varint docid; (delta from previous doclist)
** array { (position list for column 0)
** varint position; (2 more than the delta from previous position)
** }
@@ -101,8 +101,8 @@
** at D signals the start of a new column; the 1 at E indicates that the
** new column is column number 1. There are two positions at 12 and 45
** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The
-** 234 at I is the next docid. It has one position 72 (72-2) and then
-** terminates with the 0 at K.
+** 234 at I is the delta to next docid (357). It has one position 70
+** (72-2) and then terminates with the 0 at K.
**
** A "position-list" is the list of positions for multiple columns for
** a single docid. A "column-list" is the set of positions for a single
@@ -571,6 +571,18 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
}
/*
+** Create the %_stat table if it does not already exist.
+*/
+void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
+ fts3DbExec(pRc, p->db,
+ "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
+ "(id INTEGER PRIMARY KEY, value BLOB);",
+ p->zDb, p->zName
+ );
+ if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
+}
+
+/*
** Create the backing store tables (%_content, %_segments and %_segdir)
** required by the FTS3 table passed as the only argument. This is done
** as part of the vtab xCreate() method.
@@ -630,11 +642,9 @@ static int fts3CreateTables(Fts3Table *p){
p->zDb, p->zName
);
}
+ assert( p->bHasStat==p->bFts4 );
if( p->bHasStat ){
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
- p->zDb, p->zName
- );
+ sqlite3Fts3CreateStatTable(&rc, p);
}
return rc;
}
@@ -1275,7 +1285,9 @@ static int fts3InitVtab(
p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
p->bHasDocsize = (isFts4 && bNoDocsize==0);
p->bHasStat = isFts4;
+ p->bFts4 = isFts4;
p->bDescIdx = bDescIdx;
+ p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */
p->zContentTbl = zContent;
p->zLanguageid = zLanguageid;
zContent = 0;
@@ -1328,6 +1340,16 @@ static int fts3InitVtab(
rc = fts3CreateTables(p);
}
+ /* Check to see if a legacy fts3 table has been "upgraded" by the
+ ** addition of a %_stat table so that it can use incremental merge.
+ */
+ if( !isFts4 && !isCreate ){
+ int rc2 = SQLITE_OK;
+ fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2",
+ p->zDb, p->zName);
+ if( rc2==SQLITE_OK ) p->bHasStat = 1;
+ }
+
/* Figure out the page-size for the database. This is required in order to
** estimate the cost of loading large doclists from the database. */
fts3DatabasePageSize(&rc, p);
@@ -2671,7 +2693,7 @@ static int fts3SegReaderCursor(
*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
- int iLangid,
+ int iLangid, /* Language-id to search */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
@@ -2689,12 +2711,7 @@ int sqlite3Fts3SegReaderCursor(
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
assert( isPrefix==0 || isScan==0 );
- /* "isScan" is only set to true by the ft4aux module, an ordinary
- ** full-text tables. */
- assert( isScan==0 || p->aIndex==0 );
-
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
-
return fts3SegReaderCursor(
p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
@@ -2959,7 +2976,7 @@ static int fts3FilterMethod(
if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
- p->azColumn, p->bHasStat, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
+ p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_ERROR ){
@@ -3102,8 +3119,42 @@ static int fts3UpdateMethod(
** hash-table to the database.
*/
static int fts3SyncMethod(sqlite3_vtab *pVtab){
- int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab);
- sqlite3Fts3SegmentsClose((Fts3Table *)pVtab);
+
+ /* Following an incremental-merge operation, assuming that the input
+ ** segments are not completely consumed (the usual case), they are updated
+ ** in place to remove the entries that have already been merged. This
+ ** involves updating the leaf block that contains the smallest unmerged
+ ** entry and each block (if any) between the leaf and the root node. So
+ ** if the height of the input segment b-trees is N, and input segments
+ ** are merged eight at a time, updating the input segments at the end
+ ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
+ ** small - often between 0 and 2. So the overhead of the incremental
+ ** merge is somewhere between 8 and 24 blocks. To avoid this overhead
+ ** dwarfing the actual productive work accomplished, the incremental merge
+ ** is only attempted if it will write at least 64 leaf blocks. Hence
+ ** nMinMerge.
+ **
+ ** Of course, updating the input segments also involves deleting a bunch
+ ** of blocks from the segments table. But this is not considered overhead
+ ** as it would also be required by a crisis-merge that used the same input
+ ** segments.
+ */
+ const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */
+
+ Fts3Table *p = (Fts3Table*)pVtab;
+ int rc = sqlite3Fts3PendingTermsFlush(p);
+
+ if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){
+ int mxLevel = 0; /* Maximum relative level value in db */
+ int A; /* Incr-merge parameter A */
+
+ rc = sqlite3Fts3MaxLevel(p, &mxLevel);
+ assert( rc==SQLITE_OK || mxLevel==0 );
+ A = p->nLeafAdd * mxLevel;
+ A += (A/2);
+ if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8);
+ }
+ sqlite3Fts3SegmentsClose(p);
return rc;
}
@@ -3111,13 +3162,14 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
** Implementation of xBegin() method. This is a no-op.
*/
static int fts3BeginMethod(sqlite3_vtab *pVtab){
- TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
+ Fts3Table *p = (Fts3Table*)pVtab;
UNUSED_PARAMETER(pVtab);
assert( p->pSegments==0 );
assert( p->nPendingData==0 );
assert( p->inTransaction!=1 );
TESTONLY( p->inTransaction = 1 );
TESTONLY( p->mxSavepoint = -1; );
+ p->nLeafAdd = 0;
return SQLITE_OK;
}
@@ -3412,11 +3464,15 @@ static int fts3RenameMethod(
** Flush the contents of the pending-terms table to disk.
*/
static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
+ int rc = SQLITE_OK;
UNUSED_PARAMETER(iSavepoint);
assert( ((Fts3Table *)pVtab)->inTransaction );
assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
- return fts3SyncMethod(pVtab);
+ if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){
+ rc = fts3SyncMethod(pVtab);
+ }
+ return rc;
}
/*
@@ -3889,7 +3945,7 @@ void sqlite3Fts3DoclistPrev(
int nDoclist, /* Length of aDoclist in bytes */
char **ppIter, /* IN/OUT: Iterator pointer */
sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
- int *pnList, /* IN/OUT: List length pointer */
+ int *pnList, /* OUT: List length pointer */
u8 *pbEof /* OUT: End-of-file flag */
){
char *p = *ppIter;
@@ -3937,6 +3993,41 @@ void sqlite3Fts3DoclistPrev(
}
/*
+** Iterate forwards through a doclist.
+*/
+void sqlite3Fts3DoclistNext(
+ int bDescIdx, /* True if the doclist is desc */
+ char *aDoclist, /* Pointer to entire doclist */
+ int nDoclist, /* Length of aDoclist in bytes */
+ char **ppIter, /* IN/OUT: Iterator pointer */
+ sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
+ u8 *pbEof /* OUT: End-of-file flag */
+){
+ char *p = *ppIter;
+
+ assert( nDoclist>0 );
+ assert( *pbEof==0 );
+ assert( p || *piDocid==0 );
+ assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) );
+
+ if( p==0 ){
+ p = aDoclist;
+ p += sqlite3Fts3GetVarint(p, piDocid);
+ }else{
+ fts3PoslistCopy(0, &p);
+ if( p>=&aDoclist[nDoclist] ){
+ *pbEof = 1;
+ }else{
+ sqlite3_int64 iVar;
+ p += sqlite3Fts3GetVarint(p, &iVar);
+ *piDocid += ((bDescIdx ? -1 : 1) * iVar);
+ }
+ }
+
+ *ppIter = p;
+}
+
+/*
** Attempt to move the phrase iterator to point to the next matching docid.
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
@@ -4331,7 +4422,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
/* Determine which, if any, tokens in the expression should be deferred. */
- if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
+ if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){
Fts3TokenAndCost *aTC;
Fts3Expr **apOr;
aTC = (Fts3TokenAndCost *)sqlite3_malloc(
@@ -5091,26 +5182,87 @@ int sqlite3Fts3EvalPhraseStats(
** This function works regardless of whether or not the phrase is deferred,
** incremental, or neither.
*/
-char *sqlite3Fts3EvalPhrasePoslist(
+int sqlite3Fts3EvalPhrasePoslist(
Fts3Cursor *pCsr, /* FTS3 cursor object */
Fts3Expr *pExpr, /* Phrase to return doclist for */
- int iCol /* Column to return position list for */
+ int iCol, /* Column to return position list for */
+ char **ppOut /* OUT: Pointer to position list */
){
Fts3Phrase *pPhrase = pExpr->pPhrase;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- char *pIter = pPhrase->doclist.pList;
+ char *pIter;
int iThis;
+ sqlite3_int64 iDocid;
+ /* If this phrase is applies specifically to some column other than
+ ** column iCol, return a NULL pointer. */
+ *ppOut = 0;
assert( iCol>=0 && iCol<pTab->nColumn );
- if( !pIter
- || pExpr->bEof
- || pExpr->iDocid!=pCsr->iPrevId
- || (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol)
- ){
- return 0;
+ if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
+ return SQLITE_OK;
+ }
+
+ iDocid = pExpr->iDocid;
+ pIter = pPhrase->doclist.pList;
+ if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
+ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
+ int bOr = 0;
+ u8 bEof = 0;
+ Fts3Expr *p;
+
+ /* Check if this phrase descends from an OR expression node. If not,
+ ** return NULL. Otherwise, the entry that corresponds to docid
+ ** pCsr->iPrevId may lie earlier in the doclist buffer. */
+ for(p=pExpr->pParent; p; p=p->pParent){
+ if( p->eType==FTSQUERY_OR ) bOr = 1;
+ }
+ if( bOr==0 ) return SQLITE_OK;
+
+ /* This is the descendent of an OR node. In this case we cannot use
+ ** an incremental phrase. Load the entire doclist for the phrase
+ ** into memory in this case. */
+ if( pPhrase->bIncr ){
+ int rc = SQLITE_OK;
+ int bEofSave = pExpr->bEof;
+ fts3EvalRestart(pCsr, pExpr, &rc);
+ while( rc==SQLITE_OK && !pExpr->bEof ){
+ fts3EvalNextRow(pCsr, pExpr, &rc);
+ if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
+ }
+ pIter = pPhrase->doclist.pList;
+ assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
+ if( rc!=SQLITE_OK ) return rc;
+ }
+
+ if( pExpr->bEof ){
+ pIter = 0;
+ iDocid = 0;
+ }
+ bEof = (pPhrase->doclist.nAll==0);
+ assert( bDescDoclist==0 || bDescDoclist==1 );
+ assert( pCsr->bDesc==0 || pCsr->bDesc==1 );
+
+ if( pCsr->bDesc==bDescDoclist ){
+ int dummy;
+ while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
+ sqlite3Fts3DoclistPrev(
+ bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
+ &pIter, &iDocid, &dummy, &bEof
+ );
+ }
+ }else{
+ while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
+ sqlite3Fts3DoclistNext(
+ bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
+ &pIter, &iDocid, &bEof
+ );
+ }
+ }
+
+ if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
}
+ if( pIter==0 ) return SQLITE_OK;
- assert( pPhrase->doclist.nList>0 );
if( *pIter==0x01 ){
pIter++;
pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
@@ -5124,7 +5276,8 @@ char *sqlite3Fts3EvalPhrasePoslist(
pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
}
- return ((iCol==iThis)?pIter:0);
+ *ppOut = ((iCol==iThis)?pIter:0);
+ return SQLITE_OK;
}
/*
@@ -5147,6 +5300,7 @@ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
}
}
+
/*
** Return SQLITE_CORRUPT_VTAB.
*/