diff options
Diffstat (limited to 'lib/libsqlite3/src/where.c')
-rw-r--r-- | lib/libsqlite3/src/where.c | 383 |
1 files changed, 247 insertions, 136 deletions
diff --git a/lib/libsqlite3/src/where.c b/lib/libsqlite3/src/where.c index bc0110779ea..f45da74794a 100644 --- a/lib/libsqlite3/src/where.c +++ b/lib/libsqlite3/src/where.c @@ -222,10 +222,11 @@ static int whereClauseInsert(WhereClause *pWC, Expr *p, u8 wtFlags){ sqlite3DbFree(db, pOld); } pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); + memset(&pWC->a[pWC->nTerm], 0, sizeof(pWC->a[0])*(pWC->nSlot-pWC->nTerm)); } pTerm = &pWC->a[idx = pWC->nTerm++]; if( p && ExprHasProperty(p, EP_Unlikely) ){ - pTerm->truthProb = sqlite3LogEst(p->iTable) - 99; + pTerm->truthProb = sqlite3LogEst(p->iTable) - 270; }else{ pTerm->truthProb = 1; } @@ -756,6 +757,15 @@ static void transferJoinMarkings(Expr *pDerived, Expr *pBase){ } } +/* +** Mark term iChild as being a child of term iParent +*/ +static void markTermAsChild(WhereClause *pWC, int iChild, int iParent){ + pWC->a[iChild].iParent = iParent; + pWC->a[iChild].truthProb = pWC->a[iParent].truthProb; + pWC->a[iParent].nChild++; +} + #if !defined(SQLITE_OMIT_OR_OPTIMIZATION) && !defined(SQLITE_OMIT_SUBQUERY) /* ** Analyze a term that consists of two or more OR-connected @@ -1053,8 +1063,7 @@ static void exprAnalyzeOrTerm( testcase( idxNew==0 ); exprAnalyze(pSrc, pWC, idxNew); pTerm = &pWC->a[idxTerm]; - pWC->a[idxNew].iParent = idxTerm; - pTerm->nChild = 1; + markTermAsChild(pWC, idxNew, idxTerm); }else{ sqlite3ExprListDelete(db, pList); } @@ -1156,9 +1165,8 @@ static void exprAnalyze( idxNew = whereClauseInsert(pWC, pDup, TERM_VIRTUAL|TERM_DYNAMIC); if( idxNew==0 ) return; pNew = &pWC->a[idxNew]; - pNew->iParent = idxTerm; + markTermAsChild(pWC, idxNew, idxTerm); pTerm = &pWC->a[idxTerm]; - pTerm->nChild = 1; pTerm->wtFlags |= TERM_COPIED; if( pExpr->op==TK_EQ && !ExprHasProperty(pExpr, EP_FromJoin) @@ -1215,9 +1223,8 @@ static void exprAnalyze( testcase( idxNew==0 ); exprAnalyze(pSrc, pWC, idxNew); pTerm = &pWC->a[idxTerm]; - pWC->a[idxNew].iParent = idxTerm; + markTermAsChild(pWC, idxNew, idxTerm); } - pTerm->nChild = 2; } #endif /* SQLITE_OMIT_BETWEEN_OPTIMIZATION */ @@ -1292,9 +1299,8 @@ static void exprAnalyze( exprAnalyze(pSrc, pWC, idxNew2); pTerm = &pWC->a[idxTerm]; if( isComplete ){ - pWC->a[idxNew1].iParent = idxTerm; - pWC->a[idxNew2].iParent = idxTerm; - pTerm->nChild = 2; + markTermAsChild(pWC, idxNew1, idxTerm); + markTermAsChild(pWC, idxNew2, idxTerm); } } #endif /* SQLITE_OMIT_LIKE_OPTIMIZATION */ @@ -1327,9 +1333,8 @@ static void exprAnalyze( pNewTerm->leftCursor = pLeft->iTable; pNewTerm->u.leftColumn = pLeft->iColumn; pNewTerm->eOperator = WO_MATCH; - pNewTerm->iParent = idxTerm; + markTermAsChild(pWC, idxNew, idxTerm); pTerm = &pWC->a[idxTerm]; - pTerm->nChild = 1; pTerm->wtFlags |= TERM_COPIED; pNewTerm->prereqAll = pTerm->prereqAll; } @@ -1350,7 +1355,7 @@ static void exprAnalyze( if( pExpr->op==TK_NOTNULL && pExpr->pLeft->op==TK_COLUMN && pExpr->pLeft->iColumn>=0 - && OptimizationEnabled(db, SQLITE_Stat3) + && OptimizationEnabled(db, SQLITE_Stat34) ){ Expr *pNewExpr; Expr *pLeft = pExpr->pLeft; @@ -1369,9 +1374,8 @@ static void exprAnalyze( pNewTerm->leftCursor = pLeft->iTable; pNewTerm->u.leftColumn = pLeft->iColumn; pNewTerm->eOperator = WO_GT; - pNewTerm->iParent = idxTerm; + markTermAsChild(pWC, idxNew, idxTerm); pTerm = &pWC->a[idxTerm]; - pTerm->nChild = 1; pTerm->wtFlags |= TERM_COPIED; pNewTerm->prereqAll = pTerm->prereqAll; } @@ -1591,6 +1595,8 @@ static void constructAutomaticIndex( Bitmask idxCols; /* Bitmap of columns used for indexing */ Bitmask extraCols; /* Bitmap of additional columns */ u8 sentWarning = 0; /* True if a warnning has been issued */ + Expr *pPartial = 0; /* Partial Index Expression */ + int iContinue = 0; /* Jump here to skip excluded rows */ /* Generate code to skip over the creation and initialization of the ** transient index on 2nd and subsequent iterations of the loop. */ @@ -1606,6 +1612,13 @@ static void constructAutomaticIndex( pLoop = pLevel->pWLoop; idxCols = 0; for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){ + if( pLoop->prereq==0 + && (pTerm->wtFlags & TERM_VIRTUAL)==0 + && !ExprHasProperty(pTerm->pExpr, EP_FromJoin) + && sqlite3ExprIsTableConstant(pTerm->pExpr, pSrc->iCursor) ){ + pPartial = sqlite3ExprAnd(pParse->db, pPartial, + sqlite3ExprDup(pParse->db, pTerm->pExpr, 0)); + } if( termCanDriveIndex(pTerm, pSrc, notReady) ){ int iCol = pTerm->u.leftColumn; Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); @@ -1618,7 +1631,9 @@ static void constructAutomaticIndex( sentWarning = 1; } if( (idxCols & cMask)==0 ){ - if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ) return; + if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){ + goto end_auto_index_create; + } pLoop->aLTerm[nKeyCol++] = pTerm; idxCols |= cMask; } @@ -1638,7 +1653,7 @@ static void constructAutomaticIndex( ** if they go out of sync. */ extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1)); - mxBitCol = (pTable->nCol >= BMS-1) ? BMS-1 : pTable->nCol; + mxBitCol = MIN(BMS-1,pTable->nCol); testcase( pTable->nCol==BMS-1 ); testcase( pTable->nCol==BMS-2 ); for(i=0; i<mxBitCol; i++){ @@ -1647,11 +1662,10 @@ static void constructAutomaticIndex( if( pSrc->colUsed & MASKBIT(BMS-1) ){ nKeyCol += pTable->nCol - BMS + 1; } - pLoop->wsFlags |= WHERE_COLUMN_EQ | WHERE_IDX_ONLY; /* Construct the Index object to describe this index */ pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed); - if( pIdx==0 ) return; + if( pIdx==0 ) goto end_auto_index_create; pLoop->u.btree.pIndex = pIdx; pIdx->zName = "auto-index"; pIdx->pTable = pTable; @@ -1703,18 +1717,29 @@ static void constructAutomaticIndex( VdbeComment((v, "for %s", pTable->zName)); /* Fill the automatic index with content */ + sqlite3ExprCachePush(pParse); addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v); + if( pPartial ){ + iContinue = sqlite3VdbeMakeLabel(v); + sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL); + pLoop->wsFlags |= WHERE_PARTIALIDX; + } regRecord = sqlite3GetTempReg(pParse); sqlite3GenerateIndexKey(pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0); sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v); sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX); sqlite3VdbeJumpHere(v, addrTop); sqlite3ReleaseTempReg(pParse, regRecord); + sqlite3ExprCachePop(pParse); /* Jump here when skipping the initialization */ sqlite3VdbeJumpHere(v, addrInit); + +end_auto_index_create: + sqlite3ExprDelete(pParse->db, pPartial); } #endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ @@ -1874,7 +1899,6 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ } #endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ - #ifdef SQLITE_ENABLE_STAT3_OR_STAT4 /* ** Estimate the location of a particular key among all keys in an @@ -1883,9 +1907,10 @@ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ ** aStat[0] Est. number of rows less than pVal ** aStat[1] Est. number of rows equal to pVal ** -** Return SQLITE_OK on success. +** Return the index of the sample that is the smallest sample that +** is greater than or equal to pRec. */ -static void whereKeyStats( +static int whereKeyStats( Parse *pParse, /* Database connection */ Index *pIdx, /* Index to consider domain of */ UnpackedRecord *pRec, /* Vector of values to consider */ @@ -1967,6 +1992,7 @@ static void whereKeyStats( } aStat[0] = iLower + iGap; } + return i; } #endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ @@ -2117,7 +2143,7 @@ static int whereRangeSkipScanEst( ** If either of the upper or lower bound is not present, then NULL is passed in ** place of the corresponding WhereTerm. ** -** The value in (pBuilder->pNew->u.btree.nEq) is the index of the index +** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index ** column subject to the range constraint. Or, equivalently, the number of ** equality constraints optimized by the proposed index scan. For example, ** assuming index p is on t1(a, b), and the SQL query is: @@ -2133,7 +2159,7 @@ static int whereRangeSkipScanEst( ** ** When this function is called, *pnOut is set to the sqlite3LogEst() of the ** number of rows that the index scan is expected to visit without -** considering the range constraints. If nEq is 0, this is the number of +** considering the range constraints. If nEq is 0, then *pnOut is the number of ** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) ** to account for the range constraints pLower and pUpper. ** @@ -2157,10 +2183,7 @@ static int whereRangeScanEst( Index *p = pLoop->u.btree.pIndex; int nEq = pLoop->u.btree.nEq; - if( p->nSample>0 - && nEq<p->nSampleCol - && OptimizationEnabled(pParse->db, SQLITE_Stat3) - ){ + if( p->nSample>0 && nEq<p->nSampleCol ){ if( nEq==pBuilder->nRecValid ){ UnpackedRecord *pRec = pBuilder->pRec; tRowcnt a[2]; @@ -2176,15 +2199,19 @@ static int whereRangeScanEst( ** is not a simple variable or literal value), the lower bound of the ** range is $P. Due to a quirk in the way whereKeyStats() works, even ** if $L is available, whereKeyStats() is called for both ($P) and - ** ($P:$L) and the larger of the two returned values used. + ** ($P:$L) and the larger of the two returned values is used. ** ** Similarly, iUpper is to be set to the estimate of the number of rows ** less than the upper bound of the range query. Where the upper bound ** is either ($P) or ($P:$U). Again, even if $U is available, both values ** of iUpper are requested of whereKeyStats() and the smaller used. + ** + ** The number of rows between the two bounds is then just iUpper-iLower. */ - tRowcnt iLower; - tRowcnt iUpper; + tRowcnt iLower; /* Rows less than the lower bound */ + tRowcnt iUpper; /* Rows less than the upper bound */ + int iLwrIdx = -2; /* aSample[] for the lower bound */ + int iUprIdx = -1; /* aSample[] for the upper bound */ if( pRec ){ testcase( pRec->nField!=pBuilder->nRecValid ); @@ -2198,7 +2225,7 @@ static int whereRangeScanEst( /* Determine iLower and iUpper using ($P) only. */ if( nEq==0 ){ iLower = 0; - iUpper = sqlite3LogEstToInt(p->aiRowLogEst[0]); + iUpper = p->nRowEst0; }else{ /* Note: this call could be optimized away - since the same values must ** have been requested when testing key $P in whereEqualScanEst(). */ @@ -2222,7 +2249,7 @@ static int whereRangeScanEst( rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); if( rc==SQLITE_OK && bOk ){ tRowcnt iNew; - whereKeyStats(pParse, p, pRec, 0, a); + iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a); iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); if( iNew>iLower ) iLower = iNew; nOut--; @@ -2237,7 +2264,7 @@ static int whereRangeScanEst( rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); if( rc==SQLITE_OK && bOk ){ tRowcnt iNew; - whereKeyStats(pParse, p, pRec, 1, a); + iUprIdx = whereKeyStats(pParse, p, pRec, 1, a); iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); if( iNew<iUpper ) iUpper = iNew; nOut--; @@ -2249,6 +2276,11 @@ static int whereRangeScanEst( if( rc==SQLITE_OK ){ if( iUpper>iLower ){ nNew = sqlite3LogEst(iUpper - iLower); + /* TUNING: If both iUpper and iLower are derived from the same + ** sample, then assume they are 4x more selective. This brings + ** the estimated selectivity more in line with what it would be + ** if estimated without the use of STAT3/4 tables. */ + if( iLwrIdx==iUprIdx ) nNew -= 20; assert( 20==sqlite3LogEst(4) ); }else{ nNew = 10; assert( 10==sqlite3LogEst(2) ); } @@ -2273,12 +2305,15 @@ static int whereRangeScanEst( nNew = whereRangeAdjust(pLower, nOut); nNew = whereRangeAdjust(pUpper, nNew); - /* TUNING: If there is both an upper and lower limit, assume the range is + /* TUNING: If there is both an upper and lower limit and neither limit + ** has an application-defined likelihood(), assume the range is ** reduced by an additional 75%. This means that, by default, an open-ended ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to ** match 1/64 of the index. */ - if( pLower && pUpper ) nNew -= 20; + if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){ + nNew -= 20; + } nOut -= (pLower!=0) + (pUpper!=0); if( nNew<10 ) nNew = 10; @@ -2638,7 +2673,7 @@ static int codeAllEqualityTerms( pLoop = pLevel->pWLoop; assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ); nEq = pLoop->u.btree.nEq; - nSkip = pLoop->u.btree.nSkip; + nSkip = pLoop->nSkip; pIdx = pLoop->u.btree.pIndex; assert( pIdx!=0 ); @@ -2752,7 +2787,7 @@ static void explainAppendTerm( static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ Index *pIndex = pLoop->u.btree.pIndex; u16 nEq = pLoop->u.btree.nEq; - u16 nSkip = pLoop->u.btree.nSkip; + u16 nSkip = pLoop->nSkip; int i, j; Column *aCol = pTab->aCol; i16 *aiColumn = pIndex->aiColumn; @@ -2783,11 +2818,14 @@ static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ /* ** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN -** command. If the query being compiled is an EXPLAIN QUERY PLAN, a single -** record is added to the output to describe the table scan strategy in -** pLevel. +** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was +** defined at compile-time. If it is not a no-op, a single OP_Explain opcode +** is added to the output to describe the table scan strategy in pLevel. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. */ -static void explainOneScan( +static int explainOneScan( Parse *pParse, /* Parse context */ SrcList *pTabList, /* Table list this loop refers to */ WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ @@ -2795,7 +2833,8 @@ static void explainOneScan( int iFrom, /* Value for "from" column of output */ u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ ){ -#ifndef SQLITE_DEBUG + int ret = 0; +#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS) if( pParse->explain==2 ) #endif { @@ -2812,7 +2851,7 @@ static void explainOneScan( pLoop = pLevel->pWLoop; flags = pLoop->wsFlags; - if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return; + if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return 0; isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) @@ -2841,6 +2880,8 @@ static void explainOneScan( if( isSearch ){ zFmt = "PRIMARY KEY"; } + }else if( flags & WHERE_PARTIALIDX ){ + zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; }else if( flags & WHERE_AUTO_INDEX ){ zFmt = "AUTOMATIC COVERING INDEX"; }else if( flags & WHERE_IDX_ONLY ){ @@ -2882,13 +2923,46 @@ static void explainOneScan( } #endif zMsg = sqlite3StrAccumFinish(&str); - sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg, P4_DYNAMIC); + ret = sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg,P4_DYNAMIC); } + return ret; } #else -# define explainOneScan(u,v,w,x,y,z) +# define explainOneScan(u,v,w,x,y,z) 0 #endif /* SQLITE_OMIT_EXPLAIN */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +/* +** Configure the VM passed as the first argument with an +** sqlite3_stmt_scanstatus() entry corresponding to the scan used to +** implement level pLvl. Argument pSrclist is a pointer to the FROM +** clause that the scan reads data from. +** +** If argument addrExplain is not 0, it must be the address of an +** OP_Explain instruction that describes the same loop. +*/ +static void addScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +){ + const char *zObj = 0; + WhereLoop *pLoop = pLvl->pWLoop; + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 && pLoop->u.btree.pIndex!=0 ){ + zObj = pLoop->u.btree.pIndex->zName; + }else{ + zObj = pSrclist->a[pLvl->iFrom].zName; + } + sqlite3VdbeScanStatus( + v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj + ); +} +#else +# define addScanStatus(a, b, c, d) ((void)d) +#endif + + /* ** Generate code for the start of the iLevel-th loop in the WHERE clause @@ -3189,7 +3263,7 @@ static Bitmask codeOneLoopStart( pIdx = pLoop->u.btree.pIndex; iIdxCur = pLevel->iIdxCur; - assert( nEq>=pLoop->u.btree.nSkip ); + assert( nEq>=pLoop->nSkip ); /* If this loop satisfies a sort order (pOrderBy) request that ** was passed to this function to implement a "SELECT min(x) ..." @@ -3206,7 +3280,7 @@ static Bitmask codeOneLoopStart( && pWInfo->nOBSat>0 && (pIdx->nKeyCol>nEq) ){ - assert( pLoop->u.btree.nSkip==0 ); + assert( pLoop->nSkip==0 ); bSeekPastNull = 1; nExtraReg = 1; } @@ -3519,10 +3593,9 @@ static Bitmask codeOneLoopStart( Expr *pExpr = pWC->a[iTerm].pExpr; if( &pWC->a[iTerm] == pTerm ) continue; if( ExprHasProperty(pExpr, EP_FromJoin) ) continue; - testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO ); - testcase( pWC->a[iTerm].wtFlags & TERM_VIRTUAL ); - if( pWC->a[iTerm].wtFlags & (TERM_ORINFO|TERM_VIRTUAL) ) continue; + if( (pWC->a[iTerm].wtFlags & TERM_VIRTUAL)!=0 ) continue; if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue; + testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO ); pExpr = sqlite3ExprDup(db, pExpr, 0); pAndExpr = sqlite3ExprAnd(db, pAndExpr, pExpr); } @@ -3555,9 +3628,11 @@ static Bitmask codeOneLoopStart( assert( pSubWInfo || pParse->nErr || db->mallocFailed ); if( pSubWInfo ){ WhereLoop *pSubLoop; - explainOneScan( + int addrExplain = explainOneScan( pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0 ); + addScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain); + /* This is the sub-WHERE clause body. First skip over ** duplicate rows from prior sub-WHERE clauses, and record the ** rowid (or PRIMARY KEY) for the current row so that the same @@ -3688,6 +3763,10 @@ static Bitmask codeOneLoopStart( } } +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + pLevel->addrVisit = sqlite3VdbeCurrentAddr(v); +#endif + /* Insert code to test every subexpression that can be completely ** computed using the current set of tables. */ @@ -3827,7 +3906,7 @@ static void whereLoopPrint(WhereLoop *p, WhereClause *pWC){ sqlite3_free(z); } if( p->wsFlags & WHERE_SKIPSCAN ){ - sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->u.btree.nSkip); + sqlite3DebugPrintf(" f %05x %d-%d", p->wsFlags, p->nLTerm,p->nSkip); }else{ sqlite3DebugPrintf(" f %05x N %d", p->wsFlags, p->nLTerm); } @@ -3863,7 +3942,6 @@ static void whereLoopClearUnion(sqlite3 *db, WhereLoop *p){ p->u.vtab.idxStr = 0; }else if( (p->wsFlags & WHERE_AUTO_INDEX)!=0 && p->u.btree.pIndex!=0 ){ sqlite3DbFree(db, p->u.btree.pIndex->zColAff); - sqlite3KeyInfoUnref(p->u.btree.pIndex->pKeyInfo); sqlite3DbFree(db, p->u.btree.pIndex); p->u.btree.pIndex = 0; } @@ -3938,10 +4016,11 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ } /* -** Return TRUE if both of the following are true: +** Return TRUE if all of the following are true: ** ** (1) X has the same or lower cost that Y ** (2) X is a proper subset of Y +** (3) X skips at least as many columns as Y ** ** By "proper subset" we mean that X uses fewer WHERE clause terms ** than Y and that every WHERE clause term used by X is also used @@ -3949,19 +4028,25 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ ** ** If X is a proper subset of Y then Y is a better choice and ought ** to have a lower cost. This routine returns TRUE when that cost -** relationship is inverted and needs to be adjusted. +** relationship is inverted and needs to be adjusted. The third rule +** was added because if X uses skip-scan less than Y it still might +** deserve a lower cost even if it is a proper subset of Y. */ static int whereLoopCheaperProperSubset( const WhereLoop *pX, /* First WhereLoop to compare */ const WhereLoop *pY /* Compare against this WhereLoop */ ){ int i, j; - if( pX->nLTerm >= pY->nLTerm ) return 0; /* X is not a subset of Y */ + if( pX->nLTerm-pX->nSkip >= pY->nLTerm-pY->nSkip ){ + return 0; /* X is not a subset of Y */ + } + if( pY->nSkip > pX->nSkip ) return 0; if( pX->rRun >= pY->rRun ){ if( pX->rRun > pY->rRun ) return 0; /* X costs more than Y */ if( pX->nOut > pY->nOut ) return 0; /* X costs more than Y */ } for(i=pX->nLTerm-1; i>=0; i--){ + if( pX->aLTerm[i]==0 ) continue; for(j=pY->nLTerm-1; j>=0; j--){ if( pY->aLTerm[j]==pX->aLTerm[i] ) break; } @@ -3983,33 +4068,24 @@ static int whereLoopCheaperProperSubset( ** To say "WhereLoop X is a proper subset of Y" means that X uses fewer ** WHERE clause terms than Y and that every WHERE clause term used by X is ** also used by Y. -** -** This adjustment is omitted for SKIPSCAN loops. In a SKIPSCAN loop, the -** WhereLoop.nLTerm field is not an accurate measure of the number of WHERE -** clause terms covered, since some of the first nLTerm entries in aLTerm[] -** will be NULL (because they are skipped). That makes it more difficult -** to compare the loops. We could add extra code to do the comparison, and -** perhaps we will someday. But SKIPSCAN is sufficiently uncommon, and this -** adjustment is sufficient minor, that it is very difficult to construct -** a test case where the extra code would improve the query plan. Better -** to avoid the added complexity and just omit cost adjustments to SKIPSCAN -** loops. */ static void whereLoopAdjustCost(const WhereLoop *p, WhereLoop *pTemplate){ if( (pTemplate->wsFlags & WHERE_INDEXED)==0 ) return; - if( (pTemplate->wsFlags & WHERE_SKIPSCAN)!=0 ) return; for(; p; p=p->pNextLoop){ if( p->iTab!=pTemplate->iTab ) continue; if( (p->wsFlags & WHERE_INDEXED)==0 ) continue; - if( (p->wsFlags & WHERE_SKIPSCAN)!=0 ) continue; if( whereLoopCheaperProperSubset(p, pTemplate) ){ /* Adjust pTemplate cost downward so that it is cheaper than its - ** subset p */ + ** subset p. */ + WHERETRACE(0x80,("subset cost adjustment %d,%d to %d,%d\n", + pTemplate->rRun, pTemplate->nOut, p->rRun, p->nOut-1)); pTemplate->rRun = p->rRun; pTemplate->nOut = p->nOut - 1; }else if( whereLoopCheaperProperSubset(pTemplate, p) ){ /* Adjust pTemplate cost upward so that it is costlier than p since ** pTemplate is a proper subset of p */ + WHERETRACE(0x80,("subset cost adjustment %d,%d to %d,%d\n", + pTemplate->rRun, pTemplate->nOut, p->rRun, p->nOut+1)); pTemplate->rRun = p->rRun; pTemplate->nOut = p->nOut + 1; } @@ -4054,8 +4130,9 @@ static WhereLoop **whereLoopFindLesser( /* Any loop using an appliation-defined index (or PRIMARY KEY or ** UNIQUE constraint) with one or more == constraints is better - ** than an automatic index. */ + ** than an automatic index. Unless it is a skip-scan. */ if( (p->wsFlags & WHERE_AUTO_INDEX)!=0 + && (pTemplate->nSkip)==0 && (pTemplate->wsFlags & WHERE_INDEXED)!=0 && (pTemplate->wsFlags & WHERE_COLUMN_EQ)!=0 && (p->prereq & pTemplate->prereq)==pTemplate->prereq @@ -4214,10 +4291,30 @@ static int whereLoopInsert(WhereLoopBuilder *pBuilder, WhereLoop *pTemplate){ ** Adjust the WhereLoop.nOut value downward to account for terms of the ** WHERE clause that reference the loop but which are not used by an ** index. -** -** In the current implementation, the first extra WHERE clause term reduces -** the number of output rows by a factor of 10 and each additional term -** reduces the number of output rows by sqrt(2). +* +** For every WHERE clause term that is not used by the index +** and which has a truth probability assigned by one of the likelihood(), +** likely(), or unlikely() SQL functions, reduce the estimated number +** of output rows by the probability specified. +** +** TUNING: For every WHERE clause term that is not used by the index +** and which does not have an assigned truth probability, heuristics +** described below are used to try to estimate the truth probability. +** TODO --> Perhaps this is something that could be improved by better +** table statistics. +** +** Heuristic 1: Estimate the truth probability as 93.75%. The 93.75% +** value corresponds to -1 in LogEst notation, so this means decrement +** the WhereLoop.nOut field for every such WHERE clause term. +** +** Heuristic 2: If there exists one or more WHERE clause terms of the +** form "x==EXPR" and EXPR is not a constant 0 or 1, then make sure the +** final output row estimate is no greater than 1/4 of the total number +** of rows in the table. In other words, assume that x==EXPR will filter +** out at least 3 out of 4 rows. If EXPR is -1 or 0 or 1, then maybe the +** "x" column is boolean or else -1 or 0 or 1 is a common default value +** on the "x" column and so in that case only cap the output row estimate +** at 1/2 instead of 1/4. */ static void whereLoopOutputAdjust( WhereClause *pWC, /* The WHERE clause */ @@ -4226,9 +4323,10 @@ static void whereLoopOutputAdjust( ){ WhereTerm *pTerm, *pX; Bitmask notAllowed = ~(pLoop->prereq|pLoop->maskSelf); - int i, j; - int nEq = 0; /* Number of = constraints not within likely()/unlikely() */ + int i, j, k; + LogEst iReduce = 0; /* pLoop->nOut should not exceed nRow-iReduce */ + assert( (pLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); for(i=pWC->nTerm, pTerm=pWC->a; i>0; i--, pTerm++){ if( (pTerm->wtFlags & TERM_VIRTUAL)!=0 ) break; if( (pTerm->prereqAll & pLoop->maskSelf)==0 ) continue; @@ -4241,20 +4339,26 @@ static void whereLoopOutputAdjust( } if( j<0 ){ if( pTerm->truthProb<=0 ){ + /* If a truth probability is specified using the likelihood() hints, + ** then use the probability provided by the application. */ pLoop->nOut += pTerm->truthProb; }else{ + /* In the absence of explicit truth probabilities, use heuristics to + ** guess a reasonable truth probability. */ pLoop->nOut--; - if( pTerm->eOperator&WO_EQ ) nEq++; + if( pTerm->eOperator&WO_EQ ){ + Expr *pRight = pTerm->pExpr->pRight; + if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ + k = 10; + }else{ + k = 20; + } + if( iReduce<k ) iReduce = k; + } } } } - /* TUNING: If there is at least one equality constraint in the WHERE - ** clause that does not have a likelihood() explicitly assigned to it - ** then do not let the estimated number of output rows exceed half - ** the number of rows in the table. */ - if( nEq && pLoop->nOut>nRow-10 ){ - pLoop->nOut = nRow - 10; - } + if( pLoop->nOut > nRow-iReduce ) pLoop->nOut = nRow - iReduce; } /* @@ -4295,7 +4399,7 @@ static int whereLoopAddBtreeIndex( Bitmask saved_prereq; /* Original value of pNew->prereq */ u16 saved_nLTerm; /* Original value of pNew->nLTerm */ u16 saved_nEq; /* Original value of pNew->u.btree.nEq */ - u16 saved_nSkip; /* Original value of pNew->u.btree.nSkip */ + u16 saved_nSkip; /* Original value of pNew->nSkip */ u32 saved_wsFlags; /* Original value of pNew->wsFlags */ LogEst saved_nOut; /* Original value of pNew->nOut */ int iCol; /* Index of the column in the table */ @@ -4324,7 +4428,7 @@ static int whereLoopAddBtreeIndex( pTerm = whereScanInit(&scan, pBuilder->pWC, pSrc->iCursor, iCol, opMask, pProbe); saved_nEq = pNew->u.btree.nEq; - saved_nSkip = pNew->u.btree.nSkip; + saved_nSkip = pNew->nSkip; saved_nLTerm = pNew->nLTerm; saved_wsFlags = pNew->wsFlags; saved_prereq = pNew->prereq; @@ -4332,44 +4436,6 @@ static int whereLoopAddBtreeIndex( pNew->rSetup = 0; rSize = pProbe->aiRowLogEst[0]; rLogSize = estLog(rSize); - - /* Consider using a skip-scan if there are no WHERE clause constraints - ** available for the left-most terms of the index, and if the average - ** number of repeats in the left-most terms is at least 18. - ** - ** The magic number 18 is selected on the basis that scanning 17 rows - ** is almost always quicker than an index seek (even though if the index - ** contains fewer than 2^17 rows we assume otherwise in other parts of - ** the code). And, even if it is not, it should not be too much slower. - ** On the other hand, the extra seeks could end up being significantly - ** more expensive. */ - assert( 42==sqlite3LogEst(18) ); - if( saved_nEq==saved_nSkip - && saved_nEq+1<pProbe->nKeyCol - && pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */ - && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK - ){ - LogEst nIter; - pNew->u.btree.nEq++; - pNew->u.btree.nSkip++; - pNew->aLTerm[pNew->nLTerm++] = 0; - pNew->wsFlags |= WHERE_SKIPSCAN; - nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1]; - if( pTerm ){ - /* TUNING: When estimating skip-scan for a term that is also indexable, - ** multiply the cost of the skip-scan by 2.0, to make it a little less - ** desirable than the regular index lookup. */ - nIter += 10; assert( 10==sqlite3LogEst(2) ); - } - pNew->nOut -= nIter; - /* TUNING: Because uncertainties in the estimates for skip-scan queries, - ** add a 1.375 fudge factor to make skip-scan slightly less likely. */ - nIter += 5; - whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul); - pNew->nOut = saved_nOut; - pNew->u.btree.nEq = saved_nEq; - pNew->u.btree.nSkip = saved_nSkip; - } for(; rc==SQLITE_OK && pTerm!=0; pTerm = whereScanNext(&scan)){ u16 eOp = pTerm->eOperator; /* Shorthand for pTerm->eOperator */ LogEst rCostIdx; @@ -4464,7 +4530,6 @@ static int whereLoopAddBtreeIndex( if( nInMul==0 && pProbe->nSample && pNew->u.btree.nEq<=pProbe->nSampleCol - && OptimizationEnabled(db, SQLITE_Stat3) && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) ){ Expr *pExpr = pTerm->pExpr; @@ -4532,10 +4597,45 @@ static int whereLoopAddBtreeIndex( } pNew->prereq = saved_prereq; pNew->u.btree.nEq = saved_nEq; - pNew->u.btree.nSkip = saved_nSkip; + pNew->nSkip = saved_nSkip; pNew->wsFlags = saved_wsFlags; pNew->nOut = saved_nOut; pNew->nLTerm = saved_nLTerm; + + /* Consider using a skip-scan if there are no WHERE clause constraints + ** available for the left-most terms of the index, and if the average + ** number of repeats in the left-most terms is at least 18. + ** + ** The magic number 18 is selected on the basis that scanning 17 rows + ** is almost always quicker than an index seek (even though if the index + ** contains fewer than 2^17 rows we assume otherwise in other parts of + ** the code). And, even if it is not, it should not be too much slower. + ** On the other hand, the extra seeks could end up being significantly + ** more expensive. */ + assert( 42==sqlite3LogEst(18) ); + if( saved_nEq==saved_nSkip + && saved_nEq+1<pProbe->nKeyCol + && pProbe->noSkipScan==0 + && pProbe->aiRowLogEst[saved_nEq+1]>=42 /* TUNING: Minimum for skip-scan */ + && (rc = whereLoopResize(db, pNew, pNew->nLTerm+1))==SQLITE_OK + ){ + LogEst nIter; + pNew->u.btree.nEq++; + pNew->nSkip++; + pNew->aLTerm[pNew->nLTerm++] = 0; + pNew->wsFlags |= WHERE_SKIPSCAN; + nIter = pProbe->aiRowLogEst[saved_nEq] - pProbe->aiRowLogEst[saved_nEq+1]; + pNew->nOut -= nIter; + /* TUNING: Because uncertainties in the estimates for skip-scan queries, + ** add a 1.375 fudge factor to make skip-scan slightly less likely. */ + nIter += 5; + whereLoopAddBtreeIndex(pBuilder, pSrc, pProbe, nIter + nInMul); + pNew->nOut = saved_nOut; + pNew->u.btree.nEq = saved_nEq; + pNew->nSkip = saved_nSkip; + pNew->wsFlags = saved_wsFlags; + } + return rc; } @@ -4595,7 +4695,11 @@ static int whereUsablePartialIndex(int iTab, WhereClause *pWC, Expr *pWhere){ int i; WhereTerm *pTerm; for(i=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ - if( sqlite3ExprImpliesExpr(pTerm->pExpr, pWhere, iTab) ) return 1; + if( sqlite3ExprImpliesExpr(pTerm->pExpr, pWhere, iTab) + && !ExprHasProperty(pTerm->pExpr, EP_FromJoin) + ){ + return 1; + } } return 0; } @@ -4714,7 +4818,7 @@ static int whereLoopAddBtree( if( pTerm->prereqRight & pNew->maskSelf ) continue; if( termCanDriveIndex(pTerm, pSrc, 0) ){ pNew->u.btree.nEq = 1; - pNew->u.btree.nSkip = 0; + pNew->nSkip = 0; pNew->u.btree.pIndex = 0; pNew->nLTerm = 1; pNew->aLTerm[0] = pTerm; @@ -4755,7 +4859,7 @@ static int whereLoopAddBtree( } rSize = pProbe->aiRowLogEst[0]; pNew->u.btree.nEq = 0; - pNew->u.btree.nSkip = 0; + pNew->nSkip = 0; pNew->nLTerm = 0; pNew->iSortIdx = 0; pNew->rSetup = 0; @@ -5305,7 +5409,7 @@ static i8 wherePathSatisfiesOrderBy( /* Skip over == and IS NULL terms */ if( j<pLoop->u.btree.nEq - && pLoop->u.btree.nSkip==0 + && pLoop->nSkip==0 && ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL))!=0 ){ if( i & WO_ISNULL ){ @@ -5759,7 +5863,7 @@ static int wherePathSolver(WhereInfo *pWInfo, LogEst nRowEst){ } #ifdef WHERETRACE_ENABLED /* >=2 */ - if( sqlite3WhereTrace>=2 ){ + if( sqlite3WhereTrace & 0x02 ){ sqlite3DebugPrintf("---- after round %d ----\n", iLoop); for(ii=0, pTo=aTo; ii<nTo; ii++, pTo++){ sqlite3DebugPrintf(" %s cost=%-3d nrow=%-3d order=%c", @@ -5878,7 +5982,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pWC = &pWInfo->sWC; pLoop = pBuilder->pNew; pLoop->wsFlags = 0; - pLoop->u.btree.nSkip = 0; + pLoop->nSkip = 0; pTerm = findTerm(pWC, iCur, -1, 0, WO_EQ, 0); if( pTerm ){ pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW; @@ -5890,7 +5994,6 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ }else{ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ assert( pLoop->aLTermSpace==pLoop->aLTerm ); - assert( ArraySize(pLoop->aLTermSpace)==4 ); if( !IsUniqueIndex(pIdx) || pIdx->pPartIdxWhere!=0 || pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace) @@ -6399,7 +6502,10 @@ WhereInfo *sqlite3WhereBegin( */ notReady = ~(Bitmask)0; for(ii=0; ii<nTabList; ii++){ + int addrExplain; + int wsFlags; pLevel = &pWInfo->a[ii]; + wsFlags = pLevel->pWLoop->wsFlags; #ifndef SQLITE_OMIT_AUTOMATIC_INDEX if( (pLevel->pWLoop->wsFlags & WHERE_AUTO_INDEX)!=0 ){ constructAutomaticIndex(pParse, &pWInfo->sWC, @@ -6407,10 +6513,15 @@ WhereInfo *sqlite3WhereBegin( if( db->mallocFailed ) goto whereBeginError; } #endif - explainOneScan(pParse, pTabList, pLevel, ii, pLevel->iFrom, wctrlFlags); + addrExplain = explainOneScan( + pParse, pTabList, pLevel, ii, pLevel->iFrom, wctrlFlags + ); pLevel->addrBody = sqlite3VdbeCurrentAddr(v); notReady = codeOneLoopStart(pWInfo, ii, notReady); pWInfo->iContinue = pLevel->addrCont; + if( (wsFlags&WHERE_MULTI_OR)==0 && (wctrlFlags&WHERE_ONETABLE_ONLY)==0 ){ + addScanStatus(v, pTabList, pLevel, addrExplain); + } } /* Done. */ |