diff options
Diffstat (limited to 'lib/libsqlite3/src/btree.c')
-rw-r--r-- | lib/libsqlite3/src/btree.c | 983 |
1 files changed, 582 insertions, 401 deletions
diff --git a/lib/libsqlite3/src/btree.c b/lib/libsqlite3/src/btree.c index 60bc7de41eb..7ea66e0d3be 100644 --- a/lib/libsqlite3/src/btree.c +++ b/lib/libsqlite3/src/btree.c @@ -9,7 +9,7 @@ ** May you share freely, never taking more than you give. ** ************************************************************************* -** This file implements a external (disk-based) database using BTrees. +** This file implements an external (disk-based) database using BTrees. ** See the header comment on "btreeInt.h" for additional information. ** Including a description of file format and an overview of operation. */ @@ -487,7 +487,9 @@ static void invalidateIncrblobCursors( BtShared *pBt = pBtree->pBt; assert( sqlite3BtreeHoldsMutex(pBtree) ); for(p=pBt->pCursor; p; p=p->pNext){ - if( (p->curFlags & BTCF_Incrblob)!=0 && (isClearTable || p->info.nKey==iRow) ){ + if( (p->curFlags & BTCF_Incrblob)!=0 + && (isClearTable || p->info.nKey==iRow) + ){ p->eState = CURSOR_INVALID; } } @@ -606,7 +608,7 @@ static int saveCursorPosition(BtCursor *pCur){ ** data. */ if( 0==pCur->apPage[0]->intKey ){ - void *pKey = sqlite3Malloc( (int)pCur->nKey ); + void *pKey = sqlite3Malloc( pCur->nKey ); if( pKey ){ rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); if( rc==SQLITE_OK ){ @@ -629,16 +631,42 @@ static int saveCursorPosition(BtCursor *pCur){ return rc; } +/* Forward reference */ +static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); + /* ** Save the positions of all cursors (except pExcept) that are open on -** the table with root-page iRoot. Usually, this is called just before cursor -** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()). +** the table with root-page iRoot. "Saving the cursor position" means that +** the location in the btree is remembered in such a way that it can be +** moved back to the same spot after the btree has been modified. This +** routine is called just before cursor pExcept is used to modify the +** table, for example in BtreeDelete() or BtreeInsert(). +** +** Implementation note: This routine merely checks to see if any cursors +** need to be saved. It calls out to saveCursorsOnList() in the (unusual) +** event that cursors are in need to being saved. */ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ BtCursor *p; assert( sqlite3_mutex_held(pBt->mutex) ); assert( pExcept==0 || pExcept->pBt==pBt ); for(p=pBt->pCursor; p; p=p->pNext){ + if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; + } + return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK; +} + +/* This helper routine to saveAllCursors does the actual work of saving +** the cursors if and when a cursor is found that actually requires saving. +** The common case is that no cursors need to be saved, so this routine is +** broken out from its caller to avoid unnecessary stack pointer movement. +*/ +static int SQLITE_NOINLINE saveCursorsOnList( + BtCursor *p, /* The first cursor that needs saving */ + Pgno iRoot, /* Only save cursor with this iRoot. Save all if zero */ + BtCursor *pExcept /* Do not save this cursor */ +){ + do{ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ){ if( p->eState==CURSOR_VALID ){ int rc = saveCursorPosition(p); @@ -650,7 +678,8 @@ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ btreeReleaseAllCursorPages(p); } } - } + p = p->pNext; + }while( p ); return SQLITE_OK; } @@ -735,37 +764,48 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ SQLITE_OK) /* -** Determine whether or not a cursor has moved from the position it -** was last placed at. Cursors can move when the row they are pointing -** at is deleted out from under them. +** Determine whether or not a cursor has moved from the position where +** it was last placed, or has been invalidated for any other reason. +** Cursors can move when the row they are pointing at is deleted out +** from under them, for example. Cursor might also move if a btree +** is rebalanced. ** -** This routine returns an error code if something goes wrong. The -** integer *pHasMoved is set as follows: +** Calling this routine with a NULL cursor pointer returns false. ** -** 0: The cursor is unchanged -** 1: The cursor is still pointing at the same row, but the pointers -** returned by sqlite3BtreeKeyFetch() or sqlite3BtreeDataFetch() -** might now be invalid because of a balance() or other change to the -** b-tree. -** 2: The cursor is no longer pointing to the row. The row might have -** been deleted out from under the cursor. +** Use the separate sqlite3BtreeCursorRestore() routine to restore a cursor +** back to where it ought to be if this routine returns true. */ -int sqlite3BtreeCursorHasMoved(BtCursor *pCur, int *pHasMoved){ +int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ + return pCur->eState!=CURSOR_VALID; +} + +/* +** This routine restores a cursor back to its original position after it +** has been moved by some outside activity (such as a btree rebalance or +** a row having been deleted out from under the cursor). +** +** On success, the *pDifferentRow parameter is false if the cursor is left +** pointing at exactly the same row. *pDifferntRow is the row the cursor +** was pointing to has been deleted, forcing the cursor to point to some +** nearby row. +** +** This routine should only be called for a cursor that just returned +** TRUE from sqlite3BtreeCursorHasMoved(). +*/ +int sqlite3BtreeCursorRestore(BtCursor *pCur, int *pDifferentRow){ int rc; - if( pCur->eState==CURSOR_VALID ){ - *pHasMoved = 0; - return SQLITE_OK; - } + assert( pCur!=0 ); + assert( pCur->eState!=CURSOR_VALID ); rc = restoreCursorPosition(pCur); if( rc ){ - *pHasMoved = 2; + *pDifferentRow = 1; return rc; } if( pCur->eState!=CURSOR_VALID || NEVER(pCur->skipNext!=0) ){ - *pHasMoved = 2; + *pDifferentRow = 1; }else{ - *pHasMoved = 1; + *pDifferentRow = 0; } return SQLITE_OK; } @@ -930,47 +970,44 @@ static u8 *findOverflowCell(MemPage *pPage, int iCell){ ** are two versions of this function. btreeParseCell() takes a ** cell index as the second argument and btreeParseCellPtr() ** takes a pointer to the body of the cell as its second argument. -** -** Within this file, the parseCell() macro can be called instead of -** btreeParseCellPtr(). Using some compilers, this will be faster. */ static void btreeParseCellPtr( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ CellInfo *pInfo /* Fill in this structure */ ){ - u16 n; /* Number bytes in cell content header */ + u8 *pIter; /* For scanning through pCell */ u32 nPayload; /* Number of bytes of cell payload */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - - pInfo->pCell = pCell; assert( pPage->leaf==0 || pPage->leaf==1 ); - n = pPage->childPtrSize; - assert( n==4-4*pPage->leaf ); - if( pPage->intKey ){ - if( pPage->hasData ){ - assert( n==0 ); - n = getVarint32(pCell, nPayload); - }else{ - nPayload = 0; - } - n += getVarint(&pCell[n], (u64*)&pInfo->nKey); - pInfo->nData = nPayload; + if( pPage->intKeyLeaf ){ + assert( pPage->childPtrSize==0 ); + pIter = pCell + getVarint32(pCell, nPayload); + pIter += getVarint(pIter, (u64*)&pInfo->nKey); + }else if( pPage->noPayload ){ + assert( pPage->childPtrSize==4 ); + pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); + pInfo->nPayload = 0; + pInfo->nLocal = 0; + pInfo->iOverflow = 0; + pInfo->pPayload = 0; + return; }else{ - pInfo->nData = 0; - n += getVarint32(&pCell[n], nPayload); + pIter = pCell + pPage->childPtrSize; + pIter += getVarint32(pIter, nPayload); pInfo->nKey = nPayload; } pInfo->nPayload = nPayload; - pInfo->nHeader = n; + pInfo->pPayload = pIter; testcase( nPayload==pPage->maxLocal ); testcase( nPayload==pPage->maxLocal+1 ); - if( likely(nPayload<=pPage->maxLocal) ){ + if( nPayload<=pPage->maxLocal ){ /* This is the (easy) common case where the entire payload fits ** on the local page. No overflow is required. */ - if( (pInfo->nSize = (u16)(n+nPayload))<4 ) pInfo->nSize = 4; + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; }else{ @@ -997,18 +1034,16 @@ static void btreeParseCellPtr( }else{ pInfo->nLocal = (u16)minLocal; } - pInfo->iOverflow = (u16)(pInfo->nLocal + n); + pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); pInfo->nSize = pInfo->iOverflow + 4; } } -#define parseCell(pPage, iCell, pInfo) \ - btreeParseCellPtr((pPage), findCell((pPage), (iCell)), (pInfo)) static void btreeParseCell( MemPage *pPage, /* Page containing the cell */ int iCell, /* The cell index. First cell is 0 */ CellInfo *pInfo /* Fill in this structure */ ){ - parseCell(pPage, iCell, pInfo); + btreeParseCellPtr(pPage, findCell(pPage, iCell), pInfo); } /* @@ -1018,8 +1053,9 @@ static void btreeParseCell( ** the space used by the cell pointer. */ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ - u8 *pIter = &pCell[pPage->childPtrSize]; - u32 nSize; + u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + u32 nSize; /* Size value to return */ #ifdef SQLITE_DEBUG /* The value returned by this function should always be the same as @@ -1030,26 +1066,34 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ btreeParseCellPtr(pPage, pCell, &debuginfo); #endif + if( pPage->noPayload ){ + pEnd = &pIter[9]; + while( (*pIter++)&0x80 && pIter<pEnd ); + assert( pPage->childPtrSize==4 ); + return (u16)(pIter - pCell); + } + nSize = *pIter; + if( nSize>=0x80 ){ + pEnd = &pIter[9]; + nSize &= 0x7f; + do{ + nSize = (nSize<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIter<pEnd ); + } + pIter++; if( pPage->intKey ){ - u8 *pEnd; - if( pPage->hasData ){ - pIter += getVarint32(pIter, nSize); - }else{ - nSize = 0; - } - /* pIter now points at the 64-bit integer key value, a variable length ** integer. The following block moves pIter to point at the first byte ** past the end of the key value. */ pEnd = &pIter[9]; while( (*pIter++)&0x80 && pIter<pEnd ); - }else{ - pIter += getVarint32(pIter, nSize); } - testcase( nSize==pPage->maxLocal ); testcase( nSize==pPage->maxLocal+1 ); - if( nSize>pPage->maxLocal ){ + if( nSize<=pPage->maxLocal ){ + nSize += (u32)(pIter - pCell); + if( nSize<4 ) nSize = 4; + }else{ int minLocal = pPage->minLocal; nSize = minLocal + (nSize - minLocal) % (pPage->pBt->usableSize - 4); testcase( nSize==pPage->maxLocal ); @@ -1057,16 +1101,9 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ if( nSize>pPage->maxLocal ){ nSize = minLocal; } - nSize += 4; - } - nSize += (u32)(pIter - pCell); - - /* The minimum size of any cell is 4 bytes. */ - if( nSize<4 ){ - nSize = 4; + nSize += 4 + (u16)(pIter - pCell); } - - assert( nSize==debuginfo.nSize ); + assert( nSize==debuginfo.nSize || CORRUPT_DB ); return (u16)nSize; } @@ -1089,7 +1126,6 @@ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ if( *pRC ) return; assert( pCell!=0 ); btreeParseCellPtr(pPage, pCell, &info); - assert( (info.nData+(pPage->intKey?0:info.nKey))==info.nPayload ); if( info.iOverflow ){ Pgno ovfl = get4byte(&pCell[info.iOverflow]); ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); @@ -1106,7 +1142,7 @@ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ */ static int defragmentPage(MemPage *pPage){ int i; /* Loop counter */ - int pc; /* Address of a i-th cell */ + int pc; /* Address of the i-th cell */ int hdr; /* Offset to the page header */ int size; /* Size of a cell */ int usableSize; /* Number of usable bytes on a page */ @@ -1197,7 +1233,6 @@ static int defragmentPage(MemPage *pPage){ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ const int hdr = pPage->hdrOffset; /* Local cache of pPage->hdrOffset */ u8 * const data = pPage->aData; /* Local cache of pPage->aData */ - int nFrag; /* Number of fragmented bytes on pPage */ int top; /* First byte of cell content area */ int gap; /* First byte of gap between cell pointers and cell content */ int rc; /* Integer return code */ @@ -1212,25 +1247,26 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ usableSize = pPage->pBt->usableSize; assert( nByte < usableSize-8 ); - nFrag = data[hdr+7]; assert( pPage->cellOffset == hdr + 12 - 4*pPage->leaf ); gap = pPage->cellOffset + 2*pPage->nCell; - top = get2byteNotZero(&data[hdr+5]); - if( gap>top ) return SQLITE_CORRUPT_BKPT; + assert( gap<=65536 ); + top = get2byte(&data[hdr+5]); + if( gap>top ){ + if( top==0 ){ + top = 65536; + }else{ + return SQLITE_CORRUPT_BKPT; + } + } + + /* If there is enough space between gap and top for one more cell pointer + ** array entry offset, and if the freelist is not empty, then search the + ** freelist looking for a free slot big enough to satisfy the request. + */ testcase( gap+2==top ); testcase( gap+1==top ); testcase( gap==top ); - - if( nFrag>=60 ){ - /* Always defragment highly fragmented pages */ - rc = defragmentPage(pPage); - if( rc ) return rc; - top = get2byteNotZero(&data[hdr+5]); - }else if( gap+2<=top ){ - /* Search the freelist looking for a free slot big enough to satisfy - ** the request. The allocation is made from the first free slot in - ** the list that is large enough to accommodate it. - */ + if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){ int pc, addr; for(addr=hdr+1; (pc = get2byte(&data[addr]))>0; addr=pc){ int size; /* Size of the free slot */ @@ -1243,10 +1279,11 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ testcase( x==4 ); testcase( x==3 ); if( x<4 ){ + if( data[hdr+7]>=60 ) goto defragment_page; /* Remove the slot from the free-list. Update the number of ** fragmented bytes within the page. */ memcpy(&data[addr], &data[pc], 2); - data[hdr+7] = (u8)(nFrag + x); + data[hdr+7] += (u8)x; }else if( size+pc > usableSize ){ return SQLITE_CORRUPT_BKPT; }else{ @@ -1260,11 +1297,13 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ } } - /* Check to make sure there is enough space in the gap to satisfy - ** the allocation. If not, defragment. + /* The request could not be fulfilled using a freelist slot. Check + ** to see if defragmentation is necessary. */ testcase( gap+2+nByte==top ); if( gap+2+nByte>top ){ +defragment_page: + testcase( pPage->nCell==0 ); rc = defragmentPage(pPage); if( rc ) return rc; top = get2byteNotZero(&data[hdr+5]); @@ -1287,90 +1326,100 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ /* ** Return a section of the pPage->aData to the freelist. -** The first byte of the new free block is pPage->aDisk[start] -** and the size of the block is "size" bytes. -** -** Most of the effort here is involved in coalesing adjacent -** free blocks into a single big free block. -*/ -static int freeSpace(MemPage *pPage, int start, int size){ - int addr, pbegin, hdr; - int iLast; /* Largest possible freeblock offset */ - unsigned char *data = pPage->aData; +** The first byte of the new free block is pPage->aData[iStart] +** and the size of the block is iSize bytes. +** +** Adjacent freeblocks are coalesced. +** +** Note that even though the freeblock list was checked by btreeInitPage(), +** that routine will not detect overlap between cells or freeblocks. Nor +** does it detect cells or freeblocks that encrouch into the reserved bytes +** at the end of the page. So do additional corruption checks inside this +** routine and return SQLITE_CORRUPT if any problems are found. +*/ +static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ + u16 iPtr; /* Address of ptr to next freeblock */ + u16 iFreeBlk; /* Address of the next freeblock */ + u8 hdr; /* Page header size. 0 or 100 */ + u8 nFrag = 0; /* Reduction in fragmentation */ + u16 iOrigSize = iSize; /* Original value of iSize */ + u32 iLast = pPage->pBt->usableSize-4; /* Largest possible freeblock offset */ + u32 iEnd = iStart + iSize; /* First byte past the iStart buffer */ + unsigned char *data = pPage->aData; /* Page content */ assert( pPage->pBt!=0 ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( start>=pPage->hdrOffset+6+pPage->childPtrSize ); - assert( (start + size) <= (int)pPage->pBt->usableSize ); + assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); + assert( iEnd <= pPage->pBt->usableSize ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - assert( size>=0 ); /* Minimum cell size is 4 */ + assert( iSize>=4 ); /* Minimum cell size is 4 */ + assert( iStart<=iLast ); + /* Overwrite deleted information with zeros when the secure_delete + ** option is enabled */ if( pPage->pBt->btsFlags & BTS_SECURE_DELETE ){ - /* Overwrite deleted information with zeros when the secure_delete - ** option is enabled */ - memset(&data[start], 0, size); - } - - /* Add the space back into the linked list of freeblocks. Note that - ** even though the freeblock list was checked by btreeInitPage(), - ** btreeInitPage() did not detect overlapping cells or - ** freeblocks that overlapped cells. Nor does it detect when the - ** cell content area exceeds the value in the page header. If these - ** situations arise, then subsequent insert operations might corrupt - ** the freelist. So we do need to check for corruption while scanning - ** the freelist. + memset(&data[iStart], 0, iSize); + } + + /* The list of freeblocks must be in ascending order. Find the + ** spot on the list where iStart should be inserted. */ hdr = pPage->hdrOffset; - addr = hdr + 1; - iLast = pPage->pBt->usableSize - 4; - assert( start<=iLast ); - while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){ - if( pbegin<addr+4 ){ - return SQLITE_CORRUPT_BKPT; + iPtr = hdr + 1; + if( data[iPtr+1]==0 && data[iPtr]==0 ){ + iFreeBlk = 0; /* Shortcut for the case when the freelist is empty */ + }else{ + while( (iFreeBlk = get2byte(&data[iPtr]))>0 && iFreeBlk<iStart ){ + if( iFreeBlk<iPtr+4 ) return SQLITE_CORRUPT_BKPT; + iPtr = iFreeBlk; } - addr = pbegin; - } - if( pbegin>iLast ){ - return SQLITE_CORRUPT_BKPT; - } - assert( pbegin>addr || pbegin==0 ); - put2byte(&data[addr], start); - put2byte(&data[start], pbegin); - put2byte(&data[start+2], size); - pPage->nFree = pPage->nFree + (u16)size; - - /* Coalesce adjacent free blocks */ - addr = hdr + 1; - while( (pbegin = get2byte(&data[addr]))>0 ){ - int pnext, psize, x; - assert( pbegin>addr ); - assert( pbegin <= (int)pPage->pBt->usableSize-4 ); - pnext = get2byte(&data[pbegin]); - psize = get2byte(&data[pbegin+2]); - if( pbegin + psize + 3 >= pnext && pnext>0 ){ - int frag = pnext - (pbegin+psize); - if( (frag<0) || (frag>(int)data[hdr+7]) ){ - return SQLITE_CORRUPT_BKPT; + if( iFreeBlk>iLast ) return SQLITE_CORRUPT_BKPT; + assert( iFreeBlk>iPtr || iFreeBlk==0 ); + + /* At this point: + ** iFreeBlk: First freeblock after iStart, or zero if none + ** iPtr: The address of a pointer iFreeBlk + ** + ** Check to see if iFreeBlk should be coalesced onto the end of iStart. + */ + if( iFreeBlk && iEnd+3>=iFreeBlk ){ + nFrag = iFreeBlk - iEnd; + if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; + iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); + iSize = iEnd - iStart; + iFreeBlk = get2byte(&data[iFreeBlk]); + } + + /* If iPtr is another freeblock (that is, if iPtr is not the freelist + ** pointer in the page header) then check to see if iStart should be + ** coalesced onto the end of iPtr. + */ + if( iPtr>hdr+1 ){ + int iPtrEnd = iPtr + get2byte(&data[iPtr+2]); + if( iPtrEnd+3>=iStart ){ + if( iPtrEnd>iStart ) return SQLITE_CORRUPT_BKPT; + nFrag += iStart - iPtrEnd; + iSize = iEnd - iPtr; + iStart = iPtr; } - data[hdr+7] -= (u8)frag; - x = get2byte(&data[pnext]); - put2byte(&data[pbegin], x); - x = pnext + get2byte(&data[pnext+2]) - pbegin; - put2byte(&data[pbegin+2], x); - }else{ - addr = pbegin; } + if( nFrag>data[hdr+7] ) return SQLITE_CORRUPT_BKPT; + data[hdr+7] -= nFrag; } - - /* If the cell content area begins with a freeblock, remove it. */ - if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){ - int top; - pbegin = get2byte(&data[hdr+1]); - memcpy(&data[hdr+1], &data[pbegin], 2); - top = get2byte(&data[hdr+5]) + get2byte(&data[pbegin+2]); - put2byte(&data[hdr+5], top); + if( iStart==get2byte(&data[hdr+5]) ){ + /* The new freeblock is at the beginning of the cell content area, + ** so just extend the cell content area rather than create another + ** freelist entry */ + if( iPtr!=hdr+1 ) return SQLITE_CORRUPT_BKPT; + put2byte(&data[hdr+1], iFreeBlk); + put2byte(&data[hdr+5], iEnd); + }else{ + /* Insert the new freeblock into the freelist */ + put2byte(&data[iPtr], iStart); + put2byte(&data[iStart], iFreeBlk); + put2byte(&data[iStart+2], iSize); } - assert( sqlite3PagerIswriteable(pPage->pDbPage) ); + pPage->nFree += iOrigSize; return SQLITE_OK; } @@ -1397,12 +1446,14 @@ static int decodeFlags(MemPage *pPage, int flagByte){ pBt = pPage->pBt; if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ pPage->intKey = 1; - pPage->hasData = pPage->leaf; + pPage->intKeyLeaf = pPage->leaf; + pPage->noPayload = !pPage->leaf; pPage->maxLocal = pBt->maxLeaf; pPage->minLocal = pBt->minLeaf; }else if( flagByte==PTF_ZERODATA ){ pPage->intKey = 0; - pPage->hasData = 0; + pPage->intKeyLeaf = 0; + pPage->noPayload = 0; pPage->maxLocal = pBt->maxLocal; pPage->minLocal = pBt->minLocal; }else{ @@ -2057,7 +2108,8 @@ static int removeFromSharingList(BtShared *pBt){ /* ** Make sure pBt->pTmpSpace points to an allocation of -** MX_CELL_SIZE(pBt) bytes. +** MX_CELL_SIZE(pBt) bytes with a 4-byte prefix for a left-child +** pointer. */ static void allocateTempSpace(BtShared *pBt){ if( !pBt->pTmpSpace ){ @@ -2072,8 +2124,16 @@ static void allocateTempSpace(BtShared *pBt){ ** it into a database page. This is not actually a problem, but it ** does cause a valgrind error when the 1 or 2 bytes of unitialized ** data is passed to system call write(). So to avoid this error, - ** zero the first 4 bytes of temp space here. */ - if( pBt->pTmpSpace ) memset(pBt->pTmpSpace, 0, 4); + ** zero the first 4 bytes of temp space here. + ** + ** Also: Provide four bytes of initialized space before the + ** beginning of pTmpSpace as an area available to prepend the + ** left-child pointer to the beginning of a cell. + */ + if( pBt->pTmpSpace ){ + memset(pBt->pTmpSpace, 0, 8); + pBt->pTmpSpace += 4; + } } } @@ -2081,8 +2141,11 @@ static void allocateTempSpace(BtShared *pBt){ ** Free the pBt->pTmpSpace allocation */ static void freeTempSpace(BtShared *pBt){ - sqlite3PageFree( pBt->pTmpSpace); - pBt->pTmpSpace = 0; + if( pBt->pTmpSpace ){ + pBt->pTmpSpace -= 4; + sqlite3PageFree(pBt->pTmpSpace); + pBt->pTmpSpace = 0; + } } /* @@ -2108,7 +2171,7 @@ int sqlite3BtreeClose(Btree *p){ ** The call to sqlite3BtreeRollback() drops any table-locks held by ** this handle. */ - sqlite3BtreeRollback(p, SQLITE_OK); + sqlite3BtreeRollback(p, SQLITE_OK, 0); sqlite3BtreeLeave(p); /* If there are still other outstanding references to the shared-btree @@ -2550,7 +2613,7 @@ page1_init_failed: ** false then all cursors are counted. ** ** For the purposes of this routine, a cursor is any cursor that -** is capable of reading or writing to the databse. Cursors that +** is capable of reading or writing to the database. Cursors that ** have been tripped into the CURSOR_FAULT state are not counted. */ static int countValidCursors(BtShared *pBt, int wrOnly){ @@ -2576,11 +2639,11 @@ static void unlockBtreeIfUnused(BtShared *pBt){ assert( sqlite3_mutex_held(pBt->mutex) ); assert( countValidCursors(pBt,0)==0 || pBt->inTransaction>TRANS_NONE ); if( pBt->inTransaction==TRANS_NONE && pBt->pPage1!=0 ){ - assert( pBt->pPage1->aData ); + MemPage *pPage1 = pBt->pPage1; + assert( pPage1->aData ); assert( sqlite3PagerRefcount(pBt->pPager)==1 ); - assert( pBt->pPage1->aData ); - releasePage(pBt->pPage1); pBt->pPage1 = 0; + releasePage(pPage1); } } @@ -3014,7 +3077,7 @@ static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); ** calling this function again), return SQLITE_DONE. Or, if an error ** occurs, return some other error code. ** -** More specificly, this function attempts to re-organize the database so +** More specifically, this function attempts to re-organize the database so ** that the last page of the file currently in use is no longer in use. ** ** Parameter nFin is the number of pages that this database would contain @@ -3022,7 +3085,7 @@ static int allocateBtreePage(BtShared *, MemPage **, Pgno *, Pgno, u8); ** ** If the bCommit parameter is non-zero, this function assumes that the ** caller will keep calling incrVacuumStep() until it returns SQLITE_DONE -** or an error. bCommit is passed true for an auto-vacuum-on-commmit +** or an error. bCommit is passed true for an auto-vacuum-on-commit ** operation, or false for an incremental vacuum. */ static int incrVacuumStep(BtShared *pBt, Pgno nFin, Pgno iLastPg, int bCommit){ @@ -3401,60 +3464,91 @@ int sqlite3BtreeCommit(Btree *p){ /* ** This routine sets the state to CURSOR_FAULT and the error -** code to errCode for every cursor on BtShared that pBtree -** references. -** -** Every cursor is tripped, including cursors that belong -** to other database connections that happen to be sharing -** the cache with pBtree. -** -** This routine gets called when a rollback occurs. -** All cursors using the same cache must be tripped -** to prevent them from trying to use the btree after -** the rollback. The rollback may have deleted tables -** or moved root pages, so it is not sufficient to -** save the state of the cursor. The cursor must be -** invalidated. -*/ -void sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode){ +** code to errCode for every cursor on any BtShared that pBtree +** references. Or if the writeOnly flag is set to 1, then only +** trip write cursors and leave read cursors unchanged. +** +** Every cursor is a candidate to be tripped, including cursors +** that belong to other database connections that happen to be +** sharing the cache with pBtree. +** +** This routine gets called when a rollback occurs. If the writeOnly +** flag is true, then only write-cursors need be tripped - read-only +** cursors save their current positions so that they may continue +** following the rollback. Or, if writeOnly is false, all cursors are +** tripped. In general, writeOnly is false if the transaction being +** rolled back modified the database schema. In this case b-tree root +** pages may be moved or deleted from the database altogether, making +** it unsafe for read cursors to continue. +** +** If the writeOnly flag is true and an error is encountered while +** saving the current position of a read-only cursor, all cursors, +** including all read-cursors are tripped. +** +** SQLITE_OK is returned if successful, or if an error occurs while +** saving a cursor position, an SQLite error code. +*/ +int sqlite3BtreeTripAllCursors(Btree *pBtree, int errCode, int writeOnly){ BtCursor *p; - if( pBtree==0 ) return; - sqlite3BtreeEnter(pBtree); - for(p=pBtree->pBt->pCursor; p; p=p->pNext){ - int i; - sqlite3BtreeClearCursor(p); - p->eState = CURSOR_FAULT; - p->skipNext = errCode; - for(i=0; i<=p->iPage; i++){ - releasePage(p->apPage[i]); - p->apPage[i] = 0; + int rc = SQLITE_OK; + + assert( (writeOnly==0 || writeOnly==1) && BTCF_WriteFlag==1 ); + if( pBtree ){ + sqlite3BtreeEnter(pBtree); + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + int i; + if( writeOnly && (p->curFlags & BTCF_WriteFlag)==0 ){ + if( p->eState==CURSOR_VALID ){ + rc = saveCursorPosition(p); + if( rc!=SQLITE_OK ){ + (void)sqlite3BtreeTripAllCursors(pBtree, rc, 0); + break; + } + } + }else{ + sqlite3BtreeClearCursor(p); + p->eState = CURSOR_FAULT; + p->skipNext = errCode; + } + for(i=0; i<=p->iPage; i++){ + releasePage(p->apPage[i]); + p->apPage[i] = 0; + } } + sqlite3BtreeLeave(pBtree); } - sqlite3BtreeLeave(pBtree); + return rc; } /* -** Rollback the transaction in progress. All cursors will be -** invalided by this operation. Any attempt to use a cursor -** that was open at the beginning of this operation will result -** in an error. +** Rollback the transaction in progress. +** +** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). +** Only write cursors are tripped if writeOnly is true but all cursors are +** tripped if writeOnly is false. Any attempt to use +** a tripped cursor will result in an error. ** ** This will release the write lock on the database file. If there ** are no active cursors, it also releases the read lock. */ -int sqlite3BtreeRollback(Btree *p, int tripCode){ +int sqlite3BtreeRollback(Btree *p, int tripCode, int writeOnly){ int rc; BtShared *pBt = p->pBt; MemPage *pPage1; + assert( writeOnly==1 || writeOnly==0 ); + assert( tripCode==SQLITE_ABORT_ROLLBACK || tripCode==SQLITE_OK ); sqlite3BtreeEnter(p); if( tripCode==SQLITE_OK ){ rc = tripCode = saveAllCursors(pBt, 0, 0); + if( rc ) writeOnly = 0; }else{ rc = SQLITE_OK; } if( tripCode ){ - sqlite3BtreeTripAllCursors(p, tripCode); + int rc2 = sqlite3BtreeTripAllCursors(p, tripCode, writeOnly); + assert( rc==SQLITE_OK || (writeOnly==0 && rc2==SQLITE_OK) ); + if( rc2!=SQLITE_OK ) rc = rc2; } btreeIntegrity(p); @@ -3489,7 +3583,7 @@ int sqlite3BtreeRollback(Btree *p, int tripCode){ } /* -** Start a statement subtransaction. The subtransaction can can be rolled +** Start a statement subtransaction. The subtransaction can be rolled ** back independently of the main transaction. You must start a transaction ** before starting a subtransaction. The subtransaction is ended automatically ** if the main transaction commits or rolls back. @@ -3621,6 +3715,10 @@ static int btreeCursor( if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){ return SQLITE_READONLY; } + if( wrFlag ){ + allocateTempSpace(pBt); + if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; + } if( iTable==1 && btreePagecount(pBt)==0 ){ assert( wrFlag==0 ); iTable = 0; @@ -3723,7 +3821,7 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){ ** compiler to crash when getCellInfo() is implemented as a macro. ** But there is a measureable speed advantage to using the macro on gcc ** (when less compiler optimizations like -Os or -O0 are used and the -** compiler is not doing agressive inlining.) So we use a real function +** compiler is not doing aggressive inlining.) So we use a real function ** for MSVC and a macro for everything else. Ticket #2457. */ #ifndef NDEBUG @@ -3785,13 +3883,9 @@ int sqlite3BtreeCursorIsValid(BtCursor *pCur){ */ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ assert( cursorHoldsMutex(pCur) ); - assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID ); - if( pCur->eState!=CURSOR_VALID ){ - *pSize = 0; - }else{ - getCellInfo(pCur); - *pSize = pCur->info.nKey; - } + assert( pCur->eState==CURSOR_VALID ); + getCellInfo(pCur); + *pSize = pCur->info.nKey; return SQLITE_OK; } @@ -3810,8 +3904,9 @@ int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){ int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); + assert( pCur->apPage[pCur->iPage]->intKeyLeaf==1 ); getCellInfo(pCur); - *pSize = pCur->info.nData; + *pSize = pCur->info.nPayload; return SQLITE_OK; } @@ -3940,7 +4035,7 @@ static int copyPayload( ** ** If the current cursor entry uses one or more overflow pages and the ** eOp argument is not 2, this function may allocate space for and lazily -** popluates the overflow page-list cache array (BtCursor.aOverflow). +** populates the overflow page-list cache array (BtCursor.aOverflow). ** Subsequent calls use this cache to make seeking to the supplied offset ** more efficient. ** @@ -3962,30 +4057,28 @@ static int accessPayload( ){ unsigned char *aPayload; int rc = SQLITE_OK; - u32 nKey; int iIdx = 0; MemPage *pPage = pCur->apPage[pCur->iPage]; /* Btree page of current entry */ BtShared *pBt = pCur->pBt; /* Btree this cursor belongs to */ #ifdef SQLITE_DIRECT_OVERFLOW_READ - int bEnd; /* True if reading to end of data */ + unsigned char * const pBufStart = pBuf; + int bEnd; /* True if reading to end of data */ #endif assert( pPage ); assert( pCur->eState==CURSOR_VALID ); assert( pCur->aiIdx[pCur->iPage]<pPage->nCell ); assert( cursorHoldsMutex(pCur) ); - assert( eOp!=2 || offset==0 ); /* Always start from beginning for eOp==2 */ + assert( eOp!=2 || offset==0 ); /* Always start from beginning for eOp==2 */ getCellInfo(pCur); - aPayload = pCur->info.pCell + pCur->info.nHeader; - nKey = (pPage->intKey ? 0 : (int)pCur->info.nKey); + aPayload = pCur->info.pPayload; #ifdef SQLITE_DIRECT_OVERFLOW_READ - bEnd = (offset+amt==nKey+pCur->info.nData); + bEnd = offset+amt==pCur->info.nPayload; #endif + assert( offset+amt <= pCur->info.nPayload ); - if( NEVER(offset+amt > nKey+pCur->info.nData) - || &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] - ){ + if( &aPayload[pCur->info.nLocal] > &pPage->aData[pBt->usableSize] ){ /* Trying to read or write past the end of the data is an error */ return SQLITE_CORRUPT_BKPT; } @@ -4041,7 +4134,9 @@ static int accessPayload( ** entry for the first required overflow page is valid, skip ** directly to it. */ - if( (pCur->curFlags & BTCF_ValidOvfl)!=0 && pCur->aOverflow[offset/ovflSize] ){ + if( (pCur->curFlags & BTCF_ValidOvfl)!=0 + && pCur->aOverflow[offset/ovflSize] + ){ iIdx = (offset/ovflSize); nextPage = pCur->aOverflow[iIdx]; offset = (offset%ovflSize); @@ -4094,6 +4189,7 @@ static int accessPayload( ** 4) there is no open write-transaction, and ** 5) the database is not a WAL database, ** 6) all data from the page is being read. + ** 7) at least 4 bytes have already been read into the output buffer ** ** then data can be read directly from the database file into the ** output buffer, bypassing the page-cache altogether. This speeds @@ -4105,9 +4201,11 @@ static int accessPayload( && pBt->inTransaction==TRANS_READ /* (4) */ && (fd = sqlite3PagerFile(pBt->pPager))->pMethods /* (3) */ && pBt->pPage1->aData[19]==0x01 /* (5) */ + && &pBuf[-4]>=pBufStart /* (7) */ ){ u8 aSave[4]; u8 *aWrite = &pBuf[-4]; + assert( aWrite>=pBufStart ); /* hence (7) */ memcpy(aSave, aWrite, 4); rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); nextPage = get4byte(aWrite); @@ -4142,7 +4240,7 @@ static int accessPayload( /* ** Read part of the key associated with cursor pCur. Exactly -** "amt" bytes will be transfered into pBuf[]. The transfer +** "amt" bytes will be transferred into pBuf[]. The transfer ** begins at "offset". ** ** The caller must ensure that pCur is pointing to a valid row @@ -4219,7 +4317,7 @@ static const void *fetchPayload( assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); assert( pCur->info.nSize>0 ); *pAmt = pCur->info.nLocal; - return (void*)(pCur->info.pCell + pCur->info.nHeader); + return (void*)pCur->info.pPayload; } @@ -4462,17 +4560,16 @@ static int moveToRightmost(BtCursor *pCur){ assert( cursorHoldsMutex(pCur) ); assert( pCur->eState==CURSOR_VALID ); - while( rc==SQLITE_OK && !(pPage = pCur->apPage[pCur->iPage])->leaf ){ + while( !(pPage = pCur->apPage[pCur->iPage])->leaf ){ pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); pCur->aiIdx[pCur->iPage] = pPage->nCell; rc = moveToChild(pCur, pgno); + if( rc ) return rc; } - if( rc==SQLITE_OK ){ - pCur->aiIdx[pCur->iPage] = pPage->nCell-1; - pCur->info.nSize = 0; - pCur->curFlags &= ~BTCF_ValidNKey; - } - return rc; + pCur->aiIdx[pCur->iPage] = pPage->nCell-1; + assert( pCur->info.nSize==0 ); + assert( (pCur->curFlags & BTCF_ValidNKey)==0 ); + return SQLITE_OK; } /* Move the cursor to the first entry in the table. Return SQLITE_OK @@ -4603,7 +4700,7 @@ int sqlite3BtreeMovetoUnpacked( if( pIdxKey ){ xRecordCompare = sqlite3VdbeFindCompare(pIdxKey); - pIdxKey->isCorrupt = 0; + pIdxKey->errCode = 0; assert( pIdxKey->default_rc==1 || pIdxKey->default_rc==0 || pIdxKey->default_rc==-1 @@ -4648,7 +4745,7 @@ int sqlite3BtreeMovetoUnpacked( for(;;){ i64 nCellKey; pCell = findCell(pPage, idx) + pPage->childPtrSize; - if( pPage->hasData ){ + if( pPage->intKeyLeaf ){ while( 0x80 <= *(pCell++) ){ if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; } @@ -4696,14 +4793,14 @@ int sqlite3BtreeMovetoUnpacked( ** single byte varint and the record fits entirely on the main ** b-tree page. */ testcase( pCell+nCell+1==pPage->aDataEnd ); - c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey, 0); + c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey); }else if( !(pCell[1] & 0x80) && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal ){ /* The record-size field is a 2 byte varint and the record ** fits entirely on the main b-tree page. */ testcase( pCell+nCell+2==pPage->aDataEnd ); - c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey, 0); + c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey); }else{ /* The record flows over onto one or more overflow pages. In ** this case the whole cell needs to be parsed, a buffer allocated @@ -4724,10 +4821,13 @@ int sqlite3BtreeMovetoUnpacked( sqlite3_free(pCellKey); goto moveto_finish; } - c = xRecordCompare(nCell, pCellKey, pIdxKey, 0); + c = xRecordCompare(nCell, pCellKey, pIdxKey); sqlite3_free(pCellKey); } - assert( pIdxKey->isCorrupt==0 || c==0 ); + assert( + (pIdxKey->errCode!=SQLITE_CORRUPT || c==0) + && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed) + ); if( c<0 ){ lwr = idx+1; }else if( c>0 ){ @@ -4737,7 +4837,7 @@ int sqlite3BtreeMovetoUnpacked( *pRes = 0; rc = SQLITE_OK; pCur->aiIdx[pCur->iPage] = (u16)idx; - if( pIdxKey->isCorrupt ) rc = SQLITE_CORRUPT; + if( pIdxKey->errCode ) rc = SQLITE_CORRUPT; goto moveto_finish; } if( lwr>upr ) break; @@ -4792,6 +4892,12 @@ int sqlite3BtreeEof(BtCursor *pCur){ ** was already pointing to the last entry in the database before ** this routine was called, then set *pRes=1. ** +** The main entry point is sqlite3BtreeNext(). That routine is optimized +** for the common case of merely incrementing the cell counter BtCursor.aiIdx +** to the next cell on the current page. The (slower) btreeNext() helper +** routine is called when it is necessary to move to a different page or +** to restore the cursor. +** ** The calling function will set *pRes to 0 or 1. The initial *pRes value ** will be 1 if the cursor being stepped corresponds to an SQL index and ** if this routine could have been skipped if that SQL index had been @@ -4801,20 +4907,18 @@ int sqlite3BtreeEof(BtCursor *pCur){ ** SQLite btree implementation does not. (Note that the comdb2 btree ** implementation does use this hint, however.) */ -int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ +static SQLITE_NOINLINE int btreeNext(BtCursor *pCur, int *pRes){ int rc; int idx; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); - assert( pRes!=0 ); - assert( *pRes==0 || *pRes==1 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + assert( *pRes==0 ); if( pCur->eState!=CURSOR_VALID ){ - invalidateOverflowCache(pCur); + assert( (pCur->curFlags & BTCF_ValidOvfl)==0 ); rc = restoreCursorPosition(pCur); if( rc!=SQLITE_OK ){ - *pRes = 0; return rc; } if( CURSOR_INVALID==pCur->eState ){ @@ -4826,7 +4930,6 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ pCur->eState = CURSOR_VALID; if( pCur->skipNext>0 ){ pCur->skipNext = 0; - *pRes = 0; return SQLITE_OK; } pCur->skipNext = 0; @@ -4844,18 +4947,11 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ ** page into more than one b-tree structure. */ testcase( idx>pPage->nCell ); - pCur->info.nSize = 0; - pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); if( idx>=pPage->nCell ){ if( !pPage->leaf ){ rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8])); - if( rc ){ - *pRes = 0; - return rc; - } - rc = moveToLeftmost(pCur); - *pRes = 0; - return rc; + if( rc ) return rc; + return moveToLeftmost(pCur); } do{ if( pCur->iPage==0 ){ @@ -4866,29 +4962,52 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ moveToParent(pCur); pPage = pCur->apPage[pCur->iPage]; }while( pCur->aiIdx[pCur->iPage]>=pPage->nCell ); - *pRes = 0; if( pPage->intKey ){ - rc = sqlite3BtreeNext(pCur, pRes); + return sqlite3BtreeNext(pCur, pRes); }else{ - rc = SQLITE_OK; + return SQLITE_OK; } - return rc; } + if( pPage->leaf ){ + return SQLITE_OK; + }else{ + return moveToLeftmost(pCur); + } +} +int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ + MemPage *pPage; + assert( cursorHoldsMutex(pCur) ); + assert( pRes!=0 ); + assert( *pRes==0 || *pRes==1 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + pCur->info.nSize = 0; + pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); *pRes = 0; + if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur, pRes); + pPage = pCur->apPage[pCur->iPage]; + if( (++pCur->aiIdx[pCur->iPage])>=pPage->nCell ){ + pCur->aiIdx[pCur->iPage]--; + return btreeNext(pCur, pRes); + } if( pPage->leaf ){ return SQLITE_OK; + }else{ + return moveToLeftmost(pCur); } - rc = moveToLeftmost(pCur); - return rc; } - /* ** Step the cursor to the back to the previous entry in the database. If ** successful then set *pRes=0. If the cursor ** was already pointing to the first entry in the database before ** this routine was called, then set *pRes=1. ** +** The main entry point is sqlite3BtreePrevious(). That routine is optimized +** for the common case of merely decrementing the cell counter BtCursor.aiIdx +** to the previous cell on the current page. The (slower) btreePrevious() +** helper routine is called when it is necessary to move to a different page +** or to restore the cursor. +** ** The calling function will set *pRes to 0 or 1. The initial *pRes value ** will be 1 if the cursor being stepped corresponds to an SQL index and ** if this routine could have been skipped if that SQL index had been @@ -4898,22 +5017,20 @@ int sqlite3BtreeNext(BtCursor *pCur, int *pRes){ ** SQLite btree implementation does not. (Note that the comdb2 btree ** implementation does use this hint, however.) */ -int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ +static SQLITE_NOINLINE int btreePrevious(BtCursor *pCur, int *pRes){ int rc; MemPage *pPage; assert( cursorHoldsMutex(pCur) ); assert( pRes!=0 ); - assert( *pRes==0 || *pRes==1 ); + assert( *pRes==0 ); assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); - pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl); + assert( (pCur->curFlags & (BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey))==0 ); + assert( pCur->info.nSize==0 ); if( pCur->eState!=CURSOR_VALID ){ - if( ALWAYS(pCur->eState>=CURSOR_REQUIRESEEK) ){ - rc = btreeRestoreCursorPosition(pCur); - if( rc!=SQLITE_OK ){ - *pRes = 0; - return rc; - } + rc = restoreCursorPosition(pCur); + if( rc!=SQLITE_OK ){ + return rc; } if( CURSOR_INVALID==pCur->eState ){ *pRes = 1; @@ -4924,7 +5041,6 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ pCur->eState = CURSOR_VALID; if( pCur->skipNext<0 ){ pCur->skipNext = 0; - *pRes = 0; return SQLITE_OK; } pCur->skipNext = 0; @@ -4936,10 +5052,7 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ if( !pPage->leaf ){ int idx = pCur->aiIdx[pCur->iPage]; rc = moveToChild(pCur, get4byte(findCell(pPage, idx))); - if( rc ){ - *pRes = 0; - return rc; - } + if( rc ) return rc; rc = moveToRightmost(pCur); }else{ while( pCur->aiIdx[pCur->iPage]==0 ){ @@ -4950,8 +5063,8 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ } moveToParent(pCur); } - pCur->info.nSize = 0; - pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + assert( pCur->info.nSize==0 ); + assert( (pCur->curFlags & (BTCF_ValidNKey|BTCF_ValidOvfl))==0 ); pCur->aiIdx[pCur->iPage]--; pPage = pCur->apPage[pCur->iPage]; @@ -4961,9 +5074,25 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ rc = SQLITE_OK; } } - *pRes = 0; return rc; } +int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ + assert( cursorHoldsMutex(pCur) ); + assert( pRes!=0 ); + assert( *pRes==0 || *pRes==1 ); + assert( pCur->skipNext==0 || pCur->eState!=CURSOR_VALID ); + *pRes = 0; + pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey); + pCur->info.nSize = 0; + if( pCur->eState!=CURSOR_VALID + || pCur->aiIdx[pCur->iPage]==0 + || pCur->apPage[pCur->iPage]->leaf==0 + ){ + return btreePrevious(pCur, pRes); + } + pCur->aiIdx[pCur->iPage]--; + return SQLITE_OK; +} /* ** Allocate a new page from the database file. @@ -5204,7 +5333,7 @@ static int allocateBtreePage( memcpy(&aData[8+closest*4], &aData[4+k*4], 4); } put4byte(&aData[4], k-1); - noContent = !btreeGetHasContent(pBt, *pPgno) ? PAGER_GET_NOCONTENT : 0; + noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0; rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite((*ppPage)->pDbPage); @@ -5237,7 +5366,7 @@ static int allocateBtreePage( ** here are confined to those pages that lie between the end of the ** database image and the end of the database file. */ - int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate)) ? PAGER_GET_NOCONTENT : 0; + int bNoContent = (0==IfNotOmitAV(pBt->bDoTruncate))? PAGER_GET_NOCONTENT:0; rc = sqlite3PagerWrite(pBt->pPage1->pDbPage); if( rc ) return rc; @@ -5436,9 +5565,15 @@ static void freePage(MemPage *pPage, int *pRC){ } /* -** Free any overflow pages associated with the given Cell. +** Free any overflow pages associated with the given Cell. Write the +** local Cell size (the number of bytes on the original page, omitting +** overflow) into *pnSize. */ -static int clearCell(MemPage *pPage, unsigned char *pCell){ +static int clearCell( + MemPage *pPage, /* The page that contains the Cell */ + unsigned char *pCell, /* First byte of the Cell */ + u16 *pnSize /* Write the size of the Cell here */ +){ BtShared *pBt = pPage->pBt; CellInfo info; Pgno ovflPgno; @@ -5448,6 +5583,7 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); btreeParseCellPtr(pPage, pCell, &info); + *pnSize = info.nSize; if( info.iOverflow==0 ){ return SQLITE_OK; /* No overflow pages. Return without doing anything */ } @@ -5531,7 +5667,6 @@ static int fillInCell( BtShared *pBt = pPage->pBt; Pgno pgnoOvfl = 0; int nHeader; - CellInfo info; assert( sqlite3_mutex_held(pPage->pBt->mutex) ); @@ -5541,23 +5676,17 @@ static int fillInCell( || sqlite3PagerIswriteable(pPage->pDbPage) ); /* Fill in the header. */ - nHeader = 0; - if( !pPage->leaf ){ - nHeader += 4; - } - if( pPage->hasData ){ - nHeader += putVarint32(&pCell[nHeader], nData+nZero); + nHeader = pPage->childPtrSize; + nPayload = nData + nZero; + if( pPage->intKeyLeaf ){ + nHeader += putVarint32(&pCell[nHeader], nPayload); }else{ - nData = nZero = 0; + assert( nData==0 ); + assert( nZero==0 ); } nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey); - btreeParseCellPtr(pPage, pCell, &info); - assert( info.nHeader==nHeader ); - assert( info.nKey==nKey ); - assert( info.nData==(u32)(nData+nZero) ); - /* Fill in the payload */ - nPayload = nData + nZero; + /* Fill in the payload size */ if( pPage->intKey ){ pSrc = pData; nSrc = nData; @@ -5566,15 +5695,55 @@ static int fillInCell( if( NEVER(nKey>0x7fffffff || pKey==0) ){ return SQLITE_CORRUPT_BKPT; } - nPayload += (int)nKey; + nPayload = (int)nKey; pSrc = pKey; nSrc = (int)nKey; } - *pnSize = info.nSize; - spaceLeft = info.nLocal; + if( nPayload<=pPage->maxLocal ){ + n = nHeader + nPayload; + testcase( n==3 ); + testcase( n==4 ); + if( n<4 ) n = 4; + *pnSize = n; + spaceLeft = nPayload; + pPrior = pCell; + }else{ + int mn = pPage->minLocal; + n = mn + (nPayload - mn) % (pPage->pBt->usableSize - 4); + testcase( n==pPage->maxLocal ); + testcase( n==pPage->maxLocal+1 ); + if( n > pPage->maxLocal ) n = mn; + spaceLeft = n; + *pnSize = n + nHeader + 4; + pPrior = &pCell[nHeader+n]; + } pPayload = &pCell[nHeader]; - pPrior = &pCell[info.iOverflow]; + /* At this point variables should be set as follows: + ** + ** nPayload Total payload size in bytes + ** pPayload Begin writing payload here + ** spaceLeft Space available at pPayload. If nPayload>spaceLeft, + ** that means content must spill into overflow pages. + ** *pnSize Size of the local cell (not counting overflow pages) + ** pPrior Where to write the pgno of the first overflow page + ** + ** Use a call to btreeParseCellPtr() to verify that the values above + ** were computed correctly. + */ +#if SQLITE_DEBUG + { + CellInfo info; + btreeParseCellPtr(pPage, pCell, &info); + assert( nHeader=(int)(info.pPayload - pCell) ); + assert( info.nKey==nKey ); + assert( *pnSize == info.nSize ); + assert( spaceLeft == info.nLocal ); + assert( pPrior == &pCell[info.iOverflow] ); + } +#endif + + /* Write the payload into the local Cell and any extra into overflow pages */ while( nPayload>0 ){ if( spaceLeft==0 ){ #ifndef SQLITE_OMIT_AUTOVACUUM @@ -5715,11 +5884,6 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ ** in pTemp or the original pCell) and also record its index. ** Allocating a new entry in pPage->aCell[] implies that ** pPage->nOverflow is incremented. -** -** If nSkip is non-zero, then do not copy the first nSkip bytes of the -** cell. The caller will overwrite them after this function returns. If -** nSkip is non-zero, then pCell may not point to an invalid memory location -** (but pCell+nSkip is always valid). */ static void insertCell( MemPage *pPage, /* Page into which we are copying */ @@ -5736,7 +5900,6 @@ static void insertCell( int ins; /* Index in data[] where new cell pointer is inserted */ int cellOffset; /* Address of first cell pointer in data[] */ u8 *data; /* The content of the whole page */ - int nSkip = (iChild ? 4 : 0); if( *pRC ) return; @@ -5754,7 +5917,7 @@ static void insertCell( assert( sz==cellSizePtr(pPage, pCell) || (sz==8 && iChild>0) ); if( pPage->nOverflow || sz+2>pPage->nFree ){ if( pTemp ){ - memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip); + memcpy(pTemp, pCell, sz); pCell = pTemp; } if( iChild ){ @@ -5783,7 +5946,7 @@ static void insertCell( assert( idx+sz <= (int)pPage->pBt->usableSize ); pPage->nCell++; pPage->nFree -= (u16)(2 + sz); - memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip); + memcpy(&data[idx], pCell, sz); if( iChild ){ put4byte(&data[idx], iChild); } @@ -5806,7 +5969,7 @@ static void insertCell( ** The cells are guaranteed to fit on the page. */ static void assemblePage( - MemPage *pPage, /* The page to be assemblied */ + MemPage *pPage, /* The page to be assembled */ int nCell, /* The number of cells to add to this page */ u8 **apCell, /* Pointers to cell bodies */ u16 *aSize /* Sizes of the cells */ @@ -6282,7 +6445,7 @@ static int balance_nonroot( ** leafData: 1 if pPage holds key+data and pParent holds only keys. */ leafCorrection = apOld[0]->leaf*4; - leafData = apOld[0]->hasData; + leafData = apOld[0]->intKeyLeaf; for(i=0; i<nOld; i++){ int limit; @@ -6472,7 +6635,7 @@ static int balance_nonroot( } /* - ** Put the new pages in accending order. This helps to + ** Put the new pages in ascending order. This helps to ** keep entries in the disk file in order so that a scan ** of the table is a linear scan through the file. That ** in turn helps the operating system to deliver pages @@ -6858,7 +7021,7 @@ static int balance(BtCursor *pCur){ rc = sqlite3PagerWrite(pParent->pDbPage); if( rc==SQLITE_OK ){ #ifndef SQLITE_OMIT_QUICKBALANCE - if( pPage->hasData + if( pPage->intKeyLeaf && pPage->nOverflow==1 && pPage->aiOvfl[0]==pPage->nCell && pParent->pgno!=1 @@ -6867,7 +7030,7 @@ static int balance(BtCursor *pCur){ /* Call balance_quick() to create a new sibling of pPage on which ** to store the overflow cell. balance_quick() inserts a new cell ** into pParent, which may cause pParent overflow. If this - ** happens, the next interation of the do-loop will balance pParent + ** happens, the next iteration of the do-loop will balance pParent ** use either balance_nonroot() or balance_deeper(). Until this ** happens, the overflow cell is stored in the aBalanceQuickSpace[] ** buffer. @@ -6944,7 +7107,7 @@ static int balance(BtCursor *pCur){ ** MovetoUnpacked() to seek cursor pCur to (pKey, nKey) has already ** been performed. seekResult is the search result returned (a negative ** number if pCur points at an entry that is smaller than (pKey, nKey), or -** a positive value if pCur points at an etry that is larger than +** a positive value if pCur points at an entry that is larger than ** (pKey, nKey)). ** ** If the seekResult parameter is non-zero, then the caller guarantees that @@ -6977,7 +7140,8 @@ int sqlite3BtreeInsert( } assert( cursorHoldsMutex(pCur) ); - assert( (pCur->curFlags & BTCF_WriteFlag)!=0 && pBt->inTransaction==TRANS_WRITE + assert( (pCur->curFlags & BTCF_WriteFlag)!=0 + && pBt->inTransaction==TRANS_WRITE && (pBt->btsFlags & BTS_READ_ONLY)==0 ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); @@ -7010,7 +7174,8 @@ int sqlite3BtreeInsert( /* If the cursor is currently on the last row and we are appending a ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() ** call */ - if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 && pCur->info.nKey==nKey-1 ){ + if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 + && pCur->info.nKey==nKey-1 ){ loc = -1; } } @@ -7029,9 +7194,8 @@ int sqlite3BtreeInsert( pCur->pgnoRoot, nKey, nData, pPage->pgno, loc==0 ? "overwrite" : "new entry")); assert( pPage->isInit ); - allocateTempSpace(pBt); newCell = pBt->pTmpSpace; - if( newCell==0 ) return SQLITE_NOMEM; + assert( newCell!=0 ); rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); if( rc ) goto end_insert; assert( szNew==cellSizePtr(pPage, newCell) ); @@ -7048,8 +7212,7 @@ int sqlite3BtreeInsert( if( !pPage->leaf ){ memcpy(newCell, oldCell, 4); } - szOld = cellSizePtr(pPage, oldCell); - rc = clearCell(pPage, oldCell); + rc = clearCell(pPage, oldCell, &szOld); dropCell(pPage, idx, szOld, &rc); if( rc ) goto end_insert; }else if( loc<0 && pPage->nCell>0 ){ @@ -7101,7 +7264,7 @@ end_insert: /* ** Delete the entry that the cursor is pointing to. The cursor -** is left pointing at a arbitrary location. +** is left pointing at an arbitrary location. */ int sqlite3BtreeDelete(BtCursor *pCur){ Btree *p = pCur->pBtree; @@ -7111,6 +7274,7 @@ int sqlite3BtreeDelete(BtCursor *pCur){ unsigned char *pCell; /* Pointer to cell to delete */ int iCellIdx; /* Index of cell to delete */ int iCellDepth; /* Depth of node containing pCell */ + u16 szCell; /* Size of the cell being deleted */ assert( cursorHoldsMutex(pCur) ); assert( pBt->inTransaction==TRANS_WRITE ); @@ -7159,8 +7323,8 @@ int sqlite3BtreeDelete(BtCursor *pCur){ rc = sqlite3PagerWrite(pPage->pDbPage); if( rc ) return rc; - rc = clearCell(pPage, pCell); - dropCell(pPage, iCellIdx, cellSizePtr(pPage, pCell), &rc); + rc = clearCell(pPage, pCell, &szCell); + dropCell(pPage, iCellIdx, szCell, &rc); if( rc ) return rc; /* If the cell deleted was not located on a leaf page, then the cursor @@ -7177,10 +7341,8 @@ int sqlite3BtreeDelete(BtCursor *pCur){ pCell = findCell(pLeaf, pLeaf->nCell-1); nCell = cellSizePtr(pLeaf, pCell); assert( MX_CELL_SIZE(pBt) >= nCell ); - - allocateTempSpace(pBt); pTmp = pBt->pTmpSpace; - + assert( pTmp!=0 ); rc = sqlite3PagerWrite(pLeaf->pDbPage); insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc); dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc); @@ -7392,6 +7554,7 @@ static int clearDatabasePage( unsigned char *pCell; int i; int hdr; + u16 szCell; assert( sqlite3_mutex_held(pBt->mutex) ); if( pgno>btreePagecount(pBt) ){ @@ -7407,7 +7570,7 @@ static int clearDatabasePage( rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); if( rc ) goto cleardatabasepage_out; } - rc = clearCell(pPage, pCell); + rc = clearCell(pPage, pCell, &szCell); if( rc ) goto cleardatabasepage_out; } if( !pPage->leaf ){ @@ -7753,11 +7916,11 @@ Pager *sqlite3BtreePager(Btree *p){ */ static void checkAppendMsg( IntegrityCk *pCheck, - char *zMsg1, const char *zFormat, ... ){ va_list ap; + char zBuf[200]; if( !pCheck->mxErr ) return; pCheck->mxErr--; pCheck->nErr++; @@ -7765,8 +7928,9 @@ static void checkAppendMsg( if( pCheck->errMsg.nChar ){ sqlite3StrAccumAppend(&pCheck->errMsg, "\n", 1); } - if( zMsg1 ){ - sqlite3StrAccumAppendAll(&pCheck->errMsg, zMsg1); + if( pCheck->zPfx ){ + sqlite3_snprintf(sizeof(zBuf), zBuf, pCheck->zPfx, pCheck->v1, pCheck->v2); + sqlite3StrAccumAppendAll(&pCheck->errMsg, zBuf); } sqlite3VXPrintf(&pCheck->errMsg, 1, zFormat, ap); va_end(ap); @@ -7799,19 +7963,19 @@ static void setPageReferenced(IntegrityCk *pCheck, Pgno iPg){ /* ** Add 1 to the reference count for page iPage. If this is the second ** reference to the page, add an error message to pCheck->zErrMsg. -** Return 1 if there are 2 ore more references to the page and 0 if +** Return 1 if there are 2 or more references to the page and 0 if ** if this is the first reference to the page. ** ** Also check that the page number is in bounds. */ -static int checkRef(IntegrityCk *pCheck, Pgno iPage, char *zContext){ +static int checkRef(IntegrityCk *pCheck, Pgno iPage){ if( iPage==0 ) return 1; if( iPage>pCheck->nPage ){ - checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage); + checkAppendMsg(pCheck, "invalid page number %d", iPage); return 1; } if( getPageReferenced(pCheck, iPage) ){ - checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage); + checkAppendMsg(pCheck, "2nd reference to page %d", iPage); return 1; } setPageReferenced(pCheck, iPage); @@ -7828,8 +7992,7 @@ static void checkPtrmap( IntegrityCk *pCheck, /* Integrity check context */ Pgno iChild, /* Child page number */ u8 eType, /* Expected pointer map type */ - Pgno iParent, /* Expected pointer map parent page number */ - char *zContext /* Context description (used for error msg) */ + Pgno iParent /* Expected pointer map parent page number */ ){ int rc; u8 ePtrmapType; @@ -7838,12 +8001,12 @@ static void checkPtrmap( rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent); if( rc!=SQLITE_OK ){ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ) pCheck->mallocFailed = 1; - checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild); + checkAppendMsg(pCheck, "Failed to read ptrmap key=%d", iChild); return; } if( ePtrmapType!=eType || iPtrmapParent!=iParent ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", iChild, eType, iParent, ePtrmapType, iPtrmapParent); } @@ -7858,8 +8021,7 @@ static void checkList( IntegrityCk *pCheck, /* Integrity checking context */ int isFreeList, /* True for a freelist. False for overflow page list */ int iPage, /* Page number for first page in the list */ - int N, /* Expected number of pages in the list */ - char *zContext /* Context for error messages */ + int N /* Expected number of pages in the list */ ){ int i; int expected = N; @@ -7868,14 +8030,14 @@ static void checkList( DbPage *pOvflPage; unsigned char *pOvflData; if( iPage<1 ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "%d of %d pages missing from overflow list starting at %d", N+1, expected, iFirst); break; } - if( checkRef(pCheck, iPage, zContext) ) break; + if( checkRef(pCheck, iPage) ) break; if( sqlite3PagerGet(pCheck->pPager, (Pgno)iPage, &pOvflPage) ){ - checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage); + checkAppendMsg(pCheck, "failed to get page %d", iPage); break; } pOvflData = (unsigned char *)sqlite3PagerGetData(pOvflPage); @@ -7883,11 +8045,11 @@ static void checkList( int n = get4byte(&pOvflData[4]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pCheck->pBt->autoVacuum ){ - checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext); + checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0); } #endif if( n>(int)pCheck->pBt->usableSize/4-2 ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "freelist leaf count too big on page %d", iPage); N--; }else{ @@ -7895,10 +8057,10 @@ static void checkList( Pgno iFreePage = get4byte(&pOvflData[8+i*4]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pCheck->pBt->autoVacuum ){ - checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext); + checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0); } #endif - checkRef(pCheck, iFreePage, zContext); + checkRef(pCheck, iFreePage); } N -= n; } @@ -7911,7 +8073,7 @@ static void checkList( */ if( pCheck->pBt->autoVacuum && N>0 ){ i = get4byte(pOvflData); - checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext); + checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage); } } #endif @@ -7943,7 +8105,6 @@ static void checkList( static int checkTreePage( IntegrityCk *pCheck, /* Context for the sanity check */ int iPage, /* Page number of the page to check */ - char *zParentContext, /* Parent context */ i64 *pnParentMinKey, i64 *pnParentMaxKey ){ @@ -7954,23 +8115,26 @@ static int checkTreePage( u8 *data; BtShared *pBt; int usableSize; - char zContext[100]; char *hit = 0; i64 nMinKey = 0; i64 nMaxKey = 0; - - sqlite3_snprintf(sizeof(zContext), zContext, "Page %d: ", iPage); + const char *saved_zPfx = pCheck->zPfx; + int saved_v1 = pCheck->v1; + int saved_v2 = pCheck->v2; /* Check that the page exists */ pBt = pCheck->pBt; usableSize = pBt->usableSize; if( iPage==0 ) return 0; - if( checkRef(pCheck, iPage, zParentContext) ) return 0; + if( checkRef(pCheck, iPage) ) return 0; + pCheck->zPfx = "Page %d: "; + pCheck->v1 = iPage; if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "unable to get the page. error code=%d", rc); - return 0; + depth = -1; + goto end_of_check; } /* Clear MemPage.isInit to make sure the corruption detection code in @@ -7978,10 +8142,11 @@ static int checkTreePage( pPage->isInit = 0; if( (rc = btreeInitPage(pPage))!=0 ){ assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "btreeInitPage() returns error code %d", rc); releasePage(pPage); - return 0; + depth = -1; + goto end_of_check; } /* Check out all the cells. @@ -7994,23 +8159,23 @@ static int checkTreePage( /* Check payload overflow pages */ - sqlite3_snprintf(sizeof(zContext), zContext, - "On tree page %d cell %d: ", iPage, i); + pCheck->zPfx = "On tree page %d cell %d: "; + pCheck->v1 = iPage; + pCheck->v2 = i; pCell = findCell(pPage,i); btreeParseCellPtr(pPage, pCell, &info); - sz = info.nData; - if( !pPage->intKey ) sz += (int)info.nKey; + sz = info.nPayload; /* For intKey pages, check that the keys are in order. */ - else if( i==0 ) nMinKey = nMaxKey = info.nKey; - else{ - if( info.nKey <= nMaxKey ){ - checkAppendMsg(pCheck, zContext, - "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey); + if( pPage->intKey ){ + if( i==0 ){ + nMinKey = nMaxKey = info.nKey; + }else if( info.nKey <= nMaxKey ){ + checkAppendMsg(pCheck, + "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey); } nMaxKey = info.nKey; } - assert( sz==info.nPayload ); if( (sz>info.nLocal) && (&pCell[info.iOverflow]<=&pPage->aData[pBt->usableSize]) ){ @@ -8018,10 +8183,10 @@ static int checkTreePage( Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext); + checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); } #endif - checkList(pCheck, 0, pgnoOvfl, nPage, zContext); + checkList(pCheck, 0, pgnoOvfl, nPage); } /* Check sanity of left child page. @@ -8030,12 +8195,12 @@ static int checkTreePage( pgno = get4byte(pCell); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext); + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); } #endif - d2 = checkTreePage(pCheck, pgno, zContext, &nMinKey, i==0 ? NULL : &nMaxKey); + d2 = checkTreePage(pCheck, pgno, &nMinKey, i==0?NULL:&nMaxKey); if( i>0 && d2!=depth ){ - checkAppendMsg(pCheck, zContext, "Child page depth differs"); + checkAppendMsg(pCheck, "Child page depth differs"); } depth = d2; } @@ -8043,37 +8208,39 @@ static int checkTreePage( if( !pPage->leaf ){ pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); - sqlite3_snprintf(sizeof(zContext), zContext, - "On page %d at right child: ", iPage); + pCheck->zPfx = "On page %d at right child: "; + pCheck->v1 = iPage; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext); + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); } #endif - checkTreePage(pCheck, pgno, zContext, NULL, !pPage->nCell ? NULL : &nMaxKey); + checkTreePage(pCheck, pgno, NULL, !pPage->nCell?NULL:&nMaxKey); } /* For intKey leaf pages, check that the min/max keys are in order ** with any left/parent/right pages. */ + pCheck->zPfx = "Page %d: "; + pCheck->v1 = iPage; if( pPage->leaf && pPage->intKey ){ /* if we are a left child page */ if( pnParentMinKey ){ /* if we are the left most child page */ if( !pnParentMaxKey ){ if( nMaxKey > *pnParentMinKey ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "Rowid %lld out of order (max larger than parent min of %lld)", nMaxKey, *pnParentMinKey); } }else{ if( nMinKey <= *pnParentMinKey ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "Rowid %lld out of order (min less than parent min of %lld)", nMinKey, *pnParentMinKey); } if( nMaxKey > *pnParentMaxKey ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "Rowid %lld out of order (max larger than parent max of %lld)", nMaxKey, *pnParentMaxKey); } @@ -8082,7 +8249,7 @@ static int checkTreePage( /* else if we're a right child page */ } else if( pnParentMaxKey ){ if( nMinKey <= *pnParentMaxKey ){ - checkAppendMsg(pCheck, zContext, + checkAppendMsg(pCheck, "Rowid %lld out of order (min less than parent max of %lld)", nMinKey, *pnParentMaxKey); } @@ -8094,6 +8261,7 @@ static int checkTreePage( data = pPage->aData; hdr = pPage->hdrOffset; hit = sqlite3PageMalloc( pBt->pageSize ); + pCheck->zPfx = 0; if( hit==0 ){ pCheck->mallocFailed = 1; }else{ @@ -8111,7 +8279,8 @@ static int checkTreePage( size = cellSizePtr(pPage, &data[pc]); } if( (int)(pc+size-1)>=usableSize ){ - checkAppendMsg(pCheck, 0, + pCheck->zPfx = 0; + checkAppendMsg(pCheck, "Corruption detected in cell %d on page %d",i,iPage); }else{ for(j=pc+size-1; j>=pc; j--) hit[j]++; @@ -8133,19 +8302,24 @@ static int checkTreePage( if( hit[i]==0 ){ cnt++; }else if( hit[i]>1 ){ - checkAppendMsg(pCheck, 0, + checkAppendMsg(pCheck, "Multiple uses for byte %d of page %d", i, iPage); break; } } if( cnt!=data[hdr+7] ){ - checkAppendMsg(pCheck, 0, + checkAppendMsg(pCheck, "Fragmentation of %d bytes reported as %d on page %d", cnt, data[hdr+7], iPage); } } sqlite3PageFree(hit); releasePage(pPage); + +end_of_check: + pCheck->zPfx = saved_zPfx; + pCheck->v1 = saved_v1; + pCheck->v2 = saved_v2; return depth+1; } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ @@ -8186,6 +8360,9 @@ char *sqlite3BtreeIntegrityCheck( sCheck.mxErr = mxErr; sCheck.nErr = 0; sCheck.mallocFailed = 0; + sCheck.zPfx = 0; + sCheck.v1 = 0; + sCheck.v2 = 0; *pnErr = 0; if( sCheck.nPage==0 ){ sqlite3BtreeLeave(p); @@ -8205,8 +8382,10 @@ char *sqlite3BtreeIntegrityCheck( /* Check the integrity of the freelist */ + sCheck.zPfx = "Main freelist: "; checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), - get4byte(&pBt->pPage1->aData[36]), "Main freelist: "); + get4byte(&pBt->pPage1->aData[36])); + sCheck.zPfx = 0; /* Check all the tables. */ @@ -8214,10 +8393,12 @@ char *sqlite3BtreeIntegrityCheck( if( aRoot[i]==0 ) continue; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum && aRoot[i]>1 ){ - checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0); + checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); } #endif - checkTreePage(&sCheck, aRoot[i], "List of tree roots: ", NULL, NULL); + sCheck.zPfx = "List of tree roots: "; + checkTreePage(&sCheck, aRoot[i], NULL, NULL); + sCheck.zPfx = 0; } /* Make sure every page in the file is referenced @@ -8225,7 +8406,7 @@ char *sqlite3BtreeIntegrityCheck( for(i=1; i<=sCheck.nPage && sCheck.mxErr; i++){ #ifdef SQLITE_OMIT_AUTOVACUUM if( getPageReferenced(&sCheck, i)==0 ){ - checkAppendMsg(&sCheck, 0, "Page %d is never used", i); + checkAppendMsg(&sCheck, "Page %d is never used", i); } #else /* If the database supports auto-vacuum, make sure no tables contain @@ -8233,11 +8414,11 @@ char *sqlite3BtreeIntegrityCheck( */ if( getPageReferenced(&sCheck, i)==0 && (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){ - checkAppendMsg(&sCheck, 0, "Page %d is never used", i); + checkAppendMsg(&sCheck, "Page %d is never used", i); } if( getPageReferenced(&sCheck, i)!=0 && (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){ - checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i); + checkAppendMsg(&sCheck, "Pointer map page %d is referenced", i); } #endif } @@ -8247,7 +8428,7 @@ char *sqlite3BtreeIntegrityCheck( ** of the integrity check. */ if( NEVER(nRef != sqlite3PagerRefcount(pBt->pPager)) ){ - checkAppendMsg(&sCheck, 0, + checkAppendMsg(&sCheck, "Outstanding page count goes from %d to %d during this analysis", nRef, sqlite3PagerRefcount(pBt->pPager) ); @@ -8443,7 +8624,7 @@ int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void *z){ ** required in case any of them are holding references to an xFetch ** version of the b-tree page modified by the accessPayload call below. ** - ** Note that pCsr must be open on a BTREE_INTKEY table and saveCursorPosition() + ** Note that pCsr must be open on a INTKEY table and saveCursorPosition() ** and hence saveAllCursors() cannot fail on a BTREE_INTKEY table, hence ** saveAllCursors can only return SQLITE_OK. */ |