diff options
Diffstat (limited to 'lib/libsqlite3/src/wal.c')
-rw-r--r-- | lib/libsqlite3/src/wal.c | 296 |
1 files changed, 176 insertions, 120 deletions
diff --git a/lib/libsqlite3/src/wal.c b/lib/libsqlite3/src/wal.c index d134a8b52a3..71f4a3d452f 100644 --- a/lib/libsqlite3/src/wal.c +++ b/lib/libsqlite3/src/wal.c @@ -1504,7 +1504,7 @@ static void walMergesort( ** Free an iterator allocated by walIteratorInit(). */ static void walIteratorFree(WalIterator *p){ - sqlite3ScratchFree(p); + sqlite3_free(p); } /* @@ -1539,7 +1539,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + iLast*sizeof(ht_slot); - p = (WalIterator *)sqlite3ScratchMalloc(nByte); + p = (WalIterator *)sqlite3_malloc(nByte); if( !p ){ return SQLITE_NOMEM; } @@ -1549,7 +1549,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ /* Allocate temporary space used by the merge-sort routine. This block ** of memory will be freed before this function returns. */ - aTmp = (ht_slot *)sqlite3ScratchMalloc( + aTmp = (ht_slot *)sqlite3_malloc( sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast) ); if( !aTmp ){ @@ -1586,7 +1586,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ p->aSegment[i].aPgno = (u32 *)aPgno; } } - sqlite3ScratchFree(aTmp); + sqlite3_free(aTmp); if( rc!=SQLITE_OK ){ walIteratorFree(p); @@ -1624,6 +1624,38 @@ static int walPagesize(Wal *pWal){ } /* +** The following is guaranteed when this function is called: +** +** a) the WRITER lock is held, +** b) the entire log file has been checkpointed, and +** c) any existing readers are reading exclusively from the database +** file - there are no readers that may attempt to read a frame from +** the log file. +** +** This function updates the shared-memory structures so that the next +** client to write to the database (which may be this one) does so by +** writing frames into the start of the log file. +** +** The value of parameter salt1 is used as the aSalt[1] value in the +** new wal-index header. It should be passed a pseudo-random value (i.e. +** one obtained from sqlite3_randomness()). +*/ +static void walRestartHdr(Wal *pWal, u32 salt1){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + int i; /* Loop counter */ + u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ + pWal->nCkpt++; + pWal->hdr.mxFrame = 0; + sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); + memcpy(&pWal->hdr.aSalt[1], &salt1, 4); + walIndexWriteHdr(pWal); + pInfo->nBackfill = 0; + pInfo->aReadMark[1] = 0; + for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; + assert( pInfo->aReadMark[0]==0 ); +} + +/* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. ** @@ -1657,12 +1689,12 @@ static int walPagesize(Wal *pWal){ static int walCheckpoint( Wal *pWal, /* Wal connection */ int eMode, /* One of PASSIVE, FULL or RESTART */ - int (*xBusyCall)(void*), /* Function to call when busy */ + int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags for OsSync() (or 0) */ u8 *zBuf /* Temporary buffer to use */ ){ - int rc; /* Return code */ + int rc = SQLITE_OK; /* Return code */ int szPage; /* Database page-size */ WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ @@ -1671,123 +1703,146 @@ static int walCheckpoint( u32 mxPage; /* Max database page to write */ int i; /* Loop counter */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ - int (*xBusy)(void*) = 0; /* Function to call when waiting for locks */ szPage = walPagesize(pWal); testcase( szPage<=32768 ); testcase( szPage>=65536 ); pInfo = walCkptInfo(pWal); - if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK; + if( pInfo->nBackfill<pWal->hdr.mxFrame ){ - /* Allocate the iterator */ - rc = walIteratorInit(pWal, &pIter); - if( rc!=SQLITE_OK ){ - return rc; - } - assert( pIter ); + /* Allocate the iterator */ + rc = walIteratorInit(pWal, &pIter); + if( rc!=SQLITE_OK ){ + return rc; + } + assert( pIter ); - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ) xBusy = xBusyCall; + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); - /* Compute in mxSafeFrame the index of the last frame of the WAL that is - ** safe to write into the database. Frames beyond mxSafeFrame might - ** overwrite database pages that are in use by active readers and thus - ** cannot be backfilled from the WAL. - */ - mxSafeFrame = pWal->hdr.mxFrame; - mxPage = pWal->hdr.nPage; - for(i=1; i<WAL_NREADER; i++){ - u32 y = pInfo->aReadMark[i]; - if( mxSafeFrame>y ){ - assert( y<=pWal->hdr.mxFrame ); - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc==SQLITE_BUSY ){ - mxSafeFrame = y; - xBusy = 0; - }else{ - goto walcheckpoint_out; + /* Compute in mxSafeFrame the index of the last frame of the WAL that is + ** safe to write into the database. Frames beyond mxSafeFrame might + ** overwrite database pages that are in use by active readers and thus + ** cannot be backfilled from the WAL. + */ + mxSafeFrame = pWal->hdr.mxFrame; + mxPage = pWal->hdr.nPage; + for(i=1; i<WAL_NREADER; i++){ + u32 y = pInfo->aReadMark[i]; + if( mxSafeFrame>y ){ + assert( y<=pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); + if( rc==SQLITE_OK ){ + pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc==SQLITE_BUSY ){ + mxSafeFrame = y; + xBusy = 0; + }else{ + goto walcheckpoint_out; + } } } - } - if( pInfo->nBackfill<mxSafeFrame - && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK - ){ - i64 nSize; /* Current size of database file */ - u32 nBackfill = pInfo->nBackfill; - - /* Sync the WAL to disk */ - if( sync_flags ){ - rc = sqlite3OsSync(pWal->pWalFd, sync_flags); - } + if( pInfo->nBackfill<mxSafeFrame + && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0),1))==SQLITE_OK + ){ + i64 nSize; /* Current size of database file */ + u32 nBackfill = pInfo->nBackfill; - /* If the database may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. - */ - if( rc==SQLITE_OK ){ - i64 nReq = ((i64)mxPage * szPage); - rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); - if( rc==SQLITE_OK && nSize<nReq ){ - sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); + /* Sync the WAL to disk */ + if( sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags); } - } + /* If the database may grow as a result of this checkpoint, hint + ** about the eventual size of the db file to the VFS layer. + */ + if( rc==SQLITE_OK ){ + i64 nReq = ((i64)mxPage * szPage); + rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); + if( rc==SQLITE_OK && nSize<nReq ){ + sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); + } + } - /* Iterate through the contents of the WAL, copying data to the db file. */ - while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ - i64 iOffset; - assert( walFramePgno(pWal, iFrame)==iDbpage ); - if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue; - iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ - rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); - if( rc!=SQLITE_OK ) break; - iOffset = (iDbpage-1)*(i64)szPage; - testcase( IS_BIG_INT(iOffset) ); - rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); - if( rc!=SQLITE_OK ) break; - } - /* If work was actually accomplished... */ - if( rc==SQLITE_OK ){ - if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ - i64 szDb = pWal->hdr.nPage*(i64)szPage; - testcase( IS_BIG_INT(szDb) ); - rc = sqlite3OsTruncate(pWal->pDbFd, szDb); - if( rc==SQLITE_OK && sync_flags ){ - rc = sqlite3OsSync(pWal->pDbFd, sync_flags); + /* Iterate through the contents of the WAL, copying data to the db file */ + while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + i64 iOffset; + assert( walFramePgno(pWal, iFrame)==iDbpage ); + if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ + continue; } + iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ + rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); + if( rc!=SQLITE_OK ) break; + iOffset = (iDbpage-1)*(i64)szPage; + testcase( IS_BIG_INT(iOffset) ); + rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); + if( rc!=SQLITE_OK ) break; } + + /* If work was actually accomplished... */ if( rc==SQLITE_OK ){ - pInfo->nBackfill = mxSafeFrame; + if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ + i64 szDb = pWal->hdr.nPage*(i64)szPage; + testcase( IS_BIG_INT(szDb) ); + rc = sqlite3OsTruncate(pWal->pDbFd, szDb); + if( rc==SQLITE_OK && sync_flags ){ + rc = sqlite3OsSync(pWal->pDbFd, sync_flags); + } + } + if( rc==SQLITE_OK ){ + pInfo->nBackfill = mxSafeFrame; + } } - } - /* Release the reader lock held while backfilling */ - walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); - } + /* Release the reader lock held while backfilling */ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + } - if( rc==SQLITE_BUSY ){ - /* Reset the return code so as not to report a checkpoint failure - ** just because there are active readers. */ - rc = SQLITE_OK; + if( rc==SQLITE_BUSY ){ + /* Reset the return code so as not to report a checkpoint failure + ** just because there are active readers. */ + rc = SQLITE_OK; + } } - /* If this is an SQLITE_CHECKPOINT_RESTART operation, and the entire wal - ** file has been copied into the database file, then block until all - ** readers have finished using the wal file. This ensures that the next - ** process to write to the database restarts the wal file. + /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the + ** entire wal file has been copied into the database file, then block + ** until all readers have finished using the wal file. This ensures that + ** the next process to write to the database restarts the wal file. */ if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ assert( pWal->writeLock ); if( pInfo->nBackfill<pWal->hdr.mxFrame ){ rc = SQLITE_BUSY; - }else if( eMode==SQLITE_CHECKPOINT_RESTART ){ - assert( mxSafeFrame==pWal->hdr.mxFrame ); + }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ + u32 salt1; + sqlite3_randomness(4, &salt1); + assert( pInfo->nBackfill==pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ + if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){ + /* IMPLEMENTATION-OF: R-44699-57140 This mode works the same way as + ** SQLITE_CHECKPOINT_RESTART with the addition that it also + ** truncates the log file to zero bytes just prior to a + ** successful return. + ** + ** In theory, it might be safe to do this without updating the + ** wal-index header in shared memory, as all subsequent reader or + ** writer clients should see that the entire log file has been + ** checkpointed and behave accordingly. This seems unsafe though, + ** as it would leave the system in a state where the contents of + ** the wal-index header do not match the contents of the + ** file-system. To avoid this, update the wal-index header to + ** indicate that the log file contains zero valid frames. */ + walRestartHdr(pWal, salt1); + rc = sqlite3OsTruncate(pWal->pWalFd, 0); + } walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } @@ -2360,7 +2415,7 @@ int sqlite3WalFindFrame( for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ u32 iFrame = aHash[iKey] + iZero; if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){ - /* assert( iFrame>iRead ); -- not true if there is corruption */ + assert( iFrame>iRead || CORRUPT_DB ); iRead = iFrame; } if( (nCollide--)==0 ){ @@ -2573,7 +2628,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ return rc; } - /* ** This function is called just before writing a set of frames to the log ** file (see sqlite3WalFrames()). It checks to see if, instead of appending @@ -2606,20 +2660,8 @@ static int walRestartLog(Wal *pWal){ ** In theory it would be Ok to update the cache of the header only ** at this point. But updating the actual wal-index header is also ** safe and means there is no special case for sqlite3WalUndo() - ** to handle if this transaction is rolled back. - */ - int i; /* Loop counter */ - u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ - - pWal->nCkpt++; - pWal->hdr.mxFrame = 0; - sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); - aSalt[1] = salt1; - walIndexWriteHdr(pWal); - pInfo->nBackfill = 0; - pInfo->aReadMark[1] = 0; - for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED; - assert( pInfo->aReadMark[0]==0 ); + ** to handle if this transaction is rolled back. */ + walRestartHdr(pWal, salt1); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); }else if( rc!=SQLITE_BUSY ){ return rc; @@ -2907,7 +2949,7 @@ int sqlite3WalFrames( */ int sqlite3WalCheckpoint( Wal *pWal, /* Wal connection */ - int eMode, /* PASSIVE, FULL or RESTART */ + int eMode, /* PASSIVE, FULL, RESTART, or TRUNCATE */ int (*xBusy)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags to sync db file with (or 0) */ @@ -2919,29 +2961,42 @@ int sqlite3WalCheckpoint( int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ int eMode2 = eMode; /* Mode to pass to walCheckpoint() */ + int (*xBusy2)(void*) = xBusy; /* Busy handler for eMode2 */ assert( pWal->ckptLock==0 ); assert( pWal->writeLock==0 ); + /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked + ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ + assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); + if( pWal->readOnly ) return SQLITE_READONLY; WALTRACE(("WAL%p: checkpoint begins\n", pWal)); + + /* IMPLEMENTATION-OF: R-62028-47212 All calls obtain an exclusive + ** "checkpoint" lock on the database file. */ rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); if( rc ){ - /* Usually this is SQLITE_BUSY meaning that another thread or process - ** is already running a checkpoint, or maybe a recovery. But it might - ** also be SQLITE_IOERR. */ + /* EVIDENCE-OF: R-10421-19736 If any other process is running a + ** checkpoint operation at the same time, the lock cannot be obtained and + ** SQLITE_BUSY is returned. + ** EVIDENCE-OF: R-53820-33897 Even if there is a busy-handler configured, + ** it will not be invoked in this case. + */ + testcase( rc==SQLITE_BUSY ); + testcase( xBusy!=0 ); return rc; } pWal->ckptLock = 1; - /* If this is a blocking-checkpoint, then obtain the write-lock as well - ** to prevent any writers from running while the checkpoint is underway. - ** This has to be done before the call to walIndexReadHdr() below. + /* IMPLEMENTATION-OF: R-59782-36818 The SQLITE_CHECKPOINT_FULL, RESTART and + ** TRUNCATE modes also obtain the exclusive "writer" lock on the database + ** file. ** - ** If the writer lock cannot be obtained, then a passive checkpoint is - ** run instead. Since the checkpointer is not holding the writer lock, - ** there is no point in blocking waiting for any readers. Assuming no - ** other error occurs, this function will return SQLITE_BUSY to the caller. + ** EVIDENCE-OF: R-60642-04082 If the writer lock cannot be obtained + ** immediately, and a busy-handler is configured, it is invoked and the + ** writer lock retried until either the busy-handler returns 0 or the + ** lock is successfully obtained. */ if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1); @@ -2949,6 +3004,7 @@ int sqlite3WalCheckpoint( pWal->writeLock = 1; }else if( rc==SQLITE_BUSY ){ eMode2 = SQLITE_CHECKPOINT_PASSIVE; + xBusy2 = 0; rc = SQLITE_OK; } } @@ -2966,7 +3022,7 @@ int sqlite3WalCheckpoint( if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf); + rc = walCheckpoint(pWal, eMode2, xBusy2, pBusyArg, sync_flags, zBuf); } /* If no error occurred, set the output variables. */ |