1Add new virtual table 'recover' to src/ and the amalgamation. 2 3Since recover.c is in somewhat active development, it is possible that 4the patch below will not reliably re-create the file. 5 6shess@chromium.org 7 8Generated with: 9git diff --cached --relative=third_party/sqlite/src --src-prefix='' --dst-prefix='' > third_party/sqlite/recover.patch 10[--cached because otherwise the diff adding recover.c wasn't generated.] 11 12diff --git Makefile.in Makefile.in 13index f3239f3..216742c 100644 14--- Makefile.in 15+++ Makefile.in 16@@ -251,6 +251,7 @@ SRC = \ 17 $(TOP)/src/prepare.c \ 18 $(TOP)/src/printf.c \ 19 $(TOP)/src/random.c \ 20+ $(TOP)/src/recover.c \ 21 $(TOP)/src/resolve.c \ 22 $(TOP)/src/rowset.c \ 23 $(TOP)/src/select.c \ 24diff --git src/sqlite.h.in src/sqlite.h.in 25index 62b9326..fb76659 100644 26--- src/sqlite.h.in 27+++ src/sqlite.h.in 28@@ -6403,6 +6403,17 @@ int sqlite3_wal_checkpoint_v2( 29 #define SQLITE_CHECKPOINT_RESTART 2 30 31 32+/* Begin recover.patch for Chromium */ 33+/* 34+** Call to initialize the recover virtual-table modules (see recover.c). 35+** 36+** This could be loaded by default in main.c, but that would make the 37+** virtual table available to Web SQL. Breaking it out allows only 38+** selected users to enable it (currently sql/recovery.cc). 39+*/ 40+int recoverVtableInit(sqlite3 *db); 41+/* End recover.patch for Chromium */ 42+ 43 /* 44 ** Undo the hack that converts floating point types to integer for 45 ** builds on processors without floating point support. 46diff --git tool/mksqlite3c.tcl tool/mksqlite3c.tcl 47index fa99f2d..df2df07 100644 48--- tool/mksqlite3c.tcl 49+++ tool/mksqlite3c.tcl 50@@ -293,6 +293,8 @@ foreach file { 51 main.c 52 notify.c 53 54+ recover.c 55+ 56 fts3.c 57 fts3_aux.c 58 fts3_expr.c 59diff --git src/recover.c src/recover.c 60new file mode 100644 61index 0000000..6430c8b 62--- /dev/null 63+++ src/recover.c 64@@ -0,0 +1,2130 @@ 65+/* 66+** 2012 Jan 11 67+** 68+** The author disclaims copyright to this source code. In place of 69+** a legal notice, here is a blessing: 70+** 71+** May you do good and not evil. 72+** May you find forgiveness for yourself and forgive others. 73+** May you share freely, never taking more than you give. 74+*/ 75+/* TODO(shess): THIS MODULE IS STILL EXPERIMENTAL. DO NOT USE IT. */ 76+/* Implements a virtual table "recover" which can be used to recover 77+ * data from a corrupt table. The table is walked manually, with 78+ * corrupt items skipped. Additionally, any errors while reading will 79+ * be skipped. 80+ * 81+ * Given a table with this definition: 82+ * 83+ * CREATE TABLE Stuff ( 84+ * name TEXT PRIMARY KEY, 85+ * value TEXT NOT NULL 86+ * ); 87+ * 88+ * to recover the data from teh table, you could do something like: 89+ * 90+ * -- Attach another database, the original is not trustworthy. 91+ * ATTACH DATABASE '/tmp/db.db' AS rdb; 92+ * -- Create a new version of the table. 93+ * CREATE TABLE rdb.Stuff ( 94+ * name TEXT PRIMARY KEY, 95+ * value TEXT NOT NULL 96+ * ); 97+ * -- This will read the original table's data. 98+ * CREATE VIRTUAL TABLE temp.recover_Stuff using recover( 99+ * main.Stuff, 100+ * name TEXT STRICT NOT NULL, -- only real TEXT data allowed 101+ * value TEXT STRICT NOT NULL 102+ * ); 103+ * -- Corruption means the UNIQUE constraint may no longer hold for 104+ * -- Stuff, so either OR REPLACE or OR IGNORE must be used. 105+ * INSERT OR REPLACE INTO rdb.Stuff (rowid, name, value ) 106+ * SELECT rowid, name, value FROM temp.recover_Stuff; 107+ * DROP TABLE temp.recover_Stuff; 108+ * DETACH DATABASE rdb; 109+ * -- Move db.db to replace original db in filesystem. 110+ * 111+ * 112+ * Usage 113+ * 114+ * Given the goal of dealing with corruption, it would not be safe to 115+ * create a recovery table in the database being recovered. So 116+ * recovery tables must be created in the temp database. They are not 117+ * appropriate to persist, in any case. [As a bonus, sqlite_master 118+ * tables can be recovered. Perhaps more cute than useful, though.] 119+ * 120+ * The parameters are a specifier for the table to read, and a column 121+ * definition for each bit of data stored in that table. The named 122+ * table must be convertable to a root page number by reading the 123+ * sqlite_master table. Bare table names are assumed to be in 124+ * database 0 ("main"), other databases can be specified in db.table 125+ * fashion. 126+ * 127+ * Column definitions are similar to BUT NOT THE SAME AS those 128+ * provided to CREATE statements: 129+ * column-def: column-name [type-name [STRICT] [NOT NULL]] 130+ * type-name: (ANY|ROWID|INTEGER|FLOAT|NUMERIC|TEXT|BLOB) 131+ * 132+ * Only those exact type names are accepted, there is no type 133+ * intuition. The only constraints accepted are STRICT (see below) 134+ * and NOT NULL. Anything unexpected will cause the create to fail. 135+ * 136+ * ANY is a convenience to indicate that manifest typing is desired. 137+ * It is equivalent to not specifying a type at all. The results for 138+ * such columns will have the type of the data's storage. The exposed 139+ * schema will contain no type for that column. 140+ * 141+ * ROWID is used for columns representing aliases to the rowid 142+ * (INTEGER PRIMARY KEY, with or without AUTOINCREMENT), to make the 143+ * concept explicit. Such columns are actually stored as NULL, so 144+ * they cannot be simply ignored. The exposed schema will be INTEGER 145+ * for that column. 146+ * 147+ * NOT NULL causes rows with a NULL in that column to be skipped. It 148+ * also adds NOT NULL to the column in the exposed schema. If the 149+ * table has ever had columns added using ALTER TABLE, then those 150+ * columns implicitly contain NULL for rows which have not been 151+ * updated. [Workaround using COALESCE() in your SELECT statement.] 152+ * 153+ * The created table is read-only, with no indices. Any SELECT will 154+ * be a full-table scan, returning each valid row read from the 155+ * storage of the backing table. The rowid will be the rowid of the 156+ * row from the backing table. "Valid" means: 157+ * - The cell metadata for the row is well-formed. Mainly this means that 158+ * the cell header info describes a payload of the size indicated by 159+ * the cell's payload size. 160+ * - The cell does not run off the page. 161+ * - The cell does not overlap any other cell on the page. 162+ * - The cell contains doesn't contain too many columns. 163+ * - The types of the serialized data match the indicated types (see below). 164+ * 165+ * 166+ * Type affinity versus type storage. 167+ * 168+ * http://www.sqlite.org/datatype3.html describes SQLite's type 169+ * affinity system. The system provides for automated coercion of 170+ * types in certain cases, transparently enough that many developers 171+ * do not realize that it is happening. Importantly, it implies that 172+ * the raw data stored in the database may not have the obvious type. 173+ * 174+ * Differences between the stored data types and the expected data 175+ * types may be a signal of corruption. This module makes some 176+ * allowances for automatic coercion. It is important to be concious 177+ * of the difference between the schema exposed by the module, and the 178+ * data types read from storage. The following table describes how 179+ * the module interprets things: 180+ * 181+ * type schema data STRICT 182+ * ---- ------ ---- ------ 183+ * ANY <none> any any 184+ * ROWID INTEGER n/a n/a 185+ * INTEGER INTEGER integer integer 186+ * FLOAT FLOAT integer or float float 187+ * NUMERIC NUMERIC integer, float, or text integer or float 188+ * TEXT TEXT text or blob text 189+ * BLOB BLOB blob blob 190+ * 191+ * type is the type provided to the recover module, schema is the 192+ * schema exposed by the module, data is the acceptable types of data 193+ * decoded from storage, and STRICT is a modification of that. 194+ * 195+ * A very loose recovery system might use ANY for all columns, then 196+ * use the appropriate sqlite3_column_*() calls to coerce to expected 197+ * types. This doesn't provide much protection if a page from a 198+ * different table with the same column count is linked into an 199+ * inappropriate btree. 200+ * 201+ * A very tight recovery system might use STRICT to enforce typing on 202+ * all columns, preferring to skip rows which are valid at the storage 203+ * level but don't contain the right types. Note that FLOAT STRICT is 204+ * almost certainly not appropriate, since integral values are 205+ * transparently stored as integers, when that is more efficient. 206+ * 207+ * Another option is to use ANY for all columns and inspect each 208+ * result manually (using sqlite3_column_*). This should only be 209+ * necessary in cases where developers have used manifest typing (test 210+ * to make sure before you decide that you aren't using manifest 211+ * typing!). 212+ * 213+ * 214+ * Caveats 215+ * 216+ * Leaf pages not referenced by interior nodes will not be found. 217+ * 218+ * Leaf pages referenced from interior nodes of other tables will not 219+ * be resolved. 220+ * 221+ * Rows referencing invalid overflow pages will be skipped. 222+ * 223+ * SQlite rows have a header which describes how to interpret the rest 224+ * of the payload. The header can be valid in cases where the rest of 225+ * the record is actually corrupt (in the sense that the data is not 226+ * the intended data). This can especially happen WRT overflow pages, 227+ * as lack of atomic updates between pages is the primary form of 228+ * corruption I have seen in the wild. 229+ */ 230+/* The implementation is via a series of cursors. The cursor 231+ * implementations follow the pattern: 232+ * 233+ * // Creates the cursor using various initialization info. 234+ * int cursorCreate(...); 235+ * 236+ * // Returns 1 if there is no more data, 0 otherwise. 237+ * int cursorEOF(Cursor *pCursor); 238+ * 239+ * // Various accessors can be used if not at EOF. 240+ * 241+ * // Move to the next item. 242+ * int cursorNext(Cursor *pCursor); 243+ * 244+ * // Destroy the memory associated with the cursor. 245+ * void cursorDestroy(Cursor *pCursor); 246+ * 247+ * References in the following are to sections at 248+ * http://www.sqlite.org/fileformat2.html . 249+ * 250+ * RecoverLeafCursor iterates the records in a leaf table node 251+ * described in section 1.5 "B-tree Pages". When the node is 252+ * exhausted, an interior cursor is used to get the next leaf node, 253+ * and iteration continues there. 254+ * 255+ * RecoverInteriorCursor iterates the child pages in an interior table 256+ * node described in section 1.5 "B-tree Pages". When the node is 257+ * exhausted, a parent interior cursor is used to get the next 258+ * interior node at the same level, and iteration continues there. 259+ * 260+ * Together these record the path from the leaf level to the root of 261+ * the tree. Iteration happens from the leaves rather than the root 262+ * both for efficiency and putting the special case at the front of 263+ * the list is easier to implement. 264+ * 265+ * RecoverCursor uses a RecoverLeafCursor to iterate the rows of a 266+ * table, returning results via the SQLite virtual table interface. 267+ */ 268+/* TODO(shess): It might be useful to allow DEFAULT in types to 269+ * specify what to do for NULL when an ALTER TABLE case comes up. 270+ * Unfortunately, simply adding it to the exposed schema and using 271+ * sqlite3_result_null() does not cause the default to be generate. 272+ * Handling it ourselves seems hard, unfortunately. 273+ */ 274+ 275+#include <assert.h> 276+#include <ctype.h> 277+#include <stdio.h> 278+#include <string.h> 279+ 280+/* Internal SQLite things that are used: 281+ * u32, u64, i64 types. 282+ * Btree, Pager, and DbPage structs. 283+ * DbPage.pData, .pPager, and .pgno 284+ * sqlite3 struct. 285+ * sqlite3BtreePager() and sqlite3BtreeGetPageSize() 286+ * sqlite3PagerAcquire() and sqlite3PagerUnref() 287+ * getVarint(). 288+ */ 289+#include "sqliteInt.h" 290+ 291+/* For debugging. */ 292+#if 0 293+#define FNENTRY() fprintf(stderr, "In %s\n", __FUNCTION__) 294+#else 295+#define FNENTRY() 296+#endif 297+ 298+/* Generic constants and helper functions. */ 299+ 300+static const unsigned char kTableLeafPage = 0x0D; 301+static const unsigned char kTableInteriorPage = 0x05; 302+ 303+/* From section 1.5. */ 304+static const unsigned kiPageTypeOffset = 0; 305+static const unsigned kiPageFreeBlockOffset = 1; 306+static const unsigned kiPageCellCountOffset = 3; 307+static const unsigned kiPageCellContentOffset = 5; 308+static const unsigned kiPageFragmentedBytesOffset = 7; 309+static const unsigned knPageLeafHeaderBytes = 8; 310+/* Interior pages contain an additional field. */ 311+static const unsigned kiPageRightChildOffset = 8; 312+static const unsigned kiPageInteriorHeaderBytes = 12; 313+ 314+/* Accepted types are specified by a mask. */ 315+#define MASK_ROWID (1<<0) 316+#define MASK_INTEGER (1<<1) 317+#define MASK_FLOAT (1<<2) 318+#define MASK_TEXT (1<<3) 319+#define MASK_BLOB (1<<4) 320+#define MASK_NULL (1<<5) 321+ 322+/* Helpers to decode fixed-size fields. */ 323+static u32 decodeUnsigned16(const unsigned char *pData){ 324+ return (pData[0]<<8) + pData[1]; 325+} 326+static u32 decodeUnsigned32(const unsigned char *pData){ 327+ return (decodeUnsigned16(pData)<<16) + decodeUnsigned16(pData+2); 328+} 329+static i64 decodeSigned(const unsigned char *pData, unsigned nBytes){ 330+ i64 r = (char)(*pData); 331+ while( --nBytes ){ 332+ r <<= 8; 333+ r += *(++pData); 334+ } 335+ return r; 336+} 337+/* Derived from vdbeaux.c, sqlite3VdbeSerialGet(), case 7. */ 338+/* TODO(shess): Determine if swapMixedEndianFloat() applies. */ 339+static double decodeFloat64(const unsigned char *pData){ 340+#if !defined(NDEBUG) 341+ static const u64 t1 = ((u64)0x3ff00000)<<32; 342+ static const double r1 = 1.0; 343+ u64 t2 = t1; 344+ assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 ); 345+#endif 346+ i64 x = decodeSigned(pData, 8); 347+ double d; 348+ memcpy(&d, &x, sizeof(x)); 349+ return d; 350+} 351+ 352+/* Return true if a varint can safely be read from pData/nData. */ 353+/* TODO(shess): DbPage points into the middle of a buffer which 354+ * contains the page data before DbPage. So code should always be 355+ * able to read a small number of varints safely. Consider whether to 356+ * trust that or not. 357+ */ 358+static int checkVarint(const unsigned char *pData, unsigned nData){ 359+ unsigned i; 360+ 361+ /* In the worst case the decoder takes all 8 bits of the 9th byte. */ 362+ if( nData>=9 ){ 363+ return 1; 364+ } 365+ 366+ /* Look for a high-bit-clear byte in what's left. */ 367+ for( i=0; i<nData; ++i ){ 368+ if( !(pData[i]&0x80) ){ 369+ return 1; 370+ } 371+ } 372+ 373+ /* Cannot decode in the space given. */ 374+ return 0; 375+} 376+ 377+/* Return 1 if n varints can be read from pData/nData. */ 378+static int checkVarints(const unsigned char *pData, unsigned nData, 379+ unsigned n){ 380+ unsigned nCur = 0; /* Byte offset within current varint. */ 381+ unsigned nFound = 0; /* Number of varints found. */ 382+ unsigned i; 383+ 384+ /* In the worst case the decoder takes all 8 bits of the 9th byte. */ 385+ if( nData>=9*n ){ 386+ return 1; 387+ } 388+ 389+ for( i=0; nFound<n && i<nData; ++i ){ 390+ nCur++; 391+ if( nCur==9 || !(pData[i]&0x80) ){ 392+ nFound++; 393+ nCur = 0; 394+ } 395+ } 396+ 397+ return nFound==n; 398+} 399+ 400+/* ctype and str[n]casecmp() can be affected by locale (eg, tr_TR). 401+ * These versions consider only the ASCII space. 402+ */ 403+/* TODO(shess): It may be reasonable to just remove the need for these 404+ * entirely. The module could require "TEXT STRICT NOT NULL", not 405+ * "Text Strict Not Null" or whatever the developer felt like typing 406+ * that day. Handling corrupt data is a PERFECT place to be pedantic. 407+ */ 408+static int ascii_isspace(char c){ 409+ /* From fts3_expr.c */ 410+ return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; 411+} 412+static int ascii_isalnum(int x){ 413+ /* From fts3_tokenizer1.c */ 414+ return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); 415+} 416+static int ascii_tolower(int x){ 417+ /* From fts3_tokenizer1.c */ 418+ return (x>='A' && x<='Z') ? x-'A'+'a' : x; 419+} 420+/* TODO(shess): Consider sqlite3_strnicmp() */ 421+static int ascii_strncasecmp(const char *s1, const char *s2, size_t n){ 422+ const unsigned char *us1 = (const unsigned char *)s1; 423+ const unsigned char *us2 = (const unsigned char *)s2; 424+ while( *us1 && *us2 && n && ascii_tolower(*us1)==ascii_tolower(*us2) ){ 425+ us1++, us2++, n--; 426+ } 427+ return n ? ascii_tolower(*us1)-ascii_tolower(*us2) : 0; 428+} 429+static int ascii_strcasecmp(const char *s1, const char *s2){ 430+ /* If s2 is equal through strlen(s1), will exit while() due to s1's 431+ * trailing NUL, and return NUL-s2[strlen(s1)]. 432+ */ 433+ return ascii_strncasecmp(s1, s2, strlen(s1)+1); 434+} 435+ 436+/* For some reason I kept making mistakes with offset calculations. */ 437+static const unsigned char *PageData(DbPage *pPage, unsigned iOffset){ 438+ assert( iOffset<=pPage->nPageSize ); 439+ return (unsigned char *)pPage->pData + iOffset; 440+} 441+ 442+/* The first page in the file contains a file header in the first 100 443+ * bytes. The page's header information comes after that. Note that 444+ * the offsets in the page's header information are relative to the 445+ * beginning of the page, NOT the end of the page header. 446+ */ 447+static const unsigned char *PageHeader(DbPage *pPage){ 448+ if( pPage->pgno==1 ){ 449+ const unsigned nDatabaseHeader = 100; 450+ return PageData(pPage, nDatabaseHeader); 451+ }else{ 452+ return PageData(pPage, 0); 453+ } 454+} 455+ 456+/* Helper to fetch the pager and page size for the named database. */ 457+static int GetPager(sqlite3 *db, const char *zName, 458+ Pager **pPager, unsigned *pnPageSize){ 459+ Btree *pBt = NULL; 460+ int i; 461+ for( i=0; i<db->nDb; ++i ){ 462+ if( ascii_strcasecmp(db->aDb[i].zName, zName)==0 ){ 463+ pBt = db->aDb[i].pBt; 464+ break; 465+ } 466+ } 467+ if( !pBt ){ 468+ return SQLITE_ERROR; 469+ } 470+ 471+ *pPager = sqlite3BtreePager(pBt); 472+ *pnPageSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt); 473+ return SQLITE_OK; 474+} 475+ 476+/* iSerialType is a type read from a record header. See "2.1 Record Format". 477+ */ 478+ 479+/* Storage size of iSerialType in bytes. My interpretation of SQLite 480+ * documentation is that text and blob fields can have 32-bit length. 481+ * Values past 2^31-12 will need more than 32 bits to encode, which is 482+ * why iSerialType is u64. 483+ */ 484+static u32 SerialTypeLength(u64 iSerialType){ 485+ switch( iSerialType ){ 486+ case 0 : return 0; /* NULL */ 487+ case 1 : return 1; /* Various integers. */ 488+ case 2 : return 2; 489+ case 3 : return 3; 490+ case 4 : return 4; 491+ case 5 : return 6; 492+ case 6 : return 8; 493+ case 7 : return 8; /* 64-bit float. */ 494+ case 8 : return 0; /* Constant 0. */ 495+ case 9 : return 0; /* Constant 1. */ 496+ case 10 : case 11 : assert( !"RESERVED TYPE"); return 0; 497+ } 498+ return (u32)((iSerialType>>1) - 6); 499+} 500+ 501+/* True if iSerialType refers to a blob. */ 502+static int SerialTypeIsBlob(u64 iSerialType){ 503+ assert( iSerialType>=12 ); 504+ return (iSerialType%2)==0; 505+} 506+ 507+/* Returns true if the serialized type represented by iSerialType is 508+ * compatible with the given type mask. 509+ */ 510+static int SerialTypeIsCompatible(u64 iSerialType, unsigned char mask){ 511+ switch( iSerialType ){ 512+ case 0 : return (mask&MASK_NULL)!=0; 513+ case 1 : return (mask&MASK_INTEGER)!=0; 514+ case 2 : return (mask&MASK_INTEGER)!=0; 515+ case 3 : return (mask&MASK_INTEGER)!=0; 516+ case 4 : return (mask&MASK_INTEGER)!=0; 517+ case 5 : return (mask&MASK_INTEGER)!=0; 518+ case 6 : return (mask&MASK_INTEGER)!=0; 519+ case 7 : return (mask&MASK_FLOAT)!=0; 520+ case 8 : return (mask&MASK_INTEGER)!=0; 521+ case 9 : return (mask&MASK_INTEGER)!=0; 522+ case 10 : assert( !"RESERVED TYPE"); return 0; 523+ case 11 : assert( !"RESERVED TYPE"); return 0; 524+ } 525+ return (mask&(SerialTypeIsBlob(iSerialType) ? MASK_BLOB : MASK_TEXT)); 526+} 527+ 528+/* Versions of strdup() with return values appropriate for 529+ * sqlite3_free(). malloc.c has sqlite3DbStrDup()/NDup(), but those 530+ * need sqlite3DbFree(), which seems intrusive. 531+ */ 532+static char *sqlite3_strndup(const char *z, unsigned n){ 533+ char *zNew; 534+ 535+ if( z==NULL ){ 536+ return NULL; 537+ } 538+ 539+ zNew = sqlite3_malloc(n+1); 540+ if( zNew!=NULL ){ 541+ memcpy(zNew, z, n); 542+ zNew[n] = '\0'; 543+ } 544+ return zNew; 545+} 546+static char *sqlite3_strdup(const char *z){ 547+ if( z==NULL ){ 548+ return NULL; 549+ } 550+ return sqlite3_strndup(z, strlen(z)); 551+} 552+ 553+/* Fetch the page number of zTable in zDb from sqlite_master in zDb, 554+ * and put it in *piRootPage. 555+ */ 556+static int getRootPage(sqlite3 *db, const char *zDb, const char *zTable, 557+ u32 *piRootPage){ 558+ char *zSql; /* SQL selecting root page of named element. */ 559+ sqlite3_stmt *pStmt; 560+ int rc; 561+ 562+ if( strcmp(zTable, "sqlite_master")==0 ){ 563+ *piRootPage = 1; 564+ return SQLITE_OK; 565+ } 566+ 567+ zSql = sqlite3_mprintf("SELECT rootpage FROM %s.sqlite_master " 568+ "WHERE type = 'table' AND tbl_name = %Q", 569+ zDb, zTable); 570+ if( !zSql ){ 571+ return SQLITE_NOMEM; 572+ } 573+ 574+ rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 575+ sqlite3_free(zSql); 576+ if( rc!=SQLITE_OK ){ 577+ return rc; 578+ } 579+ 580+ /* Require a result. */ 581+ rc = sqlite3_step(pStmt); 582+ if( rc==SQLITE_DONE ){ 583+ rc = SQLITE_CORRUPT; 584+ }else if( rc==SQLITE_ROW ){ 585+ *piRootPage = sqlite3_column_int(pStmt, 0); 586+ 587+ /* Require only one result. */ 588+ rc = sqlite3_step(pStmt); 589+ if( rc==SQLITE_DONE ){ 590+ rc = SQLITE_OK; 591+ }else if( rc==SQLITE_ROW ){ 592+ rc = SQLITE_CORRUPT; 593+ } 594+ } 595+ sqlite3_finalize(pStmt); 596+ return rc; 597+} 598+ 599+static int getEncoding(sqlite3 *db, const char *zDb, int* piEncoding){ 600+ sqlite3_stmt *pStmt; 601+ int rc; 602+ char *zSql = sqlite3_mprintf("PRAGMA %s.encoding", zDb); 603+ if( !zSql ){ 604+ return SQLITE_NOMEM; 605+ } 606+ 607+ rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); 608+ sqlite3_free(zSql); 609+ if( rc!=SQLITE_OK ){ 610+ return rc; 611+ } 612+ 613+ /* Require a result. */ 614+ rc = sqlite3_step(pStmt); 615+ if( rc==SQLITE_DONE ){ 616+ /* This case should not be possible. */ 617+ rc = SQLITE_CORRUPT; 618+ }else if( rc==SQLITE_ROW ){ 619+ if( sqlite3_column_type(pStmt, 0)==SQLITE_TEXT ){ 620+ const char* z = (const char *)sqlite3_column_text(pStmt, 0); 621+ /* These strings match the literals in pragma.c. */ 622+ if( !strcmp(z, "UTF-16le") ){ 623+ *piEncoding = SQLITE_UTF16LE; 624+ }else if( !strcmp(z, "UTF-16be") ){ 625+ *piEncoding = SQLITE_UTF16BE; 626+ }else if( !strcmp(z, "UTF-8") ){ 627+ *piEncoding = SQLITE_UTF8; 628+ }else{ 629+ /* This case should not be possible. */ 630+ *piEncoding = SQLITE_UTF8; 631+ } 632+ }else{ 633+ /* This case should not be possible. */ 634+ *piEncoding = SQLITE_UTF8; 635+ } 636+ 637+ /* Require only one result. */ 638+ rc = sqlite3_step(pStmt); 639+ if( rc==SQLITE_DONE ){ 640+ rc = SQLITE_OK; 641+ }else if( rc==SQLITE_ROW ){ 642+ /* This case should not be possible. */ 643+ rc = SQLITE_CORRUPT; 644+ } 645+ } 646+ sqlite3_finalize(pStmt); 647+ return rc; 648+} 649+ 650+/* Cursor for iterating interior nodes. Interior page cells contain a 651+ * child page number and a rowid. The child page contains items left 652+ * of the rowid (less than). The rightmost page of the subtree is 653+ * stored in the page header. 654+ * 655+ * interiorCursorDestroy - release all resources associated with the 656+ * cursor and any parent cursors. 657+ * interiorCursorCreate - create a cursor with the given parent and page. 658+ * interiorCursorEOF - returns true if neither the cursor nor the 659+ * parent cursors can return any more data. 660+ * interiorCursorNextPage - fetch the next child page from the cursor. 661+ * 662+ * Logically, interiorCursorNextPage() returns the next child page 663+ * number from the page the cursor is currently reading, calling the 664+ * parent cursor as necessary to get new pages to read, until done. 665+ * SQLITE_ROW if a page is returned, SQLITE_DONE if out of pages, 666+ * error otherwise. Unfortunately, if the table is corrupted 667+ * unexpected pages can be returned. If any unexpected page is found, 668+ * leaf or otherwise, it is returned to the caller for processing, 669+ * with the interior cursor left empty. The next call to 670+ * interiorCursorNextPage() will recurse to the parent cursor until an 671+ * interior page to iterate is returned. 672+ * 673+ * Note that while interiorCursorNextPage() will refuse to follow 674+ * loops, it does not keep track of pages returned for purposes of 675+ * preventing duplication. 676+ * 677+ * Note that interiorCursorEOF() could return false (not at EOF), and 678+ * interiorCursorNextPage() could still return SQLITE_DONE. This 679+ * could happen if there are more cells to iterate in an interior 680+ * page, but those cells refer to invalid pages. 681+ */ 682+typedef struct RecoverInteriorCursor RecoverInteriorCursor; 683+struct RecoverInteriorCursor { 684+ RecoverInteriorCursor *pParent; /* Parent node to this node. */ 685+ DbPage *pPage; /* Reference to leaf page. */ 686+ unsigned nPageSize; /* Size of page. */ 687+ unsigned nChildren; /* Number of children on the page. */ 688+ unsigned iChild; /* Index of next child to return. */ 689+}; 690+ 691+static void interiorCursorDestroy(RecoverInteriorCursor *pCursor){ 692+ /* Destroy all the cursors to the root. */ 693+ while( pCursor ){ 694+ RecoverInteriorCursor *p = pCursor; 695+ pCursor = pCursor->pParent; 696+ 697+ if( p->pPage ){ 698+ sqlite3PagerUnref(p->pPage); 699+ p->pPage = NULL; 700+ } 701+ 702+ memset(p, 0xA5, sizeof(*p)); 703+ sqlite3_free(p); 704+ } 705+} 706+ 707+/* Internal helper. Reset storage in preparation for iterating pPage. */ 708+static void interiorCursorSetPage(RecoverInteriorCursor *pCursor, 709+ DbPage *pPage){ 710+ assert( PageHeader(pPage)[kiPageTypeOffset]==kTableInteriorPage ); 711+ 712+ if( pCursor->pPage ){ 713+ sqlite3PagerUnref(pCursor->pPage); 714+ pCursor->pPage = NULL; 715+ } 716+ pCursor->pPage = pPage; 717+ pCursor->iChild = 0; 718+ 719+ /* A child for each cell, plus one in the header. */ 720+ /* TODO(shess): Sanity-check the count? Page header plus per-cell 721+ * cost of 16-bit offset, 32-bit page number, and one varint 722+ * (minimum 1 byte). 723+ */ 724+ pCursor->nChildren = decodeUnsigned16(PageHeader(pPage) + 725+ kiPageCellCountOffset) + 1; 726+} 727+ 728+static int interiorCursorCreate(RecoverInteriorCursor *pParent, 729+ DbPage *pPage, int nPageSize, 730+ RecoverInteriorCursor **ppCursor){ 731+ RecoverInteriorCursor *pCursor = 732+ sqlite3_malloc(sizeof(RecoverInteriorCursor)); 733+ if( !pCursor ){ 734+ return SQLITE_NOMEM; 735+ } 736+ 737+ memset(pCursor, 0, sizeof(*pCursor)); 738+ pCursor->pParent = pParent; 739+ pCursor->nPageSize = nPageSize; 740+ interiorCursorSetPage(pCursor, pPage); 741+ *ppCursor = pCursor; 742+ return SQLITE_OK; 743+} 744+ 745+/* Internal helper. Return the child page number at iChild. */ 746+static unsigned interiorCursorChildPage(RecoverInteriorCursor *pCursor){ 747+ const unsigned char *pPageHeader; /* Header of the current page. */ 748+ const unsigned char *pCellOffsets; /* Offset to page's cell offsets. */ 749+ unsigned iCellOffset; /* Offset of target cell. */ 750+ 751+ assert( pCursor->iChild<pCursor->nChildren ); 752+ 753+ /* Rightmost child is in the header. */ 754+ pPageHeader = PageHeader(pCursor->pPage); 755+ if( pCursor->iChild==pCursor->nChildren-1 ){ 756+ return decodeUnsigned32(pPageHeader + kiPageRightChildOffset); 757+ } 758+ 759+ /* Each cell is a 4-byte integer page number and a varint rowid 760+ * which is greater than the rowid of items in that sub-tree (this 761+ * module ignores ordering). The offset is from the beginning of the 762+ * page, not from the page header. 763+ */ 764+ pCellOffsets = pPageHeader + kiPageInteriorHeaderBytes; 765+ iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iChild*2); 766+ if( iCellOffset<=pCursor->nPageSize-4 ){ 767+ return decodeUnsigned32(PageData(pCursor->pPage, iCellOffset)); 768+ } 769+ 770+ /* TODO(shess): Check for cell overlaps? Cells require 4 bytes plus 771+ * a varint. Check could be identical to leaf check (or even a 772+ * shared helper testing for "Cells starting in this range"?). 773+ */ 774+ 775+ /* If the offset is broken, return an invalid page number. */ 776+ return 0; 777+} 778+ 779+static int interiorCursorEOF(RecoverInteriorCursor *pCursor){ 780+ /* Find a parent with remaining children. EOF if none found. */ 781+ while( pCursor && pCursor->iChild>=pCursor->nChildren ){ 782+ pCursor = pCursor->pParent; 783+ } 784+ return pCursor==NULL; 785+} 786+ 787+/* Internal helper. Used to detect if iPage would cause a loop. */ 788+static int interiorCursorPageInUse(RecoverInteriorCursor *pCursor, 789+ unsigned iPage){ 790+ /* Find any parent using the indicated page. */ 791+ while( pCursor && pCursor->pPage->pgno!=iPage ){ 792+ pCursor = pCursor->pParent; 793+ } 794+ return pCursor!=NULL; 795+} 796+ 797+/* Get the next page from the interior cursor at *ppCursor. Returns 798+ * SQLITE_ROW with the page in *ppPage, or SQLITE_DONE if out of 799+ * pages, or the error SQLite returned. 800+ * 801+ * If the tree is uneven, then when the cursor attempts to get a new 802+ * interior page from the parent cursor, it may get a non-interior 803+ * page. In that case, the new page is returned, and *ppCursor is 804+ * updated to point to the parent cursor (this cursor is freed). 805+ */ 806+/* TODO(shess): I've tried to avoid recursion in most of this code, 807+ * but this case is more challenging because the recursive call is in 808+ * the middle of operation. One option for converting it without 809+ * adding memory management would be to retain the head pointer and 810+ * use a helper to "back up" as needed. Another option would be to 811+ * reverse the list during traversal. 812+ */ 813+static int interiorCursorNextPage(RecoverInteriorCursor **ppCursor, 814+ DbPage **ppPage){ 815+ RecoverInteriorCursor *pCursor = *ppCursor; 816+ while( 1 ){ 817+ int rc; 818+ const unsigned char *pPageHeader; /* Header of found page. */ 819+ 820+ /* Find a valid child page which isn't on the stack. */ 821+ while( pCursor->iChild<pCursor->nChildren ){ 822+ const unsigned iPage = interiorCursorChildPage(pCursor); 823+ pCursor->iChild++; 824+ if( interiorCursorPageInUse(pCursor, iPage) ){ 825+ fprintf(stderr, "Loop detected at %d\n", iPage); 826+ }else{ 827+ int rc = sqlite3PagerAcquire(pCursor->pPage->pPager, iPage, ppPage, 0); 828+ if( rc==SQLITE_OK ){ 829+ return SQLITE_ROW; 830+ } 831+ } 832+ } 833+ 834+ /* This page has no more children. Get next page from parent. */ 835+ if( !pCursor->pParent ){ 836+ return SQLITE_DONE; 837+ } 838+ rc = interiorCursorNextPage(&pCursor->pParent, ppPage); 839+ if( rc!=SQLITE_ROW ){ 840+ return rc; 841+ } 842+ 843+ /* If a non-interior page is received, that either means that the 844+ * tree is uneven, or that a child was re-used (say as an overflow 845+ * page). Remove this cursor and let the caller handle the page. 846+ */ 847+ pPageHeader = PageHeader(*ppPage); 848+ if( pPageHeader[kiPageTypeOffset]!=kTableInteriorPage ){ 849+ *ppCursor = pCursor->pParent; 850+ pCursor->pParent = NULL; 851+ interiorCursorDestroy(pCursor); 852+ return SQLITE_ROW; 853+ } 854+ 855+ /* Iterate the new page. */ 856+ interiorCursorSetPage(pCursor, *ppPage); 857+ *ppPage = NULL; 858+ } 859+ 860+ assert(NULL); /* NOTREACHED() */ 861+ return SQLITE_CORRUPT; 862+} 863+ 864+/* Large rows are spilled to overflow pages. The row's main page 865+ * stores the overflow page number after the local payload, with a 866+ * linked list forward from there as necessary. overflowMaybeCreate() 867+ * and overflowGetSegment() provide an abstraction for accessing such 868+ * data while centralizing the code. 869+ * 870+ * overflowDestroy - releases all resources associated with the structure. 871+ * overflowMaybeCreate - create the overflow structure if it is needed 872+ * to represent the given record. See function comment. 873+ * overflowGetSegment - fetch a segment from the record, accounting 874+ * for overflow pages. Segments which are not 875+ * entirely contained with a page are constructed 876+ * into a buffer which is returned. See function comment. 877+ */ 878+typedef struct RecoverOverflow RecoverOverflow; 879+struct RecoverOverflow { 880+ RecoverOverflow *pNextOverflow; 881+ DbPage *pPage; 882+ unsigned nPageSize; 883+}; 884+ 885+static void overflowDestroy(RecoverOverflow *pOverflow){ 886+ while( pOverflow ){ 887+ RecoverOverflow *p = pOverflow; 888+ pOverflow = p->pNextOverflow; 889+ 890+ if( p->pPage ){ 891+ sqlite3PagerUnref(p->pPage); 892+ p->pPage = NULL; 893+ } 894+ 895+ memset(p, 0xA5, sizeof(*p)); 896+ sqlite3_free(p); 897+ } 898+} 899+ 900+/* Internal helper. Used to detect if iPage would cause a loop. */ 901+static int overflowPageInUse(RecoverOverflow *pOverflow, unsigned iPage){ 902+ while( pOverflow && pOverflow->pPage->pgno!=iPage ){ 903+ pOverflow = pOverflow->pNextOverflow; 904+ } 905+ return pOverflow!=NULL; 906+} 907+ 908+/* Setup to access an nRecordBytes record beginning at iRecordOffset 909+ * in pPage. If nRecordBytes can be satisfied entirely from pPage, 910+ * then no overflow pages are needed an *pnLocalRecordBytes is set to 911+ * nRecordBytes. Otherwise, *ppOverflow is set to the head of a list 912+ * of overflow pages, and *pnLocalRecordBytes is set to the number of 913+ * bytes local to pPage. 914+ * 915+ * overflowGetSegment() will do the right thing regardless of whether 916+ * those values are set to be in-page or not. 917+ */ 918+static int overflowMaybeCreate(DbPage *pPage, unsigned nPageSize, 919+ unsigned iRecordOffset, unsigned nRecordBytes, 920+ unsigned *pnLocalRecordBytes, 921+ RecoverOverflow **ppOverflow){ 922+ unsigned nLocalRecordBytes; /* Record bytes in the leaf page. */ 923+ unsigned iNextPage; /* Next page number for record data. */ 924+ unsigned nBytes; /* Maximum record bytes as of current page. */ 925+ int rc; 926+ RecoverOverflow *pFirstOverflow; /* First in linked list of pages. */ 927+ RecoverOverflow *pLastOverflow; /* End of linked list. */ 928+ 929+ /* Calculations from the "Table B-Tree Leaf Cell" part of section 930+ * 1.5 of http://www.sqlite.org/fileformat2.html . maxLocal and 931+ * minLocal to match naming in btree.c. 932+ */ 933+ const unsigned maxLocal = nPageSize - 35; 934+ const unsigned minLocal = ((nPageSize-12)*32/255)-23; /* m */ 935+ 936+ /* Always fit anything smaller than maxLocal. */ 937+ if( nRecordBytes<=maxLocal ){ 938+ *pnLocalRecordBytes = nRecordBytes; 939+ *ppOverflow = NULL; 940+ return SQLITE_OK; 941+ } 942+ 943+ /* Calculate the remainder after accounting for minLocal on the leaf 944+ * page and what packs evenly into overflow pages. If the remainder 945+ * does not fit into maxLocal, then a partially-full overflow page 946+ * will be required in any case, so store as little as possible locally. 947+ */ 948+ nLocalRecordBytes = minLocal+((nRecordBytes-minLocal)%(nPageSize-4)); 949+ if( maxLocal<nLocalRecordBytes ){ 950+ nLocalRecordBytes = minLocal; 951+ } 952+ 953+ /* Don't read off the end of the page. */ 954+ if( iRecordOffset+nLocalRecordBytes+4>nPageSize ){ 955+ return SQLITE_CORRUPT; 956+ } 957+ 958+ /* First overflow page number is after the local bytes. */ 959+ iNextPage = 960+ decodeUnsigned32(PageData(pPage, iRecordOffset + nLocalRecordBytes)); 961+ nBytes = nLocalRecordBytes; 962+ 963+ /* While there are more pages to read, and more bytes are needed, 964+ * get another page. 965+ */ 966+ pFirstOverflow = pLastOverflow = NULL; 967+ rc = SQLITE_OK; 968+ while( iNextPage && nBytes<nRecordBytes ){ 969+ RecoverOverflow *pOverflow; /* New overflow page for the list. */ 970+ 971+ rc = sqlite3PagerAcquire(pPage->pPager, iNextPage, &pPage, 0); 972+ if( rc!=SQLITE_OK ){ 973+ break; 974+ } 975+ 976+ pOverflow = sqlite3_malloc(sizeof(RecoverOverflow)); 977+ if( !pOverflow ){ 978+ sqlite3PagerUnref(pPage); 979+ rc = SQLITE_NOMEM; 980+ break; 981+ } 982+ memset(pOverflow, 0, sizeof(*pOverflow)); 983+ pOverflow->pPage = pPage; 984+ pOverflow->nPageSize = nPageSize; 985+ 986+ if( !pFirstOverflow ){ 987+ pFirstOverflow = pOverflow; 988+ }else{ 989+ pLastOverflow->pNextOverflow = pOverflow; 990+ } 991+ pLastOverflow = pOverflow; 992+ 993+ iNextPage = decodeUnsigned32(pPage->pData); 994+ nBytes += nPageSize-4; 995+ 996+ /* Avoid loops. */ 997+ if( overflowPageInUse(pFirstOverflow, iNextPage) ){ 998+ fprintf(stderr, "Overflow loop detected at %d\n", iNextPage); 999+ rc = SQLITE_CORRUPT; 1000+ break; 1001+ } 1002+ } 1003+ 1004+ /* If there were not enough pages, or too many, things are corrupt. 1005+ * Not having enough pages is an obvious problem, all the data 1006+ * cannot be read. Too many pages means that the contents of the 1007+ * row between the main page and the overflow page(s) is 1008+ * inconsistent (most likely one or more of the overflow pages does 1009+ * not really belong to this row). 1010+ */ 1011+ if( rc==SQLITE_OK && (nBytes<nRecordBytes || iNextPage) ){ 1012+ rc = SQLITE_CORRUPT; 1013+ } 1014+ 1015+ if( rc==SQLITE_OK ){ 1016+ *ppOverflow = pFirstOverflow; 1017+ *pnLocalRecordBytes = nLocalRecordBytes; 1018+ }else if( pFirstOverflow ){ 1019+ overflowDestroy(pFirstOverflow); 1020+ } 1021+ return rc; 1022+} 1023+ 1024+/* Use in concert with overflowMaybeCreate() to efficiently read parts 1025+ * of a potentially-overflowing record. pPage and iRecordOffset are 1026+ * the values passed into overflowMaybeCreate(), nLocalRecordBytes and 1027+ * pOverflow are the values returned by that call. 1028+ * 1029+ * On SQLITE_OK, *ppBase points to nRequestBytes of data at 1030+ * iRequestOffset within the record. If the data exists contiguously 1031+ * in a page, a direct pointer is returned, otherwise a buffer from 1032+ * sqlite3_malloc() is returned with the data. *pbFree is set true if 1033+ * sqlite3_free() should be called on *ppBase. 1034+ */ 1035+/* Operation of this function is subtle. At any time, pPage is the 1036+ * current page, with iRecordOffset and nLocalRecordBytes being record 1037+ * data within pPage, and pOverflow being the overflow page after 1038+ * pPage. This allows the code to handle both the initial leaf page 1039+ * and overflow pages consistently by adjusting the values 1040+ * appropriately. 1041+ */ 1042+static int overflowGetSegment(DbPage *pPage, unsigned iRecordOffset, 1043+ unsigned nLocalRecordBytes, 1044+ RecoverOverflow *pOverflow, 1045+ unsigned iRequestOffset, unsigned nRequestBytes, 1046+ unsigned char **ppBase, int *pbFree){ 1047+ unsigned nBase; /* Amount of data currently collected. */ 1048+ unsigned char *pBase; /* Buffer to collect record data into. */ 1049+ 1050+ /* Skip to the page containing the start of the data. */ 1051+ while( iRequestOffset>=nLocalRecordBytes && pOverflow ){ 1052+ /* Factor out current page's contribution. */ 1053+ iRequestOffset -= nLocalRecordBytes; 1054+ 1055+ /* Move forward to the next page in the list. */ 1056+ pPage = pOverflow->pPage; 1057+ iRecordOffset = 4; 1058+ nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset; 1059+ pOverflow = pOverflow->pNextOverflow; 1060+ } 1061+ 1062+ /* If the requested data is entirely within this page, return a 1063+ * pointer into the page. 1064+ */ 1065+ if( iRequestOffset+nRequestBytes<=nLocalRecordBytes ){ 1066+ /* TODO(shess): "assignment discards qualifiers from pointer target type" 1067+ * Having ppBase be const makes sense, but sqlite3_free() takes non-const. 1068+ */ 1069+ *ppBase = (unsigned char *)PageData(pPage, iRecordOffset + iRequestOffset); 1070+ *pbFree = 0; 1071+ return SQLITE_OK; 1072+ } 1073+ 1074+ /* The data range would require additional pages. */ 1075+ if( !pOverflow ){ 1076+ /* Should never happen, the range is outside the nRecordBytes 1077+ * passed to overflowMaybeCreate(). 1078+ */ 1079+ assert(NULL); /* NOTREACHED */ 1080+ return SQLITE_ERROR; 1081+ } 1082+ 1083+ /* Get a buffer to construct into. */ 1084+ nBase = 0; 1085+ pBase = sqlite3_malloc(nRequestBytes); 1086+ if( !pBase ){ 1087+ return SQLITE_NOMEM; 1088+ } 1089+ while( nBase<nRequestBytes ){ 1090+ /* Copy over data present on this page. */ 1091+ unsigned nCopyBytes = nRequestBytes - nBase; 1092+ if( nLocalRecordBytes-iRequestOffset<nCopyBytes ){ 1093+ nCopyBytes = nLocalRecordBytes - iRequestOffset; 1094+ } 1095+ memcpy(pBase + nBase, PageData(pPage, iRecordOffset + iRequestOffset), 1096+ nCopyBytes); 1097+ nBase += nCopyBytes; 1098+ 1099+ if( pOverflow ){ 1100+ /* Copy from start of record data in future pages. */ 1101+ iRequestOffset = 0; 1102+ 1103+ /* Move forward to the next page in the list. Should match 1104+ * first while() loop. 1105+ */ 1106+ pPage = pOverflow->pPage; 1107+ iRecordOffset = 4; 1108+ nLocalRecordBytes = pOverflow->nPageSize - iRecordOffset; 1109+ pOverflow = pOverflow->pNextOverflow; 1110+ }else if( nBase<nRequestBytes ){ 1111+ /* Ran out of overflow pages with data left to deliver. Not 1112+ * possible if the requested range fits within nRecordBytes 1113+ * passed to overflowMaybeCreate() when creating pOverflow. 1114+ */ 1115+ assert(NULL); /* NOTREACHED */ 1116+ sqlite3_free(pBase); 1117+ return SQLITE_ERROR; 1118+ } 1119+ } 1120+ assert( nBase==nRequestBytes ); 1121+ *ppBase = pBase; 1122+ *pbFree = 1; 1123+ return SQLITE_OK; 1124+} 1125+ 1126+/* Primary structure for iterating the contents of a table. 1127+ * 1128+ * leafCursorDestroy - release all resources associated with the cursor. 1129+ * leafCursorCreate - create a cursor to iterate items from tree at 1130+ * the provided root page. 1131+ * leafCursorNextValidCell - get the cursor ready to access data from 1132+ * the next valid cell in the table. 1133+ * leafCursorCellRowid - get the current cell's rowid. 1134+ * leafCursorCellColumns - get current cell's column count. 1135+ * leafCursorCellColInfo - get type and data for a column in current cell. 1136+ * 1137+ * leafCursorNextValidCell skips cells which fail simple integrity 1138+ * checks, such as overlapping other cells, or being located at 1139+ * impossible offsets, or where header data doesn't correctly describe 1140+ * payload data. Returns SQLITE_ROW if a valid cell is found, 1141+ * SQLITE_DONE if all pages in the tree were exhausted. 1142+ * 1143+ * leafCursorCellColInfo() accounts for overflow pages in the style of 1144+ * overflowGetSegment(). 1145+ */ 1146+typedef struct RecoverLeafCursor RecoverLeafCursor; 1147+struct RecoverLeafCursor { 1148+ RecoverInteriorCursor *pParent; /* Parent node to this node. */ 1149+ DbPage *pPage; /* Reference to leaf page. */ 1150+ unsigned nPageSize; /* Size of pPage. */ 1151+ unsigned nCells; /* Number of cells in pPage. */ 1152+ unsigned iCell; /* Current cell. */ 1153+ 1154+ /* Info parsed from data in iCell. */ 1155+ i64 iRowid; /* rowid parsed. */ 1156+ unsigned nRecordCols; /* how many items in the record. */ 1157+ u64 iRecordOffset; /* offset to record data. */ 1158+ /* TODO(shess): nRecordBytes and nRecordHeaderBytes are used in 1159+ * leafCursorCellColInfo() to prevent buffer overruns. 1160+ * leafCursorCellDecode() already verified that the cell is valid, so 1161+ * those checks should be redundant. 1162+ */ 1163+ u64 nRecordBytes; /* Size of record data. */ 1164+ unsigned nLocalRecordBytes; /* Amount of record data in-page. */ 1165+ unsigned nRecordHeaderBytes; /* Size of record header data. */ 1166+ unsigned char *pRecordHeader; /* Pointer to record header data. */ 1167+ int bFreeRecordHeader; /* True if record header requires free. */ 1168+ RecoverOverflow *pOverflow; /* Cell overflow info, if needed. */ 1169+}; 1170+ 1171+/* Internal helper shared between next-page and create-cursor. If 1172+ * pPage is a leaf page, it will be stored in the cursor and state 1173+ * initialized for reading cells. 1174+ * 1175+ * If pPage is an interior page, a new parent cursor is created and 1176+ * injected on the stack. This is necessary to handle trees with 1177+ * uneven depth, but also is used during initial setup. 1178+ * 1179+ * If pPage is not a table page at all, it is discarded. 1180+ * 1181+ * If SQLITE_OK is returned, the caller no longer owns pPage, 1182+ * otherwise the caller is responsible for discarding it. 1183+ */ 1184+static int leafCursorLoadPage(RecoverLeafCursor *pCursor, DbPage *pPage){ 1185+ const unsigned char *pPageHeader; /* Header of *pPage */ 1186+ 1187+ /* Release the current page. */ 1188+ if( pCursor->pPage ){ 1189+ sqlite3PagerUnref(pCursor->pPage); 1190+ pCursor->pPage = NULL; 1191+ pCursor->iCell = pCursor->nCells = 0; 1192+ } 1193+ 1194+ /* If the page is an unexpected interior node, inject a new stack 1195+ * layer and try again from there. 1196+ */ 1197+ pPageHeader = PageHeader(pPage); 1198+ if( pPageHeader[kiPageTypeOffset]==kTableInteriorPage ){ 1199+ RecoverInteriorCursor *pParent; 1200+ int rc = interiorCursorCreate(pCursor->pParent, pPage, pCursor->nPageSize, 1201+ &pParent); 1202+ if( rc!=SQLITE_OK ){ 1203+ return rc; 1204+ } 1205+ pCursor->pParent = pParent; 1206+ return SQLITE_OK; 1207+ } 1208+ 1209+ /* Not a leaf page, skip it. */ 1210+ if( pPageHeader[kiPageTypeOffset]!=kTableLeafPage ){ 1211+ sqlite3PagerUnref(pPage); 1212+ return SQLITE_OK; 1213+ } 1214+ 1215+ /* Take ownership of the page and start decoding. */ 1216+ pCursor->pPage = pPage; 1217+ pCursor->iCell = 0; 1218+ pCursor->nCells = decodeUnsigned16(pPageHeader + kiPageCellCountOffset); 1219+ return SQLITE_OK; 1220+} 1221+ 1222+/* Get the next leaf-level page in the tree. Returns SQLITE_ROW when 1223+ * a leaf page is found, SQLITE_DONE when no more leaves exist, or any 1224+ * error which occurred. 1225+ */ 1226+static int leafCursorNextPage(RecoverLeafCursor *pCursor){ 1227+ if( !pCursor->pParent ){ 1228+ return SQLITE_DONE; 1229+ } 1230+ 1231+ /* Repeatedly load the parent's next child page until a leaf is found. */ 1232+ do { 1233+ DbPage *pNextPage; 1234+ int rc = interiorCursorNextPage(&pCursor->pParent, &pNextPage); 1235+ if( rc!=SQLITE_ROW ){ 1236+ assert( rc==SQLITE_DONE ); 1237+ return rc; 1238+ } 1239+ 1240+ rc = leafCursorLoadPage(pCursor, pNextPage); 1241+ if( rc!=SQLITE_OK ){ 1242+ sqlite3PagerUnref(pNextPage); 1243+ return rc; 1244+ } 1245+ } while( !pCursor->pPage ); 1246+ 1247+ return SQLITE_ROW; 1248+} 1249+ 1250+static void leafCursorDestroyCellData(RecoverLeafCursor *pCursor){ 1251+ if( pCursor->bFreeRecordHeader ){ 1252+ sqlite3_free(pCursor->pRecordHeader); 1253+ } 1254+ pCursor->bFreeRecordHeader = 0; 1255+ pCursor->pRecordHeader = NULL; 1256+ 1257+ if( pCursor->pOverflow ){ 1258+ overflowDestroy(pCursor->pOverflow); 1259+ pCursor->pOverflow = NULL; 1260+ } 1261+} 1262+ 1263+static void leafCursorDestroy(RecoverLeafCursor *pCursor){ 1264+ leafCursorDestroyCellData(pCursor); 1265+ 1266+ if( pCursor->pParent ){ 1267+ interiorCursorDestroy(pCursor->pParent); 1268+ pCursor->pParent = NULL; 1269+ } 1270+ 1271+ if( pCursor->pPage ){ 1272+ sqlite3PagerUnref(pCursor->pPage); 1273+ pCursor->pPage = NULL; 1274+ } 1275+ 1276+ memset(pCursor, 0xA5, sizeof(*pCursor)); 1277+ sqlite3_free(pCursor); 1278+} 1279+ 1280+/* Create a cursor to iterate the rows from the leaf pages of a table 1281+ * rooted at iRootPage. 1282+ */ 1283+/* TODO(shess): recoverOpen() calls this to setup the cursor, and I 1284+ * think that recoverFilter() may make a hard assumption that the 1285+ * cursor returned will turn up at least one valid cell. 1286+ * 1287+ * The cases I can think of which break this assumption are: 1288+ * - pPage is a valid leaf page with no valid cells. 1289+ * - pPage is a valid interior page with no valid leaves. 1290+ * - pPage is a valid interior page who's leaves contain no valid cells. 1291+ * - pPage is not a valid leaf or interior page. 1292+ */ 1293+static int leafCursorCreate(Pager *pPager, unsigned nPageSize, 1294+ u32 iRootPage, RecoverLeafCursor **ppCursor){ 1295+ DbPage *pPage; /* Reference to page at iRootPage. */ 1296+ RecoverLeafCursor *pCursor; /* Leaf cursor being constructed. */ 1297+ int rc; 1298+ 1299+ /* Start out with the root page. */ 1300+ rc = sqlite3PagerAcquire(pPager, iRootPage, &pPage, 0); 1301+ if( rc!=SQLITE_OK ){ 1302+ return rc; 1303+ } 1304+ 1305+ pCursor = sqlite3_malloc(sizeof(RecoverLeafCursor)); 1306+ if( !pCursor ){ 1307+ sqlite3PagerUnref(pPage); 1308+ return SQLITE_NOMEM; 1309+ } 1310+ memset(pCursor, 0, sizeof(*pCursor)); 1311+ 1312+ pCursor->nPageSize = nPageSize; 1313+ 1314+ rc = leafCursorLoadPage(pCursor, pPage); 1315+ if( rc!=SQLITE_OK ){ 1316+ sqlite3PagerUnref(pPage); 1317+ leafCursorDestroy(pCursor); 1318+ return rc; 1319+ } 1320+ 1321+ /* pPage wasn't a leaf page, find the next leaf page. */ 1322+ if( !pCursor->pPage ){ 1323+ rc = leafCursorNextPage(pCursor); 1324+ if( rc!=SQLITE_DONE && rc!=SQLITE_ROW ){ 1325+ leafCursorDestroy(pCursor); 1326+ return rc; 1327+ } 1328+ } 1329+ 1330+ *ppCursor = pCursor; 1331+ return SQLITE_OK; 1332+} 1333+ 1334+/* Useful for setting breakpoints. */ 1335+static int ValidateError(){ 1336+ return SQLITE_ERROR; 1337+} 1338+ 1339+/* Setup the cursor for reading the information from cell iCell. */ 1340+static int leafCursorCellDecode(RecoverLeafCursor *pCursor){ 1341+ const unsigned char *pPageHeader; /* Header of current page. */ 1342+ const unsigned char *pCellOffsets; /* Pointer to page's cell offsets. */ 1343+ unsigned iCellOffset; /* Offset of current cell (iCell). */ 1344+ const unsigned char *pCell; /* Pointer to data at iCellOffset. */ 1345+ unsigned nCellMaxBytes; /* Maximum local size of iCell. */ 1346+ unsigned iEndOffset; /* End of iCell's in-page data. */ 1347+ u64 nRecordBytes; /* Expected size of cell, w/overflow. */ 1348+ u64 iRowid; /* iCell's rowid (in table). */ 1349+ unsigned nRead; /* Amount of cell read. */ 1350+ unsigned nRecordHeaderRead; /* Header data read. */ 1351+ u64 nRecordHeaderBytes; /* Header size expected. */ 1352+ unsigned nRecordCols; /* Columns read from header. */ 1353+ u64 nRecordColBytes; /* Bytes in payload for those columns. */ 1354+ unsigned i; 1355+ int rc; 1356+ 1357+ assert( pCursor->iCell<pCursor->nCells ); 1358+ 1359+ leafCursorDestroyCellData(pCursor); 1360+ 1361+ /* Find the offset to the row. */ 1362+ pPageHeader = PageHeader(pCursor->pPage); 1363+ pCellOffsets = pPageHeader + knPageLeafHeaderBytes; 1364+ iCellOffset = decodeUnsigned16(pCellOffsets + pCursor->iCell*2); 1365+ if( iCellOffset>=pCursor->nPageSize ){ 1366+ return ValidateError(); 1367+ } 1368+ 1369+ pCell = PageData(pCursor->pPage, iCellOffset); 1370+ nCellMaxBytes = pCursor->nPageSize - iCellOffset; 1371+ 1372+ /* B-tree leaf cells lead with varint record size, varint rowid and 1373+ * varint header size. 1374+ */ 1375+ /* TODO(shess): The smallest page size is 512 bytes, which has an m 1376+ * of 39. Three varints need at most 27 bytes to encode. I think. 1377+ */ 1378+ if( !checkVarints(pCell, nCellMaxBytes, 3) ){ 1379+ return ValidateError(); 1380+ } 1381+ 1382+ nRead = getVarint(pCell, &nRecordBytes); 1383+ assert( iCellOffset+nRead<=pCursor->nPageSize ); 1384+ pCursor->nRecordBytes = nRecordBytes; 1385+ 1386+ nRead += getVarint(pCell + nRead, &iRowid); 1387+ assert( iCellOffset+nRead<=pCursor->nPageSize ); 1388+ pCursor->iRowid = (i64)iRowid; 1389+ 1390+ pCursor->iRecordOffset = iCellOffset + nRead; 1391+ 1392+ /* Start overflow setup here because nLocalRecordBytes is needed to 1393+ * check cell overlap. 1394+ */ 1395+ rc = overflowMaybeCreate(pCursor->pPage, pCursor->nPageSize, 1396+ pCursor->iRecordOffset, pCursor->nRecordBytes, 1397+ &pCursor->nLocalRecordBytes, 1398+ &pCursor->pOverflow); 1399+ if( rc!=SQLITE_OK ){ 1400+ return ValidateError(); 1401+ } 1402+ 1403+ /* Check that no other cell starts within this cell. */ 1404+ iEndOffset = pCursor->iRecordOffset + pCursor->nLocalRecordBytes; 1405+ for( i=0; i<pCursor->nCells; ++i ){ 1406+ const unsigned iOtherOffset = decodeUnsigned16(pCellOffsets + i*2); 1407+ if( iOtherOffset>iCellOffset && iOtherOffset<iEndOffset ){ 1408+ return ValidateError(); 1409+ } 1410+ } 1411+ 1412+ nRecordHeaderRead = getVarint(pCell + nRead, &nRecordHeaderBytes); 1413+ assert( nRecordHeaderBytes<=nRecordBytes ); 1414+ pCursor->nRecordHeaderBytes = nRecordHeaderBytes; 1415+ 1416+ /* Large headers could overflow if pages are small. */ 1417+ rc = overflowGetSegment(pCursor->pPage, 1418+ pCursor->iRecordOffset, pCursor->nLocalRecordBytes, 1419+ pCursor->pOverflow, 0, nRecordHeaderBytes, 1420+ &pCursor->pRecordHeader, &pCursor->bFreeRecordHeader); 1421+ if( rc!=SQLITE_OK ){ 1422+ return ValidateError(); 1423+ } 1424+ 1425+ /* Tally up the column count and size of data. */ 1426+ nRecordCols = 0; 1427+ nRecordColBytes = 0; 1428+ while( nRecordHeaderRead<nRecordHeaderBytes ){ 1429+ u64 iSerialType; /* Type descriptor for current column. */ 1430+ if( !checkVarint(pCursor->pRecordHeader + nRecordHeaderRead, 1431+ nRecordHeaderBytes - nRecordHeaderRead) ){ 1432+ return ValidateError(); 1433+ } 1434+ nRecordHeaderRead += getVarint(pCursor->pRecordHeader + nRecordHeaderRead, 1435+ &iSerialType); 1436+ if( iSerialType==10 || iSerialType==11 ){ 1437+ return ValidateError(); 1438+ } 1439+ nRecordColBytes += SerialTypeLength(iSerialType); 1440+ nRecordCols++; 1441+ } 1442+ pCursor->nRecordCols = nRecordCols; 1443+ 1444+ /* Parsing the header used as many bytes as expected. */ 1445+ if( nRecordHeaderRead!=nRecordHeaderBytes ){ 1446+ return ValidateError(); 1447+ } 1448+ 1449+ /* Calculated record is size of expected record. */ 1450+ if( nRecordHeaderBytes+nRecordColBytes!=nRecordBytes ){ 1451+ return ValidateError(); 1452+ } 1453+ 1454+ return SQLITE_OK; 1455+} 1456+ 1457+static i64 leafCursorCellRowid(RecoverLeafCursor *pCursor){ 1458+ return pCursor->iRowid; 1459+} 1460+ 1461+static unsigned leafCursorCellColumns(RecoverLeafCursor *pCursor){ 1462+ return pCursor->nRecordCols; 1463+} 1464+ 1465+/* Get the column info for the cell. Pass NULL for ppBase to prevent 1466+ * retrieving the data segment. If *pbFree is true, *ppBase must be 1467+ * freed by the caller using sqlite3_free(). 1468+ */ 1469+static int leafCursorCellColInfo(RecoverLeafCursor *pCursor, 1470+ unsigned iCol, u64 *piColType, 1471+ unsigned char **ppBase, int *pbFree){ 1472+ const unsigned char *pRecordHeader; /* Current cell's header. */ 1473+ u64 nRecordHeaderBytes; /* Bytes in pRecordHeader. */ 1474+ unsigned nRead; /* Bytes read from header. */ 1475+ u64 iColEndOffset; /* Offset to end of column in cell. */ 1476+ unsigned nColsSkipped; /* Count columns as procesed. */ 1477+ u64 iSerialType; /* Type descriptor for current column. */ 1478+ 1479+ /* Implicit NULL for columns past the end. This case happens when 1480+ * rows have not been updated since an ALTER TABLE added columns. 1481+ * It is more convenient to address here than in callers. 1482+ */ 1483+ if( iCol>=pCursor->nRecordCols ){ 1484+ *piColType = 0; 1485+ if( ppBase ){ 1486+ *ppBase = 0; 1487+ *pbFree = 0; 1488+ } 1489+ return SQLITE_OK; 1490+ } 1491+ 1492+ /* Must be able to decode header size. */ 1493+ pRecordHeader = pCursor->pRecordHeader; 1494+ if( !checkVarint(pRecordHeader, pCursor->nRecordHeaderBytes) ){ 1495+ return SQLITE_CORRUPT; 1496+ } 1497+ 1498+ /* Rather than caching the header size and how many bytes it took, 1499+ * decode it every time. 1500+ */ 1501+ nRead = getVarint(pRecordHeader, &nRecordHeaderBytes); 1502+ assert( nRecordHeaderBytes==pCursor->nRecordHeaderBytes ); 1503+ 1504+ /* Scan forward to the indicated column. Scans to _after_ column 1505+ * for later range checking. 1506+ */ 1507+ /* TODO(shess): This could get expensive for very wide tables. An 1508+ * array of iSerialType could be built in leafCursorCellDecode(), but 1509+ * the number of columns is dynamic per row, so it would add memory 1510+ * management complexity. Enough info to efficiently forward 1511+ * iterate could be kept, if all clients forward iterate 1512+ * (recoverColumn() may not). 1513+ */ 1514+ iColEndOffset = 0; 1515+ nColsSkipped = 0; 1516+ while( nColsSkipped<=iCol && nRead<nRecordHeaderBytes ){ 1517+ if( !checkVarint(pRecordHeader + nRead, nRecordHeaderBytes - nRead) ){ 1518+ return SQLITE_CORRUPT; 1519+ } 1520+ nRead += getVarint(pRecordHeader + nRead, &iSerialType); 1521+ iColEndOffset += SerialTypeLength(iSerialType); 1522+ nColsSkipped++; 1523+ } 1524+ 1525+ /* Column's data extends past record's end. */ 1526+ if( nRecordHeaderBytes+iColEndOffset>pCursor->nRecordBytes ){ 1527+ return SQLITE_CORRUPT; 1528+ } 1529+ 1530+ *piColType = iSerialType; 1531+ if( ppBase ){ 1532+ const u32 nColBytes = SerialTypeLength(iSerialType); 1533+ 1534+ /* Offset from start of record to beginning of column. */ 1535+ const unsigned iColOffset = nRecordHeaderBytes+iColEndOffset-nColBytes; 1536+ 1537+ return overflowGetSegment(pCursor->pPage, pCursor->iRecordOffset, 1538+ pCursor->nLocalRecordBytes, pCursor->pOverflow, 1539+ iColOffset, nColBytes, ppBase, pbFree); 1540+ } 1541+ return SQLITE_OK; 1542+} 1543+ 1544+static int leafCursorNextValidCell(RecoverLeafCursor *pCursor){ 1545+ while( 1 ){ 1546+ int rc; 1547+ 1548+ /* Move to the next cell. */ 1549+ pCursor->iCell++; 1550+ 1551+ /* No more cells, get the next leaf. */ 1552+ if( pCursor->iCell>=pCursor->nCells ){ 1553+ rc = leafCursorNextPage(pCursor); 1554+ if( rc!=SQLITE_ROW ){ 1555+ return rc; 1556+ } 1557+ assert( pCursor->iCell==0 ); 1558+ } 1559+ 1560+ /* If the cell is valid, indicate that a row is available. */ 1561+ rc = leafCursorCellDecode(pCursor); 1562+ if( rc==SQLITE_OK ){ 1563+ return SQLITE_ROW; 1564+ } 1565+ 1566+ /* Iterate until done or a valid row is found. */ 1567+ /* TODO(shess): Remove debugging output. */ 1568+ fprintf(stderr, "Skipping invalid cell\n"); 1569+ } 1570+ return SQLITE_ERROR; 1571+} 1572+ 1573+typedef struct Recover Recover; 1574+struct Recover { 1575+ sqlite3_vtab base; 1576+ sqlite3 *db; /* Host database connection */ 1577+ char *zDb; /* Database containing target table */ 1578+ char *zTable; /* Target table */ 1579+ unsigned nCols; /* Number of columns in target table */ 1580+ unsigned char *pTypes; /* Types of columns in target table */ 1581+}; 1582+ 1583+/* Internal helper for deleting the module. */ 1584+static void recoverRelease(Recover *pRecover){ 1585+ sqlite3_free(pRecover->zDb); 1586+ sqlite3_free(pRecover->zTable); 1587+ sqlite3_free(pRecover->pTypes); 1588+ memset(pRecover, 0xA5, sizeof(*pRecover)); 1589+ sqlite3_free(pRecover); 1590+} 1591+ 1592+/* Helper function for initializing the module. Forward-declared so 1593+ * recoverCreate() and recoverConnect() can see it. 1594+ */ 1595+static int recoverInit( 1596+ sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char ** 1597+); 1598+ 1599+static int recoverCreate( 1600+ sqlite3 *db, 1601+ void *pAux, 1602+ int argc, const char *const*argv, 1603+ sqlite3_vtab **ppVtab, 1604+ char **pzErr 1605+){ 1606+ FNENTRY(); 1607+ return recoverInit(db, pAux, argc, argv, ppVtab, pzErr); 1608+} 1609+ 1610+/* This should never be called. */ 1611+static int recoverConnect( 1612+ sqlite3 *db, 1613+ void *pAux, 1614+ int argc, const char *const*argv, 1615+ sqlite3_vtab **ppVtab, 1616+ char **pzErr 1617+){ 1618+ FNENTRY(); 1619+ return recoverInit(db, pAux, argc, argv, ppVtab, pzErr); 1620+} 1621+ 1622+/* No indices supported. */ 1623+static int recoverBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ 1624+ FNENTRY(); 1625+ return SQLITE_OK; 1626+} 1627+ 1628+/* Logically, this should never be called. */ 1629+static int recoverDisconnect(sqlite3_vtab *pVtab){ 1630+ FNENTRY(); 1631+ recoverRelease((Recover*)pVtab); 1632+ return SQLITE_OK; 1633+} 1634+ 1635+static int recoverDestroy(sqlite3_vtab *pVtab){ 1636+ FNENTRY(); 1637+ recoverRelease((Recover*)pVtab); 1638+ return SQLITE_OK; 1639+} 1640+ 1641+typedef struct RecoverCursor RecoverCursor; 1642+struct RecoverCursor { 1643+ sqlite3_vtab_cursor base; 1644+ RecoverLeafCursor *pLeafCursor; 1645+ int iEncoding; 1646+ int bEOF; 1647+}; 1648+ 1649+static int recoverOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ 1650+ Recover *pRecover = (Recover*)pVTab; 1651+ u32 iRootPage; /* Root page of the backing table. */ 1652+ int iEncoding; /* UTF encoding for backing database. */ 1653+ unsigned nPageSize; /* Size of pages in backing database. */ 1654+ Pager *pPager; /* Backing database pager. */ 1655+ RecoverLeafCursor *pLeafCursor; /* Cursor to read table's leaf pages. */ 1656+ RecoverCursor *pCursor; /* Cursor to read rows from leaves. */ 1657+ int rc; 1658+ 1659+ FNENTRY(); 1660+ 1661+ iRootPage = 0; 1662+ rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, 1663+ &iRootPage); 1664+ if( rc!=SQLITE_OK ){ 1665+ return rc; 1666+ } 1667+ 1668+ iEncoding = 0; 1669+ rc = getEncoding(pRecover->db, pRecover->zDb, &iEncoding); 1670+ if( rc!=SQLITE_OK ){ 1671+ return rc; 1672+ } 1673+ 1674+ rc = GetPager(pRecover->db, pRecover->zDb, &pPager, &nPageSize); 1675+ if( rc!=SQLITE_OK ){ 1676+ return rc; 1677+ } 1678+ 1679+ rc = leafCursorCreate(pPager, nPageSize, iRootPage, &pLeafCursor); 1680+ if( rc!=SQLITE_OK ){ 1681+ return rc; 1682+ } 1683+ 1684+ pCursor = sqlite3_malloc(sizeof(RecoverCursor)); 1685+ if( !pCursor ){ 1686+ leafCursorDestroy(pLeafCursor); 1687+ return SQLITE_NOMEM; 1688+ } 1689+ memset(pCursor, 0, sizeof(*pCursor)); 1690+ pCursor->base.pVtab = pVTab; 1691+ pCursor->pLeafCursor = pLeafCursor; 1692+ pCursor->iEncoding = iEncoding; 1693+ 1694+ *ppCursor = (sqlite3_vtab_cursor*)pCursor; 1695+ return SQLITE_OK; 1696+} 1697+ 1698+static int recoverClose(sqlite3_vtab_cursor *cur){ 1699+ RecoverCursor *pCursor = (RecoverCursor*)cur; 1700+ FNENTRY(); 1701+ if( pCursor->pLeafCursor ){ 1702+ leafCursorDestroy(pCursor->pLeafCursor); 1703+ pCursor->pLeafCursor = NULL; 1704+ } 1705+ memset(pCursor, 0xA5, sizeof(*pCursor)); 1706+ sqlite3_free(cur); 1707+ return SQLITE_OK; 1708+} 1709+ 1710+/* Helpful place to set a breakpoint. */ 1711+static int RecoverInvalidCell(){ 1712+ return SQLITE_ERROR; 1713+} 1714+ 1715+/* Returns SQLITE_OK if the cell has an appropriate number of columns 1716+ * with the appropriate types of data. 1717+ */ 1718+static int recoverValidateLeafCell(Recover *pRecover, RecoverCursor *pCursor){ 1719+ unsigned i; 1720+ 1721+ /* If the row's storage has too many columns, skip it. */ 1722+ if( leafCursorCellColumns(pCursor->pLeafCursor)>pRecover->nCols ){ 1723+ return RecoverInvalidCell(); 1724+ } 1725+ 1726+ /* Skip rows with unexpected types. */ 1727+ for( i=0; i<pRecover->nCols; ++i ){ 1728+ u64 iType; /* Storage type of column i. */ 1729+ int rc; 1730+ 1731+ /* ROWID alias. */ 1732+ if( (pRecover->pTypes[i]&MASK_ROWID) ){ 1733+ continue; 1734+ } 1735+ 1736+ rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iType, NULL, NULL); 1737+ assert( rc==SQLITE_OK ); 1738+ if( rc!=SQLITE_OK || !SerialTypeIsCompatible(iType, pRecover->pTypes[i]) ){ 1739+ return RecoverInvalidCell(); 1740+ } 1741+ } 1742+ 1743+ return SQLITE_OK; 1744+} 1745+ 1746+static int recoverNext(sqlite3_vtab_cursor *pVtabCursor){ 1747+ RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1748+ Recover *pRecover = (Recover*)pCursor->base.pVtab; 1749+ int rc; 1750+ 1751+ FNENTRY(); 1752+ 1753+ /* Scan forward to the next cell with valid storage, then check that 1754+ * the stored data matches the schema. 1755+ */ 1756+ while( (rc = leafCursorNextValidCell(pCursor->pLeafCursor))==SQLITE_ROW ){ 1757+ if( recoverValidateLeafCell(pRecover, pCursor)==SQLITE_OK ){ 1758+ return SQLITE_OK; 1759+ } 1760+ } 1761+ 1762+ if( rc==SQLITE_DONE ){ 1763+ pCursor->bEOF = 1; 1764+ return SQLITE_OK; 1765+ } 1766+ 1767+ assert( rc!=SQLITE_OK ); 1768+ return rc; 1769+} 1770+ 1771+static int recoverFilter( 1772+ sqlite3_vtab_cursor *pVtabCursor, 1773+ int idxNum, const char *idxStr, 1774+ int argc, sqlite3_value **argv 1775+){ 1776+ RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1777+ Recover *pRecover = (Recover*)pCursor->base.pVtab; 1778+ int rc; 1779+ 1780+ FNENTRY(); 1781+ 1782+ /* Load the first cell, and iterate forward if it's not valid. */ 1783+ /* TODO(shess): What happens if no cells at all are valid? */ 1784+ rc = leafCursorCellDecode(pCursor->pLeafCursor); 1785+ if( rc!=SQLITE_OK || recoverValidateLeafCell(pRecover, pCursor)!=SQLITE_OK ){ 1786+ return recoverNext(pVtabCursor); 1787+ } 1788+ 1789+ return SQLITE_OK; 1790+} 1791+ 1792+static int recoverEof(sqlite3_vtab_cursor *pVtabCursor){ 1793+ RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1794+ FNENTRY(); 1795+ return pCursor->bEOF; 1796+} 1797+ 1798+static int recoverColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ 1799+ RecoverCursor *pCursor = (RecoverCursor*)cur; 1800+ Recover *pRecover = (Recover*)pCursor->base.pVtab; 1801+ u64 iColType; /* Storage type of column i. */ 1802+ unsigned char *pColData; /* Column i's data. */ 1803+ int shouldFree; /* Non-zero if pColData should be freed. */ 1804+ int rc; 1805+ 1806+ FNENTRY(); 1807+ 1808+ if( i>=pRecover->nCols ){ 1809+ return SQLITE_ERROR; 1810+ } 1811+ 1812+ /* ROWID alias. */ 1813+ if( (pRecover->pTypes[i]&MASK_ROWID) ){ 1814+ sqlite3_result_int64(ctx, leafCursorCellRowid(pCursor->pLeafCursor)); 1815+ return SQLITE_OK; 1816+ } 1817+ 1818+ pColData = NULL; 1819+ shouldFree = 0; 1820+ rc = leafCursorCellColInfo(pCursor->pLeafCursor, i, &iColType, 1821+ &pColData, &shouldFree); 1822+ if( rc!=SQLITE_OK ){ 1823+ return rc; 1824+ } 1825+ /* recoverValidateLeafCell() should guarantee that this will never 1826+ * occur. 1827+ */ 1828+ if( !SerialTypeIsCompatible(iColType, pRecover->pTypes[i]) ){ 1829+ if( shouldFree ){ 1830+ sqlite3_free(pColData); 1831+ } 1832+ return SQLITE_ERROR; 1833+ } 1834+ 1835+ switch( iColType ){ 1836+ case 0 : sqlite3_result_null(ctx); break; 1837+ case 1 : sqlite3_result_int64(ctx, decodeSigned(pColData, 1)); break; 1838+ case 2 : sqlite3_result_int64(ctx, decodeSigned(pColData, 2)); break; 1839+ case 3 : sqlite3_result_int64(ctx, decodeSigned(pColData, 3)); break; 1840+ case 4 : sqlite3_result_int64(ctx, decodeSigned(pColData, 4)); break; 1841+ case 5 : sqlite3_result_int64(ctx, decodeSigned(pColData, 6)); break; 1842+ case 6 : sqlite3_result_int64(ctx, decodeSigned(pColData, 8)); break; 1843+ case 7 : sqlite3_result_double(ctx, decodeFloat64(pColData)); break; 1844+ case 8 : sqlite3_result_int(ctx, 0); break; 1845+ case 9 : sqlite3_result_int(ctx, 1); break; 1846+ case 10 : assert( iColType!=10 ); break; 1847+ case 11 : assert( iColType!=11 ); break; 1848+ 1849+ default : { 1850+ u32 l = SerialTypeLength(iColType); 1851+ 1852+ /* If pColData was already allocated, arrange to pass ownership. */ 1853+ sqlite3_destructor_type pFn = SQLITE_TRANSIENT; 1854+ if( shouldFree ){ 1855+ pFn = sqlite3_free; 1856+ shouldFree = 0; 1857+ } 1858+ 1859+ if( SerialTypeIsBlob(iColType) ){ 1860+ sqlite3_result_blob(ctx, pColData, l, pFn); 1861+ }else{ 1862+ if( pCursor->iEncoding==SQLITE_UTF16LE ){ 1863+ sqlite3_result_text16le(ctx, (const void*)pColData, l, pFn); 1864+ }else if( pCursor->iEncoding==SQLITE_UTF16BE ){ 1865+ sqlite3_result_text16be(ctx, (const void*)pColData, l, pFn); 1866+ }else{ 1867+ sqlite3_result_text(ctx, (const char*)pColData, l, pFn); 1868+ } 1869+ } 1870+ } break; 1871+ } 1872+ if( shouldFree ){ 1873+ sqlite3_free(pColData); 1874+ } 1875+ return SQLITE_OK; 1876+} 1877+ 1878+static int recoverRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ 1879+ RecoverCursor *pCursor = (RecoverCursor*)pVtabCursor; 1880+ FNENTRY(); 1881+ *pRowid = leafCursorCellRowid(pCursor->pLeafCursor); 1882+ return SQLITE_OK; 1883+} 1884+ 1885+static sqlite3_module recoverModule = { 1886+ 0, /* iVersion */ 1887+ recoverCreate, /* xCreate - create a table */ 1888+ recoverConnect, /* xConnect - connect to an existing table */ 1889+ recoverBestIndex, /* xBestIndex - Determine search strategy */ 1890+ recoverDisconnect, /* xDisconnect - Disconnect from a table */ 1891+ recoverDestroy, /* xDestroy - Drop a table */ 1892+ recoverOpen, /* xOpen - open a cursor */ 1893+ recoverClose, /* xClose - close a cursor */ 1894+ recoverFilter, /* xFilter - configure scan constraints */ 1895+ recoverNext, /* xNext - advance a cursor */ 1896+ recoverEof, /* xEof */ 1897+ recoverColumn, /* xColumn - read data */ 1898+ recoverRowid, /* xRowid - read data */ 1899+ 0, /* xUpdate - write data */ 1900+ 0, /* xBegin - begin transaction */ 1901+ 0, /* xSync - sync transaction */ 1902+ 0, /* xCommit - commit transaction */ 1903+ 0, /* xRollback - rollback transaction */ 1904+ 0, /* xFindFunction - function overloading */ 1905+ 0, /* xRename - rename the table */ 1906+}; 1907+ 1908+int recoverVtableInit(sqlite3 *db){ 1909+ return sqlite3_create_module_v2(db, "recover", &recoverModule, NULL, 0); 1910+} 1911+ 1912+/* This section of code is for parsing the create input and 1913+ * initializing the module. 1914+ */ 1915+ 1916+/* Find the next word in zText and place the endpoints in pzWord*. 1917+ * Returns true if the word is non-empty. "Word" is defined as 1918+ * ASCII alphanumeric plus '_' at this time. 1919+ */ 1920+static int findWord(const char *zText, 1921+ const char **pzWordStart, const char **pzWordEnd){ 1922+ int r; 1923+ while( ascii_isspace(*zText) ){ 1924+ zText++; 1925+ } 1926+ *pzWordStart = zText; 1927+ while( ascii_isalnum(*zText) || *zText=='_' ){ 1928+ zText++; 1929+ } 1930+ r = zText>*pzWordStart; /* In case pzWordStart==pzWordEnd */ 1931+ *pzWordEnd = zText; 1932+ return r; 1933+} 1934+ 1935+/* Return true if the next word in zText is zWord, also setting 1936+ * *pzContinue to the character after the word. 1937+ */ 1938+static int expectWord(const char *zText, const char *zWord, 1939+ const char **pzContinue){ 1940+ const char *zWordStart, *zWordEnd; 1941+ if( findWord(zText, &zWordStart, &zWordEnd) && 1942+ ascii_strncasecmp(zWord, zWordStart, zWordEnd - zWordStart)==0 ){ 1943+ *pzContinue = zWordEnd; 1944+ return 1; 1945+ } 1946+ return 0; 1947+} 1948+ 1949+/* Parse the name and type information out of parameter. In case of 1950+ * success, *pzNameStart/End contain the name of the column, 1951+ * *pzTypeStart/End contain the top-level type, and *pTypeMask has the 1952+ * type mask to use for the column. 1953+ */ 1954+static int findNameAndType(const char *parameter, 1955+ const char **pzNameStart, const char **pzNameEnd, 1956+ const char **pzTypeStart, const char **pzTypeEnd, 1957+ unsigned char *pTypeMask){ 1958+ unsigned nNameLen; /* Length of found name. */ 1959+ const char *zEnd; /* Current end of parsed column information. */ 1960+ int bNotNull; /* Non-zero if NULL is not allowed for name. */ 1961+ int bStrict; /* Non-zero if column requires exact type match. */ 1962+ const char *zDummy; /* Dummy parameter, result unused. */ 1963+ unsigned i; 1964+ 1965+ /* strictMask is used for STRICT, strictMask|otherMask if STRICT is 1966+ * not supplied. zReplace provides an alternate type to expose to 1967+ * the caller. 1968+ */ 1969+ static struct { 1970+ const char *zName; 1971+ unsigned char strictMask; 1972+ unsigned char otherMask; 1973+ const char *zReplace; 1974+ } kTypeInfo[] = { 1975+ { "ANY", 1976+ MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL, 1977+ 0, "", 1978+ }, 1979+ { "ROWID", MASK_INTEGER | MASK_ROWID, 0, "INTEGER", }, 1980+ { "INTEGER", MASK_INTEGER | MASK_NULL, 0, NULL, }, 1981+ { "FLOAT", MASK_FLOAT | MASK_NULL, MASK_INTEGER, NULL, }, 1982+ { "NUMERIC", MASK_INTEGER | MASK_FLOAT | MASK_NULL, MASK_TEXT, NULL, }, 1983+ { "TEXT", MASK_TEXT | MASK_NULL, MASK_BLOB, NULL, }, 1984+ { "BLOB", MASK_BLOB | MASK_NULL, 0, NULL, }, 1985+ }; 1986+ 1987+ if( !findWord(parameter, pzNameStart, pzNameEnd) ){ 1988+ return SQLITE_MISUSE; 1989+ } 1990+ 1991+ /* Manifest typing, accept any storage type. */ 1992+ if( !findWord(*pzNameEnd, pzTypeStart, pzTypeEnd) ){ 1993+ *pzTypeEnd = *pzTypeStart = ""; 1994+ *pTypeMask = MASK_INTEGER | MASK_FLOAT | MASK_BLOB | MASK_TEXT | MASK_NULL; 1995+ return SQLITE_OK; 1996+ } 1997+ 1998+ nNameLen = *pzTypeEnd - *pzTypeStart; 1999+ for( i=0; i<ArraySize(kTypeInfo); ++i ){ 2000+ if( ascii_strncasecmp(kTypeInfo[i].zName, *pzTypeStart, nNameLen)==0 ){ 2001+ break; 2002+ } 2003+ } 2004+ if( i==ArraySize(kTypeInfo) ){ 2005+ return SQLITE_MISUSE; 2006+ } 2007+ 2008+ zEnd = *pzTypeEnd; 2009+ bStrict = 0; 2010+ if( expectWord(zEnd, "STRICT", &zEnd) ){ 2011+ /* TODO(shess): Ick. But I don't want another single-purpose 2012+ * flag, either. 2013+ */ 2014+ if( kTypeInfo[i].zReplace && !kTypeInfo[i].zReplace[0] ){ 2015+ return SQLITE_MISUSE; 2016+ } 2017+ bStrict = 1; 2018+ } 2019+ 2020+ bNotNull = 0; 2021+ if( expectWord(zEnd, "NOT", &zEnd) ){ 2022+ if( expectWord(zEnd, "NULL", &zEnd) ){ 2023+ bNotNull = 1; 2024+ }else{ 2025+ /* Anything other than NULL after NOT is an error. */ 2026+ return SQLITE_MISUSE; 2027+ } 2028+ } 2029+ 2030+ /* Anything else is an error. */ 2031+ if( findWord(zEnd, &zDummy, &zDummy) ){ 2032+ return SQLITE_MISUSE; 2033+ } 2034+ 2035+ *pTypeMask = kTypeInfo[i].strictMask; 2036+ if( !bStrict ){ 2037+ *pTypeMask |= kTypeInfo[i].otherMask; 2038+ } 2039+ if( bNotNull ){ 2040+ *pTypeMask &= ~MASK_NULL; 2041+ } 2042+ if( kTypeInfo[i].zReplace ){ 2043+ *pzTypeStart = kTypeInfo[i].zReplace; 2044+ *pzTypeEnd = *pzTypeStart + strlen(*pzTypeStart); 2045+ } 2046+ return SQLITE_OK; 2047+} 2048+ 2049+/* Parse the arguments, placing type masks in *pTypes and the exposed 2050+ * schema in *pzCreateSql (for sqlite3_declare_vtab). 2051+ */ 2052+static int ParseColumnsAndGenerateCreate(unsigned nCols, 2053+ const char *const *pCols, 2054+ char **pzCreateSql, 2055+ unsigned char *pTypes, 2056+ char **pzErr){ 2057+ unsigned i; 2058+ char *zCreateSql = sqlite3_mprintf("CREATE TABLE x("); 2059+ if( !zCreateSql ){ 2060+ return SQLITE_NOMEM; 2061+ } 2062+ 2063+ for( i=0; i<nCols; i++ ){ 2064+ const char *zSep = (i < nCols - 1 ? ", " : ")"); 2065+ const char *zNotNull = ""; 2066+ const char *zNameStart, *zNameEnd; 2067+ const char *zTypeStart, *zTypeEnd; 2068+ int rc = findNameAndType(pCols[i], 2069+ &zNameStart, &zNameEnd, 2070+ &zTypeStart, &zTypeEnd, 2071+ &pTypes[i]); 2072+ if( rc!=SQLITE_OK ){ 2073+ *pzErr = sqlite3_mprintf("unable to parse column %d", i); 2074+ sqlite3_free(zCreateSql); 2075+ return rc; 2076+ } 2077+ 2078+ if( !(pTypes[i]&MASK_NULL) ){ 2079+ zNotNull = " NOT NULL"; 2080+ } 2081+ 2082+ /* Add name and type to the create statement. */ 2083+ zCreateSql = sqlite3_mprintf("%z%.*s %.*s%s%s", 2084+ zCreateSql, 2085+ zNameEnd - zNameStart, zNameStart, 2086+ zTypeEnd - zTypeStart, zTypeStart, 2087+ zNotNull, zSep); 2088+ if( !zCreateSql ){ 2089+ return SQLITE_NOMEM; 2090+ } 2091+ } 2092+ 2093+ *pzCreateSql = zCreateSql; 2094+ return SQLITE_OK; 2095+} 2096+ 2097+/* Helper function for initializing the module. */ 2098+/* argv[0] module name 2099+ * argv[1] db name for virtual table 2100+ * argv[2] virtual table name 2101+ * argv[3] backing table name 2102+ * argv[4] columns 2103+ */ 2104+/* TODO(shess): Since connect isn't supported, could inline into 2105+ * recoverCreate(). 2106+ */ 2107+/* TODO(shess): Explore cases where it would make sense to set *pzErr. */ 2108+static int recoverInit( 2109+ sqlite3 *db, /* Database connection */ 2110+ void *pAux, /* unused */ 2111+ int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ 2112+ sqlite3_vtab **ppVtab, /* OUT: New virtual table */ 2113+ char **pzErr /* OUT: Error message, if any */ 2114+){ 2115+ const unsigned kTypeCol = 4; /* First argument with column type info. */ 2116+ Recover *pRecover; /* Virtual table structure being created. */ 2117+ char *zDot; /* Any dot found in "db.table" backing. */ 2118+ u32 iRootPage; /* Root page of backing table. */ 2119+ char *zCreateSql; /* Schema of created virtual table. */ 2120+ int rc; 2121+ 2122+ /* Require to be in the temp database. */ 2123+ if( ascii_strcasecmp(argv[1], "temp")!=0 ){ 2124+ *pzErr = sqlite3_mprintf("recover table must be in temp database"); 2125+ return SQLITE_MISUSE; 2126+ } 2127+ 2128+ /* Need the backing table and at least one column. */ 2129+ if( argc<=kTypeCol ){ 2130+ *pzErr = sqlite3_mprintf("no columns specified"); 2131+ return SQLITE_MISUSE; 2132+ } 2133+ 2134+ pRecover = sqlite3_malloc(sizeof(Recover)); 2135+ if( !pRecover ){ 2136+ return SQLITE_NOMEM; 2137+ } 2138+ memset(pRecover, 0, sizeof(*pRecover)); 2139+ pRecover->base.pModule = &recoverModule; 2140+ pRecover->db = db; 2141+ 2142+ /* Parse out db.table, assuming main if no dot. */ 2143+ zDot = strchr(argv[3], '.'); 2144+ if( !zDot ){ 2145+ pRecover->zDb = sqlite3_strdup(db->aDb[0].zName); 2146+ pRecover->zTable = sqlite3_strdup(argv[3]); 2147+ }else if( zDot>argv[3] && zDot[1]!='\0' ){ 2148+ pRecover->zDb = sqlite3_strndup(argv[3], zDot - argv[3]); 2149+ pRecover->zTable = sqlite3_strdup(zDot + 1); 2150+ }else{ 2151+ /* ".table" or "db." not allowed. */ 2152+ *pzErr = sqlite3_mprintf("ill-formed table specifier"); 2153+ recoverRelease(pRecover); 2154+ return SQLITE_ERROR; 2155+ } 2156+ 2157+ pRecover->nCols = argc - kTypeCol; 2158+ pRecover->pTypes = sqlite3_malloc(pRecover->nCols); 2159+ if( !pRecover->zDb || !pRecover->zTable || !pRecover->pTypes ){ 2160+ recoverRelease(pRecover); 2161+ return SQLITE_NOMEM; 2162+ } 2163+ 2164+ /* Require the backing table to exist. */ 2165+ /* TODO(shess): Be more pedantic about the form of the descriptor 2166+ * string. This already fails for poorly-formed strings, simply 2167+ * because there won't be a root page, but it would make more sense 2168+ * to be explicit. 2169+ */ 2170+ rc = getRootPage(pRecover->db, pRecover->zDb, pRecover->zTable, &iRootPage); 2171+ if( rc!=SQLITE_OK ){ 2172+ *pzErr = sqlite3_mprintf("unable to find backing table"); 2173+ recoverRelease(pRecover); 2174+ return rc; 2175+ } 2176+ 2177+ /* Parse the column definitions. */ 2178+ rc = ParseColumnsAndGenerateCreate(pRecover->nCols, argv + kTypeCol, 2179+ &zCreateSql, pRecover->pTypes, pzErr); 2180+ if( rc!=SQLITE_OK ){ 2181+ recoverRelease(pRecover); 2182+ return rc; 2183+ } 2184+ 2185+ rc = sqlite3_declare_vtab(db, zCreateSql); 2186+ sqlite3_free(zCreateSql); 2187+ if( rc!=SQLITE_OK ){ 2188+ recoverRelease(pRecover); 2189+ return rc; 2190+ } 2191+ 2192+ *ppVtab = (sqlite3_vtab *)pRecover; 2193+ return SQLITE_OK; 2194+} 2195