1 /*
2 ** 2009 Oct 23
3 **
4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
6 **
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file is part of the SQLite FTS3 extension module. Specifically,
14 ** this file contains code to insert, update and delete rows from FTS3
15 ** tables. It also contains code to merge FTS3 b-tree segments. Some
16 ** of the sub-routines used to merge segments are also used by the query
17 ** code in fts3.c.
18 */
19
20 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
21
22 #include "fts3Int.h"
23 #include <string.h>
24 #include <assert.h>
25 #include <stdlib.h>
26
27 /*
28 ** When full-text index nodes are loaded from disk, the buffer that they
29 ** are loaded into has the following number of bytes of padding at the end
30 ** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer
31 ** of 920 bytes is allocated for it.
32 **
33 ** This means that if we have a pointer into a buffer containing node data,
34 ** it is always safe to read up to two varints from it without risking an
35 ** overread, even if the node data is corrupted.
36 */
37 #define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2)
38
39 typedef struct PendingList PendingList;
40 typedef struct SegmentNode SegmentNode;
41 typedef struct SegmentWriter SegmentWriter;
42
43 /*
44 ** Data structure used while accumulating terms in the pending-terms hash
45 ** table. The hash table entry maps from term (a string) to a malloc'd
46 ** instance of this structure.
47 */
48 struct PendingList {
49 int nData;
50 char *aData;
51 int nSpace;
52 sqlite3_int64 iLastDocid;
53 sqlite3_int64 iLastCol;
54 sqlite3_int64 iLastPos;
55 };
56
57
58 /*
59 ** Each cursor has a (possibly empty) linked list of the following objects.
60 */
61 struct Fts3DeferredToken {
62 Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */
63 int iCol; /* Column token must occur in */
64 Fts3DeferredToken *pNext; /* Next in list of deferred tokens */
65 PendingList *pList; /* Doclist is assembled here */
66 };
67
68 /*
69 ** An instance of this structure is used to iterate through the terms on
70 ** a contiguous set of segment b-tree leaf nodes. Although the details of
71 ** this structure are only manipulated by code in this file, opaque handles
72 ** of type Fts3SegReader* are also used by code in fts3.c to iterate through
73 ** terms when querying the full-text index. See functions:
74 **
75 ** sqlite3Fts3SegReaderNew()
76 ** sqlite3Fts3SegReaderFree()
77 ** sqlite3Fts3SegReaderCost()
78 ** sqlite3Fts3SegReaderIterate()
79 **
80 ** Methods used to manipulate Fts3SegReader structures:
81 **
82 ** fts3SegReaderNext()
83 ** fts3SegReaderFirstDocid()
84 ** fts3SegReaderNextDocid()
85 */
86 struct Fts3SegReader {
87 int iIdx; /* Index within level, or 0x7FFFFFFF for PT */
88
89 sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */
90 sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */
91 sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */
92 sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */
93
94 char *aNode; /* Pointer to node data (or NULL) */
95 int nNode; /* Size of buffer at aNode (or 0) */
96 Fts3HashElem **ppNextElem;
97
98 /* Variables set by fts3SegReaderNext(). These may be read directly
99 ** by the caller. They are valid from the time SegmentReaderNew() returns
100 ** until SegmentReaderNext() returns something other than SQLITE_OK
101 ** (i.e. SQLITE_DONE).
102 */
103 int nTerm; /* Number of bytes in current term */
104 char *zTerm; /* Pointer to current term */
105 int nTermAlloc; /* Allocated size of zTerm buffer */
106 char *aDoclist; /* Pointer to doclist of current entry */
107 int nDoclist; /* Size of doclist in current entry */
108
109 /* The following variables are used to iterate through the current doclist */
110 char *pOffsetList;
111 sqlite3_int64 iDocid;
112 };
113
114 #define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
115 #define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1])
116
117 /*
118 ** An instance of this structure is used to create a segment b-tree in the
119 ** database. The internal details of this type are only accessed by the
120 ** following functions:
121 **
122 ** fts3SegWriterAdd()
123 ** fts3SegWriterFlush()
124 ** fts3SegWriterFree()
125 */
126 struct SegmentWriter {
127 SegmentNode *pTree; /* Pointer to interior tree structure */
128 sqlite3_int64 iFirst; /* First slot in %_segments written */
129 sqlite3_int64 iFree; /* Next free slot in %_segments */
130 char *zTerm; /* Pointer to previous term buffer */
131 int nTerm; /* Number of bytes in zTerm */
132 int nMalloc; /* Size of malloc'd buffer at zMalloc */
133 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
134 int nSize; /* Size of allocation at aData */
135 int nData; /* Bytes of data in aData */
136 char *aData; /* Pointer to block from malloc() */
137 };
138
139 /*
140 ** Type SegmentNode is used by the following three functions to create
141 ** the interior part of the segment b+-tree structures (everything except
142 ** the leaf nodes). These functions and type are only ever used by code
143 ** within the fts3SegWriterXXX() family of functions described above.
144 **
145 ** fts3NodeAddTerm()
146 ** fts3NodeWrite()
147 ** fts3NodeFree()
148 */
149 struct SegmentNode {
150 SegmentNode *pParent; /* Parent node (or NULL for root node) */
151 SegmentNode *pRight; /* Pointer to right-sibling */
152 SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */
153 int nEntry; /* Number of terms written to node so far */
154 char *zTerm; /* Pointer to previous term buffer */
155 int nTerm; /* Number of bytes in zTerm */
156 int nMalloc; /* Size of malloc'd buffer at zMalloc */
157 char *zMalloc; /* Malloc'd space (possibly) used for zTerm */
158 int nData; /* Bytes of valid data so far */
159 char *aData; /* Node data */
160 };
161
162 /*
163 ** Valid values for the second argument to fts3SqlStmt().
164 */
165 #define SQL_DELETE_CONTENT 0
166 #define SQL_IS_EMPTY 1
167 #define SQL_DELETE_ALL_CONTENT 2
168 #define SQL_DELETE_ALL_SEGMENTS 3
169 #define SQL_DELETE_ALL_SEGDIR 4
170 #define SQL_DELETE_ALL_DOCSIZE 5
171 #define SQL_DELETE_ALL_STAT 6
172 #define SQL_SELECT_CONTENT_BY_ROWID 7
173 #define SQL_NEXT_SEGMENT_INDEX 8
174 #define SQL_INSERT_SEGMENTS 9
175 #define SQL_NEXT_SEGMENTS_ID 10
176 #define SQL_INSERT_SEGDIR 11
177 #define SQL_SELECT_LEVEL 12
178 #define SQL_SELECT_ALL_LEVEL 13
179 #define SQL_SELECT_LEVEL_COUNT 14
180 #define SQL_SELECT_SEGDIR_COUNT_MAX 15
181 #define SQL_DELETE_SEGDIR_BY_LEVEL 16
182 #define SQL_DELETE_SEGMENTS_RANGE 17
183 #define SQL_CONTENT_INSERT 18
184 #define SQL_DELETE_DOCSIZE 19
185 #define SQL_REPLACE_DOCSIZE 20
186 #define SQL_SELECT_DOCSIZE 21
187 #define SQL_SELECT_DOCTOTAL 22
188 #define SQL_REPLACE_DOCTOTAL 23
189
190 /*
191 ** This function is used to obtain an SQLite prepared statement handle
192 ** for the statement identified by the second argument. If successful,
193 ** *pp is set to the requested statement handle and SQLITE_OK returned.
194 ** Otherwise, an SQLite error code is returned and *pp is set to 0.
195 **
196 ** If argument apVal is not NULL, then it must point to an array with
197 ** at least as many entries as the requested statement has bound
198 ** parameters. The values are bound to the statements parameters before
199 ** returning.
200 */
fts3SqlStmt(Fts3Table * p,int eStmt,sqlite3_stmt ** pp,sqlite3_value ** apVal)201 static int fts3SqlStmt(
202 Fts3Table *p, /* Virtual table handle */
203 int eStmt, /* One of the SQL_XXX constants above */
204 sqlite3_stmt **pp, /* OUT: Statement handle */
205 sqlite3_value **apVal /* Values to bind to statement */
206 ){
207 const char *azSql[] = {
208 /* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?",
209 /* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)",
210 /* 2 */ "DELETE FROM %Q.'%q_content'",
211 /* 3 */ "DELETE FROM %Q.'%q_segments'",
212 /* 4 */ "DELETE FROM %Q.'%q_segdir'",
213 /* 5 */ "DELETE FROM %Q.'%q_docsize'",
214 /* 6 */ "DELETE FROM %Q.'%q_stat'",
215 /* 7 */ "SELECT %s FROM %Q.'%q_content' AS x WHERE rowid=?",
216 /* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
217 /* 9 */ "INSERT INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
218 /* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
219 /* 11 */ "INSERT INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
220
221 /* Return segments in order from oldest to newest.*/
222 /* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
223 "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC",
224 /* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
225 "FROM %Q.'%q_segdir' ORDER BY level DESC, idx ASC",
226
227 /* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?",
228 /* 15 */ "SELECT count(*), max(level) FROM %Q.'%q_segdir'",
229
230 /* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?",
231 /* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?",
232 /* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)",
233 /* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?",
234 /* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",
235 /* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
236 /* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0",
237 /* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
238 };
239 int rc = SQLITE_OK;
240 sqlite3_stmt *pStmt;
241
242 assert( SizeofArray(azSql)==SizeofArray(p->aStmt) );
243 assert( eStmt<SizeofArray(azSql) && eStmt>=0 );
244
245 pStmt = p->aStmt[eStmt];
246 if( !pStmt ){
247 char *zSql;
248 if( eStmt==SQL_CONTENT_INSERT ){
249 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist);
250 }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){
251 zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist, p->zDb, p->zName);
252 }else{
253 zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
254 }
255 if( !zSql ){
256 rc = SQLITE_NOMEM;
257 }else{
258 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL);
259 sqlite3_free(zSql);
260 assert( rc==SQLITE_OK || pStmt==0 );
261 p->aStmt[eStmt] = pStmt;
262 }
263 }
264 if( apVal ){
265 int i;
266 int nParam = sqlite3_bind_parameter_count(pStmt);
267 for(i=0; rc==SQLITE_OK && i<nParam; i++){
268 rc = sqlite3_bind_value(pStmt, i+1, apVal[i]);
269 }
270 }
271 *pp = pStmt;
272 return rc;
273 }
274
fts3SelectDocsize(Fts3Table * pTab,int eStmt,sqlite3_int64 iDocid,sqlite3_stmt ** ppStmt)275 static int fts3SelectDocsize(
276 Fts3Table *pTab, /* FTS3 table handle */
277 int eStmt, /* Either SQL_SELECT_DOCSIZE or DOCTOTAL */
278 sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */
279 sqlite3_stmt **ppStmt /* OUT: Statement handle */
280 ){
281 sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */
282 int rc; /* Return code */
283
284 assert( eStmt==SQL_SELECT_DOCSIZE || eStmt==SQL_SELECT_DOCTOTAL );
285
286 rc = fts3SqlStmt(pTab, eStmt, &pStmt, 0);
287 if( rc==SQLITE_OK ){
288 if( eStmt==SQL_SELECT_DOCSIZE ){
289 sqlite3_bind_int64(pStmt, 1, iDocid);
290 }
291 rc = sqlite3_step(pStmt);
292 if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
293 rc = sqlite3_reset(pStmt);
294 if( rc==SQLITE_OK ) rc = SQLITE_CORRUPT;
295 pStmt = 0;
296 }else{
297 rc = SQLITE_OK;
298 }
299 }
300
301 *ppStmt = pStmt;
302 return rc;
303 }
304
sqlite3Fts3SelectDoctotal(Fts3Table * pTab,sqlite3_stmt ** ppStmt)305 int sqlite3Fts3SelectDoctotal(
306 Fts3Table *pTab, /* Fts3 table handle */
307 sqlite3_stmt **ppStmt /* OUT: Statement handle */
308 ){
309 return fts3SelectDocsize(pTab, SQL_SELECT_DOCTOTAL, 0, ppStmt);
310 }
311
sqlite3Fts3SelectDocsize(Fts3Table * pTab,sqlite3_int64 iDocid,sqlite3_stmt ** ppStmt)312 int sqlite3Fts3SelectDocsize(
313 Fts3Table *pTab, /* Fts3 table handle */
314 sqlite3_int64 iDocid, /* Docid to read size data for */
315 sqlite3_stmt **ppStmt /* OUT: Statement handle */
316 ){
317 return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
318 }
319
320 /*
321 ** Similar to fts3SqlStmt(). Except, after binding the parameters in
322 ** array apVal[] to the SQL statement identified by eStmt, the statement
323 ** is executed.
324 **
325 ** Returns SQLITE_OK if the statement is successfully executed, or an
326 ** SQLite error code otherwise.
327 */
fts3SqlExec(int * pRC,Fts3Table * p,int eStmt,sqlite3_value ** apVal)328 static void fts3SqlExec(
329 int *pRC, /* Result code */
330 Fts3Table *p, /* The FTS3 table */
331 int eStmt, /* Index of statement to evaluate */
332 sqlite3_value **apVal /* Parameters to bind */
333 ){
334 sqlite3_stmt *pStmt;
335 int rc;
336 if( *pRC ) return;
337 rc = fts3SqlStmt(p, eStmt, &pStmt, apVal);
338 if( rc==SQLITE_OK ){
339 sqlite3_step(pStmt);
340 rc = sqlite3_reset(pStmt);
341 }
342 *pRC = rc;
343 }
344
345
346 /*
347 ** This function ensures that the caller has obtained a shared-cache
348 ** table-lock on the %_content table. This is required before reading
349 ** data from the fts3 table. If this lock is not acquired first, then
350 ** the caller may end up holding read-locks on the %_segments and %_segdir
351 ** tables, but no read-lock on the %_content table. If this happens
352 ** a second connection will be able to write to the fts3 table, but
353 ** attempting to commit those writes might return SQLITE_LOCKED or
354 ** SQLITE_LOCKED_SHAREDCACHE (because the commit attempts to obtain
355 ** write-locks on the %_segments and %_segdir ** tables).
356 **
357 ** We try to avoid this because if FTS3 returns any error when committing
358 ** a transaction, the whole transaction will be rolled back. And this is
359 ** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can
360 ** still happen if the user reads data directly from the %_segments or
361 ** %_segdir tables instead of going through FTS3 though.
362 */
sqlite3Fts3ReadLock(Fts3Table * p)363 int sqlite3Fts3ReadLock(Fts3Table *p){
364 int rc; /* Return code */
365 sqlite3_stmt *pStmt; /* Statement used to obtain lock */
366
367 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
368 if( rc==SQLITE_OK ){
369 sqlite3_bind_null(pStmt, 1);
370 sqlite3_step(pStmt);
371 rc = sqlite3_reset(pStmt);
372 }
373 return rc;
374 }
375
376 /*
377 ** Set *ppStmt to a statement handle that may be used to iterate through
378 ** all rows in the %_segdir table, from oldest to newest. If successful,
379 ** return SQLITE_OK. If an error occurs while preparing the statement,
380 ** return an SQLite error code.
381 **
382 ** There is only ever one instance of this SQL statement compiled for
383 ** each FTS3 table.
384 **
385 ** The statement returns the following columns from the %_segdir table:
386 **
387 ** 0: idx
388 ** 1: start_block
389 ** 2: leaves_end_block
390 ** 3: end_block
391 ** 4: root
392 */
sqlite3Fts3AllSegdirs(Fts3Table * p,int iLevel,sqlite3_stmt ** ppStmt)393 int sqlite3Fts3AllSegdirs(Fts3Table *p, int iLevel, sqlite3_stmt **ppStmt){
394 int rc;
395 sqlite3_stmt *pStmt = 0;
396 if( iLevel<0 ){
397 rc = fts3SqlStmt(p, SQL_SELECT_ALL_LEVEL, &pStmt, 0);
398 }else{
399 rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
400 if( rc==SQLITE_OK ) sqlite3_bind_int(pStmt, 1, iLevel);
401 }
402 *ppStmt = pStmt;
403 return rc;
404 }
405
406
407 /*
408 ** Append a single varint to a PendingList buffer. SQLITE_OK is returned
409 ** if successful, or an SQLite error code otherwise.
410 **
411 ** This function also serves to allocate the PendingList structure itself.
412 ** For example, to create a new PendingList structure containing two
413 ** varints:
414 **
415 ** PendingList *p = 0;
416 ** fts3PendingListAppendVarint(&p, 1);
417 ** fts3PendingListAppendVarint(&p, 2);
418 */
fts3PendingListAppendVarint(PendingList ** pp,sqlite3_int64 i)419 static int fts3PendingListAppendVarint(
420 PendingList **pp, /* IN/OUT: Pointer to PendingList struct */
421 sqlite3_int64 i /* Value to append to data */
422 ){
423 PendingList *p = *pp;
424
425 /* Allocate or grow the PendingList as required. */
426 if( !p ){
427 p = sqlite3_malloc(sizeof(*p) + 100);
428 if( !p ){
429 return SQLITE_NOMEM;
430 }
431 p->nSpace = 100;
432 p->aData = (char *)&p[1];
433 p->nData = 0;
434 }
435 else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){
436 int nNew = p->nSpace * 2;
437 p = sqlite3_realloc(p, sizeof(*p) + nNew);
438 if( !p ){
439 sqlite3_free(*pp);
440 *pp = 0;
441 return SQLITE_NOMEM;
442 }
443 p->nSpace = nNew;
444 p->aData = (char *)&p[1];
445 }
446
447 /* Append the new serialized varint to the end of the list. */
448 p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i);
449 p->aData[p->nData] = '\0';
450 *pp = p;
451 return SQLITE_OK;
452 }
453
454 /*
455 ** Add a docid/column/position entry to a PendingList structure. Non-zero
456 ** is returned if the structure is sqlite3_realloced as part of adding
457 ** the entry. Otherwise, zero.
458 **
459 ** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning.
460 ** Zero is always returned in this case. Otherwise, if no OOM error occurs,
461 ** it is set to SQLITE_OK.
462 */
fts3PendingListAppend(PendingList ** pp,sqlite3_int64 iDocid,sqlite3_int64 iCol,sqlite3_int64 iPos,int * pRc)463 static int fts3PendingListAppend(
464 PendingList **pp, /* IN/OUT: PendingList structure */
465 sqlite3_int64 iDocid, /* Docid for entry to add */
466 sqlite3_int64 iCol, /* Column for entry to add */
467 sqlite3_int64 iPos, /* Position of term for entry to add */
468 int *pRc /* OUT: Return code */
469 ){
470 PendingList *p = *pp;
471 int rc = SQLITE_OK;
472
473 assert( !p || p->iLastDocid<=iDocid );
474
475 if( !p || p->iLastDocid!=iDocid ){
476 sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0);
477 if( p ){
478 assert( p->nData<p->nSpace );
479 assert( p->aData[p->nData]==0 );
480 p->nData++;
481 }
482 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){
483 goto pendinglistappend_out;
484 }
485 p->iLastCol = -1;
486 p->iLastPos = 0;
487 p->iLastDocid = iDocid;
488 }
489 if( iCol>0 && p->iLastCol!=iCol ){
490 if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1))
491 || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol))
492 ){
493 goto pendinglistappend_out;
494 }
495 p->iLastCol = iCol;
496 p->iLastPos = 0;
497 }
498 if( iCol>=0 ){
499 assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) );
500 rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos);
501 if( rc==SQLITE_OK ){
502 p->iLastPos = iPos;
503 }
504 }
505
506 pendinglistappend_out:
507 *pRc = rc;
508 if( p!=*pp ){
509 *pp = p;
510 return 1;
511 }
512 return 0;
513 }
514
515 /*
516 ** Tokenize the nul-terminated string zText and add all tokens to the
517 ** pending-terms hash-table. The docid used is that currently stored in
518 ** p->iPrevDocid, and the column is specified by argument iCol.
519 **
520 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
521 */
fts3PendingTermsAdd(Fts3Table * p,const char * zText,int iCol,u32 * pnWord)522 static int fts3PendingTermsAdd(
523 Fts3Table *p, /* Table into which text will be inserted */
524 const char *zText, /* Text of document to be inserted */
525 int iCol, /* Column into which text is being inserted */
526 u32 *pnWord /* OUT: Number of tokens inserted */
527 ){
528 int rc;
529 int iStart;
530 int iEnd;
531 int iPos;
532 int nWord = 0;
533
534 char const *zToken;
535 int nToken;
536
537 sqlite3_tokenizer *pTokenizer = p->pTokenizer;
538 sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
539 sqlite3_tokenizer_cursor *pCsr;
540 int (*xNext)(sqlite3_tokenizer_cursor *pCursor,
541 const char**,int*,int*,int*,int*);
542
543 assert( pTokenizer && pModule );
544
545 rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
546 if( rc!=SQLITE_OK ){
547 return rc;
548 }
549 pCsr->pTokenizer = pTokenizer;
550
551 xNext = pModule->xNext;
552 while( SQLITE_OK==rc
553 && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos))
554 ){
555 PendingList *pList;
556
557 if( iPos>=nWord ) nWord = iPos+1;
558
559 /* Positions cannot be negative; we use -1 as a terminator internally.
560 ** Tokens must have a non-zero length.
561 */
562 if( iPos<0 || !zToken || nToken<=0 ){
563 rc = SQLITE_ERROR;
564 break;
565 }
566
567 pList = (PendingList *)fts3HashFind(&p->pendingTerms, zToken, nToken);
568 if( pList ){
569 p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem));
570 }
571 if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){
572 if( pList==fts3HashInsert(&p->pendingTerms, zToken, nToken, pList) ){
573 /* Malloc failed while inserting the new entry. This can only
574 ** happen if there was no previous entry for this token.
575 */
576 assert( 0==fts3HashFind(&p->pendingTerms, zToken, nToken) );
577 sqlite3_free(pList);
578 rc = SQLITE_NOMEM;
579 }
580 }
581 if( rc==SQLITE_OK ){
582 p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem));
583 }
584 }
585
586 pModule->xClose(pCsr);
587 *pnWord = nWord;
588 return (rc==SQLITE_DONE ? SQLITE_OK : rc);
589 }
590
591 /*
592 ** Calling this function indicates that subsequent calls to
593 ** fts3PendingTermsAdd() are to add term/position-list pairs for the
594 ** contents of the document with docid iDocid.
595 */
fts3PendingTermsDocid(Fts3Table * p,sqlite_int64 iDocid)596 static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){
597 /* TODO(shess) Explore whether partially flushing the buffer on
598 ** forced-flush would provide better performance. I suspect that if
599 ** we ordered the doclists by size and flushed the largest until the
600 ** buffer was half empty, that would let the less frequent terms
601 ** generate longer doclists.
602 */
603 if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){
604 int rc = sqlite3Fts3PendingTermsFlush(p);
605 if( rc!=SQLITE_OK ) return rc;
606 }
607 p->iPrevDocid = iDocid;
608 return SQLITE_OK;
609 }
610
611 /*
612 ** Discard the contents of the pending-terms hash table.
613 */
sqlite3Fts3PendingTermsClear(Fts3Table * p)614 void sqlite3Fts3PendingTermsClear(Fts3Table *p){
615 Fts3HashElem *pElem;
616 for(pElem=fts3HashFirst(&p->pendingTerms); pElem; pElem=fts3HashNext(pElem)){
617 sqlite3_free(fts3HashData(pElem));
618 }
619 fts3HashClear(&p->pendingTerms);
620 p->nPendingData = 0;
621 }
622
623 /*
624 ** This function is called by the xUpdate() method as part of an INSERT
625 ** operation. It adds entries for each term in the new record to the
626 ** pendingTerms hash table.
627 **
628 ** Argument apVal is the same as the similarly named argument passed to
629 ** fts3InsertData(). Parameter iDocid is the docid of the new row.
630 */
fts3InsertTerms(Fts3Table * p,sqlite3_value ** apVal,u32 * aSz)631 static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
632 int i; /* Iterator variable */
633 for(i=2; i<p->nColumn+2; i++){
634 const char *zText = (const char *)sqlite3_value_text(apVal[i]);
635 if( zText ){
636 int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
637 if( rc!=SQLITE_OK ){
638 return rc;
639 }
640 }
641 aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]);
642 }
643 return SQLITE_OK;
644 }
645
646 /*
647 ** This function is called by the xUpdate() method for an INSERT operation.
648 ** The apVal parameter is passed a copy of the apVal argument passed by
649 ** SQLite to the xUpdate() method. i.e:
650 **
651 ** apVal[0] Not used for INSERT.
652 ** apVal[1] rowid
653 ** apVal[2] Left-most user-defined column
654 ** ...
655 ** apVal[p->nColumn+1] Right-most user-defined column
656 ** apVal[p->nColumn+2] Hidden column with same name as table
657 ** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
658 */
fts3InsertData(Fts3Table * p,sqlite3_value ** apVal,sqlite3_int64 * piDocid)659 static int fts3InsertData(
660 Fts3Table *p, /* Full-text table */
661 sqlite3_value **apVal, /* Array of values to insert */
662 sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */
663 ){
664 int rc; /* Return code */
665 sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */
666
667 /* Locate the statement handle used to insert data into the %_content
668 ** table. The SQL for this statement is:
669 **
670 ** INSERT INTO %_content VALUES(?, ?, ?, ...)
671 **
672 ** The statement features N '?' variables, where N is the number of user
673 ** defined columns in the FTS3 table, plus one for the docid field.
674 */
675 rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
676 if( rc!=SQLITE_OK ){
677 return rc;
678 }
679
680 /* There is a quirk here. The users INSERT statement may have specified
681 ** a value for the "rowid" field, for the "docid" field, or for both.
682 ** Which is a problem, since "rowid" and "docid" are aliases for the
683 ** same value. For example:
684 **
685 ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2);
686 **
687 ** In FTS3, this is an error. It is an error to specify non-NULL values
688 ** for both docid and some other rowid alias.
689 */
690 if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){
691 if( SQLITE_NULL==sqlite3_value_type(apVal[0])
692 && SQLITE_NULL!=sqlite3_value_type(apVal[1])
693 ){
694 /* A rowid/docid conflict. */
695 return SQLITE_ERROR;
696 }
697 rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]);
698 if( rc!=SQLITE_OK ) return rc;
699 }
700
701 /* Execute the statement to insert the record. Set *piDocid to the
702 ** new docid value.
703 */
704 sqlite3_step(pContentInsert);
705 rc = sqlite3_reset(pContentInsert);
706
707 *piDocid = sqlite3_last_insert_rowid(p->db);
708 return rc;
709 }
710
711
712
713 /*
714 ** Remove all data from the FTS3 table. Clear the hash table containing
715 ** pending terms.
716 */
fts3DeleteAll(Fts3Table * p)717 static int fts3DeleteAll(Fts3Table *p){
718 int rc = SQLITE_OK; /* Return code */
719
720 /* Discard the contents of the pending-terms hash table. */
721 sqlite3Fts3PendingTermsClear(p);
722
723 /* Delete everything from the %_content, %_segments and %_segdir tables. */
724 fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
725 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
726 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
727 if( p->bHasDocsize ){
728 fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0);
729 }
730 if( p->bHasStat ){
731 fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0);
732 }
733 return rc;
734 }
735
736 /*
737 ** The first element in the apVal[] array is assumed to contain the docid
738 ** (an integer) of a row about to be deleted. Remove all terms from the
739 ** full-text index.
740 */
fts3DeleteTerms(int * pRC,Fts3Table * p,sqlite3_value ** apVal,u32 * aSz)741 static void fts3DeleteTerms(
742 int *pRC, /* Result code */
743 Fts3Table *p, /* The FTS table to delete from */
744 sqlite3_value **apVal, /* apVal[] contains the docid to be deleted */
745 u32 *aSz /* Sizes of deleted document written here */
746 ){
747 int rc;
748 sqlite3_stmt *pSelect;
749
750 if( *pRC ) return;
751 rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, apVal);
752 if( rc==SQLITE_OK ){
753 if( SQLITE_ROW==sqlite3_step(pSelect) ){
754 int i;
755 for(i=1; i<=p->nColumn; i++){
756 const char *zText = (const char *)sqlite3_column_text(pSelect, i);
757 rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
758 if( rc!=SQLITE_OK ){
759 sqlite3_reset(pSelect);
760 *pRC = rc;
761 return;
762 }
763 aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
764 }
765 }
766 rc = sqlite3_reset(pSelect);
767 }else{
768 sqlite3_reset(pSelect);
769 }
770 *pRC = rc;
771 }
772
773 /*
774 ** Forward declaration to account for the circular dependency between
775 ** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
776 */
777 static int fts3SegmentMerge(Fts3Table *, int);
778
779 /*
780 ** This function allocates a new level iLevel index in the segdir table.
781 ** Usually, indexes are allocated within a level sequentially starting
782 ** with 0, so the allocated index is one greater than the value returned
783 ** by:
784 **
785 ** SELECT max(idx) FROM %_segdir WHERE level = :iLevel
786 **
787 ** However, if there are already FTS3_MERGE_COUNT indexes at the requested
788 ** level, they are merged into a single level (iLevel+1) segment and the
789 ** allocated index is 0.
790 **
791 ** If successful, *piIdx is set to the allocated index slot and SQLITE_OK
792 ** returned. Otherwise, an SQLite error code is returned.
793 */
fts3AllocateSegdirIdx(Fts3Table * p,int iLevel,int * piIdx)794 static int fts3AllocateSegdirIdx(Fts3Table *p, int iLevel, int *piIdx){
795 int rc; /* Return Code */
796 sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
797 int iNext = 0; /* Result of query pNextIdx */
798
799 /* Set variable iNext to the next available segdir index at level iLevel. */
800 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
801 if( rc==SQLITE_OK ){
802 sqlite3_bind_int(pNextIdx, 1, iLevel);
803 if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
804 iNext = sqlite3_column_int(pNextIdx, 0);
805 }
806 rc = sqlite3_reset(pNextIdx);
807 }
808
809 if( rc==SQLITE_OK ){
810 /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already
811 ** full, merge all segments in level iLevel into a single iLevel+1
812 ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise,
813 ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
814 */
815 if( iNext>=FTS3_MERGE_COUNT ){
816 rc = fts3SegmentMerge(p, iLevel);
817 *piIdx = 0;
818 }else{
819 *piIdx = iNext;
820 }
821 }
822
823 return rc;
824 }
825
826 /*
827 ** The %_segments table is declared as follows:
828 **
829 ** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB)
830 **
831 ** This function reads data from a single row of the %_segments table. The
832 ** specific row is identified by the iBlockid parameter. If paBlob is not
833 ** NULL, then a buffer is allocated using sqlite3_malloc() and populated
834 ** with the contents of the blob stored in the "block" column of the
835 ** identified table row is. Whether or not paBlob is NULL, *pnBlob is set
836 ** to the size of the blob in bytes before returning.
837 **
838 ** If an error occurs, or the table does not contain the specified row,
839 ** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If
840 ** paBlob is non-NULL, then it is the responsibility of the caller to
841 ** eventually free the returned buffer.
842 **
843 ** This function may leave an open sqlite3_blob* handle in the
844 ** Fts3Table.pSegments variable. This handle is reused by subsequent calls
845 ** to this function. The handle may be closed by calling the
846 ** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy
847 ** performance improvement, but the blob handle should always be closed
848 ** before control is returned to the user (to prevent a lock being held
849 ** on the database file for longer than necessary). Thus, any virtual table
850 ** method (xFilter etc.) that may directly or indirectly call this function
851 ** must call sqlite3Fts3SegmentsClose() before returning.
852 */
sqlite3Fts3ReadBlock(Fts3Table * p,sqlite3_int64 iBlockid,char ** paBlob,int * pnBlob)853 int sqlite3Fts3ReadBlock(
854 Fts3Table *p, /* FTS3 table handle */
855 sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */
856 char **paBlob, /* OUT: Blob data in malloc'd buffer */
857 int *pnBlob /* OUT: Size of blob data */
858 ){
859 int rc; /* Return code */
860
861 /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */
862 assert( pnBlob);
863
864 if( p->pSegments ){
865 rc = sqlite3_blob_reopen(p->pSegments, iBlockid);
866 }else{
867 if( 0==p->zSegmentsTbl ){
868 p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName);
869 if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM;
870 }
871 rc = sqlite3_blob_open(
872 p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments
873 );
874 }
875
876 if( rc==SQLITE_OK ){
877 int nByte = sqlite3_blob_bytes(p->pSegments);
878 if( paBlob ){
879 char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
880 if( !aByte ){
881 rc = SQLITE_NOMEM;
882 }else{
883 rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0);
884 memset(&aByte[nByte], 0, FTS3_NODE_PADDING);
885 if( rc!=SQLITE_OK ){
886 sqlite3_free(aByte);
887 aByte = 0;
888 }
889 }
890 *paBlob = aByte;
891 }
892 *pnBlob = nByte;
893 }
894
895 return rc;
896 }
897
898 /*
899 ** Close the blob handle at p->pSegments, if it is open. See comments above
900 ** the sqlite3Fts3ReadBlock() function for details.
901 */
sqlite3Fts3SegmentsClose(Fts3Table * p)902 void sqlite3Fts3SegmentsClose(Fts3Table *p){
903 sqlite3_blob_close(p->pSegments);
904 p->pSegments = 0;
905 }
906
907 /*
908 ** Move the iterator passed as the first argument to the next term in the
909 ** segment. If successful, SQLITE_OK is returned. If there is no next term,
910 ** SQLITE_DONE. Otherwise, an SQLite error code.
911 */
fts3SegReaderNext(Fts3Table * p,Fts3SegReader * pReader)912 static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){
913 char *pNext; /* Cursor variable */
914 int nPrefix; /* Number of bytes in term prefix */
915 int nSuffix; /* Number of bytes in term suffix */
916
917 if( !pReader->aDoclist ){
918 pNext = pReader->aNode;
919 }else{
920 pNext = &pReader->aDoclist[pReader->nDoclist];
921 }
922
923 if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){
924 int rc; /* Return code from Fts3ReadBlock() */
925
926 if( fts3SegReaderIsPending(pReader) ){
927 Fts3HashElem *pElem = *(pReader->ppNextElem);
928 if( pElem==0 ){
929 pReader->aNode = 0;
930 }else{
931 PendingList *pList = (PendingList *)fts3HashData(pElem);
932 pReader->zTerm = (char *)fts3HashKey(pElem);
933 pReader->nTerm = fts3HashKeysize(pElem);
934 pReader->nNode = pReader->nDoclist = pList->nData + 1;
935 pReader->aNode = pReader->aDoclist = pList->aData;
936 pReader->ppNextElem++;
937 assert( pReader->aNode );
938 }
939 return SQLITE_OK;
940 }
941
942 if( !fts3SegReaderIsRootOnly(pReader) ){
943 sqlite3_free(pReader->aNode);
944 }
945 pReader->aNode = 0;
946
947 /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf
948 ** blocks have already been traversed. */
949 assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock );
950 if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){
951 return SQLITE_OK;
952 }
953
954 rc = sqlite3Fts3ReadBlock(
955 p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode
956 );
957 if( rc!=SQLITE_OK ) return rc;
958 pNext = pReader->aNode;
959 }
960
961 /* Because of the FTS3_NODE_PADDING bytes of padding, the following is
962 ** safe (no risk of overread) even if the node data is corrupted.
963 */
964 pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
965 pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
966 if( nPrefix<0 || nSuffix<=0
967 || &pNext[nSuffix]>&pReader->aNode[pReader->nNode]
968 ){
969 return SQLITE_CORRUPT;
970 }
971
972 if( nPrefix+nSuffix>pReader->nTermAlloc ){
973 int nNew = (nPrefix+nSuffix)*2;
974 char *zNew = sqlite3_realloc(pReader->zTerm, nNew);
975 if( !zNew ){
976 return SQLITE_NOMEM;
977 }
978 pReader->zTerm = zNew;
979 pReader->nTermAlloc = nNew;
980 }
981 memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix);
982 pReader->nTerm = nPrefix+nSuffix;
983 pNext += nSuffix;
984 pNext += sqlite3Fts3GetVarint32(pNext, &pReader->nDoclist);
985 pReader->aDoclist = pNext;
986 pReader->pOffsetList = 0;
987
988 /* Check that the doclist does not appear to extend past the end of the
989 ** b-tree node. And that the final byte of the doclist is 0x00. If either
990 ** of these statements is untrue, then the data structure is corrupt.
991 */
992 if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode]
993 || pReader->aDoclist[pReader->nDoclist-1]
994 ){
995 return SQLITE_CORRUPT;
996 }
997 return SQLITE_OK;
998 }
999
1000 /*
1001 ** Set the SegReader to point to the first docid in the doclist associated
1002 ** with the current term.
1003 */
fts3SegReaderFirstDocid(Fts3SegReader * pReader)1004 static void fts3SegReaderFirstDocid(Fts3SegReader *pReader){
1005 int n;
1006 assert( pReader->aDoclist );
1007 assert( !pReader->pOffsetList );
1008 n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid);
1009 pReader->pOffsetList = &pReader->aDoclist[n];
1010 }
1011
1012 /*
1013 ** Advance the SegReader to point to the next docid in the doclist
1014 ** associated with the current term.
1015 **
1016 ** If arguments ppOffsetList and pnOffsetList are not NULL, then
1017 ** *ppOffsetList is set to point to the first column-offset list
1018 ** in the doclist entry (i.e. immediately past the docid varint).
1019 ** *pnOffsetList is set to the length of the set of column-offset
1020 ** lists, not including the nul-terminator byte. For example:
1021 */
fts3SegReaderNextDocid(Fts3SegReader * pReader,char ** ppOffsetList,int * pnOffsetList)1022 static void fts3SegReaderNextDocid(
1023 Fts3SegReader *pReader,
1024 char **ppOffsetList,
1025 int *pnOffsetList
1026 ){
1027 char *p = pReader->pOffsetList;
1028 char c = 0;
1029
1030 /* Pointer p currently points at the first byte of an offset list. The
1031 ** following two lines advance it to point one byte past the end of
1032 ** the same offset list.
1033 */
1034 while( *p | c ) c = *p++ & 0x80;
1035 p++;
1036
1037 /* If required, populate the output variables with a pointer to and the
1038 ** size of the previous offset-list.
1039 */
1040 if( ppOffsetList ){
1041 *ppOffsetList = pReader->pOffsetList;
1042 *pnOffsetList = (int)(p - pReader->pOffsetList - 1);
1043 }
1044
1045 /* If there are no more entries in the doclist, set pOffsetList to
1046 ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and
1047 ** Fts3SegReader.pOffsetList to point to the next offset list before
1048 ** returning.
1049 */
1050 if( p>=&pReader->aDoclist[pReader->nDoclist] ){
1051 pReader->pOffsetList = 0;
1052 }else{
1053 sqlite3_int64 iDelta;
1054 pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta);
1055 pReader->iDocid += iDelta;
1056 }
1057 }
1058
1059 /*
1060 ** This function is called to estimate the amount of data that will be
1061 ** loaded from the disk If SegReaderIterate() is called on this seg-reader,
1062 ** in units of average document size.
1063 **
1064 ** This can be used as follows: If the caller has a small doclist that
1065 ** contains references to N documents, and is considering merging it with
1066 ** a large doclist (size X "average documents"), it may opt not to load
1067 ** the large doclist if X>N.
1068 */
sqlite3Fts3SegReaderCost(Fts3Cursor * pCsr,Fts3SegReader * pReader,int * pnCost)1069 int sqlite3Fts3SegReaderCost(
1070 Fts3Cursor *pCsr, /* FTS3 cursor handle */
1071 Fts3SegReader *pReader, /* Segment-reader handle */
1072 int *pnCost /* IN/OUT: Number of bytes read */
1073 ){
1074 Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
1075 int rc = SQLITE_OK; /* Return code */
1076 int nCost = 0; /* Cost in bytes to return */
1077 int pgsz = p->nPgsz; /* Database page size */
1078
1079 /* If this seg-reader is reading the pending-terms table, or if all data
1080 ** for the segment is stored on the root page of the b-tree, then the cost
1081 ** is zero. In this case all required data is already in main memory.
1082 */
1083 if( p->bHasStat
1084 && !fts3SegReaderIsPending(pReader)
1085 && !fts3SegReaderIsRootOnly(pReader)
1086 ){
1087 int nBlob = 0;
1088 sqlite3_int64 iBlock;
1089
1090 if( pCsr->nRowAvg==0 ){
1091 /* The average document size, which is required to calculate the cost
1092 ** of each doclist, has not yet been determined. Read the required
1093 ** data from the %_stat table to calculate it.
1094 **
1095 ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3
1096 ** varints, where nCol is the number of columns in the FTS3 table.
1097 ** The first varint is the number of documents currently stored in
1098 ** the table. The following nCol varints contain the total amount of
1099 ** data stored in all rows of each column of the table, from left
1100 ** to right.
1101 */
1102 sqlite3_stmt *pStmt;
1103 sqlite3_int64 nDoc = 0;
1104 sqlite3_int64 nByte = 0;
1105 const char *pEnd;
1106 const char *a;
1107
1108 rc = sqlite3Fts3SelectDoctotal(p, &pStmt);
1109 if( rc!=SQLITE_OK ) return rc;
1110 a = sqlite3_column_blob(pStmt, 0);
1111 assert( a );
1112
1113 pEnd = &a[sqlite3_column_bytes(pStmt, 0)];
1114 a += sqlite3Fts3GetVarint(a, &nDoc);
1115 while( a<pEnd ){
1116 a += sqlite3Fts3GetVarint(a, &nByte);
1117 }
1118 if( nDoc==0 || nByte==0 ){
1119 sqlite3_reset(pStmt);
1120 return SQLITE_CORRUPT;
1121 }
1122
1123 pCsr->nRowAvg = (int)(((nByte / nDoc) + pgsz) / pgsz);
1124 assert( pCsr->nRowAvg>0 );
1125 rc = sqlite3_reset(pStmt);
1126 if( rc!=SQLITE_OK ) return rc;
1127 }
1128
1129 /* Assume that a blob flows over onto overflow pages if it is larger
1130 ** than (pgsz-35) bytes in size (the file-format documentation
1131 ** confirms this).
1132 */
1133 for(iBlock=pReader->iStartBlock; iBlock<=pReader->iLeafEndBlock; iBlock++){
1134 rc = sqlite3Fts3ReadBlock(p, iBlock, 0, &nBlob);
1135 if( rc!=SQLITE_OK ) break;
1136 if( (nBlob+35)>pgsz ){
1137 int nOvfl = (nBlob + 34)/pgsz;
1138 nCost += ((nOvfl + pCsr->nRowAvg - 1)/pCsr->nRowAvg);
1139 }
1140 }
1141 }
1142
1143 *pnCost += nCost;
1144 return rc;
1145 }
1146
1147 /*
1148 ** Free all allocations associated with the iterator passed as the
1149 ** second argument.
1150 */
sqlite3Fts3SegReaderFree(Fts3SegReader * pReader)1151 void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){
1152 if( pReader && !fts3SegReaderIsPending(pReader) ){
1153 sqlite3_free(pReader->zTerm);
1154 if( !fts3SegReaderIsRootOnly(pReader) ){
1155 sqlite3_free(pReader->aNode);
1156 }
1157 }
1158 sqlite3_free(pReader);
1159 }
1160
1161 /*
1162 ** Allocate a new SegReader object.
1163 */
sqlite3Fts3SegReaderNew(int iAge,sqlite3_int64 iStartLeaf,sqlite3_int64 iEndLeaf,sqlite3_int64 iEndBlock,const char * zRoot,int nRoot,Fts3SegReader ** ppReader)1164 int sqlite3Fts3SegReaderNew(
1165 int iAge, /* Segment "age". */
1166 sqlite3_int64 iStartLeaf, /* First leaf to traverse */
1167 sqlite3_int64 iEndLeaf, /* Final leaf to traverse */
1168 sqlite3_int64 iEndBlock, /* Final block of segment */
1169 const char *zRoot, /* Buffer containing root node */
1170 int nRoot, /* Size of buffer containing root node */
1171 Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */
1172 ){
1173 int rc = SQLITE_OK; /* Return code */
1174 Fts3SegReader *pReader; /* Newly allocated SegReader object */
1175 int nExtra = 0; /* Bytes to allocate segment root node */
1176
1177 assert( iStartLeaf<=iEndLeaf );
1178 if( iStartLeaf==0 ){
1179 nExtra = nRoot + FTS3_NODE_PADDING;
1180 }
1181
1182 pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
1183 if( !pReader ){
1184 return SQLITE_NOMEM;
1185 }
1186 memset(pReader, 0, sizeof(Fts3SegReader));
1187 pReader->iIdx = iAge;
1188 pReader->iStartBlock = iStartLeaf;
1189 pReader->iLeafEndBlock = iEndLeaf;
1190 pReader->iEndBlock = iEndBlock;
1191
1192 if( nExtra ){
1193 /* The entire segment is stored in the root node. */
1194 pReader->aNode = (char *)&pReader[1];
1195 pReader->nNode = nRoot;
1196 memcpy(pReader->aNode, zRoot, nRoot);
1197 memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING);
1198 }else{
1199 pReader->iCurrentBlock = iStartLeaf-1;
1200 }
1201
1202 if( rc==SQLITE_OK ){
1203 *ppReader = pReader;
1204 }else{
1205 sqlite3Fts3SegReaderFree(pReader);
1206 }
1207 return rc;
1208 }
1209
1210 /*
1211 ** This is a comparison function used as a qsort() callback when sorting
1212 ** an array of pending terms by term. This occurs as part of flushing
1213 ** the contents of the pending-terms hash table to the database.
1214 */
fts3CompareElemByTerm(const void * lhs,const void * rhs)1215 static int fts3CompareElemByTerm(const void *lhs, const void *rhs){
1216 char *z1 = fts3HashKey(*(Fts3HashElem **)lhs);
1217 char *z2 = fts3HashKey(*(Fts3HashElem **)rhs);
1218 int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs);
1219 int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs);
1220
1221 int n = (n1<n2 ? n1 : n2);
1222 int c = memcmp(z1, z2, n);
1223 if( c==0 ){
1224 c = n1 - n2;
1225 }
1226 return c;
1227 }
1228
1229 /*
1230 ** This function is used to allocate an Fts3SegReader that iterates through
1231 ** a subset of the terms stored in the Fts3Table.pendingTerms array.
1232 */
sqlite3Fts3SegReaderPending(Fts3Table * p,const char * zTerm,int nTerm,int isPrefix,Fts3SegReader ** ppReader)1233 int sqlite3Fts3SegReaderPending(
1234 Fts3Table *p, /* Virtual table handle */
1235 const char *zTerm, /* Term to search for */
1236 int nTerm, /* Size of buffer zTerm */
1237 int isPrefix, /* True for a term-prefix query */
1238 Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */
1239 ){
1240 Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */
1241 Fts3HashElem *pE; /* Iterator variable */
1242 Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */
1243 int nElem = 0; /* Size of array at aElem */
1244 int rc = SQLITE_OK; /* Return Code */
1245
1246 if( isPrefix ){
1247 int nAlloc = 0; /* Size of allocated array at aElem */
1248
1249 for(pE=fts3HashFirst(&p->pendingTerms); pE; pE=fts3HashNext(pE)){
1250 char *zKey = (char *)fts3HashKey(pE);
1251 int nKey = fts3HashKeysize(pE);
1252 if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){
1253 if( nElem==nAlloc ){
1254 Fts3HashElem **aElem2;
1255 nAlloc += 16;
1256 aElem2 = (Fts3HashElem **)sqlite3_realloc(
1257 aElem, nAlloc*sizeof(Fts3HashElem *)
1258 );
1259 if( !aElem2 ){
1260 rc = SQLITE_NOMEM;
1261 nElem = 0;
1262 break;
1263 }
1264 aElem = aElem2;
1265 }
1266 aElem[nElem++] = pE;
1267 }
1268 }
1269
1270 /* If more than one term matches the prefix, sort the Fts3HashElem
1271 ** objects in term order using qsort(). This uses the same comparison
1272 ** callback as is used when flushing terms to disk.
1273 */
1274 if( nElem>1 ){
1275 qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm);
1276 }
1277
1278 }else{
1279 pE = fts3HashFindElem(&p->pendingTerms, zTerm, nTerm);
1280 if( pE ){
1281 aElem = &pE;
1282 nElem = 1;
1283 }
1284 }
1285
1286 if( nElem>0 ){
1287 int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *);
1288 pReader = (Fts3SegReader *)sqlite3_malloc(nByte);
1289 if( !pReader ){
1290 rc = SQLITE_NOMEM;
1291 }else{
1292 memset(pReader, 0, nByte);
1293 pReader->iIdx = 0x7FFFFFFF;
1294 pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
1295 memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
1296 }
1297 }
1298
1299 if( isPrefix ){
1300 sqlite3_free(aElem);
1301 }
1302 *ppReader = pReader;
1303 return rc;
1304 }
1305
1306 /*
1307 ** Compare the entries pointed to by two Fts3SegReader structures.
1308 ** Comparison is as follows:
1309 **
1310 ** 1) EOF is greater than not EOF.
1311 **
1312 ** 2) The current terms (if any) are compared using memcmp(). If one
1313 ** term is a prefix of another, the longer term is considered the
1314 ** larger.
1315 **
1316 ** 3) By segment age. An older segment is considered larger.
1317 */
fts3SegReaderCmp(Fts3SegReader * pLhs,Fts3SegReader * pRhs)1318 static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
1319 int rc;
1320 if( pLhs->aNode && pRhs->aNode ){
1321 int rc2 = pLhs->nTerm - pRhs->nTerm;
1322 if( rc2<0 ){
1323 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm);
1324 }else{
1325 rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm);
1326 }
1327 if( rc==0 ){
1328 rc = rc2;
1329 }
1330 }else{
1331 rc = (pLhs->aNode==0) - (pRhs->aNode==0);
1332 }
1333 if( rc==0 ){
1334 rc = pRhs->iIdx - pLhs->iIdx;
1335 }
1336 assert( rc!=0 );
1337 return rc;
1338 }
1339
1340 /*
1341 ** A different comparison function for SegReader structures. In this
1342 ** version, it is assumed that each SegReader points to an entry in
1343 ** a doclist for identical terms. Comparison is made as follows:
1344 **
1345 ** 1) EOF (end of doclist in this case) is greater than not EOF.
1346 **
1347 ** 2) By current docid.
1348 **
1349 ** 3) By segment age. An older segment is considered larger.
1350 */
fts3SegReaderDoclistCmp(Fts3SegReader * pLhs,Fts3SegReader * pRhs)1351 static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){
1352 int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0);
1353 if( rc==0 ){
1354 if( pLhs->iDocid==pRhs->iDocid ){
1355 rc = pRhs->iIdx - pLhs->iIdx;
1356 }else{
1357 rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1;
1358 }
1359 }
1360 assert( pLhs->aNode && pRhs->aNode );
1361 return rc;
1362 }
1363
1364 /*
1365 ** Compare the term that the Fts3SegReader object passed as the first argument
1366 ** points to with the term specified by arguments zTerm and nTerm.
1367 **
1368 ** If the pSeg iterator is already at EOF, return 0. Otherwise, return
1369 ** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are
1370 ** equal, or +ve if the pSeg term is greater than zTerm/nTerm.
1371 */
fts3SegReaderTermCmp(Fts3SegReader * pSeg,const char * zTerm,int nTerm)1372 static int fts3SegReaderTermCmp(
1373 Fts3SegReader *pSeg, /* Segment reader object */
1374 const char *zTerm, /* Term to compare to */
1375 int nTerm /* Size of term zTerm in bytes */
1376 ){
1377 int res = 0;
1378 if( pSeg->aNode ){
1379 if( pSeg->nTerm>nTerm ){
1380 res = memcmp(pSeg->zTerm, zTerm, nTerm);
1381 }else{
1382 res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm);
1383 }
1384 if( res==0 ){
1385 res = pSeg->nTerm-nTerm;
1386 }
1387 }
1388 return res;
1389 }
1390
1391 /*
1392 ** Argument apSegment is an array of nSegment elements. It is known that
1393 ** the final (nSegment-nSuspect) members are already in sorted order
1394 ** (according to the comparison function provided). This function shuffles
1395 ** the array around until all entries are in sorted order.
1396 */
fts3SegReaderSort(Fts3SegReader ** apSegment,int nSegment,int nSuspect,int (* xCmp)(Fts3SegReader *,Fts3SegReader *))1397 static void fts3SegReaderSort(
1398 Fts3SegReader **apSegment, /* Array to sort entries of */
1399 int nSegment, /* Size of apSegment array */
1400 int nSuspect, /* Unsorted entry count */
1401 int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */
1402 ){
1403 int i; /* Iterator variable */
1404
1405 assert( nSuspect<=nSegment );
1406
1407 if( nSuspect==nSegment ) nSuspect--;
1408 for(i=nSuspect-1; i>=0; i--){
1409 int j;
1410 for(j=i; j<(nSegment-1); j++){
1411 Fts3SegReader *pTmp;
1412 if( xCmp(apSegment[j], apSegment[j+1])<0 ) break;
1413 pTmp = apSegment[j+1];
1414 apSegment[j+1] = apSegment[j];
1415 apSegment[j] = pTmp;
1416 }
1417 }
1418
1419 #ifndef NDEBUG
1420 /* Check that the list really is sorted now. */
1421 for(i=0; i<(nSuspect-1); i++){
1422 assert( xCmp(apSegment[i], apSegment[i+1])<0 );
1423 }
1424 #endif
1425 }
1426
1427 /*
1428 ** Insert a record into the %_segments table.
1429 */
fts3WriteSegment(Fts3Table * p,sqlite3_int64 iBlock,char * z,int n)1430 static int fts3WriteSegment(
1431 Fts3Table *p, /* Virtual table handle */
1432 sqlite3_int64 iBlock, /* Block id for new block */
1433 char *z, /* Pointer to buffer containing block data */
1434 int n /* Size of buffer z in bytes */
1435 ){
1436 sqlite3_stmt *pStmt;
1437 int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0);
1438 if( rc==SQLITE_OK ){
1439 sqlite3_bind_int64(pStmt, 1, iBlock);
1440 sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC);
1441 sqlite3_step(pStmt);
1442 rc = sqlite3_reset(pStmt);
1443 }
1444 return rc;
1445 }
1446
1447 /*
1448 ** Insert a record into the %_segdir table.
1449 */
fts3WriteSegdir(Fts3Table * p,int iLevel,int iIdx,sqlite3_int64 iStartBlock,sqlite3_int64 iLeafEndBlock,sqlite3_int64 iEndBlock,char * zRoot,int nRoot)1450 static int fts3WriteSegdir(
1451 Fts3Table *p, /* Virtual table handle */
1452 int iLevel, /* Value for "level" field */
1453 int iIdx, /* Value for "idx" field */
1454 sqlite3_int64 iStartBlock, /* Value for "start_block" field */
1455 sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
1456 sqlite3_int64 iEndBlock, /* Value for "end_block" field */
1457 char *zRoot, /* Blob value for "root" field */
1458 int nRoot /* Number of bytes in buffer zRoot */
1459 ){
1460 sqlite3_stmt *pStmt;
1461 int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
1462 if( rc==SQLITE_OK ){
1463 sqlite3_bind_int(pStmt, 1, iLevel);
1464 sqlite3_bind_int(pStmt, 2, iIdx);
1465 sqlite3_bind_int64(pStmt, 3, iStartBlock);
1466 sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
1467 sqlite3_bind_int64(pStmt, 5, iEndBlock);
1468 sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC);
1469 sqlite3_step(pStmt);
1470 rc = sqlite3_reset(pStmt);
1471 }
1472 return rc;
1473 }
1474
1475 /*
1476 ** Return the size of the common prefix (if any) shared by zPrev and
1477 ** zNext, in bytes. For example,
1478 **
1479 ** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3
1480 ** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2
1481 ** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0
1482 */
fts3PrefixCompress(const char * zPrev,int nPrev,const char * zNext,int nNext)1483 static int fts3PrefixCompress(
1484 const char *zPrev, /* Buffer containing previous term */
1485 int nPrev, /* Size of buffer zPrev in bytes */
1486 const char *zNext, /* Buffer containing next term */
1487 int nNext /* Size of buffer zNext in bytes */
1488 ){
1489 int n;
1490 UNUSED_PARAMETER(nNext);
1491 for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++);
1492 return n;
1493 }
1494
1495 /*
1496 ** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger
1497 ** (according to memcmp) than the previous term.
1498 */
fts3NodeAddTerm(Fts3Table * p,SegmentNode ** ppTree,int isCopyTerm,const char * zTerm,int nTerm)1499 static int fts3NodeAddTerm(
1500 Fts3Table *p, /* Virtual table handle */
1501 SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */
1502 int isCopyTerm, /* True if zTerm/nTerm is transient */
1503 const char *zTerm, /* Pointer to buffer containing term */
1504 int nTerm /* Size of term in bytes */
1505 ){
1506 SegmentNode *pTree = *ppTree;
1507 int rc;
1508 SegmentNode *pNew;
1509
1510 /* First try to append the term to the current node. Return early if
1511 ** this is possible.
1512 */
1513 if( pTree ){
1514 int nData = pTree->nData; /* Current size of node in bytes */
1515 int nReq = nData; /* Required space after adding zTerm */
1516 int nPrefix; /* Number of bytes of prefix compression */
1517 int nSuffix; /* Suffix length */
1518
1519 nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm);
1520 nSuffix = nTerm-nPrefix;
1521
1522 nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix;
1523 if( nReq<=p->nNodeSize || !pTree->zTerm ){
1524
1525 if( nReq>p->nNodeSize ){
1526 /* An unusual case: this is the first term to be added to the node
1527 ** and the static node buffer (p->nNodeSize bytes) is not large
1528 ** enough. Use a separately malloced buffer instead This wastes
1529 ** p->nNodeSize bytes, but since this scenario only comes about when
1530 ** the database contain two terms that share a prefix of almost 2KB,
1531 ** this is not expected to be a serious problem.
1532 */
1533 assert( pTree->aData==(char *)&pTree[1] );
1534 pTree->aData = (char *)sqlite3_malloc(nReq);
1535 if( !pTree->aData ){
1536 return SQLITE_NOMEM;
1537 }
1538 }
1539
1540 if( pTree->zTerm ){
1541 /* There is no prefix-length field for first term in a node */
1542 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix);
1543 }
1544
1545 nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix);
1546 memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix);
1547 pTree->nData = nData + nSuffix;
1548 pTree->nEntry++;
1549
1550 if( isCopyTerm ){
1551 if( pTree->nMalloc<nTerm ){
1552 char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2);
1553 if( !zNew ){
1554 return SQLITE_NOMEM;
1555 }
1556 pTree->nMalloc = nTerm*2;
1557 pTree->zMalloc = zNew;
1558 }
1559 pTree->zTerm = pTree->zMalloc;
1560 memcpy(pTree->zTerm, zTerm, nTerm);
1561 pTree->nTerm = nTerm;
1562 }else{
1563 pTree->zTerm = (char *)zTerm;
1564 pTree->nTerm = nTerm;
1565 }
1566 return SQLITE_OK;
1567 }
1568 }
1569
1570 /* If control flows to here, it was not possible to append zTerm to the
1571 ** current node. Create a new node (a right-sibling of the current node).
1572 ** If this is the first node in the tree, the term is added to it.
1573 **
1574 ** Otherwise, the term is not added to the new node, it is left empty for
1575 ** now. Instead, the term is inserted into the parent of pTree. If pTree
1576 ** has no parent, one is created here.
1577 */
1578 pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize);
1579 if( !pNew ){
1580 return SQLITE_NOMEM;
1581 }
1582 memset(pNew, 0, sizeof(SegmentNode));
1583 pNew->nData = 1 + FTS3_VARINT_MAX;
1584 pNew->aData = (char *)&pNew[1];
1585
1586 if( pTree ){
1587 SegmentNode *pParent = pTree->pParent;
1588 rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm);
1589 if( pTree->pParent==0 ){
1590 pTree->pParent = pParent;
1591 }
1592 pTree->pRight = pNew;
1593 pNew->pLeftmost = pTree->pLeftmost;
1594 pNew->pParent = pParent;
1595 pNew->zMalloc = pTree->zMalloc;
1596 pNew->nMalloc = pTree->nMalloc;
1597 pTree->zMalloc = 0;
1598 }else{
1599 pNew->pLeftmost = pNew;
1600 rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm);
1601 }
1602
1603 *ppTree = pNew;
1604 return rc;
1605 }
1606
1607 /*
1608 ** Helper function for fts3NodeWrite().
1609 */
fts3TreeFinishNode(SegmentNode * pTree,int iHeight,sqlite3_int64 iLeftChild)1610 static int fts3TreeFinishNode(
1611 SegmentNode *pTree,
1612 int iHeight,
1613 sqlite3_int64 iLeftChild
1614 ){
1615 int nStart;
1616 assert( iHeight>=1 && iHeight<128 );
1617 nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild);
1618 pTree->aData[nStart] = (char)iHeight;
1619 sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild);
1620 return nStart;
1621 }
1622
1623 /*
1624 ** Write the buffer for the segment node pTree and all of its peers to the
1625 ** database. Then call this function recursively to write the parent of
1626 ** pTree and its peers to the database.
1627 **
1628 ** Except, if pTree is a root node, do not write it to the database. Instead,
1629 ** set output variables *paRoot and *pnRoot to contain the root node.
1630 **
1631 ** If successful, SQLITE_OK is returned and output variable *piLast is
1632 ** set to the largest blockid written to the database (or zero if no
1633 ** blocks were written to the db). Otherwise, an SQLite error code is
1634 ** returned.
1635 */
fts3NodeWrite(Fts3Table * p,SegmentNode * pTree,int iHeight,sqlite3_int64 iLeaf,sqlite3_int64 iFree,sqlite3_int64 * piLast,char ** paRoot,int * pnRoot)1636 static int fts3NodeWrite(
1637 Fts3Table *p, /* Virtual table handle */
1638 SegmentNode *pTree, /* SegmentNode handle */
1639 int iHeight, /* Height of this node in tree */
1640 sqlite3_int64 iLeaf, /* Block id of first leaf node */
1641 sqlite3_int64 iFree, /* Block id of next free slot in %_segments */
1642 sqlite3_int64 *piLast, /* OUT: Block id of last entry written */
1643 char **paRoot, /* OUT: Data for root node */
1644 int *pnRoot /* OUT: Size of root node in bytes */
1645 ){
1646 int rc = SQLITE_OK;
1647
1648 if( !pTree->pParent ){
1649 /* Root node of the tree. */
1650 int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf);
1651 *piLast = iFree-1;
1652 *pnRoot = pTree->nData - nStart;
1653 *paRoot = &pTree->aData[nStart];
1654 }else{
1655 SegmentNode *pIter;
1656 sqlite3_int64 iNextFree = iFree;
1657 sqlite3_int64 iNextLeaf = iLeaf;
1658 for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){
1659 int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf);
1660 int nWrite = pIter->nData - nStart;
1661
1662 rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite);
1663 iNextFree++;
1664 iNextLeaf += (pIter->nEntry+1);
1665 }
1666 if( rc==SQLITE_OK ){
1667 assert( iNextLeaf==iFree );
1668 rc = fts3NodeWrite(
1669 p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot
1670 );
1671 }
1672 }
1673
1674 return rc;
1675 }
1676
1677 /*
1678 ** Free all memory allocations associated with the tree pTree.
1679 */
fts3NodeFree(SegmentNode * pTree)1680 static void fts3NodeFree(SegmentNode *pTree){
1681 if( pTree ){
1682 SegmentNode *p = pTree->pLeftmost;
1683 fts3NodeFree(p->pParent);
1684 while( p ){
1685 SegmentNode *pRight = p->pRight;
1686 if( p->aData!=(char *)&p[1] ){
1687 sqlite3_free(p->aData);
1688 }
1689 assert( pRight==0 || p->zMalloc==0 );
1690 sqlite3_free(p->zMalloc);
1691 sqlite3_free(p);
1692 p = pRight;
1693 }
1694 }
1695 }
1696
1697 /*
1698 ** Add a term to the segment being constructed by the SegmentWriter object
1699 ** *ppWriter. When adding the first term to a segment, *ppWriter should
1700 ** be passed NULL. This function will allocate a new SegmentWriter object
1701 ** and return it via the input/output variable *ppWriter in this case.
1702 **
1703 ** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code.
1704 */
fts3SegWriterAdd(Fts3Table * p,SegmentWriter ** ppWriter,int isCopyTerm,const char * zTerm,int nTerm,const char * aDoclist,int nDoclist)1705 static int fts3SegWriterAdd(
1706 Fts3Table *p, /* Virtual table handle */
1707 SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */
1708 int isCopyTerm, /* True if buffer zTerm must be copied */
1709 const char *zTerm, /* Pointer to buffer containing term */
1710 int nTerm, /* Size of term in bytes */
1711 const char *aDoclist, /* Pointer to buffer containing doclist */
1712 int nDoclist /* Size of doclist in bytes */
1713 ){
1714 int nPrefix; /* Size of term prefix in bytes */
1715 int nSuffix; /* Size of term suffix in bytes */
1716 int nReq; /* Number of bytes required on leaf page */
1717 int nData;
1718 SegmentWriter *pWriter = *ppWriter;
1719
1720 if( !pWriter ){
1721 int rc;
1722 sqlite3_stmt *pStmt;
1723
1724 /* Allocate the SegmentWriter structure */
1725 pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter));
1726 if( !pWriter ) return SQLITE_NOMEM;
1727 memset(pWriter, 0, sizeof(SegmentWriter));
1728 *ppWriter = pWriter;
1729
1730 /* Allocate a buffer in which to accumulate data */
1731 pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize);
1732 if( !pWriter->aData ) return SQLITE_NOMEM;
1733 pWriter->nSize = p->nNodeSize;
1734
1735 /* Find the next free blockid in the %_segments table */
1736 rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0);
1737 if( rc!=SQLITE_OK ) return rc;
1738 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1739 pWriter->iFree = sqlite3_column_int64(pStmt, 0);
1740 pWriter->iFirst = pWriter->iFree;
1741 }
1742 rc = sqlite3_reset(pStmt);
1743 if( rc!=SQLITE_OK ) return rc;
1744 }
1745 nData = pWriter->nData;
1746
1747 nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm);
1748 nSuffix = nTerm-nPrefix;
1749
1750 /* Figure out how many bytes are required by this new entry */
1751 nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */
1752 sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */
1753 nSuffix + /* Term suffix */
1754 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
1755 nDoclist; /* Doclist data */
1756
1757 if( nData>0 && nData+nReq>p->nNodeSize ){
1758 int rc;
1759
1760 /* The current leaf node is full. Write it out to the database. */
1761 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData);
1762 if( rc!=SQLITE_OK ) return rc;
1763
1764 /* Add the current term to the interior node tree. The term added to
1765 ** the interior tree must:
1766 **
1767 ** a) be greater than the largest term on the leaf node just written
1768 ** to the database (still available in pWriter->zTerm), and
1769 **
1770 ** b) be less than or equal to the term about to be added to the new
1771 ** leaf node (zTerm/nTerm).
1772 **
1773 ** In other words, it must be the prefix of zTerm 1 byte longer than
1774 ** the common prefix (if any) of zTerm and pWriter->zTerm.
1775 */
1776 assert( nPrefix<nTerm );
1777 rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1);
1778 if( rc!=SQLITE_OK ) return rc;
1779
1780 nData = 0;
1781 pWriter->nTerm = 0;
1782
1783 nPrefix = 0;
1784 nSuffix = nTerm;
1785 nReq = 1 + /* varint containing prefix size */
1786 sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */
1787 nTerm + /* Term suffix */
1788 sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */
1789 nDoclist; /* Doclist data */
1790 }
1791
1792 /* If the buffer currently allocated is too small for this entry, realloc
1793 ** the buffer to make it large enough.
1794 */
1795 if( nReq>pWriter->nSize ){
1796 char *aNew = sqlite3_realloc(pWriter->aData, nReq);
1797 if( !aNew ) return SQLITE_NOMEM;
1798 pWriter->aData = aNew;
1799 pWriter->nSize = nReq;
1800 }
1801 assert( nData+nReq<=pWriter->nSize );
1802
1803 /* Append the prefix-compressed term and doclist to the buffer. */
1804 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix);
1805 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix);
1806 memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix);
1807 nData += nSuffix;
1808 nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist);
1809 memcpy(&pWriter->aData[nData], aDoclist, nDoclist);
1810 pWriter->nData = nData + nDoclist;
1811
1812 /* Save the current term so that it can be used to prefix-compress the next.
1813 ** If the isCopyTerm parameter is true, then the buffer pointed to by
1814 ** zTerm is transient, so take a copy of the term data. Otherwise, just
1815 ** store a copy of the pointer.
1816 */
1817 if( isCopyTerm ){
1818 if( nTerm>pWriter->nMalloc ){
1819 char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2);
1820 if( !zNew ){
1821 return SQLITE_NOMEM;
1822 }
1823 pWriter->nMalloc = nTerm*2;
1824 pWriter->zMalloc = zNew;
1825 pWriter->zTerm = zNew;
1826 }
1827 assert( pWriter->zTerm==pWriter->zMalloc );
1828 memcpy(pWriter->zTerm, zTerm, nTerm);
1829 }else{
1830 pWriter->zTerm = (char *)zTerm;
1831 }
1832 pWriter->nTerm = nTerm;
1833
1834 return SQLITE_OK;
1835 }
1836
1837 /*
1838 ** Flush all data associated with the SegmentWriter object pWriter to the
1839 ** database. This function must be called after all terms have been added
1840 ** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is
1841 ** returned. Otherwise, an SQLite error code.
1842 */
fts3SegWriterFlush(Fts3Table * p,SegmentWriter * pWriter,int iLevel,int iIdx)1843 static int fts3SegWriterFlush(
1844 Fts3Table *p, /* Virtual table handle */
1845 SegmentWriter *pWriter, /* SegmentWriter to flush to the db */
1846 int iLevel, /* Value for 'level' column of %_segdir */
1847 int iIdx /* Value for 'idx' column of %_segdir */
1848 ){
1849 int rc; /* Return code */
1850 if( pWriter->pTree ){
1851 sqlite3_int64 iLast = 0; /* Largest block id written to database */
1852 sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */
1853 char *zRoot = NULL; /* Pointer to buffer containing root node */
1854 int nRoot = 0; /* Size of buffer zRoot */
1855
1856 iLastLeaf = pWriter->iFree;
1857 rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData);
1858 if( rc==SQLITE_OK ){
1859 rc = fts3NodeWrite(p, pWriter->pTree, 1,
1860 pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot);
1861 }
1862 if( rc==SQLITE_OK ){
1863 rc = fts3WriteSegdir(
1864 p, iLevel, iIdx, pWriter->iFirst, iLastLeaf, iLast, zRoot, nRoot);
1865 }
1866 }else{
1867 /* The entire tree fits on the root node. Write it to the segdir table. */
1868 rc = fts3WriteSegdir(
1869 p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
1870 }
1871 return rc;
1872 }
1873
1874 /*
1875 ** Release all memory held by the SegmentWriter object passed as the
1876 ** first argument.
1877 */
fts3SegWriterFree(SegmentWriter * pWriter)1878 static void fts3SegWriterFree(SegmentWriter *pWriter){
1879 if( pWriter ){
1880 sqlite3_free(pWriter->aData);
1881 sqlite3_free(pWriter->zMalloc);
1882 fts3NodeFree(pWriter->pTree);
1883 sqlite3_free(pWriter);
1884 }
1885 }
1886
1887 /*
1888 ** The first value in the apVal[] array is assumed to contain an integer.
1889 ** This function tests if there exist any documents with docid values that
1890 ** are different from that integer. i.e. if deleting the document with docid
1891 ** apVal[0] would mean the FTS3 table were empty.
1892 **
1893 ** If successful, *pisEmpty is set to true if the table is empty except for
1894 ** document apVal[0], or false otherwise, and SQLITE_OK is returned. If an
1895 ** error occurs, an SQLite error code is returned.
1896 */
fts3IsEmpty(Fts3Table * p,sqlite3_value ** apVal,int * pisEmpty)1897 static int fts3IsEmpty(Fts3Table *p, sqlite3_value **apVal, int *pisEmpty){
1898 sqlite3_stmt *pStmt;
1899 int rc;
1900 rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, apVal);
1901 if( rc==SQLITE_OK ){
1902 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1903 *pisEmpty = sqlite3_column_int(pStmt, 0);
1904 }
1905 rc = sqlite3_reset(pStmt);
1906 }
1907 return rc;
1908 }
1909
1910 /*
1911 ** Set *pnSegment to the total number of segments in the database. Set
1912 ** *pnMax to the largest segment level in the database (segment levels
1913 ** are stored in the 'level' column of the %_segdir table).
1914 **
1915 ** Return SQLITE_OK if successful, or an SQLite error code if not.
1916 */
fts3SegmentCountMax(Fts3Table * p,int * pnSegment,int * pnMax)1917 static int fts3SegmentCountMax(Fts3Table *p, int *pnSegment, int *pnMax){
1918 sqlite3_stmt *pStmt;
1919 int rc;
1920
1921 rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_COUNT_MAX, &pStmt, 0);
1922 if( rc!=SQLITE_OK ) return rc;
1923 if( SQLITE_ROW==sqlite3_step(pStmt) ){
1924 *pnSegment = sqlite3_column_int(pStmt, 0);
1925 *pnMax = sqlite3_column_int(pStmt, 1);
1926 }
1927 return sqlite3_reset(pStmt);
1928 }
1929
1930 /*
1931 ** This function is used after merging multiple segments into a single large
1932 ** segment to delete the old, now redundant, segment b-trees. Specifically,
1933 ** it:
1934 **
1935 ** 1) Deletes all %_segments entries for the segments associated with
1936 ** each of the SegReader objects in the array passed as the third
1937 ** argument, and
1938 **
1939 ** 2) deletes all %_segdir entries with level iLevel, or all %_segdir
1940 ** entries regardless of level if (iLevel<0).
1941 **
1942 ** SQLITE_OK is returned if successful, otherwise an SQLite error code.
1943 */
fts3DeleteSegdir(Fts3Table * p,int iLevel,Fts3SegReader ** apSegment,int nReader)1944 static int fts3DeleteSegdir(
1945 Fts3Table *p, /* Virtual table handle */
1946 int iLevel, /* Level of %_segdir entries to delete */
1947 Fts3SegReader **apSegment, /* Array of SegReader objects */
1948 int nReader /* Size of array apSegment */
1949 ){
1950 int rc; /* Return Code */
1951 int i; /* Iterator variable */
1952 sqlite3_stmt *pDelete; /* SQL statement to delete rows */
1953
1954 rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
1955 for(i=0; rc==SQLITE_OK && i<nReader; i++){
1956 Fts3SegReader *pSegment = apSegment[i];
1957 if( pSegment->iStartBlock ){
1958 sqlite3_bind_int64(pDelete, 1, pSegment->iStartBlock);
1959 sqlite3_bind_int64(pDelete, 2, pSegment->iEndBlock);
1960 sqlite3_step(pDelete);
1961 rc = sqlite3_reset(pDelete);
1962 }
1963 }
1964 if( rc!=SQLITE_OK ){
1965 return rc;
1966 }
1967
1968 if( iLevel==FTS3_SEGCURSOR_ALL ){
1969 fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
1970 }else if( iLevel==FTS3_SEGCURSOR_PENDING ){
1971 sqlite3Fts3PendingTermsClear(p);
1972 }else{
1973 assert( iLevel>=0 );
1974 rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_BY_LEVEL, &pDelete, 0);
1975 if( rc==SQLITE_OK ){
1976 sqlite3_bind_int(pDelete, 1, iLevel);
1977 sqlite3_step(pDelete);
1978 rc = sqlite3_reset(pDelete);
1979 }
1980 }
1981
1982 return rc;
1983 }
1984
1985 /*
1986 ** When this function is called, buffer *ppList (size *pnList bytes) contains
1987 ** a position list that may (or may not) feature multiple columns. This
1988 ** function adjusts the pointer *ppList and the length *pnList so that they
1989 ** identify the subset of the position list that corresponds to column iCol.
1990 **
1991 ** If there are no entries in the input position list for column iCol, then
1992 ** *pnList is set to zero before returning.
1993 */
fts3ColumnFilter(int iCol,char ** ppList,int * pnList)1994 static void fts3ColumnFilter(
1995 int iCol, /* Column to filter on */
1996 char **ppList, /* IN/OUT: Pointer to position list */
1997 int *pnList /* IN/OUT: Size of buffer *ppList in bytes */
1998 ){
1999 char *pList = *ppList;
2000 int nList = *pnList;
2001 char *pEnd = &pList[nList];
2002 int iCurrent = 0;
2003 char *p = pList;
2004
2005 assert( iCol>=0 );
2006 while( 1 ){
2007 char c = 0;
2008 while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80;
2009
2010 if( iCol==iCurrent ){
2011 nList = (int)(p - pList);
2012 break;
2013 }
2014
2015 nList -= (int)(p - pList);
2016 pList = p;
2017 if( nList==0 ){
2018 break;
2019 }
2020 p = &pList[1];
2021 p += sqlite3Fts3GetVarint32(p, &iCurrent);
2022 }
2023
2024 *ppList = pList;
2025 *pnList = nList;
2026 }
2027
sqlite3Fts3SegReaderStart(Fts3Table * p,Fts3SegReaderCursor * pCsr,Fts3SegFilter * pFilter)2028 int sqlite3Fts3SegReaderStart(
2029 Fts3Table *p, /* Virtual table handle */
2030 Fts3SegReaderCursor *pCsr, /* Cursor object */
2031 Fts3SegFilter *pFilter /* Restrictions on range of iteration */
2032 ){
2033 int i;
2034
2035 /* Initialize the cursor object */
2036 pCsr->pFilter = pFilter;
2037
2038 /* If the Fts3SegFilter defines a specific term (or term prefix) to search
2039 ** for, then advance each segment iterator until it points to a term of
2040 ** equal or greater value than the specified term. This prevents many
2041 ** unnecessary merge/sort operations for the case where single segment
2042 ** b-tree leaf nodes contain more than one term.
2043 */
2044 for(i=0; i<pCsr->nSegment; i++){
2045 int nTerm = pFilter->nTerm;
2046 const char *zTerm = pFilter->zTerm;
2047 Fts3SegReader *pSeg = pCsr->apSegment[i];
2048 do {
2049 int rc = fts3SegReaderNext(p, pSeg);
2050 if( rc!=SQLITE_OK ) return rc;
2051 }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 );
2052 }
2053 fts3SegReaderSort(
2054 pCsr->apSegment, pCsr->nSegment, pCsr->nSegment, fts3SegReaderCmp);
2055
2056 return SQLITE_OK;
2057 }
2058
sqlite3Fts3SegReaderStep(Fts3Table * p,Fts3SegReaderCursor * pCsr)2059 int sqlite3Fts3SegReaderStep(
2060 Fts3Table *p, /* Virtual table handle */
2061 Fts3SegReaderCursor *pCsr /* Cursor object */
2062 ){
2063 int rc = SQLITE_OK;
2064
2065 int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY);
2066 int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS);
2067 int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
2068 int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
2069 int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
2070
2071 Fts3SegReader **apSegment = pCsr->apSegment;
2072 int nSegment = pCsr->nSegment;
2073 Fts3SegFilter *pFilter = pCsr->pFilter;
2074
2075 if( pCsr->nSegment==0 ) return SQLITE_OK;
2076
2077 do {
2078 int nMerge;
2079 int i;
2080
2081 /* Advance the first pCsr->nAdvance entries in the apSegment[] array
2082 ** forward. Then sort the list in order of current term again.
2083 */
2084 for(i=0; i<pCsr->nAdvance; i++){
2085 rc = fts3SegReaderNext(p, apSegment[i]);
2086 if( rc!=SQLITE_OK ) return rc;
2087 }
2088 fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp);
2089 pCsr->nAdvance = 0;
2090
2091 /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */
2092 assert( rc==SQLITE_OK );
2093 if( apSegment[0]->aNode==0 ) break;
2094
2095 pCsr->nTerm = apSegment[0]->nTerm;
2096 pCsr->zTerm = apSegment[0]->zTerm;
2097
2098 /* If this is a prefix-search, and if the term that apSegment[0] points
2099 ** to does not share a suffix with pFilter->zTerm/nTerm, then all
2100 ** required callbacks have been made. In this case exit early.
2101 **
2102 ** Similarly, if this is a search for an exact match, and the first term
2103 ** of segment apSegment[0] is not a match, exit early.
2104 */
2105 if( pFilter->zTerm && !isScan ){
2106 if( pCsr->nTerm<pFilter->nTerm
2107 || (!isPrefix && pCsr->nTerm>pFilter->nTerm)
2108 || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm)
2109 ){
2110 break;
2111 }
2112 }
2113
2114 nMerge = 1;
2115 while( nMerge<nSegment
2116 && apSegment[nMerge]->aNode
2117 && apSegment[nMerge]->nTerm==pCsr->nTerm
2118 && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm)
2119 ){
2120 nMerge++;
2121 }
2122
2123 assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
2124 if( nMerge==1 && !isIgnoreEmpty ){
2125 pCsr->aDoclist = apSegment[0]->aDoclist;
2126 pCsr->nDoclist = apSegment[0]->nDoclist;
2127 rc = SQLITE_ROW;
2128 }else{
2129 int nDoclist = 0; /* Size of doclist */
2130 sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */
2131
2132 /* The current term of the first nMerge entries in the array
2133 ** of Fts3SegReader objects is the same. The doclists must be merged
2134 ** and a single term returned with the merged doclist.
2135 */
2136 for(i=0; i<nMerge; i++){
2137 fts3SegReaderFirstDocid(apSegment[i]);
2138 }
2139 fts3SegReaderSort(apSegment, nMerge, nMerge, fts3SegReaderDoclistCmp);
2140 while( apSegment[0]->pOffsetList ){
2141 int j; /* Number of segments that share a docid */
2142 char *pList;
2143 int nList;
2144 int nByte;
2145 sqlite3_int64 iDocid = apSegment[0]->iDocid;
2146 fts3SegReaderNextDocid(apSegment[0], &pList, &nList);
2147 j = 1;
2148 while( j<nMerge
2149 && apSegment[j]->pOffsetList
2150 && apSegment[j]->iDocid==iDocid
2151 ){
2152 fts3SegReaderNextDocid(apSegment[j], 0, 0);
2153 j++;
2154 }
2155
2156 if( isColFilter ){
2157 fts3ColumnFilter(pFilter->iCol, &pList, &nList);
2158 }
2159
2160 if( !isIgnoreEmpty || nList>0 ){
2161 nByte = sqlite3Fts3VarintLen(iDocid-iPrev) + (isRequirePos?nList+1:0);
2162 if( nDoclist+nByte>pCsr->nBuffer ){
2163 char *aNew;
2164 pCsr->nBuffer = (nDoclist+nByte)*2;
2165 aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer);
2166 if( !aNew ){
2167 return SQLITE_NOMEM;
2168 }
2169 pCsr->aBuffer = aNew;
2170 }
2171 nDoclist += sqlite3Fts3PutVarint(
2172 &pCsr->aBuffer[nDoclist], iDocid-iPrev
2173 );
2174 iPrev = iDocid;
2175 if( isRequirePos ){
2176 memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
2177 nDoclist += nList;
2178 pCsr->aBuffer[nDoclist++] = '\0';
2179 }
2180 }
2181
2182 fts3SegReaderSort(apSegment, nMerge, j, fts3SegReaderDoclistCmp);
2183 }
2184 if( nDoclist>0 ){
2185 pCsr->aDoclist = pCsr->aBuffer;
2186 pCsr->nDoclist = nDoclist;
2187 rc = SQLITE_ROW;
2188 }
2189 }
2190 pCsr->nAdvance = nMerge;
2191 }while( rc==SQLITE_OK );
2192
2193 return rc;
2194 }
2195
sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor * pCsr)2196 void sqlite3Fts3SegReaderFinish(
2197 Fts3SegReaderCursor *pCsr /* Cursor object */
2198 ){
2199 if( pCsr ){
2200 int i;
2201 for(i=0; i<pCsr->nSegment; i++){
2202 sqlite3Fts3SegReaderFree(pCsr->apSegment[i]);
2203 }
2204 sqlite3_free(pCsr->apSegment);
2205 sqlite3_free(pCsr->aBuffer);
2206
2207 pCsr->nSegment = 0;
2208 pCsr->apSegment = 0;
2209 pCsr->aBuffer = 0;
2210 }
2211 }
2212
2213 /*
2214 ** Merge all level iLevel segments in the database into a single
2215 ** iLevel+1 segment. Or, if iLevel<0, merge all segments into a
2216 ** single segment with a level equal to the numerically largest level
2217 ** currently present in the database.
2218 **
2219 ** If this function is called with iLevel<0, but there is only one
2220 ** segment in the database, SQLITE_DONE is returned immediately.
2221 ** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
2222 ** an SQLite error code is returned.
2223 */
fts3SegmentMerge(Fts3Table * p,int iLevel)2224 static int fts3SegmentMerge(Fts3Table *p, int iLevel){
2225 int rc; /* Return code */
2226 int iIdx = 0; /* Index of new segment */
2227 int iNewLevel = 0; /* Level to create new segment at */
2228 SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
2229 Fts3SegFilter filter; /* Segment term filter condition */
2230 Fts3SegReaderCursor csr; /* Cursor to iterate through level(s) */
2231
2232 rc = sqlite3Fts3SegReaderCursor(p, iLevel, 0, 0, 1, 0, &csr);
2233 if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
2234
2235 if( iLevel==FTS3_SEGCURSOR_ALL ){
2236 /* This call is to merge all segments in the database to a single
2237 ** segment. The level of the new segment is equal to the the numerically
2238 ** greatest segment level currently present in the database. The index
2239 ** of the new segment is always 0. */
2240 int nDummy; /* TODO: Remove this */
2241 if( csr.nSegment==1 ){
2242 rc = SQLITE_DONE;
2243 goto finished;
2244 }
2245 rc = fts3SegmentCountMax(p, &nDummy, &iNewLevel);
2246 }else{
2247 /* This call is to merge all segments at level iLevel. Find the next
2248 ** available segment index at level iLevel+1. The call to
2249 ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
2250 ** a single iLevel+2 segment if necessary. */
2251 iNewLevel = iLevel+1;
2252 rc = fts3AllocateSegdirIdx(p, iNewLevel, &iIdx);
2253 }
2254 if( rc!=SQLITE_OK ) goto finished;
2255 assert( csr.nSegment>0 );
2256 assert( iNewLevel>=0 );
2257
2258 memset(&filter, 0, sizeof(Fts3SegFilter));
2259 filter.flags = FTS3_SEGMENT_REQUIRE_POS;
2260 filter.flags |= (iLevel==FTS3_SEGCURSOR_ALL ? FTS3_SEGMENT_IGNORE_EMPTY : 0);
2261
2262 rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
2263 while( SQLITE_OK==rc ){
2264 rc = sqlite3Fts3SegReaderStep(p, &csr);
2265 if( rc!=SQLITE_ROW ) break;
2266 rc = fts3SegWriterAdd(p, &pWriter, 1,
2267 csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist);
2268 }
2269 if( rc!=SQLITE_OK ) goto finished;
2270 assert( pWriter );
2271
2272 rc = fts3DeleteSegdir(p, iLevel, csr.apSegment, csr.nSegment);
2273 if( rc!=SQLITE_OK ) goto finished;
2274 rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
2275
2276 finished:
2277 fts3SegWriterFree(pWriter);
2278 sqlite3Fts3SegReaderFinish(&csr);
2279 return rc;
2280 }
2281
2282
2283 /*
2284 ** Flush the contents of pendingTerms to a level 0 segment.
2285 */
sqlite3Fts3PendingTermsFlush(Fts3Table * p)2286 int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
2287 return fts3SegmentMerge(p, FTS3_SEGCURSOR_PENDING);
2288 }
2289
2290 /*
2291 ** Encode N integers as varints into a blob.
2292 */
fts3EncodeIntArray(int N,u32 * a,char * zBuf,int * pNBuf)2293 static void fts3EncodeIntArray(
2294 int N, /* The number of integers to encode */
2295 u32 *a, /* The integer values */
2296 char *zBuf, /* Write the BLOB here */
2297 int *pNBuf /* Write number of bytes if zBuf[] used here */
2298 ){
2299 int i, j;
2300 for(i=j=0; i<N; i++){
2301 j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]);
2302 }
2303 *pNBuf = j;
2304 }
2305
2306 /*
2307 ** Decode a blob of varints into N integers
2308 */
fts3DecodeIntArray(int N,u32 * a,const char * zBuf,int nBuf)2309 static void fts3DecodeIntArray(
2310 int N, /* The number of integers to decode */
2311 u32 *a, /* Write the integer values */
2312 const char *zBuf, /* The BLOB containing the varints */
2313 int nBuf /* size of the BLOB */
2314 ){
2315 int i, j;
2316 UNUSED_PARAMETER(nBuf);
2317 for(i=j=0; i<N; i++){
2318 sqlite3_int64 x;
2319 j += sqlite3Fts3GetVarint(&zBuf[j], &x);
2320 assert(j<=nBuf);
2321 a[i] = (u32)(x & 0xffffffff);
2322 }
2323 }
2324
2325 /*
2326 ** Insert the sizes (in tokens) for each column of the document
2327 ** with docid equal to p->iPrevDocid. The sizes are encoded as
2328 ** a blob of varints.
2329 */
fts3InsertDocsize(int * pRC,Fts3Table * p,u32 * aSz)2330 static void fts3InsertDocsize(
2331 int *pRC, /* Result code */
2332 Fts3Table *p, /* Table into which to insert */
2333 u32 *aSz /* Sizes of each column */
2334 ){
2335 char *pBlob; /* The BLOB encoding of the document size */
2336 int nBlob; /* Number of bytes in the BLOB */
2337 sqlite3_stmt *pStmt; /* Statement used to insert the encoding */
2338 int rc; /* Result code from subfunctions */
2339
2340 if( *pRC ) return;
2341 pBlob = sqlite3_malloc( 10*p->nColumn );
2342 if( pBlob==0 ){
2343 *pRC = SQLITE_NOMEM;
2344 return;
2345 }
2346 fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob);
2347 rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0);
2348 if( rc ){
2349 sqlite3_free(pBlob);
2350 *pRC = rc;
2351 return;
2352 }
2353 sqlite3_bind_int64(pStmt, 1, p->iPrevDocid);
2354 sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free);
2355 sqlite3_step(pStmt);
2356 *pRC = sqlite3_reset(pStmt);
2357 }
2358
2359 /*
2360 ** Record 0 of the %_stat table contains a blob consisting of N varints,
2361 ** where N is the number of user defined columns in the fts3 table plus
2362 ** two. If nCol is the number of user defined columns, then values of the
2363 ** varints are set as follows:
2364 **
2365 ** Varint 0: Total number of rows in the table.
2366 **
2367 ** Varint 1..nCol: For each column, the total number of tokens stored in
2368 ** the column for all rows of the table.
2369 **
2370 ** Varint 1+nCol: The total size, in bytes, of all text values in all
2371 ** columns of all rows of the table.
2372 **
2373 */
fts3UpdateDocTotals(int * pRC,Fts3Table * p,u32 * aSzIns,u32 * aSzDel,int nChng)2374 static void fts3UpdateDocTotals(
2375 int *pRC, /* The result code */
2376 Fts3Table *p, /* Table being updated */
2377 u32 *aSzIns, /* Size increases */
2378 u32 *aSzDel, /* Size decreases */
2379 int nChng /* Change in the number of documents */
2380 ){
2381 char *pBlob; /* Storage for BLOB written into %_stat */
2382 int nBlob; /* Size of BLOB written into %_stat */
2383 u32 *a; /* Array of integers that becomes the BLOB */
2384 sqlite3_stmt *pStmt; /* Statement for reading and writing */
2385 int i; /* Loop counter */
2386 int rc; /* Result code from subfunctions */
2387
2388 const int nStat = p->nColumn+2;
2389
2390 if( *pRC ) return;
2391 a = sqlite3_malloc( (sizeof(u32)+10)*nStat );
2392 if( a==0 ){
2393 *pRC = SQLITE_NOMEM;
2394 return;
2395 }
2396 pBlob = (char*)&a[nStat];
2397 rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
2398 if( rc ){
2399 sqlite3_free(a);
2400 *pRC = rc;
2401 return;
2402 }
2403 if( sqlite3_step(pStmt)==SQLITE_ROW ){
2404 fts3DecodeIntArray(nStat, a,
2405 sqlite3_column_blob(pStmt, 0),
2406 sqlite3_column_bytes(pStmt, 0));
2407 }else{
2408 memset(a, 0, sizeof(u32)*(nStat) );
2409 }
2410 sqlite3_reset(pStmt);
2411 if( nChng<0 && a[0]<(u32)(-nChng) ){
2412 a[0] = 0;
2413 }else{
2414 a[0] += nChng;
2415 }
2416 for(i=0; i<p->nColumn+1; i++){
2417 u32 x = a[i+1];
2418 if( x+aSzIns[i] < aSzDel[i] ){
2419 x = 0;
2420 }else{
2421 x = x + aSzIns[i] - aSzDel[i];
2422 }
2423 a[i+1] = x;
2424 }
2425 fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
2426 rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
2427 if( rc ){
2428 sqlite3_free(a);
2429 *pRC = rc;
2430 return;
2431 }
2432 sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
2433 sqlite3_step(pStmt);
2434 *pRC = sqlite3_reset(pStmt);
2435 sqlite3_free(a);
2436 }
2437
2438 /*
2439 ** Handle a 'special' INSERT of the form:
2440 **
2441 ** "INSERT INTO tbl(tbl) VALUES(<expr>)"
2442 **
2443 ** Argument pVal contains the result of <expr>. Currently the only
2444 ** meaningful value to insert is the text 'optimize'.
2445 */
fts3SpecialInsert(Fts3Table * p,sqlite3_value * pVal)2446 static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
2447 int rc; /* Return Code */
2448 const char *zVal = (const char *)sqlite3_value_text(pVal);
2449 int nVal = sqlite3_value_bytes(pVal);
2450
2451 if( !zVal ){
2452 return SQLITE_NOMEM;
2453 }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
2454 rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL);
2455 if( rc==SQLITE_DONE ){
2456 rc = SQLITE_OK;
2457 }else{
2458 sqlite3Fts3PendingTermsClear(p);
2459 }
2460 #ifdef SQLITE_TEST
2461 }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
2462 p->nNodeSize = atoi(&zVal[9]);
2463 rc = SQLITE_OK;
2464 }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){
2465 p->nMaxPendingData = atoi(&zVal[11]);
2466 rc = SQLITE_OK;
2467 #endif
2468 }else{
2469 rc = SQLITE_ERROR;
2470 }
2471
2472 sqlite3Fts3SegmentsClose(p);
2473 return rc;
2474 }
2475
2476 /*
2477 ** Return the deferred doclist associated with deferred token pDeferred.
2478 ** This function assumes that sqlite3Fts3CacheDeferredDoclists() has already
2479 ** been called to allocate and populate the doclist.
2480 */
sqlite3Fts3DeferredDoclist(Fts3DeferredToken * pDeferred,int * pnByte)2481 char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *pDeferred, int *pnByte){
2482 if( pDeferred->pList ){
2483 *pnByte = pDeferred->pList->nData;
2484 return pDeferred->pList->aData;
2485 }
2486 *pnByte = 0;
2487 return 0;
2488 }
2489
2490 /*
2491 ** Helper fucntion for FreeDeferredDoclists(). This function removes all
2492 ** references to deferred doclists from within the tree of Fts3Expr
2493 ** structures headed by
2494 */
fts3DeferredDoclistClear(Fts3Expr * pExpr)2495 static void fts3DeferredDoclistClear(Fts3Expr *pExpr){
2496 if( pExpr ){
2497 fts3DeferredDoclistClear(pExpr->pLeft);
2498 fts3DeferredDoclistClear(pExpr->pRight);
2499 if( pExpr->isLoaded ){
2500 sqlite3_free(pExpr->aDoclist);
2501 pExpr->isLoaded = 0;
2502 pExpr->aDoclist = 0;
2503 pExpr->nDoclist = 0;
2504 pExpr->pCurrent = 0;
2505 pExpr->iCurrent = 0;
2506 }
2507 }
2508 }
2509
2510 /*
2511 ** Delete all cached deferred doclists. Deferred doclists are cached
2512 ** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
2513 */
sqlite3Fts3FreeDeferredDoclists(Fts3Cursor * pCsr)2514 void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){
2515 Fts3DeferredToken *pDef;
2516 for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){
2517 sqlite3_free(pDef->pList);
2518 pDef->pList = 0;
2519 }
2520 if( pCsr->pDeferred ){
2521 fts3DeferredDoclistClear(pCsr->pExpr);
2522 }
2523 }
2524
2525 /*
2526 ** Free all entries in the pCsr->pDeffered list. Entries are added to
2527 ** this list using sqlite3Fts3DeferToken().
2528 */
sqlite3Fts3FreeDeferredTokens(Fts3Cursor * pCsr)2529 void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){
2530 Fts3DeferredToken *pDef;
2531 Fts3DeferredToken *pNext;
2532 for(pDef=pCsr->pDeferred; pDef; pDef=pNext){
2533 pNext = pDef->pNext;
2534 sqlite3_free(pDef->pList);
2535 sqlite3_free(pDef);
2536 }
2537 pCsr->pDeferred = 0;
2538 }
2539
2540 /*
2541 ** Generate deferred-doclists for all tokens in the pCsr->pDeferred list
2542 ** based on the row that pCsr currently points to.
2543 **
2544 ** A deferred-doclist is like any other doclist with position information
2545 ** included, except that it only contains entries for a single row of the
2546 ** table, not for all rows.
2547 */
sqlite3Fts3CacheDeferredDoclists(Fts3Cursor * pCsr)2548 int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
2549 int rc = SQLITE_OK; /* Return code */
2550 if( pCsr->pDeferred ){
2551 int i; /* Used to iterate through table columns */
2552 sqlite3_int64 iDocid; /* Docid of the row pCsr points to */
2553 Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */
2554
2555 Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
2556 sqlite3_tokenizer *pT = p->pTokenizer;
2557 sqlite3_tokenizer_module const *pModule = pT->pModule;
2558
2559 assert( pCsr->isRequireSeek==0 );
2560 iDocid = sqlite3_column_int64(pCsr->pStmt, 0);
2561
2562 for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){
2563 const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
2564 sqlite3_tokenizer_cursor *pTC = 0;
2565
2566 rc = pModule->xOpen(pT, zText, -1, &pTC);
2567 while( rc==SQLITE_OK ){
2568 char const *zToken; /* Buffer containing token */
2569 int nToken; /* Number of bytes in token */
2570 int iDum1, iDum2; /* Dummy variables */
2571 int iPos; /* Position of token in zText */
2572
2573 pTC->pTokenizer = pT;
2574 rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
2575 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
2576 Fts3PhraseToken *pPT = pDef->pToken;
2577 if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
2578 && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
2579 && (0==memcmp(zToken, pPT->z, pPT->n))
2580 ){
2581 fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc);
2582 }
2583 }
2584 }
2585 if( pTC ) pModule->xClose(pTC);
2586 if( rc==SQLITE_DONE ) rc = SQLITE_OK;
2587 }
2588
2589 for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
2590 if( pDef->pList ){
2591 rc = fts3PendingListAppendVarint(&pDef->pList, 0);
2592 }
2593 }
2594 }
2595
2596 return rc;
2597 }
2598
2599 /*
2600 ** Add an entry for token pToken to the pCsr->pDeferred list.
2601 */
sqlite3Fts3DeferToken(Fts3Cursor * pCsr,Fts3PhraseToken * pToken,int iCol)2602 int sqlite3Fts3DeferToken(
2603 Fts3Cursor *pCsr, /* Fts3 table cursor */
2604 Fts3PhraseToken *pToken, /* Token to defer */
2605 int iCol /* Column that token must appear in (or -1) */
2606 ){
2607 Fts3DeferredToken *pDeferred;
2608 pDeferred = sqlite3_malloc(sizeof(*pDeferred));
2609 if( !pDeferred ){
2610 return SQLITE_NOMEM;
2611 }
2612 memset(pDeferred, 0, sizeof(*pDeferred));
2613 pDeferred->pToken = pToken;
2614 pDeferred->pNext = pCsr->pDeferred;
2615 pDeferred->iCol = iCol;
2616 pCsr->pDeferred = pDeferred;
2617
2618 assert( pToken->pDeferred==0 );
2619 pToken->pDeferred = pDeferred;
2620
2621 return SQLITE_OK;
2622 }
2623
2624
2625 /*
2626 ** This function does the work for the xUpdate method of FTS3 virtual
2627 ** tables.
2628 */
sqlite3Fts3UpdateMethod(sqlite3_vtab * pVtab,int nArg,sqlite3_value ** apVal,sqlite_int64 * pRowid)2629 int sqlite3Fts3UpdateMethod(
2630 sqlite3_vtab *pVtab, /* FTS3 vtab object */
2631 int nArg, /* Size of argument array */
2632 sqlite3_value **apVal, /* Array of arguments */
2633 sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */
2634 ){
2635 Fts3Table *p = (Fts3Table *)pVtab;
2636 int rc = SQLITE_OK; /* Return Code */
2637 int isRemove = 0; /* True for an UPDATE or DELETE */
2638 sqlite3_int64 iRemove = 0; /* Rowid removed by UPDATE or DELETE */
2639 u32 *aSzIns; /* Sizes of inserted documents */
2640 u32 *aSzDel; /* Sizes of deleted documents */
2641 int nChng = 0; /* Net change in number of documents */
2642
2643 assert( p->pSegments==0 );
2644
2645 /* Allocate space to hold the change in document sizes */
2646 aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
2647 if( aSzIns==0 ) return SQLITE_NOMEM;
2648 aSzDel = &aSzIns[p->nColumn+1];
2649 memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2);
2650
2651 /* If this is a DELETE or UPDATE operation, remove the old record. */
2652 if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){
2653 int isEmpty = 0;
2654 rc = fts3IsEmpty(p, apVal, &isEmpty);
2655 if( rc==SQLITE_OK ){
2656 if( isEmpty ){
2657 /* Deleting this row means the whole table is empty. In this case
2658 ** delete the contents of all three tables and throw away any
2659 ** data in the pendingTerms hash table.
2660 */
2661 rc = fts3DeleteAll(p);
2662 }else{
2663 isRemove = 1;
2664 iRemove = sqlite3_value_int64(apVal[0]);
2665 rc = fts3PendingTermsDocid(p, iRemove);
2666 fts3DeleteTerms(&rc, p, apVal, aSzDel);
2667 fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, apVal);
2668 if( p->bHasDocsize ){
2669 fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, apVal);
2670 }
2671 nChng--;
2672 }
2673 }
2674 }else if( sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL ){
2675 sqlite3_free(aSzIns);
2676 return fts3SpecialInsert(p, apVal[p->nColumn+2]);
2677 }
2678
2679 /* If this is an INSERT or UPDATE operation, insert the new record. */
2680 if( nArg>1 && rc==SQLITE_OK ){
2681 rc = fts3InsertData(p, apVal, pRowid);
2682 if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){
2683 rc = fts3PendingTermsDocid(p, *pRowid);
2684 }
2685 if( rc==SQLITE_OK ){
2686 rc = fts3InsertTerms(p, apVal, aSzIns);
2687 }
2688 if( p->bHasDocsize ){
2689 fts3InsertDocsize(&rc, p, aSzIns);
2690 }
2691 nChng++;
2692 }
2693
2694 if( p->bHasStat ){
2695 fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
2696 }
2697
2698 sqlite3_free(aSzIns);
2699 sqlite3Fts3SegmentsClose(p);
2700 return rc;
2701 }
2702
2703 /*
2704 ** Flush any data in the pending-terms hash table to disk. If successful,
2705 ** merge all segments in the database (including the new segment, if
2706 ** there was any data to flush) into a single segment.
2707 */
sqlite3Fts3Optimize(Fts3Table * p)2708 int sqlite3Fts3Optimize(Fts3Table *p){
2709 int rc;
2710 rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0);
2711 if( rc==SQLITE_OK ){
2712 rc = fts3SegmentMerge(p, FTS3_SEGCURSOR_ALL);
2713 if( rc==SQLITE_OK ){
2714 rc = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
2715 if( rc==SQLITE_OK ){
2716 sqlite3Fts3PendingTermsClear(p);
2717 }
2718 }else{
2719 sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0);
2720 sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0);
2721 }
2722 }
2723 sqlite3Fts3SegmentsClose(p);
2724 return rc;
2725 }
2726
2727 #endif
2728