• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ** 2009 Nov 12
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 */
14 
15 #ifndef _FTSINT_H
16 #define _FTSINT_H
17 
18 #if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
19 # define NDEBUG 1
20 #endif
21 
22 #include "sqlite3.h"
23 #include "fts3_tokenizer.h"
24 #include "fts3_hash.h"
25 
26 /*
27 ** This constant controls how often segments are merged. Once there are
28 ** FTS3_MERGE_COUNT segments of level N, they are merged into a single
29 ** segment of level N+1.
30 */
31 #define FTS3_MERGE_COUNT 16
32 
33 /*
34 ** This is the maximum amount of data (in bytes) to store in the
35 ** Fts3Table.pendingTerms hash table. Normally, the hash table is
36 ** populated as documents are inserted/updated/deleted in a transaction
37 ** and used to create a new segment when the transaction is committed.
38 ** However if this limit is reached midway through a transaction, a new
39 ** segment is created and the hash table cleared immediately.
40 */
41 #define FTS3_MAX_PENDING_DATA (1*1024*1024)
42 
43 /*
44 ** Macro to return the number of elements in an array. SQLite has a
45 ** similar macro called ArraySize(). Use a different name to avoid
46 ** a collision when building an amalgamation with built-in FTS3.
47 */
48 #define SizeofArray(X) ((int)(sizeof(X)/sizeof(X[0])))
49 
50 /*
51 ** Maximum length of a varint encoded integer. The varint format is different
52 ** from that used by SQLite, so the maximum length is 10, not 9.
53 */
54 #define FTS3_VARINT_MAX 10
55 
56 /*
57 ** The testcase() macro is only used by the amalgamation.  If undefined,
58 ** make it a no-op.
59 */
60 #ifndef testcase
61 # define testcase(X)
62 #endif
63 
64 /*
65 ** Terminator values for position-lists and column-lists.
66 */
67 #define POS_COLUMN  (1)     /* Column-list terminator */
68 #define POS_END     (0)     /* Position-list terminator */
69 
70 /*
71 ** This section provides definitions to allow the
72 ** FTS3 extension to be compiled outside of the
73 ** amalgamation.
74 */
75 #ifndef SQLITE_AMALGAMATION
76 /*
77 ** Macros indicating that conditional expressions are always true or
78 ** false.
79 */
80 #ifdef SQLITE_COVERAGE_TEST
81 # define ALWAYS(x) (1)
82 # define NEVER(X)  (0)
83 #else
84 # define ALWAYS(x) (x)
85 # define NEVER(X)  (x)
86 #endif
87 
88 /*
89 ** Internal types used by SQLite.
90 */
91 typedef unsigned char u8;         /* 1-byte (or larger) unsigned integer */
92 typedef short int i16;            /* 2-byte (or larger) signed integer */
93 typedef unsigned int u32;         /* 4-byte unsigned integer */
94 typedef sqlite3_uint64 u64;       /* 8-byte unsigned integer */
95 /*
96 ** Macro used to suppress compiler warnings for unused parameters.
97 */
98 #define UNUSED_PARAMETER(x) (void)(x)
99 #endif
100 
101 typedef struct Fts3Table Fts3Table;
102 typedef struct Fts3Cursor Fts3Cursor;
103 typedef struct Fts3Expr Fts3Expr;
104 typedef struct Fts3Phrase Fts3Phrase;
105 typedef struct Fts3PhraseToken Fts3PhraseToken;
106 
107 typedef struct Fts3SegFilter Fts3SegFilter;
108 typedef struct Fts3DeferredToken Fts3DeferredToken;
109 typedef struct Fts3SegReader Fts3SegReader;
110 typedef struct Fts3SegReaderCursor Fts3SegReaderCursor;
111 
112 /*
113 ** A connection to a fulltext index is an instance of the following
114 ** structure. The xCreate and xConnect methods create an instance
115 ** of this structure and xDestroy and xDisconnect free that instance.
116 ** All other methods receive a pointer to the structure as one of their
117 ** arguments.
118 */
119 struct Fts3Table {
120   sqlite3_vtab base;              /* Base class used by SQLite core */
121   sqlite3 *db;                    /* The database connection */
122   const char *zDb;                /* logical database name */
123   const char *zName;              /* virtual table name */
124   int nColumn;                    /* number of named columns in virtual table */
125   char **azColumn;                /* column names.  malloced */
126   sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
127 
128   /* Precompiled statements used by the implementation. Each of these
129   ** statements is run and reset within a single virtual table API call.
130   */
131   sqlite3_stmt *aStmt[24];
132 
133   char *zReadExprlist;
134   char *zWriteExprlist;
135 
136   int nNodeSize;                  /* Soft limit for node size */
137   u8 bHasStat;                    /* True if %_stat table exists */
138   u8 bHasDocsize;                 /* True if %_docsize table exists */
139   int nPgsz;                      /* Page size for host database */
140   char *zSegmentsTbl;             /* Name of %_segments table */
141   sqlite3_blob *pSegments;        /* Blob handle open on %_segments table */
142 
143   /* The following hash table is used to buffer pending index updates during
144   ** transactions. Variable nPendingData estimates the memory size of the
145   ** pending data, including hash table overhead, but not malloc overhead.
146   ** When nPendingData exceeds nMaxPendingData, the buffer is flushed
147   ** automatically. Variable iPrevDocid is the docid of the most recently
148   ** inserted record.
149   */
150   int nMaxPendingData;
151   int nPendingData;
152   sqlite_int64 iPrevDocid;
153   Fts3Hash pendingTerms;
154 };
155 
156 /*
157 ** When the core wants to read from the virtual table, it creates a
158 ** virtual table cursor (an instance of the following structure) using
159 ** the xOpen method. Cursors are destroyed using the xClose method.
160 */
161 struct Fts3Cursor {
162   sqlite3_vtab_cursor base;       /* Base class used by SQLite core */
163   i16 eSearch;                    /* Search strategy (see below) */
164   u8 isEof;                       /* True if at End Of Results */
165   u8 isRequireSeek;               /* True if must seek pStmt to %_content row */
166   sqlite3_stmt *pStmt;            /* Prepared statement in use by the cursor */
167   Fts3Expr *pExpr;                /* Parsed MATCH query string */
168   int nPhrase;                    /* Number of matchable phrases in query */
169   Fts3DeferredToken *pDeferred;   /* Deferred search tokens, if any */
170   sqlite3_int64 iPrevId;          /* Previous id read from aDoclist */
171   char *pNextId;                  /* Pointer into the body of aDoclist */
172   char *aDoclist;                 /* List of docids for full-text queries */
173   int nDoclist;                   /* Size of buffer at aDoclist */
174   int eEvalmode;                  /* An FTS3_EVAL_XX constant */
175   int nRowAvg;                    /* Average size of database rows, in pages */
176 
177   int isMatchinfoNeeded;          /* True when aMatchinfo[] needs filling in */
178   u32 *aMatchinfo;                /* Information about most recent match */
179   int nMatchinfo;                 /* Number of elements in aMatchinfo[] */
180   char *zMatchinfo;               /* Matchinfo specification */
181 };
182 
183 #define FTS3_EVAL_FILTER    0
184 #define FTS3_EVAL_NEXT      1
185 #define FTS3_EVAL_MATCHINFO 2
186 
187 /*
188 ** The Fts3Cursor.eSearch member is always set to one of the following.
189 ** Actualy, Fts3Cursor.eSearch can be greater than or equal to
190 ** FTS3_FULLTEXT_SEARCH.  If so, then Fts3Cursor.eSearch - 2 is the index
191 ** of the column to be searched.  For example, in
192 **
193 **     CREATE VIRTUAL TABLE ex1 USING fts3(a,b,c,d);
194 **     SELECT docid FROM ex1 WHERE b MATCH 'one two three';
195 **
196 ** Because the LHS of the MATCH operator is 2nd column "b",
197 ** Fts3Cursor.eSearch will be set to FTS3_FULLTEXT_SEARCH+1.  (+0 for a,
198 ** +1 for b, +2 for c, +3 for d.)  If the LHS of MATCH were "ex1"
199 ** indicating that all columns should be searched,
200 ** then eSearch would be set to FTS3_FULLTEXT_SEARCH+4.
201 */
202 #define FTS3_FULLSCAN_SEARCH 0    /* Linear scan of %_content table */
203 #define FTS3_DOCID_SEARCH    1    /* Lookup by rowid on %_content table */
204 #define FTS3_FULLTEXT_SEARCH 2    /* Full-text index search */
205 
206 /*
207 ** A "phrase" is a sequence of one or more tokens that must match in
208 ** sequence.  A single token is the base case and the most common case.
209 ** For a sequence of tokens contained in double-quotes (i.e. "one two three")
210 ** nToken will be the number of tokens in the string.
211 **
212 ** The nDocMatch and nMatch variables contain data that may be used by the
213 ** matchinfo() function. They are populated when the full-text index is
214 ** queried for hits on the phrase. If one or more tokens in the phrase
215 ** are deferred, the nDocMatch and nMatch variables are populated based
216 ** on the assumption that the
217 */
218 struct Fts3PhraseToken {
219   char *z;                        /* Text of the token */
220   int n;                          /* Number of bytes in buffer z */
221   int isPrefix;                   /* True if token ends with a "*" character */
222   int bFulltext;                  /* True if full-text index was used */
223   Fts3SegReaderCursor *pSegcsr;   /* Segment-reader for this token */
224   Fts3DeferredToken *pDeferred;   /* Deferred token object for this token */
225 };
226 
227 struct Fts3Phrase {
228   /* Variables populated by fts3_expr.c when parsing a MATCH expression */
229   int nToken;                /* Number of tokens in the phrase */
230   int iColumn;               /* Index of column this phrase must match */
231   int isNot;                 /* Phrase prefixed by unary not (-) operator */
232   Fts3PhraseToken aToken[1]; /* One entry for each token in the phrase */
233 };
234 
235 /*
236 ** A tree of these objects forms the RHS of a MATCH operator.
237 **
238 ** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded
239 ** is true, then aDoclist points to a malloced buffer, size nDoclist bytes,
240 ** containing the results of the NEAR or phrase query in FTS3 doclist
241 ** format. As usual, the initial "Length" field found in doclists stored
242 ** on disk is omitted from this buffer.
243 **
244 ** Variable pCurrent always points to the start of a docid field within
245 ** aDoclist. Since the doclist is usually scanned in docid order, this can
246 ** be used to accelerate seeking to the required docid within the doclist.
247 */
248 struct Fts3Expr {
249   int eType;                 /* One of the FTSQUERY_XXX values defined below */
250   int nNear;                 /* Valid if eType==FTSQUERY_NEAR */
251   Fts3Expr *pParent;         /* pParent->pLeft==this or pParent->pRight==this */
252   Fts3Expr *pLeft;           /* Left operand */
253   Fts3Expr *pRight;          /* Right operand */
254   Fts3Phrase *pPhrase;       /* Valid if eType==FTSQUERY_PHRASE */
255 
256   int isLoaded;              /* True if aDoclist/nDoclist are initialized. */
257   char *aDoclist;            /* Buffer containing doclist */
258   int nDoclist;              /* Size of aDoclist in bytes */
259 
260   sqlite3_int64 iCurrent;
261   char *pCurrent;
262 };
263 
264 /*
265 ** Candidate values for Fts3Query.eType. Note that the order of the first
266 ** four values is in order of precedence when parsing expressions. For
267 ** example, the following:
268 **
269 **   "a OR b AND c NOT d NEAR e"
270 **
271 ** is equivalent to:
272 **
273 **   "a OR (b AND (c NOT (d NEAR e)))"
274 */
275 #define FTSQUERY_NEAR   1
276 #define FTSQUERY_NOT    2
277 #define FTSQUERY_AND    3
278 #define FTSQUERY_OR     4
279 #define FTSQUERY_PHRASE 5
280 
281 
282 /* fts3_write.c */
283 int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
284 int sqlite3Fts3PendingTermsFlush(Fts3Table *);
285 void sqlite3Fts3PendingTermsClear(Fts3Table *);
286 int sqlite3Fts3Optimize(Fts3Table *);
287 int sqlite3Fts3SegReaderNew(int, sqlite3_int64,
288   sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
289 int sqlite3Fts3SegReaderPending(Fts3Table*,const char*,int,int,Fts3SegReader**);
290 void sqlite3Fts3SegReaderFree(Fts3SegReader *);
291 int sqlite3Fts3SegReaderCost(Fts3Cursor *, Fts3SegReader *, int *);
292 int sqlite3Fts3AllSegdirs(Fts3Table*, int, sqlite3_stmt **);
293 int sqlite3Fts3ReadLock(Fts3Table *);
294 int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*);
295 
296 int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
297 int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
298 
299 void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
300 int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
301 int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
302 void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
303 char *sqlite3Fts3DeferredDoclist(Fts3DeferredToken *, int *);
304 void sqlite3Fts3SegmentsClose(Fts3Table *);
305 
306 #define FTS3_SEGCURSOR_PENDING -1
307 #define FTS3_SEGCURSOR_ALL     -2
308 
309 int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3SegReaderCursor*, Fts3SegFilter*);
310 int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3SegReaderCursor *);
311 void sqlite3Fts3SegReaderFinish(Fts3SegReaderCursor *);
312 int sqlite3Fts3SegReaderCursor(
313     Fts3Table *, int, const char *, int, int, int, Fts3SegReaderCursor *);
314 
315 /* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
316 #define FTS3_SEGMENT_REQUIRE_POS   0x00000001
317 #define FTS3_SEGMENT_IGNORE_EMPTY  0x00000002
318 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
319 #define FTS3_SEGMENT_PREFIX        0x00000008
320 #define FTS3_SEGMENT_SCAN          0x00000010
321 
322 /* Type passed as 4th argument to SegmentReaderIterate() */
323 struct Fts3SegFilter {
324   const char *zTerm;
325   int nTerm;
326   int iCol;
327   int flags;
328 };
329 
330 struct Fts3SegReaderCursor {
331   /* Used internally by sqlite3Fts3SegReaderXXX() calls */
332   Fts3SegReader **apSegment;      /* Array of Fts3SegReader objects */
333   int nSegment;                   /* Size of apSegment array */
334   int nAdvance;                   /* How many seg-readers to advance */
335   Fts3SegFilter *pFilter;         /* Pointer to filter object */
336   char *aBuffer;                  /* Buffer to merge doclists in */
337   int nBuffer;                    /* Allocated size of aBuffer[] in bytes */
338 
339   /* Cost of running this iterator. Used by fts3.c only. */
340   int nCost;
341 
342   /* Output values. Valid only after Fts3SegReaderStep() returns SQLITE_ROW. */
343   char *zTerm;                    /* Pointer to term buffer */
344   int nTerm;                      /* Size of zTerm in bytes */
345   char *aDoclist;                 /* Pointer to doclist buffer */
346   int nDoclist;                   /* Size of aDoclist[] in bytes */
347 };
348 
349 /* fts3.c */
350 int sqlite3Fts3PutVarint(char *, sqlite3_int64);
351 int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
352 int sqlite3Fts3GetVarint32(const char *, int *);
353 int sqlite3Fts3VarintLen(sqlite3_uint64);
354 void sqlite3Fts3Dequote(char *);
355 
356 char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int);
357 int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *, Fts3Expr *);
358 int sqlite3Fts3ExprLoadFtDoclist(Fts3Cursor *, Fts3Expr *, char **, int *);
359 int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int);
360 
361 /* fts3_tokenizer.c */
362 const char *sqlite3Fts3NextToken(const char *, int *);
363 int sqlite3Fts3InitHashTable(sqlite3 *, Fts3Hash *, const char *);
364 int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, const char *,
365     sqlite3_tokenizer **, char **
366 );
367 int sqlite3Fts3IsIdChar(char);
368 
369 /* fts3_snippet.c */
370 void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*);
371 void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
372   const char *, const char *, int, int
373 );
374 void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
375 
376 /* fts3_expr.c */
377 int sqlite3Fts3ExprParse(sqlite3_tokenizer *,
378   char **, int, int, const char *, int, Fts3Expr **
379 );
380 void sqlite3Fts3ExprFree(Fts3Expr *);
381 #ifdef SQLITE_TEST
382 int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
383 #endif
384 
385 /* fts3_aux.c */
386 int sqlite3Fts3InitAux(sqlite3 *db);
387 
388 #endif /* _FTSINT_H */
389