• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# 2007 April 26
2#
3# The author disclaims copyright to this source code.
4#
5#*************************************************************************
6# This file implements tests for prefix-searching in the fts2
7# component of the SQLite library.
8#
9# $Id: fts2n.test,v 1.2 2007/12/13 21:54:11 drh Exp $
10#
11
12set testdir [file dirname $argv0]
13source $testdir/tester.tcl
14
15# If SQLITE_ENABLE_FTS2 is defined, omit this file.
16ifcapable !fts2 {
17  finish_test
18  return
19}
20
21# A large string to prime the pump with.
22set text {
23  Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
24  iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
25  sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
26  aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
27  ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
28  at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
29  ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
30  luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
31  lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
32  potenti. Cum sociis natoque penatibus et magnis dis parturient
33  montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
34  suscipit nec, consequat quis, risus.
35}
36
37db eval {
38  CREATE VIRTUAL TABLE t1 USING fts2(c);
39
40  INSERT INTO t1(rowid, c) VALUES(1, $text);
41  INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
42}
43
44# Exact match
45do_test fts2n-1.1 {
46  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
47} {1}
48
49# And a prefix
50do_test fts2n-1.2 {
51  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
52} {1}
53
54# Prefix includes exact match
55do_test fts2n-1.3 {
56  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
57} {1}
58
59# Make certain everything isn't considered a prefix!
60do_test fts2n-1.4 {
61  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
62} {}
63
64# Prefix across multiple rows.
65do_test fts2n-1.5 {
66  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
67} {1 2}
68
69# Likewise, with multiple hits in one document.
70do_test fts2n-1.6 {
71  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
72} {1 2}
73
74# Prefix which should only hit one document.
75do_test fts2n-1.7 {
76  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
77} {2}
78
79# * not at end is dropped.
80do_test fts2n-1.8 {
81  execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
82} {}
83
84# Stand-alone * is dropped.
85do_test fts2n-1.9 {
86  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
87} {}
88
89# Phrase-query prefix.
90do_test fts2n-1.10 {
91  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
92} {2}
93do_test fts2n-1.11 {
94  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
95} {}
96
97# Phrase query with multiple prefix matches.
98do_test fts2n-1.12 {
99  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
100} {1 2}
101
102# Phrase query with multiple prefix matches.
103do_test fts2n-1.13 {
104  execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
105} {2}
106
107
108
109
110# Test across updates (and, by implication, deletes).
111
112# Version of text without "lorem".
113regsub -all {[Ll]orem} $text '' ntext
114
115db eval {
116  CREATE VIRTUAL TABLE t2 USING fts2(c);
117
118  INSERT INTO t2(rowid, c) VALUES(1, $text);
119  INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
120  UPDATE t2 SET c = $ntext WHERE rowid = 1;
121}
122
123# Can't see lorem as an exact match.
124do_test fts2n-2.1 {
125  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
126} {}
127
128# Can't see a prefix of lorem, either.
129do_test fts2n-2.2 {
130  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
131} {}
132
133# Can see lovely in the other document.
134do_test fts2n-2.3 {
135  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
136} {2}
137
138# Can still see other hits.
139do_test fts2n-2.4 {
140  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
141} {1 2}
142
143# Prefix which should only hit one document.
144do_test fts2n-2.5 {
145  execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
146} {2}
147
148
149
150# Test with a segment which will have multiple levels in the tree.
151
152# Build a big document with lots of unique terms.
153set bigtext $text
154foreach c {a b c d e} {
155  regsub -all {[A-Za-z]+} $bigtext "&$c" t
156  append bigtext $t
157}
158
159# Populate a table with many copies of the big document, so that we
160# can test the number of hits found.  Populate $ret with the expected
161# hit counts for each row.  offsets() returns 4 elements for every
162# hit.  We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
163# $bigtext.
164set ret {6 1}
165db eval {
166  BEGIN;
167  CREATE VIRTUAL TABLE t3 USING fts2(c);
168
169  INSERT INTO t3(rowid, c) VALUES(1, $text);
170  INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
171}
172for {set i 0} {$i<100} {incr i} {
173  db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
174  lappend ret 192
175}
176db eval {COMMIT;}
177
178# Test that we get the expected number of hits.
179do_test fts2n-3.1 {
180  set t {}
181  db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
182    set l [llength $o]
183    lappend t [expr {$l/4}]
184  }
185  set t
186} $ret
187
188# TODO(shess) It would be useful to test a couple edge cases, but I
189# don't know if we have the precision to manage it from here at this
190# time.  Prefix hits can cross leaves, which the code above _should_
191# hit by virtue of size.  There are two variations on this.  If the
192# tree is 2 levels high, the code will find the leaf-node extent
193# directly, but if its higher, the code will have to follow two
194# separate interior branches down the tree.  Both should be tested.
195
196finish_test
197