1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/integer-section-indexing-handler.h"
16
17 #include <limits>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <utility>
22 #include <vector>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "gmock/gmock.h"
26 #include "gtest/gtest.h"
27 #include "icing/document-builder.h"
28 #include "icing/file/filesystem.h"
29 #include "icing/index/hit/doc-hit-info.h"
30 #include "icing/index/iterator/doc-hit-info-iterator.h"
31 #include "icing/index/numeric/integer-index.h"
32 #include "icing/index/numeric/numeric-index.h"
33 #include "icing/portable/platform.h"
34 #include "icing/proto/document.pb.h"
35 #include "icing/proto/schema.pb.h"
36 #include "icing/schema-builder.h"
37 #include "icing/schema/schema-store.h"
38 #include "icing/schema/section.h"
39 #include "icing/store/document-id.h"
40 #include "icing/store/document-store.h"
41 #include "icing/testing/common-matchers.h"
42 #include "icing/testing/fake-clock.h"
43 #include "icing/testing/icu-data-file-helper.h"
44 #include "icing/testing/test-data.h"
45 #include "icing/testing/tmp-directory.h"
46 #include "icing/tokenization/language-segmenter-factory.h"
47 #include "icing/tokenization/language-segmenter.h"
48 #include "icing/util/tokenized-document.h"
49 #include "unicode/uloc.h"
50
51 namespace icing {
52 namespace lib {
53
54 namespace {
55
56 using ::testing::ElementsAre;
57 using ::testing::Eq;
58 using ::testing::IsEmpty;
59 using ::testing::IsTrue;
60
61 // Indexable properties (section) and section id. Section id is determined by
62 // the lexicographical order of indexable property paths.
63 // Schema type with indexable properties: FakeType
64 // Section id = 0: "body"
65 // Section id = 1: "timestamp"
66 // Section id = 2: "title"
67 static constexpr std::string_view kFakeType = "FakeType";
68 static constexpr std::string_view kPropertyBody = "body";
69 static constexpr std::string_view kPropertyTimestamp = "timestamp";
70 static constexpr std::string_view kPropertyTitle = "title";
71
72 static constexpr SectionId kSectionIdTimestamp = 1;
73
74 // Schema type with nested indexable properties: NestedType
75 // Section id = 0: "name"
76 // Section id = 1: "nested.body"
77 // Section id = 2: "nested.timestamp"
78 // Section id = 3: "nested.title"
79 // Section id = 4: "price"
80 static constexpr std::string_view kNestedType = "NestedType";
81 static constexpr std::string_view kPropertyName = "name";
82 static constexpr std::string_view kPropertyNestedDoc = "nested";
83 static constexpr std::string_view kPropertyPrice = "price";
84
85 static constexpr SectionId kSectionIdNestedTimestamp = 2;
86 static constexpr SectionId kSectionIdPrice = 4;
87
88 class IntegerSectionIndexingHandlerTest : public ::testing::Test {
89 protected:
SetUp()90 void SetUp() override {
91 if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
92 ICING_ASSERT_OK(
93 // File generated via icu_data_file rule in //icing/BUILD.
94 icu_data_file_helper::SetUpICUDataFile(
95 GetTestFilePath("icing/icu.dat")));
96 }
97
98 base_dir_ = GetTestTempDir() + "/icing_test";
99 ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
100 IsTrue());
101
102 integer_index_working_path_ = base_dir_ + "/integer_index";
103 schema_store_dir_ = base_dir_ + "/schema_store";
104 document_store_dir_ = base_dir_ + "/document_store";
105
106 ICING_ASSERT_OK_AND_ASSIGN(
107 integer_index_,
108 IntegerIndex::Create(filesystem_, integer_index_working_path_,
109 /*num_data_threshold_for_bucket_split=*/65536,
110 /*pre_mapping_fbv=*/false));
111
112 language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
113 ICING_ASSERT_OK_AND_ASSIGN(
114 lang_segmenter_,
115 language_segmenter_factory::Create(std::move(segmenter_options)));
116
117 ASSERT_THAT(
118 filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
119 IsTrue());
120 ICING_ASSERT_OK_AND_ASSIGN(
121 schema_store_,
122 SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
123 SchemaProto schema =
124 SchemaBuilder()
125 .AddType(
126 SchemaTypeConfigBuilder()
127 .SetType(kFakeType)
128 .AddProperty(PropertyConfigBuilder()
129 .SetName(kPropertyTitle)
130 .SetDataTypeString(TERM_MATCH_EXACT,
131 TOKENIZER_PLAIN)
132 .SetCardinality(CARDINALITY_OPTIONAL))
133 .AddProperty(PropertyConfigBuilder()
134 .SetName(kPropertyBody)
135 .SetDataTypeString(TERM_MATCH_EXACT,
136 TOKENIZER_PLAIN)
137 .SetCardinality(CARDINALITY_OPTIONAL))
138 .AddProperty(PropertyConfigBuilder()
139 .SetName(kPropertyTimestamp)
140 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
141 .SetCardinality(CARDINALITY_OPTIONAL)))
142 .AddType(
143 SchemaTypeConfigBuilder()
144 .SetType(kNestedType)
145 .AddProperty(
146 PropertyConfigBuilder()
147 .SetName(kPropertyNestedDoc)
148 .SetDataTypeDocument(
149 kFakeType, /*index_nested_properties=*/true)
150 .SetCardinality(CARDINALITY_OPTIONAL))
151 .AddProperty(PropertyConfigBuilder()
152 .SetName(kPropertyPrice)
153 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
154 .SetCardinality(CARDINALITY_OPTIONAL))
155 .AddProperty(PropertyConfigBuilder()
156 .SetName(kPropertyName)
157 .SetDataTypeString(TERM_MATCH_EXACT,
158 TOKENIZER_PLAIN)
159 .SetCardinality(CARDINALITY_OPTIONAL)))
160 .Build();
161 ICING_ASSERT_OK(schema_store_->SetSchema(
162 schema, /*ignore_errors_and_delete_documents=*/false,
163 /*allow_circular_schema_definitions=*/false));
164
165 ASSERT_TRUE(
166 filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str()));
167 ICING_ASSERT_OK_AND_ASSIGN(
168 DocumentStore::CreateResult doc_store_create_result,
169 DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
170 schema_store_.get(),
171 /*force_recovery_and_revalidate_documents=*/false,
172 /*namespace_id_fingerprint=*/true,
173 /*pre_mapping_fbv=*/false,
174 /*use_persistent_hash_map=*/true,
175 PortableFileBackedProtoLog<
176 DocumentWrapper>::kDeflateCompressionLevel,
177 /*initialize_stats=*/nullptr));
178 document_store_ = std::move(doc_store_create_result.document_store);
179 }
180
TearDown()181 void TearDown() override {
182 document_store_.reset();
183 schema_store_.reset();
184 lang_segmenter_.reset();
185 integer_index_.reset();
186
187 filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
188 }
189
190 Filesystem filesystem_;
191 FakeClock fake_clock_;
192 std::string base_dir_;
193 std::string integer_index_working_path_;
194 std::string schema_store_dir_;
195 std::string document_store_dir_;
196
197 std::unique_ptr<NumericIndex<int64_t>> integer_index_;
198 std::unique_ptr<LanguageSegmenter> lang_segmenter_;
199 std::unique_ptr<SchemaStore> schema_store_;
200 std::unique_ptr<DocumentStore> document_store_;
201 };
202
GetHits(std::unique_ptr<DocHitInfoIterator> iterator)203 std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
204 std::vector<DocHitInfo> infos;
205 while (iterator->Advance().ok()) {
206 infos.push_back(iterator->doc_hit_info());
207 }
208 return infos;
209 }
210
TEST_F(IntegerSectionIndexingHandlerTest,CreationWithNullPointerShouldFail)211 TEST_F(IntegerSectionIndexingHandlerTest, CreationWithNullPointerShouldFail) {
212 EXPECT_THAT(IntegerSectionIndexingHandler::Create(/*clock=*/nullptr,
213 integer_index_.get()),
214 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
215
216 EXPECT_THAT(IntegerSectionIndexingHandler::Create(&fake_clock_,
217 /*integer_index=*/nullptr),
218 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
219 }
220
TEST_F(IntegerSectionIndexingHandlerTest,HandleIntegerSection)221 TEST_F(IntegerSectionIndexingHandlerTest, HandleIntegerSection) {
222 DocumentProto document =
223 DocumentBuilder()
224 .SetKey("icing", "fake_type/1")
225 .SetSchema(std::string(kFakeType))
226 .AddStringProperty(std::string(kPropertyTitle), "title")
227 .AddStringProperty(std::string(kPropertyBody), "body")
228 .AddInt64Property(std::string(kPropertyTimestamp), 123)
229 .Build();
230 ICING_ASSERT_OK_AND_ASSIGN(
231 TokenizedDocument tokenized_document,
232 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
233 std::move(document)));
234 ICING_ASSERT_OK_AND_ASSIGN(
235 DocumentId document_id,
236 document_store_->Put(tokenized_document.document()));
237
238 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
239 // Handle document.
240 ICING_ASSERT_OK_AND_ASSIGN(
241 std::unique_ptr<IntegerSectionIndexingHandler> handler,
242 IntegerSectionIndexingHandler::Create(&fake_clock_,
243 integer_index_.get()));
244 EXPECT_THAT(
245 handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
246 /*put_document_stats=*/nullptr),
247 IsOk());
248 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
249
250 // Query "timestamp".
251 ICING_ASSERT_OK_AND_ASSIGN(
252 std::unique_ptr<DocHitInfoIterator> itr,
253 integer_index_->GetIterator(
254 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
255 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
256 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
257 EXPECT_THAT(GetHits(std::move(itr)),
258 ElementsAre(EqualsDocHitInfo(
259 document_id, std::vector<SectionId>{kSectionIdTimestamp})));
260 }
261
TEST_F(IntegerSectionIndexingHandlerTest,HandleNestedIntegerSection)262 TEST_F(IntegerSectionIndexingHandlerTest, HandleNestedIntegerSection) {
263 DocumentProto nested_document =
264 DocumentBuilder()
265 .SetKey("icing", "nested_type/1")
266 .SetSchema(std::string(kNestedType))
267 .AddDocumentProperty(
268 std::string(kPropertyNestedDoc),
269 DocumentBuilder()
270 .SetKey("icing", "nested_fake_type/1")
271 .SetSchema(std::string(kFakeType))
272 .AddStringProperty(std::string(kPropertyTitle),
273 "nested title")
274 .AddStringProperty(std::string(kPropertyBody), "nested body")
275 .AddInt64Property(std::string(kPropertyTimestamp), 123)
276 .Build())
277 .AddInt64Property(std::string(kPropertyPrice), 456)
278 .Build();
279 ICING_ASSERT_OK_AND_ASSIGN(
280 TokenizedDocument tokenized_document,
281 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
282 std::move(nested_document)));
283 ICING_ASSERT_OK_AND_ASSIGN(
284 DocumentId document_id,
285 document_store_->Put(tokenized_document.document()));
286
287 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
288 // Handle nested_document.
289 ICING_ASSERT_OK_AND_ASSIGN(
290 std::unique_ptr<IntegerSectionIndexingHandler> handler,
291 IntegerSectionIndexingHandler::Create(&fake_clock_,
292 integer_index_.get()));
293 EXPECT_THAT(
294 handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
295 /*put_document_stats=*/nullptr),
296 IsOk());
297 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
298
299 // Query "nested.timestamp".
300 ICING_ASSERT_OK_AND_ASSIGN(
301 std::unique_ptr<DocHitInfoIterator> itr,
302 integer_index_->GetIterator(
303 "nested.timestamp", /*key_lower=*/std::numeric_limits<int64_t>::min(),
304 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
305 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
306 EXPECT_THAT(
307 GetHits(std::move(itr)),
308 ElementsAre(EqualsDocHitInfo(
309 document_id, std::vector<SectionId>{kSectionIdNestedTimestamp})));
310
311 // Query "price".
312 ICING_ASSERT_OK_AND_ASSIGN(
313 itr,
314 integer_index_->GetIterator(
315 kPropertyPrice, /*key_lower=*/std::numeric_limits<int64_t>::min(),
316 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
317 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
318 EXPECT_THAT(GetHits(std::move(itr)),
319 ElementsAre(EqualsDocHitInfo(
320 document_id, std::vector<SectionId>{kSectionIdPrice})));
321
322 // Query "timestamp". Should get empty result.
323 ICING_ASSERT_OK_AND_ASSIGN(
324 itr,
325 integer_index_->GetIterator(
326 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
327 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
328 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
329 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
330 }
331
TEST_F(IntegerSectionIndexingHandlerTest,HandleShouldSkipEmptyIntegerSection)332 TEST_F(IntegerSectionIndexingHandlerTest, HandleShouldSkipEmptyIntegerSection) {
333 // Create a FakeType document without "timestamp".
334 DocumentProto document =
335 DocumentBuilder()
336 .SetKey("icing", "fake_type/1")
337 .SetSchema(std::string(kFakeType))
338 .AddStringProperty(std::string(kPropertyTitle), "title")
339 .AddStringProperty(std::string(kPropertyBody), "body")
340 .Build();
341 ICING_ASSERT_OK_AND_ASSIGN(
342 TokenizedDocument tokenized_document,
343 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
344 std::move(document)));
345 ICING_ASSERT_OK_AND_ASSIGN(
346 DocumentId document_id,
347 document_store_->Put(tokenized_document.document()));
348
349 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kInvalidDocumentId));
350 // Handle document. Index data should remain unchanged since there is no
351 // indexable integer, but last_added_document_id should be updated.
352 ICING_ASSERT_OK_AND_ASSIGN(
353 std::unique_ptr<IntegerSectionIndexingHandler> handler,
354 IntegerSectionIndexingHandler::Create(&fake_clock_,
355 integer_index_.get()));
356 EXPECT_THAT(
357 handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
358 /*put_document_stats=*/nullptr),
359 IsOk());
360 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
361
362 // Query "timestamp". Should get empty result.
363 ICING_ASSERT_OK_AND_ASSIGN(
364 std::unique_ptr<DocHitInfoIterator> itr,
365 integer_index_->GetIterator(
366 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
367 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
368 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
369 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
370 }
371
TEST_F(IntegerSectionIndexingHandlerTest,HandleInvalidDocumentIdShouldReturnInvalidArgumentError)372 TEST_F(IntegerSectionIndexingHandlerTest,
373 HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
374 DocumentProto document =
375 DocumentBuilder()
376 .SetKey("icing", "fake_type/1")
377 .SetSchema(std::string(kFakeType))
378 .AddStringProperty(std::string(kPropertyTitle), "title")
379 .AddStringProperty(std::string(kPropertyBody), "body")
380 .AddInt64Property(std::string(kPropertyTimestamp), 123)
381 .Build();
382 ICING_ASSERT_OK_AND_ASSIGN(
383 TokenizedDocument tokenized_document,
384 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
385 std::move(document)));
386 ICING_ASSERT_OK(document_store_->Put(tokenized_document.document()));
387
388 static constexpr DocumentId kCurrentDocumentId = 3;
389 integer_index_->set_last_added_document_id(kCurrentDocumentId);
390 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
391
392 ICING_ASSERT_OK_AND_ASSIGN(
393 std::unique_ptr<IntegerSectionIndexingHandler> handler,
394 IntegerSectionIndexingHandler::Create(&fake_clock_,
395 integer_index_.get()));
396
397 // Handling document with kInvalidDocumentId should cause a failure, and both
398 // index data and last_added_document_id should remain unchanged.
399 EXPECT_THAT(
400 handler->Handle(tokenized_document, kInvalidDocumentId,
401 /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
402 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
403 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
404
405 // Query "timestamp". Should get empty result.
406 ICING_ASSERT_OK_AND_ASSIGN(
407 std::unique_ptr<DocHitInfoIterator> itr,
408 integer_index_->GetIterator(
409 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
410 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
411 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
412 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
413
414 // Recovery mode should get the same result.
415 EXPECT_THAT(
416 handler->Handle(tokenized_document, kInvalidDocumentId,
417 /*recovery_mode=*/true, /*put_document_stats=*/nullptr),
418 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
419 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(kCurrentDocumentId));
420
421 // Query "timestamp". Should get empty result.
422 ICING_ASSERT_OK_AND_ASSIGN(
423 itr,
424 integer_index_->GetIterator(
425 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
426 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
427 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
428 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
429 }
430
TEST_F(IntegerSectionIndexingHandlerTest,HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError)431 TEST_F(IntegerSectionIndexingHandlerTest,
432 HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
433 DocumentProto document =
434 DocumentBuilder()
435 .SetKey("icing", "fake_type/1")
436 .SetSchema(std::string(kFakeType))
437 .AddStringProperty(std::string(kPropertyTitle), "title")
438 .AddStringProperty(std::string(kPropertyBody), "body")
439 .AddInt64Property(std::string(kPropertyTimestamp), 123)
440 .Build();
441 ICING_ASSERT_OK_AND_ASSIGN(
442 TokenizedDocument tokenized_document,
443 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
444 std::move(document)));
445 ICING_ASSERT_OK_AND_ASSIGN(
446 DocumentId document_id,
447 document_store_->Put(tokenized_document.document()));
448
449 ICING_ASSERT_OK_AND_ASSIGN(
450 std::unique_ptr<IntegerSectionIndexingHandler> handler,
451 IntegerSectionIndexingHandler::Create(&fake_clock_,
452 integer_index_.get()));
453
454 // Handling document with document_id == last_added_document_id should cause a
455 // failure, and both index data and last_added_document_id should remain
456 // unchanged.
457 integer_index_->set_last_added_document_id(document_id);
458 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
459 EXPECT_THAT(
460 handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
461 /*put_document_stats=*/nullptr),
462 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
463 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id));
464
465 // Query "timestamp". Should get empty result.
466 ICING_ASSERT_OK_AND_ASSIGN(
467 std::unique_ptr<DocHitInfoIterator> itr,
468 integer_index_->GetIterator(
469 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
470 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
471 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
472 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
473
474 // Handling document with document_id < last_added_document_id should cause a
475 // failure, and both index data and last_added_document_id should remain
476 // unchanged.
477 integer_index_->set_last_added_document_id(document_id + 1);
478 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1));
479 EXPECT_THAT(
480 handler->Handle(tokenized_document, document_id, /*recovery_mode=*/false,
481 /*put_document_stats=*/nullptr),
482 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
483 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id + 1));
484
485 // Query "timestamp". Should get empty result.
486 ICING_ASSERT_OK_AND_ASSIGN(
487 itr,
488 integer_index_->GetIterator(
489 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
490 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
491 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
492 EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
493 }
494
TEST_F(IntegerSectionIndexingHandlerTest,HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId)495 TEST_F(IntegerSectionIndexingHandlerTest,
496 HandleRecoveryModeShouldIgnoreDocsLELastAddedDocId) {
497 DocumentProto document1 =
498 DocumentBuilder()
499 .SetKey("icing", "fake_type/1")
500 .SetSchema(std::string(kFakeType))
501 .AddStringProperty(std::string(kPropertyTitle), "title one")
502 .AddStringProperty(std::string(kPropertyBody), "body one")
503 .AddInt64Property(std::string(kPropertyTimestamp), 123)
504 .Build();
505 DocumentProto document2 =
506 DocumentBuilder()
507 .SetKey("icing", "fake_type/2")
508 .SetSchema(std::string(kFakeType))
509 .AddStringProperty(std::string(kPropertyTitle), "title two")
510 .AddStringProperty(std::string(kPropertyBody), "body two")
511 .AddInt64Property(std::string(kPropertyTimestamp), 456)
512 .Build();
513 ICING_ASSERT_OK_AND_ASSIGN(
514 TokenizedDocument tokenized_document1,
515 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
516 std::move(document1)));
517 ICING_ASSERT_OK_AND_ASSIGN(
518 TokenizedDocument tokenized_document2,
519 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
520 std::move(document2)));
521 ICING_ASSERT_OK_AND_ASSIGN(
522 DocumentId document_id1,
523 document_store_->Put(tokenized_document1.document()));
524 ICING_ASSERT_OK_AND_ASSIGN(
525 DocumentId document_id2,
526 document_store_->Put(tokenized_document2.document()));
527
528 ICING_ASSERT_OK_AND_ASSIGN(
529 std::unique_ptr<IntegerSectionIndexingHandler> handler,
530 IntegerSectionIndexingHandler::Create(&fake_clock_,
531 integer_index_.get()));
532
533 // Handle document with document_id > last_added_document_id in recovery mode.
534 // The handler should index this document and update last_added_document_id.
535 EXPECT_THAT(
536 handler->Handle(tokenized_document1, document_id1, /*recovery_mode=*/true,
537 /*put_document_stats=*/nullptr),
538 IsOk());
539 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id1));
540
541 // Query "timestamp".
542 ICING_ASSERT_OK_AND_ASSIGN(
543 std::unique_ptr<DocHitInfoIterator> itr,
544 integer_index_->GetIterator(
545 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
546 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
547 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
548 EXPECT_THAT(GetHits(std::move(itr)),
549 ElementsAre(EqualsDocHitInfo(
550 document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
551
552 // Handle document with document_id == last_added_document_id in recovery
553 // mode. We should not get any error, but the handler should ignore the
554 // document, so both index data and last_added_document_id should remain
555 // unchanged.
556 integer_index_->set_last_added_document_id(document_id2);
557 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2));
558 EXPECT_THAT(
559 handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true,
560 /*put_document_stats=*/nullptr),
561 IsOk());
562 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2));
563
564 // Query "timestamp". Should not get hits for document2.
565 ICING_ASSERT_OK_AND_ASSIGN(
566 itr,
567 integer_index_->GetIterator(
568 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
569 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
570 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
571 EXPECT_THAT(GetHits(std::move(itr)),
572 ElementsAre(EqualsDocHitInfo(
573 document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
574
575 // Handle document with document_id < last_added_document_id in recovery mode.
576 // We should not get any error, but the handler should ignore the document, so
577 // both index data and last_added_document_id should remain unchanged.
578 integer_index_->set_last_added_document_id(document_id2 + 1);
579 ASSERT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1));
580 EXPECT_THAT(
581 handler->Handle(tokenized_document2, document_id2, /*recovery_mode=*/true,
582 /*put_document_stats=*/nullptr),
583 IsOk());
584 EXPECT_THAT(integer_index_->last_added_document_id(), Eq(document_id2 + 1));
585
586 // Query "timestamp". Should not get hits for document2.
587 ICING_ASSERT_OK_AND_ASSIGN(
588 itr,
589 integer_index_->GetIterator(
590 kPropertyTimestamp, /*key_lower=*/std::numeric_limits<int64_t>::min(),
591 /*key_upper=*/std::numeric_limits<int64_t>::max(), *document_store_,
592 *schema_store_, fake_clock_.GetSystemTimeMilliseconds()));
593 EXPECT_THAT(GetHits(std::move(itr)),
594 ElementsAre(EqualsDocHitInfo(
595 document_id1, std::vector<SectionId>{kSectionIdTimestamp})));
596 }
597
598 } // namespace
599
600 } // namespace lib
601 } // namespace icing
602