1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <algorithm>
16 #include <cstdint>
17 #include <limits>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <tuple>
22 #include <unordered_set>
23 #include <utility>
24 #include <vector>
25
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "gmock/gmock.h"
28 #include "gtest/gtest.h"
29 #include "icing/absl_ports/str_cat.h"
30 #include "icing/document-builder.h"
31 #include "icing/file/file-backed-vector.h"
32 #include "icing/file/filesystem.h"
33 #include "icing/file/memory-mapped-file.h"
34 #include "icing/file/mock-filesystem.h"
35 #include "icing/file/portable-file-backed-proto-log.h"
36 #include "icing/file/version-util.h"
37 #include "icing/icing-search-engine.h"
38 #include "icing/index/data-indexing-handler.h"
39 #include "icing/index/index-processor.h"
40 #include "icing/index/index.h"
41 #include "icing/index/integer-section-indexing-handler.h"
42 #include "icing/index/iterator/doc-hit-info-iterator.h"
43 #include "icing/index/numeric/integer-index.h"
44 #include "icing/index/numeric/numeric-index.h"
45 #include "icing/index/term-indexing-handler.h"
46 #include "icing/jni/jni-cache.h"
47 #include "icing/join/join-processor.h"
48 #include "icing/join/qualified-id-join-index-impl-v2.h"
49 #include "icing/join/qualified-id-join-index.h"
50 #include "icing/join/qualified-id-join-indexing-handler.h"
51 #include "icing/legacy/index/icing-filesystem.h"
52 #include "icing/legacy/index/icing-mock-filesystem.h"
53 #include "icing/portable/endian.h"
54 #include "icing/portable/equals-proto.h"
55 #include "icing/portable/platform.h"
56 #include "icing/proto/debug.pb.h"
57 #include "icing/proto/document.pb.h"
58 #include "icing/proto/document_wrapper.pb.h"
59 #include "icing/proto/initialize.pb.h"
60 #include "icing/proto/logging.pb.h"
61 #include "icing/proto/optimize.pb.h"
62 #include "icing/proto/persist.pb.h"
63 #include "icing/proto/reset.pb.h"
64 #include "icing/proto/schema.pb.h"
65 #include "icing/proto/scoring.pb.h"
66 #include "icing/proto/search.pb.h"
67 #include "icing/proto/status.pb.h"
68 #include "icing/proto/storage.pb.h"
69 #include "icing/proto/term.pb.h"
70 #include "icing/proto/usage.pb.h"
71 #include "icing/query/query-features.h"
72 #include "icing/schema-builder.h"
73 #include "icing/schema/schema-store.h"
74 #include "icing/schema/section.h"
75 #include "icing/store/document-associated-score-data.h"
76 #include "icing/store/document-id.h"
77 #include "icing/store/document-log-creator.h"
78 #include "icing/store/document-store.h"
79 #include "icing/store/namespace-fingerprint-identifier.h"
80 #include "icing/testing/common-matchers.h"
81 #include "icing/testing/fake-clock.h"
82 #include "icing/testing/icu-data-file-helper.h"
83 #include "icing/testing/jni-test-helpers.h"
84 #include "icing/testing/test-data.h"
85 #include "icing/testing/tmp-directory.h"
86 #include "icing/tokenization/language-segmenter-factory.h"
87 #include "icing/tokenization/language-segmenter.h"
88 #include "icing/transform/normalizer-factory.h"
89 #include "icing/transform/normalizer.h"
90 #include "icing/util/clock.h"
91 #include "icing/util/tokenized-document.h"
92 #include "unicode/uloc.h"
93
94 namespace icing {
95 namespace lib {
96
97 namespace {
98
99 using ::icing::lib::portable_equals_proto::EqualsProto;
100 using ::testing::_;
101 using ::testing::AtLeast;
102 using ::testing::DoDefault;
103 using ::testing::EndsWith;
104 using ::testing::Eq;
105 using ::testing::HasSubstr;
106 using ::testing::IsEmpty;
107 using ::testing::Matcher;
108 using ::testing::Ne;
109 using ::testing::Return;
110 using ::testing::SizeIs;
111
112 constexpr std::string_view kIpsumText =
113 "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
114 "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
115 "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
116 "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
117 "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
118 "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
119 "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
120 "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
121 "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
122 "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
123 "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
124 "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
125 "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
126 "placerat semper.";
127
ReadDocumentLogHeader(Filesystem filesystem,const std::string & file_path)128 PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
129 Filesystem filesystem, const std::string& file_path) {
130 PortableFileBackedProtoLog<DocumentWrapper>::Header header;
131 filesystem.PRead(file_path.c_str(), &header,
132 sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
133 /*offset=*/0);
134 return header;
135 }
136
WriteDocumentLogHeader(Filesystem filesystem,const std::string & file_path,PortableFileBackedProtoLog<DocumentWrapper>::Header & header)137 void WriteDocumentLogHeader(
138 Filesystem filesystem, const std::string& file_path,
139 PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
140 filesystem.Write(file_path.c_str(), &header,
141 sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
142 }
143
144 // For mocking purpose, we allow tests to provide a custom Filesystem.
145 class TestIcingSearchEngine : public IcingSearchEngine {
146 public:
TestIcingSearchEngine(const IcingSearchEngineOptions & options,std::unique_ptr<const Filesystem> filesystem,std::unique_ptr<const IcingFilesystem> icing_filesystem,std::unique_ptr<Clock> clock,std::unique_ptr<JniCache> jni_cache)147 TestIcingSearchEngine(const IcingSearchEngineOptions& options,
148 std::unique_ptr<const Filesystem> filesystem,
149 std::unique_ptr<const IcingFilesystem> icing_filesystem,
150 std::unique_ptr<Clock> clock,
151 std::unique_ptr<JniCache> jni_cache)
152 : IcingSearchEngine(options, std::move(filesystem),
153 std::move(icing_filesystem), std::move(clock),
154 std::move(jni_cache)) {}
155 };
156
GetTestBaseDir()157 std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
158
159 // This test is meant to cover all tests relating to
160 // IcingSearchEngine::Initialize.
161 class IcingSearchEngineInitializationTest : public testing::Test {
162 protected:
SetUp()163 void SetUp() override {
164 if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
165 // If we've specified using the reverse-JNI method for segmentation (i.e.
166 // not ICU), then we won't have the ICU data file included to set up.
167 // Technically, we could choose to use reverse-JNI for segmentation AND
168 // include an ICU data file, but that seems unlikely and our current BUILD
169 // setup doesn't do this.
170 // File generated via icu_data_file rule in //icing/BUILD.
171 std::string icu_data_file_path =
172 GetTestFilePath("icing/icu.dat");
173 ICING_ASSERT_OK(
174 icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
175 }
176 filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
177
178 language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
179 ICING_ASSERT_OK_AND_ASSIGN(
180 lang_segmenter_,
181 language_segmenter_factory::Create(std::move(segmenter_options)));
182
183 ICING_ASSERT_OK_AND_ASSIGN(
184 normalizer_,
185 normalizer_factory::Create(
186 /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
187 }
188
TearDown()189 void TearDown() override {
190 normalizer_.reset();
191 lang_segmenter_.reset();
192 filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
193 }
194
filesystem() const195 const Filesystem* filesystem() const { return &filesystem_; }
196
icing_filesystem() const197 const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; }
198
199 Filesystem filesystem_;
200 IcingFilesystem icing_filesystem_;
201 std::unique_ptr<LanguageSegmenter> lang_segmenter_;
202 std::unique_ptr<Normalizer> normalizer_;
203 };
204
205 // Non-zero value so we don't override it to be the current time
206 constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
207
GetVersionFileDir()208 std::string GetVersionFileDir() { return GetTestBaseDir(); }
209
GetDocumentDir()210 std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
211
GetIndexDir()212 std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
213
GetIntegerIndexDir()214 std::string GetIntegerIndexDir() {
215 return GetTestBaseDir() + "/integer_index_dir";
216 }
217
GetQualifiedIdJoinIndexDir()218 std::string GetQualifiedIdJoinIndexDir() {
219 return GetTestBaseDir() + "/qualified_id_join_index_dir";
220 }
221
GetSchemaDir()222 std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
223
GetHeaderFilename()224 std::string GetHeaderFilename() {
225 return GetTestBaseDir() + "/icing_search_engine_header";
226 }
227
GetDefaultIcingOptions()228 IcingSearchEngineOptions GetDefaultIcingOptions() {
229 IcingSearchEngineOptions icing_options;
230 icing_options.set_base_dir(GetTestBaseDir());
231 icing_options.set_document_store_namespace_id_fingerprint(true);
232 icing_options.set_use_new_qualified_id_join_index(true);
233 return icing_options;
234 }
235
CreateMessageDocument(std::string name_space,std::string uri)236 DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
237 return DocumentBuilder()
238 .SetKey(std::move(name_space), std::move(uri))
239 .SetSchema("Message")
240 .AddStringProperty("body", "message body")
241 .AddInt64Property("indexableInteger", 123)
242 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
243 .Build();
244 }
245
CreateEmailDocument(const std::string & name_space,const std::string & uri,int score,const std::string & subject_content,const std::string & body_content)246 DocumentProto CreateEmailDocument(const std::string& name_space,
247 const std::string& uri, int score,
248 const std::string& subject_content,
249 const std::string& body_content) {
250 return DocumentBuilder()
251 .SetKey(name_space, uri)
252 .SetSchema("Email")
253 .SetScore(score)
254 .AddStringProperty("subject", subject_content)
255 .AddStringProperty("body", body_content)
256 .Build();
257 }
258
CreateMessageSchemaTypeConfig()259 SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
260 return SchemaTypeConfigBuilder()
261 .SetType("Message")
262 .AddProperty(PropertyConfigBuilder()
263 .SetName("body")
264 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
265 .SetCardinality(CARDINALITY_REQUIRED))
266 .AddProperty(PropertyConfigBuilder()
267 .SetName("indexableInteger")
268 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
269 .SetCardinality(CARDINALITY_REQUIRED))
270 .Build();
271 }
272
CreateEmailSchemaTypeConfig()273 SchemaTypeConfigProto CreateEmailSchemaTypeConfig() {
274 return SchemaTypeConfigBuilder()
275 .SetType("Email")
276 .AddProperty(PropertyConfigBuilder()
277 .SetName("body")
278 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
279 .SetCardinality(CARDINALITY_REQUIRED))
280 .AddProperty(PropertyConfigBuilder()
281 .SetName("subject")
282 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
283 .SetCardinality(CARDINALITY_REQUIRED))
284 .Build();
285 }
286
CreateMessageSchema()287 SchemaProto CreateMessageSchema() {
288 return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
289 }
290
CreateEmailSchema()291 SchemaProto CreateEmailSchema() {
292 return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build();
293 }
294
GetDefaultScoringSpec()295 ScoringSpecProto GetDefaultScoringSpec() {
296 ScoringSpecProto scoring_spec;
297 scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
298 return scoring_spec;
299 }
300
301 // TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
302 // SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
303 // instances by them.
304
TEST_F(IcingSearchEngineInitializationTest,UninitializedInstanceFailsSafely)305 TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
306 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
307
308 SchemaProto email_schema = CreateMessageSchema();
309 EXPECT_THAT(icing.SetSchema(email_schema).status(),
310 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
311 EXPECT_THAT(icing.GetSchema().status(),
312 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
313 EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
314 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
315
316 DocumentProto doc = CreateMessageDocument("namespace", "uri");
317 EXPECT_THAT(icing.Put(doc).status(),
318 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
319 EXPECT_THAT(icing
320 .Get(doc.namespace_(), doc.uri(),
321 GetResultSpecProto::default_instance())
322 .status(),
323 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
324 EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
325 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
326 EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
327 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
328 EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
329 .status()
330 .code(),
331 Eq(StatusProto::FAILED_PRECONDITION));
332
333 SearchSpecProto search_spec = SearchSpecProto::default_instance();
334 ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
335 ResultSpecProto result_spec = ResultSpecProto::default_instance();
336 EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
337 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
338 constexpr int kSomePageToken = 12;
339 EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
340 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
341 icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
342
343 EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
344 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
345 EXPECT_THAT(icing.Optimize().status(),
346 ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
347 }
348
TEST_F(IcingSearchEngineInitializationTest,SimpleInitialization)349 TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) {
350 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
351 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
352 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
353
354 DocumentProto document = CreateMessageDocument("namespace", "uri");
355 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
356 ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
357 }
358
TEST_F(IcingSearchEngineInitializationTest,InitializingAgainSavesNonPersistedData)359 TEST_F(IcingSearchEngineInitializationTest,
360 InitializingAgainSavesNonPersistedData) {
361 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
362 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
363 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
364
365 DocumentProto document = CreateMessageDocument("namespace", "uri");
366 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
367
368 GetResultProto expected_get_result_proto;
369 expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
370 *expected_get_result_proto.mutable_document() = document;
371
372 ASSERT_THAT(
373 icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
374 EqualsProto(expected_get_result_proto));
375
376 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
377 EXPECT_THAT(
378 icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
379 EqualsProto(expected_get_result_proto));
380 }
381
TEST_F(IcingSearchEngineInitializationTest,MaxIndexMergeSizeReturnsInvalidArgument)382 TEST_F(IcingSearchEngineInitializationTest,
383 MaxIndexMergeSizeReturnsInvalidArgument) {
384 IcingSearchEngineOptions options = GetDefaultIcingOptions();
385 options.set_index_merge_size(std::numeric_limits<int32_t>::max());
386 IcingSearchEngine icing(options, GetTestJniCache());
387 EXPECT_THAT(icing.Initialize().status(),
388 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
389 }
390
TEST_F(IcingSearchEngineInitializationTest,NegativeMergeSizeReturnsInvalidArgument)391 TEST_F(IcingSearchEngineInitializationTest,
392 NegativeMergeSizeReturnsInvalidArgument) {
393 IcingSearchEngineOptions options = GetDefaultIcingOptions();
394 options.set_index_merge_size(-1);
395 IcingSearchEngine icing(options, GetTestJniCache());
396 EXPECT_THAT(icing.Initialize().status(),
397 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
398 }
399
TEST_F(IcingSearchEngineInitializationTest,ZeroMergeSizeReturnsInvalidArgument)400 TEST_F(IcingSearchEngineInitializationTest,
401 ZeroMergeSizeReturnsInvalidArgument) {
402 IcingSearchEngineOptions options = GetDefaultIcingOptions();
403 options.set_index_merge_size(0);
404 IcingSearchEngine icing(options, GetTestJniCache());
405 EXPECT_THAT(icing.Initialize().status(),
406 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
407 }
408
TEST_F(IcingSearchEngineInitializationTest,GoodIndexMergeSizeReturnsOk)409 TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) {
410 IcingSearchEngineOptions options = GetDefaultIcingOptions();
411 // One is fine, if a bit weird. It just means that the lite index will be
412 // smaller and will request a merge any time content is added to it.
413 options.set_index_merge_size(1);
414 IcingSearchEngine icing(options, GetTestJniCache());
415 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
416 }
417
TEST_F(IcingSearchEngineInitializationTest,NegativeMaxTokenLenReturnsInvalidArgument)418 TEST_F(IcingSearchEngineInitializationTest,
419 NegativeMaxTokenLenReturnsInvalidArgument) {
420 IcingSearchEngineOptions options = GetDefaultIcingOptions();
421 options.set_max_token_length(-1);
422 IcingSearchEngine icing(options, GetTestJniCache());
423 EXPECT_THAT(icing.Initialize().status(),
424 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
425 }
426
TEST_F(IcingSearchEngineInitializationTest,ZeroMaxTokenLenReturnsInvalidArgument)427 TEST_F(IcingSearchEngineInitializationTest,
428 ZeroMaxTokenLenReturnsInvalidArgument) {
429 IcingSearchEngineOptions options = GetDefaultIcingOptions();
430 options.set_max_token_length(0);
431 IcingSearchEngine icing(options, GetTestJniCache());
432 EXPECT_THAT(icing.Initialize().status(),
433 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
434 }
435
TEST_F(IcingSearchEngineInitializationTest,NegativeCompressionLevelReturnsInvalidArgument)436 TEST_F(IcingSearchEngineInitializationTest,
437 NegativeCompressionLevelReturnsInvalidArgument) {
438 IcingSearchEngineOptions options = GetDefaultIcingOptions();
439 options.set_compression_level(-1);
440 IcingSearchEngine icing(options, GetTestJniCache());
441 EXPECT_THAT(icing.Initialize().status(),
442 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
443 }
444
TEST_F(IcingSearchEngineInitializationTest,GreaterThanMaxCompressionLevelReturnsInvalidArgument)445 TEST_F(IcingSearchEngineInitializationTest,
446 GreaterThanMaxCompressionLevelReturnsInvalidArgument) {
447 IcingSearchEngineOptions options = GetDefaultIcingOptions();
448 options.set_compression_level(10);
449 IcingSearchEngine icing(options, GetTestJniCache());
450 EXPECT_THAT(icing.Initialize().status(),
451 ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
452 }
453
TEST_F(IcingSearchEngineInitializationTest,GoodCompressionLevelReturnsOk)454 TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) {
455 IcingSearchEngineOptions options = GetDefaultIcingOptions();
456 options.set_compression_level(0);
457 IcingSearchEngine icing(options, GetTestJniCache());
458 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
459 }
460
TEST_F(IcingSearchEngineInitializationTest,ReinitializingWithDifferentCompressionLevelReturnsOk)461 TEST_F(IcingSearchEngineInitializationTest,
462 ReinitializingWithDifferentCompressionLevelReturnsOk) {
463 IcingSearchEngineOptions options = GetDefaultIcingOptions();
464 options.set_compression_level(3);
465 {
466 IcingSearchEngine icing(options, GetTestJniCache());
467 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
468 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
469
470 DocumentProto document = CreateMessageDocument("namespace", "uri");
471 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
472 ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
473 }
474 options.set_compression_level(9);
475 {
476 IcingSearchEngine icing(options, GetTestJniCache());
477 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
478 }
479 options.set_compression_level(0);
480 {
481 IcingSearchEngine icing(options, GetTestJniCache());
482 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
483 }
484 }
485
TEST_F(IcingSearchEngineInitializationTest,FailToCreateDocStore)486 TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) {
487 auto mock_filesystem = std::make_unique<MockFilesystem>();
488 // This fails DocumentStore::Create()
489 ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
490 .WillByDefault(Return(false));
491
492 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
493 std::move(mock_filesystem),
494 std::make_unique<IcingFilesystem>(),
495 std::make_unique<FakeClock>(), GetTestJniCache());
496
497 InitializeResultProto initialize_result_proto = icing.Initialize();
498 EXPECT_THAT(initialize_result_proto.status(),
499 ProtoStatusIs(StatusProto::INTERNAL));
500 EXPECT_THAT(initialize_result_proto.status().message(),
501 HasSubstr("Could not create directory"));
502 }
503
TEST_F(IcingSearchEngineInitializationTest,InitMarkerFilePreviousFailuresAtThreshold)504 TEST_F(IcingSearchEngineInitializationTest,
505 InitMarkerFilePreviousFailuresAtThreshold) {
506 Filesystem filesystem;
507 DocumentProto email1 =
508 CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
509 email1.set_creation_timestamp_ms(10000);
510 DocumentProto email2 =
511 CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
512 email2.set_creation_timestamp_ms(10000);
513
514 {
515 // Create an index with a few documents.
516 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
517 InitializeResultProto init_result = icing.Initialize();
518 ASSERT_THAT(init_result.status(), ProtoIsOk());
519 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
520 Eq(0));
521 ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
522 ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
523 ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
524 }
525
526 // Write an init marker file with 5 previously failed attempts.
527 std::string marker_filepath = GetTestBaseDir() + "/init_marker";
528
529 {
530 ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
531 int network_init_attempts = GHostToNetworkL(5);
532 // Write the updated number of attempts before we get started.
533 ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
534 &network_init_attempts,
535 sizeof(network_init_attempts)));
536 ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
537 }
538
539 {
540 // Create the index again and verify that initialization succeeds and no
541 // data is thrown out.
542 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
543 InitializeResultProto init_result = icing.Initialize();
544 ASSERT_THAT(init_result.status(), ProtoIsOk());
545 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
546 Eq(5));
547 EXPECT_THAT(
548 icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
549 .document(),
550 EqualsProto(email1));
551 EXPECT_THAT(
552 icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
553 .document(),
554 EqualsProto(email2));
555 }
556
557 // The successful init should have thrown out the marker file.
558 ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
559 }
560
TEST_F(IcingSearchEngineInitializationTest,InitMarkerFilePreviousFailuresBeyondThreshold)561 TEST_F(IcingSearchEngineInitializationTest,
562 InitMarkerFilePreviousFailuresBeyondThreshold) {
563 Filesystem filesystem;
564 DocumentProto email1 =
565 CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
566 DocumentProto email2 =
567 CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
568
569 {
570 // Create an index with a few documents.
571 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
572 InitializeResultProto init_result = icing.Initialize();
573 ASSERT_THAT(init_result.status(), ProtoIsOk());
574 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
575 Eq(0));
576 ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
577 ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
578 ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
579 }
580
581 // Write an init marker file with 6 previously failed attempts.
582 std::string marker_filepath = GetTestBaseDir() + "/init_marker";
583
584 {
585 ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
586 int network_init_attempts = GHostToNetworkL(6);
587 // Write the updated number of attempts before we get started.
588 ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
589 &network_init_attempts,
590 sizeof(network_init_attempts)));
591 ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
592 }
593
594 {
595 // Create the index again and verify that initialization succeeds and all
596 // data is thrown out.
597 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
598 InitializeResultProto init_result = icing.Initialize();
599 ASSERT_THAT(init_result.status(),
600 ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
601 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
602 Eq(6));
603 EXPECT_THAT(
604 icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
605 .status(),
606 ProtoStatusIs(StatusProto::NOT_FOUND));
607 EXPECT_THAT(
608 icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
609 .status(),
610 ProtoStatusIs(StatusProto::NOT_FOUND));
611 }
612
613 // The successful init should have thrown out the marker file.
614 ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
615 }
616
TEST_F(IcingSearchEngineInitializationTest,SuccessiveInitFailuresIncrementsInitMarker)617 TEST_F(IcingSearchEngineInitializationTest,
618 SuccessiveInitFailuresIncrementsInitMarker) {
619 Filesystem filesystem;
620 DocumentProto email1 =
621 CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
622 DocumentProto email2 =
623 CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
624
625 {
626 // 1. Create an index with a few documents.
627 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
628 InitializeResultProto init_result = icing.Initialize();
629 ASSERT_THAT(init_result.status(), ProtoIsOk());
630 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
631 Eq(0));
632 ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
633 ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
634 ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
635 }
636
637 {
638 // 2. Create an index that will encounter an IO failure when trying to
639 // create the document log.
640 IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
641
642 auto mock_filesystem = std::make_unique<MockFilesystem>();
643 std::string document_log_filepath =
644 icing_options.base_dir() + "/document_dir/document_log_v1";
645 ON_CALL(*mock_filesystem,
646 GetFileSize(Matcher<const char*>(Eq(document_log_filepath))))
647 .WillByDefault(Return(Filesystem::kBadFileSize));
648
649 TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
650 std::make_unique<IcingFilesystem>(),
651 std::make_unique<FakeClock>(),
652 GetTestJniCache());
653
654 // Fail to initialize six times in a row.
655 InitializeResultProto init_result = icing.Initialize();
656 ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
657 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
658 Eq(0));
659
660 init_result = icing.Initialize();
661 ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
662 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
663 Eq(1));
664
665 init_result = icing.Initialize();
666 ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
667 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
668 Eq(2));
669
670 init_result = icing.Initialize();
671 ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
672 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
673 Eq(3));
674
675 init_result = icing.Initialize();
676 ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
677 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
678 Eq(4));
679
680 init_result = icing.Initialize();
681 ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
682 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
683 Eq(5));
684 }
685
686 {
687 // 3. Create the index again and verify that initialization succeeds and all
688 // data is thrown out.
689 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
690 InitializeResultProto init_result = icing.Initialize();
691 ASSERT_THAT(init_result.status(),
692 ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
693 ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
694 Eq(6));
695
696 EXPECT_THAT(
697 icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
698 .status(),
699 ProtoStatusIs(StatusProto::NOT_FOUND));
700 EXPECT_THAT(
701 icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
702 .status(),
703 ProtoStatusIs(StatusProto::NOT_FOUND));
704 }
705
706 // The successful init should have thrown out the marker file.
707 std::string marker_filepath = GetTestBaseDir() + "/init_marker";
708 ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
709 }
710
TEST_F(IcingSearchEngineInitializationTest,RecoverFromMissingHeaderFile)711 TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) {
712 SearchSpecProto search_spec;
713 search_spec.set_query("message");
714 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
715
716 SearchResultProto expected_search_result_proto;
717 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
718 *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
719 CreateMessageDocument("namespace", "uri");
720
721 GetResultProto expected_get_result_proto;
722 expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
723 *expected_get_result_proto.mutable_document() =
724 CreateMessageDocument("namespace", "uri");
725
726 {
727 // Basic initialization/setup
728 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
729 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
730 EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
731 EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
732 ProtoIsOk());
733 EXPECT_THAT(
734 icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
735 EqualsProto(expected_get_result_proto));
736 SearchResultProto search_result_proto =
737 icing.Search(search_spec, GetDefaultScoringSpec(),
738 ResultSpecProto::default_instance());
739 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
740 expected_search_result_proto));
741 } // This should shut down IcingSearchEngine and persist anything it needs to
742
743 EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
744
745 // We should be able to recover from this and access all our previous data
746 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
747 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
748
749 // Checks that DocumentLog is still ok
750 EXPECT_THAT(
751 icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
752 EqualsProto(expected_get_result_proto));
753
754 // Checks that the term index is still ok so we can search over it
755 SearchResultProto search_result_proto =
756 icing.Search(search_spec, GetDefaultScoringSpec(),
757 ResultSpecProto::default_instance());
758 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
759 expected_search_result_proto));
760
761 // Checks that the integer index is still ok so we can search over it
762 SearchSpecProto search_spec2;
763 search_spec2.set_query("indexableInteger == 123");
764 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
765
766 SearchResultProto search_result_google::protobuf =
767 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
768 ResultSpecProto::default_instance());
769 EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
770 expected_search_result_proto));
771
772 // Checks that Schema is still since it'll be needed to validate the document
773 EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
774 ProtoIsOk());
775 }
776
TEST_F(IcingSearchEngineInitializationTest,UnableToRecoverFromCorruptSchema)777 TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) {
778 {
779 // Basic initialization/setup
780 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
781 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
782 EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
783 EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
784 ProtoIsOk());
785
786 GetResultProto expected_get_result_proto;
787 expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
788 *expected_get_result_proto.mutable_document() =
789 CreateMessageDocument("namespace", "uri");
790
791 EXPECT_THAT(
792 icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
793 EqualsProto(expected_get_result_proto));
794 } // This should shut down IcingSearchEngine and persist anything it needs to
795
796 const std::string schema_file =
797 absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
798 const std::string corrupt_data = "1234";
799 EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
800 corrupt_data.size()));
801
802 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
803 EXPECT_THAT(icing.Initialize().status(),
804 ProtoStatusIs(StatusProto::INTERNAL));
805 }
806
TEST_F(IcingSearchEngineInitializationTest,UnableToRecoverFromCorruptDocumentLog)807 TEST_F(IcingSearchEngineInitializationTest,
808 UnableToRecoverFromCorruptDocumentLog) {
809 {
810 // Basic initialization/setup
811 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
812 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
813 EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
814 EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
815 ProtoIsOk());
816
817 GetResultProto expected_get_result_proto;
818 expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
819 *expected_get_result_proto.mutable_document() =
820 CreateMessageDocument("namespace", "uri");
821
822 EXPECT_THAT(
823 icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
824 EqualsProto(expected_get_result_proto));
825 } // This should shut down IcingSearchEngine and persist anything it needs to
826
827 const std::string document_log_file = absl_ports::StrCat(
828 GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
829 const std::string corrupt_data = "1234";
830 EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
831 corrupt_data.data(), corrupt_data.size()));
832
833 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
834 EXPECT_THAT(icing.Initialize().status(),
835 ProtoStatusIs(StatusProto::INTERNAL));
836 }
837
TEST_F(IcingSearchEngineInitializationTest,RecoverFromInconsistentSchemaStore)838 TEST_F(IcingSearchEngineInitializationTest,
839 RecoverFromInconsistentSchemaStore) {
840 DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
841 DocumentProto document2_with_additional_property =
842 DocumentBuilder()
843 .SetKey("namespace", "uri2")
844 .SetSchema("Message")
845 .AddStringProperty("additional", "content")
846 .AddStringProperty("body", "message body")
847 .AddInt64Property("indexableInteger", 123)
848 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
849 .Build();
850
851 IcingSearchEngineOptions options = GetDefaultIcingOptions();
852 {
853 // Initializes folder and schema
854 IcingSearchEngine icing(options, GetTestJniCache());
855 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
856
857 SchemaProto schema =
858 SchemaBuilder()
859 .AddType(
860 SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
861 // Add non-indexable property "additional"
862 .AddProperty(PropertyConfigBuilder()
863 .SetName("additional")
864 .SetDataType(TYPE_STRING)
865 .SetCardinality(CARDINALITY_OPTIONAL)))
866 .Build();
867
868 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
869 EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
870 EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
871 ProtoIsOk());
872
873 // Won't get us anything because "additional" isn't marked as an indexed
874 // property in the schema
875 SearchSpecProto search_spec;
876 search_spec.set_query("additional:content");
877 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
878
879 SearchResultProto expected_search_result_proto;
880 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
881 SearchResultProto search_result_proto =
882 icing.Search(search_spec, GetDefaultScoringSpec(),
883 ResultSpecProto::default_instance());
884 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
885 expected_search_result_proto));
886 } // This should shut down IcingSearchEngine and persist anything it needs to
887
888 {
889 // This schema will change the SchemaTypeIds from the previous schema_
890 // (since SchemaTypeIds are assigned based on order of the types, and this
891 // new schema changes the ordering of previous types)
892 SchemaProto new_schema;
893 auto type = new_schema.add_types();
894 type->set_schema_type("Email");
895
896 // Switching a non-indexable property to indexable changes the SectionIds
897 // (since SectionIds are assigned based on alphabetical order of indexed
898 // sections, marking "additional" as an indexed property will push the
899 // "body" and "indexableInteger" property to different SectionIds)
900 *new_schema.add_types() =
901 SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
902 .AddProperty(
903 PropertyConfigBuilder()
904 .SetName("additional")
905 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
906 .SetCardinality(CARDINALITY_OPTIONAL))
907 .Build();
908
909 // Write the marker file
910 std::string marker_filepath =
911 absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
912 ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
913 ASSERT_TRUE(sfd.is_valid());
914
915 // Write the new schema
916 FakeClock fake_clock;
917 ICING_ASSERT_OK_AND_ASSIGN(
918 std::unique_ptr<SchemaStore> schema_store,
919 SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
920 ICING_EXPECT_OK(schema_store->SetSchema(
921 new_schema, /*ignore_errors_and_delete_documents=*/false,
922 /*allow_circular_schema_definitions=*/false));
923 } // Will persist new schema
924
925 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
926 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
927
928 // We can insert a Email document since we kept the new schema
929 DocumentProto email_document =
930 DocumentBuilder()
931 .SetKey("namespace", "email_uri")
932 .SetSchema("Email")
933 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
934 .Build();
935 EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
936
937 GetResultProto expected_get_result_proto;
938 expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
939 *expected_get_result_proto.mutable_document() = email_document;
940
941 EXPECT_THAT(icing.Get("namespace", "email_uri",
942 GetResultSpecProto::default_instance()),
943 EqualsProto(expected_get_result_proto));
944
945 // Verify term search
946 SearchSpecProto search_spec1;
947
948 // The section restrict will ensure we are using the correct, updated
949 // SectionId in the Index
950 search_spec1.set_query("additional:content");
951
952 // Schema type filter will ensure we're using the correct, updated
953 // SchemaTypeId in the DocumentStore
954 search_spec1.add_schema_type_filters("Message");
955 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
956
957 SearchResultProto expected_search_result_proto1;
958 expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
959 *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
960 document2_with_additional_property;
961
962 SearchResultProto search_result_proto1 =
963 icing.Search(search_spec1, GetDefaultScoringSpec(),
964 ResultSpecProto::default_instance());
965 EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
966 expected_search_result_proto1));
967
968 // Verify numeric (integer) search
969 SearchSpecProto search_spec2;
970 search_spec2.set_query("indexableInteger == 123");
971 search_spec1.add_schema_type_filters("Message");
972 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
973
974 SearchResultProto expected_search_result_google::protobuf;
975 expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
976 *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
977 document2_with_additional_property;
978 *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
979 document1;
980
981 SearchResultProto search_result_google::protobuf =
982 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
983 ResultSpecProto::default_instance());
984 EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
985 expected_search_result_google::protobuf));
986 }
987
TEST_F(IcingSearchEngineInitializationTest,RecoverFromInconsistentDocumentStore)988 TEST_F(IcingSearchEngineInitializationTest,
989 RecoverFromInconsistentDocumentStore) {
990 // Test the following scenario: document store is ahead of term, integer and
991 // qualified id join index. IcingSearchEngine should be able to recover all
992 // indices. Several additional behaviors are also tested:
993 // - Index directory handling:
994 // - Term index directory should be unaffected.
995 // - Integer index directory should be unaffected.
996 // - Qualified id join index directory should be unaffected.
997 // - Truncate indices:
998 // - "TruncateTo()" for term index shouldn't take effect.
999 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
1000 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
1001 // discarded.
1002 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
1003 // underlying storage sub directory (path_expr =
1004 // "*/qualified_id_join_index_dir/*") should be discarded.
1005 // - Still, we need to replay and reindex documents.
1006
1007 SchemaProto schema =
1008 SchemaBuilder()
1009 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1010 PropertyConfigBuilder()
1011 .SetName("name")
1012 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1013 .SetCardinality(CARDINALITY_REQUIRED)))
1014 .AddType(SchemaTypeConfigBuilder()
1015 .SetType("Message")
1016 .AddProperty(PropertyConfigBuilder()
1017 .SetName("body")
1018 .SetDataTypeString(TERM_MATCH_PREFIX,
1019 TOKENIZER_PLAIN)
1020 .SetCardinality(CARDINALITY_REQUIRED))
1021 .AddProperty(PropertyConfigBuilder()
1022 .SetName("indexableInteger")
1023 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1024 .SetCardinality(CARDINALITY_REQUIRED))
1025 .AddProperty(PropertyConfigBuilder()
1026 .SetName("senderQualifiedId")
1027 .SetDataTypeJoinableString(
1028 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1029 .SetCardinality(CARDINALITY_REQUIRED)))
1030 .Build();
1031
1032 DocumentProto person =
1033 DocumentBuilder()
1034 .SetKey("namespace", "person")
1035 .SetSchema("Person")
1036 .AddStringProperty("name", "person")
1037 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1038 .Build();
1039 DocumentProto message1 =
1040 DocumentBuilder()
1041 .SetKey("namespace", "message/1")
1042 .SetSchema("Message")
1043 .AddStringProperty("body", "message body one")
1044 .AddInt64Property("indexableInteger", 123)
1045 .AddStringProperty("senderQualifiedId", "namespace#person")
1046 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1047 .Build();
1048 DocumentProto message2 =
1049 DocumentBuilder()
1050 .SetKey("namespace", "message/2")
1051 .SetSchema("Message")
1052 .AddStringProperty("body", "message body two")
1053 .AddInt64Property("indexableInteger", 123)
1054 .AddStringProperty("senderQualifiedId", "namespace#person")
1055 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1056 .Build();
1057
1058 IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
1059
1060 {
1061 // Initializes folder and schema, index one document
1062 TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
1063 std::make_unique<IcingFilesystem>(),
1064 std::make_unique<FakeClock>(),
1065 GetTestJniCache());
1066 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1067 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1068 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1069 EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk());
1070 } // This should shut down IcingSearchEngine and persist anything it needs to
1071
1072 {
1073 FakeClock fake_clock;
1074 ICING_ASSERT_OK_AND_ASSIGN(
1075 std::unique_ptr<SchemaStore> schema_store,
1076 SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
1077
1078 // Puts message2 into DocumentStore but doesn't index it.
1079 ICING_ASSERT_OK_AND_ASSIGN(
1080 DocumentStore::CreateResult create_result,
1081 DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
1082 schema_store.get(),
1083 /*force_recovery_and_revalidate_documents=*/false,
1084 /*namespace_id_fingerprint=*/true,
1085 /*pre_mapping_fbv=*/false,
1086 /*use_persistent_hash_map=*/true,
1087 PortableFileBackedProtoLog<
1088 DocumentWrapper>::kDeflateCompressionLevel,
1089 /*initialize_stats=*/nullptr));
1090 std::unique_ptr<DocumentStore> document_store =
1091 std::move(create_result.document_store);
1092
1093 ICING_EXPECT_OK(document_store->Put(message2));
1094 }
1095
1096 // Mock filesystem to observe and check the behavior of all indices.
1097 auto mock_filesystem = std::make_unique<MockFilesystem>();
1098 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1099 .WillRepeatedly(DoDefault());
1100 // Ensure term index directory should never be discarded.
1101 EXPECT_CALL(*mock_filesystem,
1102 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1103 .Times(0);
1104 // Ensure integer index directory should never be discarded, and Clear()
1105 // should never be called (i.e. storage sub directory
1106 // "*/integer_index_dir/*" should never be discarded).
1107 EXPECT_CALL(*mock_filesystem,
1108 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1109 .Times(0);
1110 EXPECT_CALL(*mock_filesystem,
1111 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1112 .Times(0);
1113 // Ensure qualified id join index directory should never be discarded, and
1114 // Clear() should never be called (i.e. storage sub directory
1115 // "*/qualified_id_join_index_dir/*" should never be discarded).
1116 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1117 EndsWith("/qualified_id_join_index_dir")))
1118 .Times(0);
1119 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1120 HasSubstr("/qualified_id_join_index_dir/")))
1121 .Times(0);
1122
1123 TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
1124 std::make_unique<IcingFilesystem>(),
1125 std::make_unique<FakeClock>(), GetTestJniCache());
1126 InitializeResultProto initialize_result = icing.Initialize();
1127 EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1128 // Index Restoration should be triggered here and document2 should be
1129 // indexed.
1130 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1131 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
1132 EXPECT_THAT(
1133 initialize_result.initialize_stats().integer_index_restoration_cause(),
1134 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
1135 EXPECT_THAT(initialize_result.initialize_stats()
1136 .qualified_id_join_index_restoration_cause(),
1137 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
1138
1139 GetResultProto expected_get_result_proto;
1140 expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
1141 *expected_get_result_proto.mutable_document() = message1;
1142
1143 // DocumentStore kept the additional document
1144 EXPECT_THAT(icing.Get("namespace", "message/1",
1145 GetResultSpecProto::default_instance()),
1146 EqualsProto(expected_get_result_proto));
1147
1148 *expected_get_result_proto.mutable_document() = message2;
1149 EXPECT_THAT(icing.Get("namespace", "message/2",
1150 GetResultSpecProto::default_instance()),
1151 EqualsProto(expected_get_result_proto));
1152
1153 SearchResultProto expected_search_result_proto;
1154 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1155 *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1156 message2;
1157 *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1158 message1;
1159
1160 // We indexed the additional document in all indices.
1161 // Verify term search
1162 SearchSpecProto search_spec1;
1163 search_spec1.set_query("message");
1164 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
1165 SearchResultProto search_result_proto1 =
1166 icing.Search(search_spec1, GetDefaultScoringSpec(),
1167 ResultSpecProto::default_instance());
1168 EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
1169 expected_search_result_proto));
1170
1171 // Verify numeric (integer) search
1172 SearchSpecProto search_spec2;
1173 search_spec2.set_query("indexableInteger == 123");
1174 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
1175
1176 SearchResultProto search_result_google::protobuf =
1177 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
1178 ResultSpecProto::default_instance());
1179 EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
1180 expected_search_result_proto));
1181
1182 // Verify join search: join a query for `name:person` with a child query for
1183 // `body:message` based on the child's `senderQualifiedId` field.
1184 SearchSpecProto search_spec3;
1185 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
1186 search_spec3.set_query("name:person");
1187 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
1188 join_spec->set_parent_property_expression(
1189 std::string(JoinProcessor::kQualifiedIdExpr));
1190 join_spec->set_child_property_expression("senderQualifiedId");
1191 join_spec->set_aggregation_scoring_strategy(
1192 JoinSpecProto::AggregationScoringStrategy::COUNT);
1193 JoinSpecProto::NestedSpecProto* nested_spec =
1194 join_spec->mutable_nested_spec();
1195 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1196 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
1197 nested_search_spec->set_query("body:message");
1198 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
1199 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1200
1201 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
1202 result_spec3.set_max_joined_children_per_parent_to_return(
1203 std::numeric_limits<int32_t>::max());
1204
1205 SearchResultProto expected_join_search_result_proto;
1206 expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1207 SearchResultProto::ResultProto* result_proto =
1208 expected_join_search_result_proto.mutable_results()->Add();
1209 *result_proto->mutable_document() = person;
1210 *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
1211 *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
1212
1213 SearchResultProto search_result_proto3 = icing.Search(
1214 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
1215 EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
1216 expected_join_search_result_proto));
1217 }
1218
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptedDocumentStore)1219 TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptedDocumentStore) {
1220 // Test the following scenario: some document store derived files are
1221 // corrupted. IcingSearchEngine should be able to recover the document store,
1222 // and since NamespaceIds were reassigned, we should rebuild qualified id join
1223 // index as well. Several additional behaviors are also tested:
1224 // - Index directory handling:
1225 // - Term index directory should be unaffected.
1226 // - Integer index directory should be unaffected.
1227 // - Should discard the entire qualified id join index directory and start
1228 // it from scratch.
1229 // - Truncate indices:
1230 // - "TruncateTo()" for term index shouldn't take effect.
1231 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
1232 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
1233 // discarded.
1234 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
1235 // underlying storage sub directory (path_expr =
1236 // "*/qualified_id_join_index_dir/*") should be discarded.
1237 // - Still, we need to replay and reindex documents (for qualified id join
1238 // index).
1239
1240 SchemaProto schema =
1241 SchemaBuilder()
1242 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1243 PropertyConfigBuilder()
1244 .SetName("name")
1245 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1246 .SetCardinality(CARDINALITY_REQUIRED)))
1247 .AddType(SchemaTypeConfigBuilder()
1248 .SetType("Message")
1249 .AddProperty(PropertyConfigBuilder()
1250 .SetName("body")
1251 .SetDataTypeString(TERM_MATCH_PREFIX,
1252 TOKENIZER_PLAIN)
1253 .SetCardinality(CARDINALITY_REQUIRED))
1254 .AddProperty(PropertyConfigBuilder()
1255 .SetName("indexableInteger")
1256 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1257 .SetCardinality(CARDINALITY_REQUIRED))
1258 .AddProperty(PropertyConfigBuilder()
1259 .SetName("senderQualifiedId")
1260 .SetDataTypeJoinableString(
1261 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1262 .SetCardinality(CARDINALITY_REQUIRED)))
1263 .Build();
1264
1265 DocumentProto personDummy =
1266 DocumentBuilder()
1267 .SetKey("namespace2", "personDummy")
1268 .SetSchema("Person")
1269 .AddStringProperty("name", "personDummy")
1270 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1271 .Build();
1272 DocumentProto person1 =
1273 DocumentBuilder()
1274 .SetKey("namespace1", "person")
1275 .SetSchema("Person")
1276 .AddStringProperty("name", "person")
1277 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1278 .Build();
1279 DocumentProto person2 =
1280 DocumentBuilder()
1281 .SetKey("namespace2", "person")
1282 .SetSchema("Person")
1283 .AddStringProperty("name", "person")
1284 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1285 .Build();
1286 DocumentProto message =
1287 DocumentBuilder()
1288 .SetKey("namespace2", "message/1")
1289 .SetSchema("Message")
1290 .AddStringProperty("body", "message body one")
1291 .AddInt64Property("indexableInteger", 123)
1292 .AddStringProperty("senderQualifiedId", "namespace2#person")
1293 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1294 .Build();
1295
1296 IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
1297
1298 {
1299 // Initializes folder and schema, index one document
1300 TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
1301 std::make_unique<IcingFilesystem>(),
1302 std::make_unique<FakeClock>(),
1303 GetTestJniCache());
1304 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1305 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1306 // "namespace2" (in personDummy) will be assigned NamespaceId = 0.
1307 EXPECT_THAT(icing.Put(personDummy).status(), ProtoIsOk());
1308 // "namespace1" (in person1) will be assigned NamespaceId = 1.
1309 EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
1310 EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
1311 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1312
1313 // Now delete personDummy.
1314 EXPECT_THAT(
1315 icing.Delete(personDummy.namespace_(), personDummy.uri()).status(),
1316 ProtoIsOk());
1317 } // This should shut down IcingSearchEngine and persist anything it needs to
1318
1319 {
1320 FakeClock fake_clock;
1321 ICING_ASSERT_OK_AND_ASSIGN(
1322 std::unique_ptr<SchemaStore> schema_store,
1323 SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
1324
1325 // Manually corrupt one of the derived files of DocumentStore without
1326 // updating checksum in DocumentStore header.
1327 std::string score_cache_filename = GetDocumentDir() + "/score_cache";
1328 ICING_ASSERT_OK_AND_ASSIGN(
1329 std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>>
1330 score_cache,
1331 FileBackedVector<DocumentAssociatedScoreData>::Create(
1332 *filesystem(), std::move(score_cache_filename),
1333 MemoryMappedFile::READ_WRITE_AUTO_SYNC));
1334 ICING_ASSERT_OK_AND_ASSIGN(const DocumentAssociatedScoreData* score_data,
1335 score_cache->Get(/*idx=*/0));
1336 ICING_ASSERT_OK(score_cache->Set(
1337 /*idx=*/0,
1338 DocumentAssociatedScoreData(score_data->corpus_id(),
1339 score_data->document_score() + 1,
1340 score_data->creation_timestamp_ms(),
1341 score_data->length_in_tokens())));
1342 ICING_ASSERT_OK(score_cache->PersistToDisk());
1343 }
1344
1345 // Mock filesystem to observe and check the behavior of all indices.
1346 auto mock_filesystem = std::make_unique<MockFilesystem>();
1347 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1348 .WillRepeatedly(DoDefault());
1349 // Ensure term index directory should never be discarded.
1350 EXPECT_CALL(*mock_filesystem,
1351 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1352 .Times(0);
1353 // Ensure integer index directory should never be discarded, and Clear()
1354 // should never be called (i.e. storage sub directory
1355 // "*/integer_index_dir/*" should never be discarded).
1356 EXPECT_CALL(*mock_filesystem,
1357 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1358 .Times(0);
1359 EXPECT_CALL(*mock_filesystem,
1360 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1361 .Times(0);
1362 // Ensure qualified id join index directory should be discarded once, and
1363 // Clear() should never be called (i.e. storage sub directory
1364 // "*/qualified_id_join_index_dir/*" should never be discarded).
1365 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1366 EndsWith("/qualified_id_join_index_dir")))
1367 .Times(1);
1368 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1369 HasSubstr("/qualified_id_join_index_dir/")))
1370 .Times(0);
1371
1372 TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
1373 std::make_unique<IcingFilesystem>(),
1374 std::make_unique<FakeClock>(), GetTestJniCache());
1375 InitializeResultProto initialize_result = icing.Initialize();
1376 EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1377 // DocumentStore should be recovered. When reassigning NamespaceId, the order
1378 // will be the document traversal order: [person1, person2, message].
1379 // Therefore, "namespace1" will have id = 0 and "namespace2" will have id = 1.
1380 EXPECT_THAT(
1381 initialize_result.initialize_stats().document_store_recovery_cause(),
1382 Eq(InitializeStatsProto::IO_ERROR));
1383 // Term, integer index should be unaffected.
1384 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1385 Eq(InitializeStatsProto::NONE));
1386 EXPECT_THAT(
1387 initialize_result.initialize_stats().integer_index_restoration_cause(),
1388 Eq(InitializeStatsProto::NONE));
1389 // Qualified id join index should be rebuilt.
1390 EXPECT_THAT(initialize_result.initialize_stats()
1391 .qualified_id_join_index_restoration_cause(),
1392 Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
1393
1394 // Verify join search: join a query for `name:person` with a child query for
1395 // `body:message` based on the child's `senderQualifiedId` field. message2
1396 // should be joined to person2 correctly.
1397 SearchSpecProto search_spec;
1398 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
1399 search_spec.set_query("name:person");
1400 JoinSpecProto* join_spec = search_spec.mutable_join_spec();
1401 join_spec->set_parent_property_expression(
1402 std::string(JoinProcessor::kQualifiedIdExpr));
1403 join_spec->set_child_property_expression("senderQualifiedId");
1404 join_spec->set_aggregation_scoring_strategy(
1405 JoinSpecProto::AggregationScoringStrategy::COUNT);
1406 JoinSpecProto::NestedSpecProto* nested_spec =
1407 join_spec->mutable_nested_spec();
1408 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1409 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
1410 nested_search_spec->set_query("body:message");
1411 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
1412 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1413
1414 ResultSpecProto result_spec = ResultSpecProto::default_instance();
1415 result_spec.set_max_joined_children_per_parent_to_return(
1416 std::numeric_limits<int32_t>::max());
1417
1418 SearchResultProto expected_join_search_result_proto;
1419 expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1420 SearchResultProto::ResultProto* result_proto =
1421 expected_join_search_result_proto.mutable_results()->Add();
1422 *result_proto->mutable_document() = person2;
1423 *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
1424
1425 *expected_join_search_result_proto.mutable_results()
1426 ->Add()
1427 ->mutable_document() = person1;
1428
1429 SearchResultProto search_result_proto = icing.Search(
1430 search_spec, ScoringSpecProto::default_instance(), result_spec);
1431 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1432 expected_join_search_result_proto));
1433 }
1434
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptIndex)1435 TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
1436 // Test the following scenario: term index is corrupted (e.g. checksum doesn't
1437 // match). IcingSearchEngine should be able to recover term index. Several
1438 // additional behaviors are also tested:
1439 // - Index directory handling:
1440 // - Should discard the entire term index directory and start it from
1441 // scratch.
1442 // - Integer index directory should be unaffected.
1443 // - Qualified id join index directory should be unaffected.
1444 // - Truncate indices:
1445 // - "TruncateTo()" for term index shouldn't take effect since we start it
1446 // from scratch.
1447 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
1448 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
1449 // discarded.
1450 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
1451 // underlying storage sub directory (path_expr =
1452 // "*/qualified_id_join_index_dir/*") should be discarded.
1453
1454 SchemaProto schema =
1455 SchemaBuilder()
1456 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1457 PropertyConfigBuilder()
1458 .SetName("name")
1459 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1460 .SetCardinality(CARDINALITY_REQUIRED)))
1461 .AddType(SchemaTypeConfigBuilder()
1462 .SetType("Message")
1463 .AddProperty(PropertyConfigBuilder()
1464 .SetName("body")
1465 .SetDataTypeString(TERM_MATCH_PREFIX,
1466 TOKENIZER_PLAIN)
1467 .SetCardinality(CARDINALITY_REQUIRED))
1468 .AddProperty(PropertyConfigBuilder()
1469 .SetName("indexableInteger")
1470 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1471 .SetCardinality(CARDINALITY_REQUIRED))
1472 .AddProperty(PropertyConfigBuilder()
1473 .SetName("senderQualifiedId")
1474 .SetDataTypeJoinableString(
1475 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1476 .SetCardinality(CARDINALITY_REQUIRED)))
1477 .Build();
1478
1479 DocumentProto person =
1480 DocumentBuilder()
1481 .SetKey("namespace", "person")
1482 .SetSchema("Person")
1483 .AddStringProperty("name", "person")
1484 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1485 .Build();
1486 DocumentProto message =
1487 DocumentBuilder()
1488 .SetKey("namespace", "message/1")
1489 .SetSchema("Message")
1490 .AddStringProperty("body", "message body")
1491 .AddInt64Property("indexableInteger", 123)
1492 .AddStringProperty("senderQualifiedId", "namespace#person")
1493 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1494 .Build();
1495
1496 SearchSpecProto search_spec;
1497 search_spec.set_query("body:message");
1498 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
1499
1500 SearchResultProto expected_search_result_proto;
1501 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1502 *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1503 message;
1504
1505 {
1506 // Initializes folder and schema, index one document
1507 TestIcingSearchEngine icing(
1508 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1509 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1510 GetTestJniCache());
1511 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1512 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1513 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1514 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1515 SearchResultProto search_result_proto =
1516 icing.Search(search_spec, GetDefaultScoringSpec(),
1517 ResultSpecProto::default_instance());
1518 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1519 expected_search_result_proto));
1520 } // This should shut down IcingSearchEngine and persist anything it needs to
1521
1522 // Manually corrupt term index
1523 {
1524 const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
1525 ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
1526 ASSERT_TRUE(fd.is_valid());
1527 ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
1528 }
1529
1530 // Mock filesystem to observe and check the behavior of all indices.
1531 auto mock_filesystem = std::make_unique<MockFilesystem>();
1532 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1533 .WillRepeatedly(DoDefault());
1534 // Ensure term index directory should be discarded once.
1535 EXPECT_CALL(*mock_filesystem,
1536 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1537 .Times(1);
1538 // Ensure integer index directory should never be discarded, and Clear()
1539 // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
1540 // should never be discarded).
1541 EXPECT_CALL(*mock_filesystem,
1542 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1543 .Times(0);
1544 EXPECT_CALL(*mock_filesystem,
1545 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1546 .Times(0);
1547 // Ensure qualified id join index directory should never be discarded, and
1548 // Clear() should never be called (i.e. storage sub directory
1549 // "*/qualified_id_join_index_dir/*" should never be discarded).
1550 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1551 EndsWith("/qualified_id_join_index_dir")))
1552 .Times(0);
1553 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1554 HasSubstr("/qualified_id_join_index_dir/")))
1555 .Times(0);
1556
1557 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
1558 std::move(mock_filesystem),
1559 std::make_unique<IcingFilesystem>(),
1560 std::make_unique<FakeClock>(), GetTestJniCache());
1561 InitializeResultProto initialize_result = icing.Initialize();
1562 EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1563 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1564 Eq(InitializeStatsProto::IO_ERROR));
1565 EXPECT_THAT(
1566 initialize_result.initialize_stats().integer_index_restoration_cause(),
1567 Eq(InitializeStatsProto::NONE));
1568 EXPECT_THAT(initialize_result.initialize_stats()
1569 .qualified_id_join_index_restoration_cause(),
1570 Eq(InitializeStatsProto::NONE));
1571
1572 // Check that our index is ok by searching over the restored index
1573 SearchResultProto search_result_proto =
1574 icing.Search(search_spec, GetDefaultScoringSpec(),
1575 ResultSpecProto::default_instance());
1576 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1577 expected_search_result_proto));
1578 }
1579
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptIntegerIndex)1580 TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
1581 // Test the following scenario: integer index is corrupted (e.g. checksum
1582 // doesn't match). IcingSearchEngine should be able to recover integer index.
1583 // Several additional behaviors are also tested:
1584 // - Index directory handling:
1585 // - Term index directory should be unaffected.
1586 // - Should discard the entire integer index directory and start it from
1587 // scratch.
1588 // - Qualified id join index directory should be unaffected.
1589 // - Truncate indices:
1590 // - "TruncateTo()" for term index shouldn't take effect.
1591 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
1592 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
1593 // discarded, since we start it from scratch.
1594 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
1595 // underlying storage sub directory (path_expr =
1596 // "*/qualified_id_join_index_dir/*") should be discarded.
1597
1598 SchemaProto schema =
1599 SchemaBuilder()
1600 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1601 PropertyConfigBuilder()
1602 .SetName("name")
1603 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1604 .SetCardinality(CARDINALITY_REQUIRED)))
1605 .AddType(SchemaTypeConfigBuilder()
1606 .SetType("Message")
1607 .AddProperty(PropertyConfigBuilder()
1608 .SetName("body")
1609 .SetDataTypeString(TERM_MATCH_PREFIX,
1610 TOKENIZER_PLAIN)
1611 .SetCardinality(CARDINALITY_REQUIRED))
1612 .AddProperty(PropertyConfigBuilder()
1613 .SetName("indexableInteger")
1614 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1615 .SetCardinality(CARDINALITY_REQUIRED))
1616 .AddProperty(PropertyConfigBuilder()
1617 .SetName("senderQualifiedId")
1618 .SetDataTypeJoinableString(
1619 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1620 .SetCardinality(CARDINALITY_REQUIRED)))
1621 .Build();
1622
1623 DocumentProto person =
1624 DocumentBuilder()
1625 .SetKey("namespace", "person")
1626 .SetSchema("Person")
1627 .AddStringProperty("name", "person")
1628 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1629 .Build();
1630 DocumentProto message =
1631 DocumentBuilder()
1632 .SetKey("namespace", "message/1")
1633 .SetSchema("Message")
1634 .AddStringProperty("body", "message body")
1635 .AddInt64Property("indexableInteger", 123)
1636 .AddStringProperty("senderQualifiedId", "namespace#person")
1637 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1638 .Build();
1639
1640 SearchSpecProto search_spec;
1641 search_spec.set_query("indexableInteger == 123");
1642 search_spec.add_enabled_features(std::string(kNumericSearchFeature));
1643
1644 SearchResultProto expected_search_result_proto;
1645 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1646 *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1647 message;
1648
1649 {
1650 // Initializes folder and schema, index one document
1651 TestIcingSearchEngine icing(
1652 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1653 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1654 GetTestJniCache());
1655 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1656 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1657 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1658 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1659 SearchResultProto search_result_proto =
1660 icing.Search(search_spec, GetDefaultScoringSpec(),
1661 ResultSpecProto::default_instance());
1662 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1663 expected_search_result_proto));
1664 } // This should shut down IcingSearchEngine and persist anything it needs to
1665
1666 // Manually corrupt integer index
1667 {
1668 const std::string integer_index_metadata_file =
1669 GetIntegerIndexDir() + "/integer_index.m";
1670 ScopedFd fd(
1671 filesystem()->OpenForWrite(integer_index_metadata_file.c_str()));
1672 ASSERT_TRUE(fd.is_valid());
1673 ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
1674 }
1675
1676 // Mock filesystem to observe and check the behavior of all indices.
1677 auto mock_filesystem = std::make_unique<MockFilesystem>();
1678 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1679 .WillRepeatedly(DoDefault());
1680 // Ensure term index directory should never be discarded.
1681 EXPECT_CALL(*mock_filesystem,
1682 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1683 .Times(0);
1684 // Ensure integer index directory should be discarded once, and Clear()
1685 // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
1686 // should never be discarded) since we start it from scratch.
1687 EXPECT_CALL(*mock_filesystem,
1688 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1689 .Times(1);
1690 EXPECT_CALL(*mock_filesystem,
1691 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1692 .Times(0);
1693 // Ensure qualified id join index directory should never be discarded, and
1694 // Clear() should never be called (i.e. storage sub directory
1695 // "*/qualified_id_join_index_dir/*" should never be discarded).
1696 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1697 EndsWith("/qualified_id_join_index_dir")))
1698 .Times(0);
1699 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1700 HasSubstr("/qualified_id_join_index_dir/")))
1701 .Times(0);
1702
1703 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
1704 std::move(mock_filesystem),
1705 std::make_unique<IcingFilesystem>(),
1706 std::make_unique<FakeClock>(), GetTestJniCache());
1707 InitializeResultProto initialize_result = icing.Initialize();
1708 EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1709 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1710 Eq(InitializeStatsProto::NONE));
1711 EXPECT_THAT(
1712 initialize_result.initialize_stats().integer_index_restoration_cause(),
1713 Eq(InitializeStatsProto::IO_ERROR));
1714 EXPECT_THAT(initialize_result.initialize_stats()
1715 .qualified_id_join_index_restoration_cause(),
1716 Eq(InitializeStatsProto::NONE));
1717
1718 // Check that our index is ok by searching over the restored index
1719 SearchResultProto search_result_proto =
1720 icing.Search(search_spec, GetDefaultScoringSpec(),
1721 ResultSpecProto::default_instance());
1722 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1723 expected_search_result_proto));
1724 }
1725
TEST_F(IcingSearchEngineInitializationTest,RecoverFromIntegerIndexBucketSplitThresholdChange)1726 TEST_F(IcingSearchEngineInitializationTest,
1727 RecoverFromIntegerIndexBucketSplitThresholdChange) {
1728 SchemaProto schema =
1729 SchemaBuilder()
1730 .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
1731 PropertyConfigBuilder()
1732 .SetName("indexableInteger")
1733 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1734 .SetCardinality(CARDINALITY_REQUIRED)))
1735 .Build();
1736
1737 DocumentProto message =
1738 DocumentBuilder()
1739 .SetKey("namespace", "message/1")
1740 .SetSchema("Message")
1741 .AddInt64Property("indexableInteger", 123)
1742 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1743 .Build();
1744
1745 // 1. Create an index with a message document.
1746 {
1747 TestIcingSearchEngine icing(
1748 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1749 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1750 GetTestJniCache());
1751
1752 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
1753 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1754
1755 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1756 }
1757
1758 // 2. Create the index again with different
1759 // integer_index_bucket_split_threshold. This should trigger index
1760 // restoration.
1761 {
1762 // Mock filesystem to observe and check the behavior of all indices.
1763 auto mock_filesystem = std::make_unique<MockFilesystem>();
1764 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1765 .WillRepeatedly(DoDefault());
1766 // Ensure term index directory should never be discarded.
1767 EXPECT_CALL(*mock_filesystem,
1768 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1769 .Times(0);
1770 // Ensure integer index directory should be discarded once, and Clear()
1771 // should never be called (i.e. storage sub directory
1772 // "*/integer_index_dir/*" should never be discarded) since we start it from
1773 // scratch.
1774 EXPECT_CALL(*mock_filesystem,
1775 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1776 .Times(1);
1777 EXPECT_CALL(*mock_filesystem,
1778 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1779 .Times(0);
1780 // Ensure qualified id join index directory should never be discarded, and
1781 // Clear() should never be called (i.e. storage sub directory
1782 // "*/qualified_id_join_index_dir/*" should never be discarded).
1783 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1784 EndsWith("/qualified_id_join_index_dir")))
1785 .Times(0);
1786 EXPECT_CALL(
1787 *mock_filesystem,
1788 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
1789 .Times(0);
1790
1791 static constexpr int32_t kNewIntegerIndexBucketSplitThreshold = 1000;
1792 IcingSearchEngineOptions options = GetDefaultIcingOptions();
1793 ASSERT_THAT(kNewIntegerIndexBucketSplitThreshold,
1794 Ne(options.integer_index_bucket_split_threshold()));
1795 options.set_integer_index_bucket_split_threshold(
1796 kNewIntegerIndexBucketSplitThreshold);
1797
1798 TestIcingSearchEngine icing(options, std::move(mock_filesystem),
1799 std::make_unique<IcingFilesystem>(),
1800 std::make_unique<FakeClock>(),
1801 GetTestJniCache());
1802 InitializeResultProto initialize_result = icing.Initialize();
1803 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
1804 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1805 Eq(InitializeStatsProto::NONE));
1806 EXPECT_THAT(
1807 initialize_result.initialize_stats().integer_index_restoration_cause(),
1808 Eq(InitializeStatsProto::IO_ERROR));
1809 EXPECT_THAT(initialize_result.initialize_stats()
1810 .qualified_id_join_index_restoration_cause(),
1811 Eq(InitializeStatsProto::NONE));
1812
1813 // Verify integer index works normally
1814 SearchSpecProto search_spec;
1815 search_spec.set_query("indexableInteger == 123");
1816 search_spec.add_enabled_features(std::string(kNumericSearchFeature));
1817
1818 SearchResultProto results =
1819 icing.Search(search_spec, ScoringSpecProto::default_instance(),
1820 ResultSpecProto::default_instance());
1821 ASSERT_THAT(results.results(), SizeIs(1));
1822 EXPECT_THAT(results.results(0).document().uri(), Eq("message/1"));
1823 }
1824 }
1825
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptQualifiedIdJoinIndex)1826 TEST_F(IcingSearchEngineInitializationTest,
1827 RecoverFromCorruptQualifiedIdJoinIndex) {
1828 // Test the following scenario: qualified id join index is corrupted (e.g.
1829 // checksum doesn't match). IcingSearchEngine should be able to recover
1830 // qualified id join index. Several additional behaviors are also tested:
1831 // - Index directory handling:
1832 // - Term index directory should be unaffected.
1833 // - Integer index directory should be unaffected.
1834 // - Should discard the entire qualified id join index directory and start
1835 // it from scratch.
1836 // - Truncate indices:
1837 // - "TruncateTo()" for term index shouldn't take effect.
1838 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
1839 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
1840 // discarded.
1841 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
1842 // underlying storage sub directory (path_expr =
1843 // "*/qualified_id_join_index_dir/*") should be discarded, since we start
1844 // it from scratch.
1845
1846 SchemaProto schema =
1847 SchemaBuilder()
1848 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1849 PropertyConfigBuilder()
1850 .SetName("name")
1851 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1852 .SetCardinality(CARDINALITY_REQUIRED)))
1853 .AddType(SchemaTypeConfigBuilder()
1854 .SetType("Message")
1855 .AddProperty(PropertyConfigBuilder()
1856 .SetName("body")
1857 .SetDataTypeString(TERM_MATCH_PREFIX,
1858 TOKENIZER_PLAIN)
1859 .SetCardinality(CARDINALITY_REQUIRED))
1860 .AddProperty(PropertyConfigBuilder()
1861 .SetName("indexableInteger")
1862 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1863 .SetCardinality(CARDINALITY_REQUIRED))
1864 .AddProperty(PropertyConfigBuilder()
1865 .SetName("senderQualifiedId")
1866 .SetDataTypeJoinableString(
1867 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1868 .SetCardinality(CARDINALITY_REQUIRED)))
1869 .Build();
1870
1871 DocumentProto person =
1872 DocumentBuilder()
1873 .SetKey("namespace", "person")
1874 .SetSchema("Person")
1875 .AddStringProperty("name", "person")
1876 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1877 .Build();
1878 DocumentProto message =
1879 DocumentBuilder()
1880 .SetKey("namespace", "message/1")
1881 .SetSchema("Message")
1882 .AddStringProperty("body", "message body")
1883 .AddInt64Property("indexableInteger", 123)
1884 .AddStringProperty("senderQualifiedId", "namespace#person")
1885 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1886 .Build();
1887
1888 // Prepare join search spec to join a query for `name:person` with a child
1889 // query for `body:message` based on the child's `senderQualifiedId` field.
1890 SearchSpecProto search_spec;
1891 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
1892 search_spec.set_query("name:person");
1893 JoinSpecProto* join_spec = search_spec.mutable_join_spec();
1894 join_spec->set_parent_property_expression(
1895 std::string(JoinProcessor::kQualifiedIdExpr));
1896 join_spec->set_child_property_expression("senderQualifiedId");
1897 join_spec->set_aggregation_scoring_strategy(
1898 JoinSpecProto::AggregationScoringStrategy::COUNT);
1899 JoinSpecProto::NestedSpecProto* nested_spec =
1900 join_spec->mutable_nested_spec();
1901 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1902 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
1903 nested_search_spec->set_query("body:message");
1904 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
1905 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1906
1907 ResultSpecProto result_spec = ResultSpecProto::default_instance();
1908 result_spec.set_max_joined_children_per_parent_to_return(
1909 std::numeric_limits<int32_t>::max());
1910
1911 SearchResultProto expected_search_result_proto;
1912 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1913 SearchResultProto::ResultProto* result_proto =
1914 expected_search_result_proto.mutable_results()->Add();
1915 *result_proto->mutable_document() = person;
1916 *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
1917
1918 {
1919 // Initializes folder and schema, index one document
1920 TestIcingSearchEngine icing(
1921 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1922 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1923 GetTestJniCache());
1924 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1925 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1926 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1927 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1928 SearchResultProto search_result_proto =
1929 icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
1930 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1931 expected_search_result_proto));
1932 } // This should shut down IcingSearchEngine and persist anything it needs to
1933
1934 // Manually corrupt qualified id join index
1935 {
1936 const std::string qualified_id_join_index_metadata_file =
1937 GetQualifiedIdJoinIndexDir() + "/metadata";
1938 ScopedFd fd(filesystem()->OpenForWrite(
1939 qualified_id_join_index_metadata_file.c_str()));
1940 ASSERT_TRUE(fd.is_valid());
1941 ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
1942 }
1943
1944 // Mock filesystem to observe and check the behavior of all indices.
1945 auto mock_filesystem = std::make_unique<MockFilesystem>();
1946 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1947 .WillRepeatedly(DoDefault());
1948 // Ensure term index directory should never be discarded.
1949 EXPECT_CALL(*mock_filesystem,
1950 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1951 .Times(0);
1952 // Ensure integer index directory should never be discarded, and Clear()
1953 // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
1954 // should never be discarded).
1955 EXPECT_CALL(*mock_filesystem,
1956 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1957 .Times(0);
1958 EXPECT_CALL(*mock_filesystem,
1959 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1960 .Times(0);
1961 // Ensure qualified id join index directory should be discarded once, and
1962 // Clear() should never be called (i.e. storage sub directory
1963 // "*/qualified_id_join_index_dir/*" should never be discarded).
1964 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1965 EndsWith("/qualified_id_join_index_dir")))
1966 .Times(1);
1967 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1968 HasSubstr("/qualified_id_join_index_dir/")))
1969 .Times(0);
1970
1971 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
1972 std::move(mock_filesystem),
1973 std::make_unique<IcingFilesystem>(),
1974 std::make_unique<FakeClock>(), GetTestJniCache());
1975 InitializeResultProto initialize_result = icing.Initialize();
1976 EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1977 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1978 Eq(InitializeStatsProto::NONE));
1979 EXPECT_THAT(
1980 initialize_result.initialize_stats().integer_index_restoration_cause(),
1981 Eq(InitializeStatsProto::NONE));
1982 EXPECT_THAT(initialize_result.initialize_stats()
1983 .qualified_id_join_index_restoration_cause(),
1984 Eq(InitializeStatsProto::IO_ERROR));
1985
1986 // Check that our index is ok by searching over the restored index
1987 SearchResultProto search_result_proto =
1988 icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
1989 EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1990 expected_search_result_proto));
1991 }
1992
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexLoseTermIndex)1993 TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
1994 // Test the following scenario: losing the entire term index. Since we need
1995 // flash index magic to determine the version, in this test we will throw out
1996 // the entire term index and re-initialize an empty one, to bypass
1997 // undetermined version state change and correctly trigger "lose term index"
1998 // scenario.
1999 // IcingSearchEngine should be able to recover term index. Several additional
2000 // behaviors are also tested:
2001 // - Index directory handling:
2002 // - Term index directory should not be discarded (but instead just being
2003 // rebuilt by replaying all docs).
2004 // - Integer index directory should be unaffected.
2005 // - Qualified id join index directory should be unaffected.
2006 // - Truncate indices:
2007 // - "TruncateTo()" for term index shouldn't take effect since it is empty.
2008 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
2009 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
2010 // discarded.
2011 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
2012 // underlying storage sub directory (path_expr =
2013 // "*/qualified_id_join_index_dir/*") should be discarded.
2014
2015 SchemaProto schema =
2016 SchemaBuilder()
2017 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2018 PropertyConfigBuilder()
2019 .SetName("name")
2020 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2021 .SetCardinality(CARDINALITY_REQUIRED)))
2022 .AddType(SchemaTypeConfigBuilder()
2023 .SetType("Message")
2024 .AddProperty(PropertyConfigBuilder()
2025 .SetName("body")
2026 .SetDataTypeString(TERM_MATCH_PREFIX,
2027 TOKENIZER_PLAIN)
2028 .SetCardinality(CARDINALITY_REQUIRED))
2029 .AddProperty(PropertyConfigBuilder()
2030 .SetName("indexableInteger")
2031 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2032 .SetCardinality(CARDINALITY_REQUIRED))
2033 .AddProperty(PropertyConfigBuilder()
2034 .SetName("senderQualifiedId")
2035 .SetDataTypeJoinableString(
2036 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2037 .SetCardinality(CARDINALITY_REQUIRED)))
2038 .Build();
2039
2040 DocumentProto person =
2041 DocumentBuilder()
2042 .SetKey("namespace", "person")
2043 .SetSchema("Person")
2044 .AddStringProperty("name", "person")
2045 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2046 .Build();
2047 DocumentProto message =
2048 DocumentBuilder()
2049 .SetKey("namespace", "message/1")
2050 .SetSchema("Message")
2051 .AddStringProperty("body", kIpsumText)
2052 .AddInt64Property("indexableInteger", 123)
2053 .AddStringProperty("senderQualifiedId", "namespace#person")
2054 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2055 .Build();
2056
2057 // 1. Create an index with 3 message documents.
2058 {
2059 TestIcingSearchEngine icing(
2060 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
2061 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2062 GetTestJniCache());
2063
2064 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2065 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2066
2067 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2068 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2069 message = DocumentBuilder(message).SetUri("message/2").Build();
2070 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2071 message = DocumentBuilder(message).SetUri("message/3").Build();
2072 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2073 }
2074
2075 // 2. Delete and re-initialize an empty term index to trigger
2076 // RestoreIndexIfNeeded.
2077 {
2078 std::string idx_subdir = GetIndexDir() + "/idx";
2079 ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
2080 ICING_ASSERT_OK_AND_ASSIGN(
2081 std::unique_ptr<Index> index,
2082 Index::Create(Index::Options(GetIndexDir(),
2083 /*index_merge_size=*/100,
2084 /*lite_index_sort_at_indexing=*/true,
2085 /*lite_index_sort_size=*/50),
2086 filesystem(), icing_filesystem()));
2087 ICING_ASSERT_OK(index->PersistToDisk());
2088 }
2089
2090 // 3. Create the index again. This should trigger index restoration.
2091 {
2092 // Mock filesystem to observe and check the behavior of all indices.
2093 auto mock_filesystem = std::make_unique<MockFilesystem>();
2094 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2095 .WillRepeatedly(DoDefault());
2096 // Ensure term index directory should never be discarded since we've already
2097 // lost it.
2098 EXPECT_CALL(*mock_filesystem,
2099 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2100 .Times(0);
2101 // Ensure integer index directory should never be discarded, and Clear()
2102 // should never be called (i.e. storage sub directory
2103 // "*/integer_index_dir/*" should never be discarded).
2104 EXPECT_CALL(*mock_filesystem,
2105 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2106 .Times(0);
2107 EXPECT_CALL(*mock_filesystem,
2108 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2109 .Times(0);
2110 // Ensure qualified id join index directory should never be discarded, and
2111 // Clear() should never be called (i.e. storage sub directory
2112 // "*/qualified_id_join_index_dir/*" should never be discarded).
2113 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2114 EndsWith("/qualified_id_join_index_dir")))
2115 .Times(0);
2116 EXPECT_CALL(
2117 *mock_filesystem,
2118 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2119 .Times(0);
2120
2121 TestIcingSearchEngine icing(
2122 GetDefaultIcingOptions(), std::move(mock_filesystem),
2123 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2124 GetTestJniCache());
2125 InitializeResultProto initialize_result = icing.Initialize();
2126 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2127 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2128 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2129 EXPECT_THAT(
2130 initialize_result.initialize_stats().integer_index_restoration_cause(),
2131 Eq(InitializeStatsProto::NONE));
2132 EXPECT_THAT(initialize_result.initialize_stats()
2133 .qualified_id_join_index_restoration_cause(),
2134 Eq(InitializeStatsProto::NONE));
2135
2136 // Verify term index works normally
2137 SearchSpecProto search_spec1;
2138 search_spec1.set_query("body:consectetur");
2139 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2140 SearchResultProto results1 =
2141 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2142 ResultSpecProto::default_instance());
2143 EXPECT_THAT(results1.status(), ProtoIsOk());
2144 EXPECT_THAT(results1.next_page_token(), Eq(0));
2145 // All documents should be retrievable.
2146 ASSERT_THAT(results1.results(), SizeIs(3));
2147 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
2148 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
2149 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
2150
2151 // Verify integer index works normally
2152 SearchSpecProto search_spec2;
2153 search_spec2.set_query("indexableInteger == 123");
2154 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2155
2156 SearchResultProto results2 =
2157 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2158 ResultSpecProto::default_instance());
2159 ASSERT_THAT(results2.results(), SizeIs(3));
2160 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
2161 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
2162 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
2163
2164 // Verify qualified id join index works normally: join a query for
2165 // `name:person` with a child query for `body:consectetur` based on the
2166 // child's `senderQualifiedId` field.
2167 SearchSpecProto search_spec3;
2168 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2169 search_spec3.set_query("name:person");
2170 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2171 join_spec->set_parent_property_expression(
2172 std::string(JoinProcessor::kQualifiedIdExpr));
2173 join_spec->set_child_property_expression("senderQualifiedId");
2174 join_spec->set_aggregation_scoring_strategy(
2175 JoinSpecProto::AggregationScoringStrategy::COUNT);
2176 JoinSpecProto::NestedSpecProto* nested_spec =
2177 join_spec->mutable_nested_spec();
2178 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2179 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2180 nested_search_spec->set_query("body:consectetur");
2181 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2182 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2183
2184 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2185 result_spec3.set_max_joined_children_per_parent_to_return(
2186 std::numeric_limits<int32_t>::max());
2187
2188 SearchResultProto results3 = icing.Search(
2189 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2190 ASSERT_THAT(results3.results(), SizeIs(1));
2191 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2192 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
2193 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2194 Eq("message/3"));
2195 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2196 Eq("message/2"));
2197 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
2198 Eq("message/1"));
2199 }
2200 }
2201
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexLoseIntegerIndex)2202 TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
2203 // Test the following scenario: losing the entire integer index directory.
2204 // IcingSearchEngine should be able to recover integer index. Several
2205 // additional behaviors are also tested:
2206 // - Index directory handling:
2207 // - Term index directory should be unaffected.
2208 // - Integer index directory should not be discarded since we've already
2209 // lost it. Start it from scratch.
2210 // - Qualified id join index directory should be unaffected.
2211 // - Truncate indices:
2212 // - "TruncateTo()" for term index shouldn't take effect.
2213 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
2214 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
2215 // discarded, since we start it from scratch.
2216 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
2217 // underlying storage sub directory (path_expr =
2218 // "*/qualified_id_join_index_dir/*") should be discarded.
2219
2220 SchemaProto schema =
2221 SchemaBuilder()
2222 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2223 PropertyConfigBuilder()
2224 .SetName("name")
2225 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2226 .SetCardinality(CARDINALITY_REQUIRED)))
2227 .AddType(SchemaTypeConfigBuilder()
2228 .SetType("Message")
2229 .AddProperty(PropertyConfigBuilder()
2230 .SetName("body")
2231 .SetDataTypeString(TERM_MATCH_PREFIX,
2232 TOKENIZER_PLAIN)
2233 .SetCardinality(CARDINALITY_REQUIRED))
2234 .AddProperty(PropertyConfigBuilder()
2235 .SetName("indexableInteger")
2236 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2237 .SetCardinality(CARDINALITY_REQUIRED))
2238 .AddProperty(PropertyConfigBuilder()
2239 .SetName("senderQualifiedId")
2240 .SetDataTypeJoinableString(
2241 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2242 .SetCardinality(CARDINALITY_REQUIRED)))
2243 .Build();
2244
2245 DocumentProto person =
2246 DocumentBuilder()
2247 .SetKey("namespace", "person")
2248 .SetSchema("Person")
2249 .AddStringProperty("name", "person")
2250 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2251 .Build();
2252 DocumentProto message =
2253 DocumentBuilder()
2254 .SetKey("namespace", "message/1")
2255 .SetSchema("Message")
2256 .AddStringProperty("body", kIpsumText)
2257 .AddInt64Property("indexableInteger", 123)
2258 .AddStringProperty("senderQualifiedId", "namespace#person")
2259 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2260 .Build();
2261
2262 // 1. Create an index with 3 message documents.
2263 {
2264 TestIcingSearchEngine icing(
2265 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
2266 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2267 GetTestJniCache());
2268
2269 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2270 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2271
2272 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2273 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2274 message = DocumentBuilder(message).SetUri("message/2").Build();
2275 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2276 message = DocumentBuilder(message).SetUri("message/3").Build();
2277 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2278 }
2279
2280 // 2. Delete the integer index file to trigger RestoreIndexIfNeeded.
2281 std::string integer_index_dir = GetIntegerIndexDir();
2282 filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
2283
2284 // 3. Create the index again. This should trigger index restoration.
2285 {
2286 // Mock filesystem to observe and check the behavior of all indices.
2287 auto mock_filesystem = std::make_unique<MockFilesystem>();
2288 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2289 .WillRepeatedly(DoDefault());
2290 // Ensure term index directory should never be discarded.
2291 EXPECT_CALL(*mock_filesystem,
2292 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2293 .Times(0);
2294 // Ensure integer index directory should never be discarded since we've
2295 // already lost it, and Clear() should never be called (i.e. storage sub
2296 // directory "*/integer_index_dir/*" should never be discarded) since we
2297 // start it from scratch.
2298 EXPECT_CALL(*mock_filesystem,
2299 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2300 .Times(0);
2301 EXPECT_CALL(*mock_filesystem,
2302 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2303 .Times(0);
2304 // Ensure qualified id join index directory should never be discarded, and
2305 // Clear() should never be called (i.e. storage sub directory
2306 // "*/qualified_id_join_index_dir/*" should never be discarded).
2307 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2308 EndsWith("/qualified_id_join_index_dir")))
2309 .Times(0);
2310 EXPECT_CALL(
2311 *mock_filesystem,
2312 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2313 .Times(0);
2314
2315 TestIcingSearchEngine icing(
2316 GetDefaultIcingOptions(), std::move(mock_filesystem),
2317 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2318 GetTestJniCache());
2319 InitializeResultProto initialize_result = icing.Initialize();
2320 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2321 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2322 Eq(InitializeStatsProto::NONE));
2323 EXPECT_THAT(
2324 initialize_result.initialize_stats().integer_index_restoration_cause(),
2325 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2326 EXPECT_THAT(initialize_result.initialize_stats()
2327 .qualified_id_join_index_restoration_cause(),
2328 Eq(InitializeStatsProto::NONE));
2329
2330 // Verify term index works normally
2331 SearchSpecProto search_spec1;
2332 search_spec1.set_query("body:consectetur");
2333 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2334 SearchResultProto results1 =
2335 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2336 ResultSpecProto::default_instance());
2337 EXPECT_THAT(results1.status(), ProtoIsOk());
2338 EXPECT_THAT(results1.next_page_token(), Eq(0));
2339 // All documents should be retrievable.
2340 ASSERT_THAT(results1.results(), SizeIs(3));
2341 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
2342 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
2343 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
2344
2345 // Verify integer index works normally
2346 SearchSpecProto search_spec2;
2347 search_spec2.set_query("indexableInteger == 123");
2348 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2349
2350 SearchResultProto results2 =
2351 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2352 ResultSpecProto::default_instance());
2353 ASSERT_THAT(results2.results(), SizeIs(3));
2354 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
2355 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
2356 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
2357
2358 // Verify qualified id join index works normally: join a query for
2359 // `name:person` with a child query for `body:consectetur` based on the
2360 // child's `senderQualifiedId` field.
2361 SearchSpecProto search_spec3;
2362 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2363 search_spec3.set_query("name:person");
2364 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2365 join_spec->set_parent_property_expression(
2366 std::string(JoinProcessor::kQualifiedIdExpr));
2367 join_spec->set_child_property_expression("senderQualifiedId");
2368 join_spec->set_aggregation_scoring_strategy(
2369 JoinSpecProto::AggregationScoringStrategy::COUNT);
2370 JoinSpecProto::NestedSpecProto* nested_spec =
2371 join_spec->mutable_nested_spec();
2372 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2373 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2374 nested_search_spec->set_query("body:consectetur");
2375 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2376 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2377
2378 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2379 result_spec3.set_max_joined_children_per_parent_to_return(
2380 std::numeric_limits<int32_t>::max());
2381
2382 SearchResultProto results3 = icing.Search(
2383 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2384 ASSERT_THAT(results3.results(), SizeIs(1));
2385 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2386 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
2387 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2388 Eq("message/3"));
2389 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2390 Eq("message/2"));
2391 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
2392 Eq("message/1"));
2393 }
2394 }
2395
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexLoseQualifiedIdJoinIndex)2396 TEST_F(IcingSearchEngineInitializationTest,
2397 RestoreIndexLoseQualifiedIdJoinIndex) {
2398 // Test the following scenario: losing the entire qualified id join index
2399 // directory. IcingSearchEngine should be able to recover qualified id join
2400 // index. Several additional behaviors are also tested:
2401 // - Index directory handling:
2402 // - Term index directory should be unaffected.
2403 // - Integer index directory should be unaffected.
2404 // - Qualified id join index directory should not be discarded since we've
2405 // already lost it. Start it from scratch.
2406 // - Truncate indices:
2407 // - "TruncateTo()" for term index shouldn't take effect.
2408 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
2409 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
2410 // discarded.
2411 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
2412 // underlying storage sub directory (path_expr =
2413 // "*/qualified_id_join_index_dir/*") should be discarded, since we start
2414 // it from scratch.
2415
2416 SchemaProto schema =
2417 SchemaBuilder()
2418 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2419 PropertyConfigBuilder()
2420 .SetName("name")
2421 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2422 .SetCardinality(CARDINALITY_REQUIRED)))
2423 .AddType(SchemaTypeConfigBuilder()
2424 .SetType("Message")
2425 .AddProperty(PropertyConfigBuilder()
2426 .SetName("body")
2427 .SetDataTypeString(TERM_MATCH_PREFIX,
2428 TOKENIZER_PLAIN)
2429 .SetCardinality(CARDINALITY_REQUIRED))
2430 .AddProperty(PropertyConfigBuilder()
2431 .SetName("indexableInteger")
2432 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2433 .SetCardinality(CARDINALITY_REQUIRED))
2434 .AddProperty(PropertyConfigBuilder()
2435 .SetName("senderQualifiedId")
2436 .SetDataTypeJoinableString(
2437 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2438 .SetCardinality(CARDINALITY_REQUIRED)))
2439 .Build();
2440
2441 DocumentProto person =
2442 DocumentBuilder()
2443 .SetKey("namespace", "person")
2444 .SetSchema("Person")
2445 .AddStringProperty("name", "person")
2446 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2447 .Build();
2448 DocumentProto message =
2449 DocumentBuilder()
2450 .SetKey("namespace", "message/1")
2451 .SetSchema("Message")
2452 .AddStringProperty("body", kIpsumText)
2453 .AddInt64Property("indexableInteger", 123)
2454 .AddStringProperty("senderQualifiedId", "namespace#person")
2455 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2456 .Build();
2457
2458 // 1. Create an index with 3 message documents.
2459 {
2460 TestIcingSearchEngine icing(
2461 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
2462 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2463 GetTestJniCache());
2464
2465 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2466 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2467
2468 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2469 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2470 message = DocumentBuilder(message).SetUri("message/2").Build();
2471 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2472 message = DocumentBuilder(message).SetUri("message/3").Build();
2473 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2474 }
2475
2476 // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
2477 std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
2478 filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str());
2479
2480 // 3. Create the index again. This should trigger index restoration.
2481 {
2482 // Mock filesystem to observe and check the behavior of all indices.
2483 auto mock_filesystem = std::make_unique<MockFilesystem>();
2484 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2485 .WillRepeatedly(DoDefault());
2486 // Ensure term index directory should never be discarded.
2487 EXPECT_CALL(*mock_filesystem,
2488 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2489 .Times(0);
2490 // Ensure integer index directory should never be discarded since we've
2491 // already lost it, and Clear() should never be called (i.e. storage sub
2492 // directory "*/integer_index_dir/*" should never be discarded).
2493 EXPECT_CALL(*mock_filesystem,
2494 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2495 .Times(0);
2496 EXPECT_CALL(*mock_filesystem,
2497 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2498 .Times(0);
2499 // Ensure qualified id join index directory should never be discarded, and
2500 // Clear() should never be called (i.e. storage sub directory
2501 // "*/qualified_id_join_index_dir/*" should never be discarded)
2502 // since we start it from scratch.
2503 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2504 EndsWith("/qualified_id_join_index_dir")))
2505 .Times(0);
2506 EXPECT_CALL(
2507 *mock_filesystem,
2508 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2509 .Times(0);
2510
2511 TestIcingSearchEngine icing(
2512 GetDefaultIcingOptions(), std::move(mock_filesystem),
2513 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2514 GetTestJniCache());
2515 InitializeResultProto initialize_result = icing.Initialize();
2516 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2517 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2518 Eq(InitializeStatsProto::NONE));
2519 EXPECT_THAT(
2520 initialize_result.initialize_stats().integer_index_restoration_cause(),
2521 Eq(InitializeStatsProto::NONE));
2522 EXPECT_THAT(initialize_result.initialize_stats()
2523 .qualified_id_join_index_restoration_cause(),
2524 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2525
2526 // Verify term index works normally
2527 SearchSpecProto search_spec1;
2528 search_spec1.set_query("body:consectetur");
2529 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2530 SearchResultProto results1 =
2531 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2532 ResultSpecProto::default_instance());
2533 EXPECT_THAT(results1.status(), ProtoIsOk());
2534 EXPECT_THAT(results1.next_page_token(), Eq(0));
2535 // All documents should be retrievable.
2536 ASSERT_THAT(results1.results(), SizeIs(3));
2537 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
2538 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
2539 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
2540
2541 // Verify integer index works normally
2542 SearchSpecProto search_spec2;
2543 search_spec2.set_query("indexableInteger == 123");
2544 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2545
2546 SearchResultProto results2 =
2547 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2548 ResultSpecProto::default_instance());
2549 ASSERT_THAT(results2.results(), SizeIs(3));
2550 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
2551 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
2552 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
2553
2554 // Verify qualified id join index works normally: join a query for
2555 // `name:person` with a child query for `body:consectetur` based on the
2556 // child's `senderQualifiedId` field.
2557 SearchSpecProto search_spec3;
2558 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2559 search_spec3.set_query("name:person");
2560 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2561 join_spec->set_parent_property_expression(
2562 std::string(JoinProcessor::kQualifiedIdExpr));
2563 join_spec->set_child_property_expression("senderQualifiedId");
2564 join_spec->set_aggregation_scoring_strategy(
2565 JoinSpecProto::AggregationScoringStrategy::COUNT);
2566 JoinSpecProto::NestedSpecProto* nested_spec =
2567 join_spec->mutable_nested_spec();
2568 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2569 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2570 nested_search_spec->set_query("body:consectetur");
2571 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2572 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2573
2574 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2575 result_spec3.set_max_joined_children_per_parent_to_return(
2576 std::numeric_limits<int32_t>::max());
2577
2578 SearchResultProto results3 = icing.Search(
2579 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2580 ASSERT_THAT(results3.results(), SizeIs(1));
2581 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2582 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
2583 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2584 Eq("message/3"));
2585 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2586 Eq("message/2"));
2587 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
2588 Eq("message/1"));
2589 }
2590 }
2591
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateLiteIndexWithoutReindexing)2592 TEST_F(IcingSearchEngineInitializationTest,
2593 RestoreIndexTruncateLiteIndexWithoutReindexing) {
2594 // Test the following scenario: term lite index is *completely* ahead of
2595 // document store. IcingSearchEngine should be able to recover term index.
2596 // Several additional behaviors are also tested:
2597 // - Index directory handling:
2598 // - Term index directory should be unaffected.
2599 // - Integer index directory should be unaffected.
2600 // - Qualified id join index directory should be unaffected.
2601 // - Truncate indices:
2602 // - "TruncateTo()" for term index should take effect and throw out the
2603 // entire lite index. This should be sufficient to make term index
2604 // consistent with document store, so reindexing should not take place.
2605 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
2606 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
2607 // discarded.
2608 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
2609 // underlying storage sub directory (path_expr =
2610 // "*/qualified_id_join_index_dir/*") should be discarded.
2611
2612 SchemaProto schema =
2613 SchemaBuilder()
2614 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2615 PropertyConfigBuilder()
2616 .SetName("name")
2617 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2618 .SetCardinality(CARDINALITY_REQUIRED)))
2619 .AddType(SchemaTypeConfigBuilder()
2620 .SetType("Message")
2621 .AddProperty(PropertyConfigBuilder()
2622 .SetName("body")
2623 .SetDataTypeString(TERM_MATCH_PREFIX,
2624 TOKENIZER_PLAIN)
2625 .SetCardinality(CARDINALITY_REQUIRED))
2626 .AddProperty(PropertyConfigBuilder()
2627 .SetName("indexableInteger")
2628 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2629 .SetCardinality(CARDINALITY_REQUIRED))
2630 .AddProperty(PropertyConfigBuilder()
2631 .SetName("senderQualifiedId")
2632 .SetDataTypeJoinableString(
2633 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2634 .SetCardinality(CARDINALITY_REQUIRED)))
2635 .Build();
2636
2637 DocumentProto person =
2638 DocumentBuilder()
2639 .SetKey("namespace", "person")
2640 .SetSchema("Person")
2641 .AddStringProperty("name", "person")
2642 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2643 .Build();
2644 DocumentProto message =
2645 DocumentBuilder()
2646 .SetKey("namespace", "message/1")
2647 .SetSchema("Message")
2648 .AddStringProperty("body", kIpsumText)
2649 .AddInt64Property("indexableInteger", 123)
2650 .AddStringProperty("senderQualifiedId", "namespace#person")
2651 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2652 .Build();
2653
2654 // 1. Create an index with a LiteIndex that will only allow a person and a
2655 // message document before needing a merge.
2656 {
2657 IcingSearchEngineOptions options = GetDefaultIcingOptions();
2658 options.set_index_merge_size(person.ByteSizeLong() +
2659 message.ByteSizeLong());
2660 TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
2661 std::make_unique<IcingFilesystem>(),
2662 std::make_unique<FakeClock>(),
2663 GetTestJniCache());
2664
2665 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2666 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2667
2668 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2669 // Add two message documents. These should get merged into the main index.
2670 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2671 message = DocumentBuilder(message).SetUri("message/2").Build();
2672 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2673 }
2674
2675 // 2. Manually add some data into term lite index and increment
2676 // last_added_document_id, but don't merge into the main index. This will
2677 // cause mismatched last_added_document_id with term index.
2678 // - Document store: [0, 1, 2]
2679 // - Term index
2680 // - Main index: [0, 1, 2]
2681 // - Lite index: [3]
2682 // - Integer index: [0, 1, 2]
2683 // - Qualified id join index: [0, 1, 2]
2684 {
2685 ICING_ASSERT_OK_AND_ASSIGN(
2686 std::unique_ptr<Index> index,
2687 Index::Create(
2688 Index::Options(GetIndexDir(),
2689 /*index_merge_size=*/message.ByteSizeLong(),
2690 /*lite_index_sort_at_indexing=*/true,
2691 /*lite_index_sort_size=*/8),
2692 filesystem(), icing_filesystem()));
2693 DocumentId original_last_added_doc_id = index->last_added_document_id();
2694 index->set_last_added_document_id(original_last_added_doc_id + 1);
2695 Index::Editor editor =
2696 index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
2697 TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
2698 ICING_ASSERT_OK(editor.BufferTerm("foo"));
2699 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
2700 }
2701
2702 // 3. Create the index again.
2703 {
2704 // Mock filesystem to observe and check the behavior of all indices.
2705 auto mock_filesystem = std::make_unique<MockFilesystem>();
2706 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2707 .WillRepeatedly(DoDefault());
2708 // Ensure term index directory should never be discarded. since we only call
2709 // TruncateTo for term index.
2710 EXPECT_CALL(*mock_filesystem,
2711 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2712 .Times(0);
2713 // Ensure integer index directory should never be discarded, and Clear()
2714 // should never be called (i.e. storage sub directory
2715 // "*/integer_index_dir/*" should never be discarded).
2716 EXPECT_CALL(*mock_filesystem,
2717 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2718 .Times(0);
2719 EXPECT_CALL(*mock_filesystem,
2720 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2721 .Times(0);
2722 // Ensure qualified id join index directory should never be discarded, and
2723 // Clear() should never be called (i.e. storage sub directory
2724 // "*/qualified_id_join_index_dir/*" should never be discarded).
2725 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2726 EndsWith("/qualified_id_join_index_dir")))
2727 .Times(0);
2728 EXPECT_CALL(
2729 *mock_filesystem,
2730 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2731 .Times(0);
2732
2733 IcingSearchEngineOptions options = GetDefaultIcingOptions();
2734 options.set_index_merge_size(message.ByteSizeLong());
2735 TestIcingSearchEngine icing(options, std::move(mock_filesystem),
2736 std::make_unique<IcingFilesystem>(),
2737 std::make_unique<FakeClock>(),
2738 GetTestJniCache());
2739 InitializeResultProto initialize_result = icing.Initialize();
2740 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2741 // Since truncating lite index is sufficient to make term index consistent
2742 // with document store, replaying documents or reindex shouldn't take place.
2743 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2744 Eq(InitializeStatsProto::NONE));
2745 EXPECT_THAT(
2746 initialize_result.initialize_stats().integer_index_restoration_cause(),
2747 Eq(InitializeStatsProto::NONE));
2748 EXPECT_THAT(initialize_result.initialize_stats()
2749 .qualified_id_join_index_restoration_cause(),
2750 Eq(InitializeStatsProto::NONE));
2751
2752 // Verify term index works normally
2753 SearchSpecProto search_spec1;
2754 search_spec1.set_query("body:consectetur");
2755 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2756 SearchResultProto results1 =
2757 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2758 ResultSpecProto::default_instance());
2759 EXPECT_THAT(results1.status(), ProtoIsOk());
2760 EXPECT_THAT(results1.next_page_token(), Eq(0));
2761 // Only the documents that were in the main index should be retrievable.
2762 ASSERT_THAT(results1.results(), SizeIs(2));
2763 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2"));
2764 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1"));
2765
2766 // Verify integer index works normally
2767 SearchSpecProto search_spec2;
2768 search_spec2.set_query("indexableInteger == 123");
2769 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2770
2771 SearchResultProto results2 =
2772 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2773 ResultSpecProto::default_instance());
2774 ASSERT_THAT(results2.results(), SizeIs(2));
2775 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2"));
2776 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1"));
2777
2778 // Verify qualified id join index works normally: join a query for
2779 // `name:person` with a child query for `body:consectetur` based on the
2780 // child's `senderQualifiedId` field.
2781 SearchSpecProto search_spec3;
2782 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2783 search_spec3.set_query("name:person");
2784 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2785 join_spec->set_parent_property_expression(
2786 std::string(JoinProcessor::kQualifiedIdExpr));
2787 join_spec->set_child_property_expression("senderQualifiedId");
2788 join_spec->set_aggregation_scoring_strategy(
2789 JoinSpecProto::AggregationScoringStrategy::COUNT);
2790 JoinSpecProto::NestedSpecProto* nested_spec =
2791 join_spec->mutable_nested_spec();
2792 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2793 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2794 nested_search_spec->set_query("body:consectetur");
2795 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2796 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2797
2798 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2799 result_spec3.set_max_joined_children_per_parent_to_return(
2800 std::numeric_limits<int32_t>::max());
2801
2802 SearchResultProto results3 = icing.Search(
2803 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2804 ASSERT_THAT(results3.results(), SizeIs(1));
2805 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2806 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2));
2807 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2808 Eq("message/2"));
2809 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2810 Eq("message/1"));
2811 }
2812
2813 // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
2814 // verify the correctness of term index restoration. Instead, we have to check
2815 // hits for "foo" should not be found in term index.
2816 {
2817 ICING_ASSERT_OK_AND_ASSIGN(
2818 std::unique_ptr<Index> index,
2819 Index::Create(
2820 Index::Options(GetIndexDir(),
2821 /*index_merge_size=*/message.ByteSizeLong(),
2822 /*lite_index_sort_at_indexing=*/true,
2823 /*lite_index_sort_size=*/8),
2824 filesystem(), icing_filesystem()));
2825 ICING_ASSERT_OK_AND_ASSIGN(
2826 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
2827 index->GetIterator("foo", /*term_start_index=*/0,
2828 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2829 TermMatchType::EXACT_ONLY));
2830 EXPECT_THAT(doc_hit_info_iter->Advance(),
2831 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
2832 }
2833 }
2834
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateLiteIndexWithReindexing)2835 TEST_F(IcingSearchEngineInitializationTest,
2836 RestoreIndexTruncateLiteIndexWithReindexing) {
2837 // Test the following scenario: term lite index is *partially* ahead of
2838 // document store. IcingSearchEngine should be able to recover term index.
2839 // Several additional behaviors are also tested:
2840 // - Index directory handling:
2841 // - Term index directory should be unaffected.
2842 // - Integer index directory should be unaffected.
2843 // - Qualified id join index directory should be unaffected.
2844 // - Truncate indices:
2845 // - "TruncateTo()" for term index should take effect and throw out the
2846 // entire lite index. However, some valid data in term lite index were
2847 // discarded together, so reindexing should still take place to recover
2848 // them after truncating.
2849 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
2850 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
2851 // discarded.
2852 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
2853 // underlying storage sub directory (path_expr =
2854 // "*/qualified_id_join_index_dir/*") should be discarded.
2855
2856 SchemaProto schema =
2857 SchemaBuilder()
2858 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2859 PropertyConfigBuilder()
2860 .SetName("name")
2861 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2862 .SetCardinality(CARDINALITY_REQUIRED)))
2863 .AddType(SchemaTypeConfigBuilder()
2864 .SetType("Message")
2865 .AddProperty(PropertyConfigBuilder()
2866 .SetName("body")
2867 .SetDataTypeString(TERM_MATCH_PREFIX,
2868 TOKENIZER_PLAIN)
2869 .SetCardinality(CARDINALITY_REQUIRED))
2870 .AddProperty(PropertyConfigBuilder()
2871 .SetName("indexableInteger")
2872 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2873 .SetCardinality(CARDINALITY_REQUIRED))
2874 .AddProperty(PropertyConfigBuilder()
2875 .SetName("senderQualifiedId")
2876 .SetDataTypeJoinableString(
2877 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2878 .SetCardinality(CARDINALITY_REQUIRED)))
2879 .Build();
2880
2881 DocumentProto person =
2882 DocumentBuilder()
2883 .SetKey("namespace", "person")
2884 .SetSchema("Person")
2885 .AddStringProperty("name", "person")
2886 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2887 .Build();
2888 DocumentProto message =
2889 DocumentBuilder()
2890 .SetKey("namespace", "message/1")
2891 .SetSchema("Message")
2892 .AddStringProperty("body", kIpsumText)
2893 .AddInt64Property("indexableInteger", 123)
2894 .AddStringProperty("senderQualifiedId", "namespace#person")
2895 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2896 .Build();
2897
2898 // 1. Create an index with a LiteIndex that will only allow a person and a
2899 // message document before needing a merge.
2900 {
2901 IcingSearchEngineOptions options = GetDefaultIcingOptions();
2902 options.set_index_merge_size(message.ByteSizeLong());
2903 TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
2904 std::make_unique<IcingFilesystem>(),
2905 std::make_unique<FakeClock>(),
2906 GetTestJniCache());
2907
2908 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2909 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2910
2911 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2912 // Add two message documents. These should get merged into the main index.
2913 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2914 message = DocumentBuilder(message).SetUri("message/2").Build();
2915 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2916 // Add one document. This one should get remain in the lite index.
2917 message = DocumentBuilder(message).SetUri("message/3").Build();
2918 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2919 }
2920
2921 // 2. Manually add some data into term lite index and increment
2922 // last_added_document_id, but don't merge into the main index. This will
2923 // cause mismatched last_added_document_id with term index.
2924 // - Document store: [0, 1, 2, 3]
2925 // - Term index
2926 // - Main index: [0, 1, 2]
2927 // - Lite index: [3, 4]
2928 // - Integer index: [0, 1, 2, 3]
2929 // - Qualified id join index: [0, 1, 2, 3]
2930 {
2931 ICING_ASSERT_OK_AND_ASSIGN(
2932 std::unique_ptr<Index> index,
2933 Index::Create(
2934 Index::Options(GetIndexDir(),
2935 /*index_merge_size=*/message.ByteSizeLong(),
2936 /*lite_index_sort_at_indexing=*/true,
2937 /*lite_index_sort_size=*/8),
2938 filesystem(), icing_filesystem()));
2939 DocumentId original_last_added_doc_id = index->last_added_document_id();
2940 index->set_last_added_document_id(original_last_added_doc_id + 1);
2941 Index::Editor editor =
2942 index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
2943 TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
2944 ICING_ASSERT_OK(editor.BufferTerm("foo"));
2945 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
2946 }
2947
2948 // 3. Create the index again.
2949 {
2950 // Mock filesystem to observe and check the behavior of all indices.
2951 auto mock_filesystem = std::make_unique<MockFilesystem>();
2952 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2953 .WillRepeatedly(DoDefault());
2954 // Ensure term index directory should never be discarded. since we only call
2955 // TruncateTo for term index.
2956 EXPECT_CALL(*mock_filesystem,
2957 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2958 .Times(0);
2959 // Ensure integer index directory should never be discarded, and Clear()
2960 // should never be called (i.e. storage sub directory
2961 // "*/integer_index_dir/*" should never be discarded).
2962 EXPECT_CALL(*mock_filesystem,
2963 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2964 .Times(0);
2965 EXPECT_CALL(*mock_filesystem,
2966 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2967 .Times(0);
2968 // Ensure qualified id join index directory should never be discarded, and
2969 // Clear() should never be called (i.e. storage sub directory
2970 // "*/qualified_id_join_index_dir/*" should never be discarded).
2971 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2972 EndsWith("/qualified_id_join_index_dir")))
2973 .Times(0);
2974 EXPECT_CALL(
2975 *mock_filesystem,
2976 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2977 .Times(0);
2978
2979 IcingSearchEngineOptions options = GetDefaultIcingOptions();
2980 options.set_index_merge_size(message.ByteSizeLong());
2981 TestIcingSearchEngine icing(options, std::move(mock_filesystem),
2982 std::make_unique<IcingFilesystem>(),
2983 std::make_unique<FakeClock>(),
2984 GetTestJniCache());
2985 InitializeResultProto initialize_result = icing.Initialize();
2986 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2987 // Truncating lite index not only deletes data ahead document store, but
2988 // also deletes valid data. Therefore, we still have to replay documents and
2989 // reindex.
2990 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2991 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2992 EXPECT_THAT(
2993 initialize_result.initialize_stats().integer_index_restoration_cause(),
2994 Eq(InitializeStatsProto::NONE));
2995 EXPECT_THAT(initialize_result.initialize_stats()
2996 .qualified_id_join_index_restoration_cause(),
2997 Eq(InitializeStatsProto::NONE));
2998
2999 // Verify term index works normally
3000 SearchSpecProto search_spec1;
3001 search_spec1.set_query("body:consectetur");
3002 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
3003 SearchResultProto results1 =
3004 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
3005 ResultSpecProto::default_instance());
3006 EXPECT_THAT(results1.status(), ProtoIsOk());
3007 EXPECT_THAT(results1.next_page_token(), Eq(0));
3008 // Only the documents that were in the main index should be retrievable.
3009 ASSERT_THAT(results1.results(), SizeIs(3));
3010 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
3011 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
3012 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
3013
3014 // Verify integer index works normally
3015 SearchSpecProto search_spec2;
3016 search_spec2.set_query("indexableInteger == 123");
3017 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
3018
3019 SearchResultProto results2 =
3020 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
3021 ResultSpecProto::default_instance());
3022 ASSERT_THAT(results2.results(), SizeIs(3));
3023 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
3024 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
3025 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
3026
3027 // Verify qualified id join index works normally: join a query for
3028 // `name:person` with a child query for `body:consectetur` based on the
3029 // child's `senderQualifiedId` field.
3030 SearchSpecProto search_spec3;
3031 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
3032 search_spec3.set_query("name:person");
3033 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
3034 join_spec->set_parent_property_expression(
3035 std::string(JoinProcessor::kQualifiedIdExpr));
3036 join_spec->set_child_property_expression("senderQualifiedId");
3037 join_spec->set_aggregation_scoring_strategy(
3038 JoinSpecProto::AggregationScoringStrategy::COUNT);
3039 JoinSpecProto::NestedSpecProto* nested_spec =
3040 join_spec->mutable_nested_spec();
3041 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
3042 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
3043 nested_search_spec->set_query("body:consectetur");
3044 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
3045 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
3046
3047 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
3048 result_spec3.set_max_joined_children_per_parent_to_return(
3049 std::numeric_limits<int32_t>::max());
3050
3051 SearchResultProto results3 = icing.Search(
3052 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
3053 ASSERT_THAT(results3.results(), SizeIs(1));
3054 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
3055 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
3056 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
3057 Eq("message/3"));
3058 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
3059 Eq("message/2"));
3060 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
3061 Eq("message/1"));
3062 }
3063
3064 // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to
3065 // verify the correctness of term index restoration. Instead, we have to check
3066 // hits for "foo" should not be found in term index.
3067 {
3068 ICING_ASSERT_OK_AND_ASSIGN(
3069 std::unique_ptr<Index> index,
3070 Index::Create(
3071 Index::Options(GetIndexDir(),
3072 /*index_merge_size=*/message.ByteSizeLong(),
3073 /*lite_index_sort_at_indexing=*/true,
3074 /*lite_index_sort_size=*/8),
3075 filesystem(), icing_filesystem()));
3076 ICING_ASSERT_OK_AND_ASSIGN(
3077 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
3078 index->GetIterator("foo", /*term_start_index=*/0,
3079 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3080 TermMatchType::EXACT_ONLY));
3081 EXPECT_THAT(doc_hit_info_iter->Advance(),
3082 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3083 }
3084 }
3085
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateMainIndexWithoutReindexing)3086 TEST_F(IcingSearchEngineInitializationTest,
3087 RestoreIndexTruncateMainIndexWithoutReindexing) {
3088 // Test the following scenario: term main index is *completely* ahead of
3089 // document store. IcingSearchEngine should be able to recover term index.
3090 // Several additional behaviors are also tested:
3091 // - Index directory handling:
3092 // - Term index directory should be unaffected.
3093 // - Integer index directory should be unaffected.
3094 // - Qualified id join index directory should be unaffected.
3095 // - Truncate indices:
3096 // - "TruncateTo()" for term index should take effect and throw out the
3097 // entire lite and main index. This should be sufficient to make term
3098 // index consistent with document store (in this case, document store is
3099 // empty as well), so reindexing should not take place.
3100 // - "Clear()" should be called for integer index. It is a special case when
3101 // document store has no document. Since there is no integer index storage
3102 // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
3103 // discarded.
3104 // - "Clear()" should be called for qualified id join index. It is a special
3105 // case when document store has no document.
3106
3107 // 1. Create an index with no document.
3108 {
3109 TestIcingSearchEngine icing(
3110 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3111 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3112 GetTestJniCache());
3113
3114 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3115 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
3116 }
3117
3118 // 2. Manually add some data into term lite index and increment
3119 // last_added_document_id. Merge some of them into the main index and keep
3120 // others in the lite index. This will cause mismatched document id with
3121 // document store.
3122 // - Document store: []
3123 // - Term index
3124 // - Main index: [0]
3125 // - Lite index: [1]
3126 // - Integer index: []
3127 // - Qualified id join index: []
3128 {
3129 ICING_ASSERT_OK_AND_ASSIGN(
3130 std::unique_ptr<Index> index,
3131 Index::Create(
3132 // index merge size is not important here because we will manually
3133 // invoke merge below.
3134 Index::Options(GetIndexDir(), /*index_merge_size=*/100,
3135 /*lite_index_sort_at_indexing=*/true,
3136 /*lite_index_sort_size=*/50),
3137 filesystem(), icing_filesystem()));
3138 // Add hits for document 0 and merge.
3139 ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
3140 index->set_last_added_document_id(0);
3141 Index::Editor editor =
3142 index->Edit(/*document_id=*/0, /*section_id=*/0,
3143 TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3144 ICING_ASSERT_OK(editor.BufferTerm("foo"));
3145 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3146 ICING_ASSERT_OK(index->Merge());
3147
3148 // Add hits for document 1 and don't merge.
3149 index->set_last_added_document_id(1);
3150 editor = index->Edit(/*document_id=*/1, /*section_id=*/0,
3151 TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3152 ICING_ASSERT_OK(editor.BufferTerm("bar"));
3153 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3154 }
3155
3156 // 3. Create the index again. This should throw out the lite and main index.
3157 {
3158 // Mock filesystem to observe and check the behavior of all indices.
3159 auto mock_filesystem = std::make_unique<MockFilesystem>();
3160 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3161 .WillRepeatedly(DoDefault());
3162 // Ensure term index directory should never be discarded. since we only call
3163 // TruncateTo for term index.
3164 EXPECT_CALL(*mock_filesystem,
3165 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3166 .Times(0);
3167 // Ensure integer index directory should never be discarded. Even though
3168 // Clear() was called, it shouldn't take effect since there is no storage
3169 // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
3170 EXPECT_CALL(*mock_filesystem,
3171 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3172 .Times(0);
3173 EXPECT_CALL(*mock_filesystem,
3174 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3175 .Times(0);
3176 // Ensure qualified id join index directory should never be discarded.
3177 // Clear() was called and should discard and reinitialize the underlying
3178 // mapper.
3179 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3180 EndsWith("/qualified_id_join_index_dir")))
3181 .Times(0);
3182 EXPECT_CALL(
3183 *mock_filesystem,
3184 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3185 .Times(AtLeast(1));
3186
3187 TestIcingSearchEngine icing(
3188 GetDefaultIcingOptions(), std::move(mock_filesystem),
3189 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3190 GetTestJniCache());
3191 InitializeResultProto initialize_result = icing.Initialize();
3192 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3193 // Since truncating main index is sufficient to make term index consistent
3194 // with document store, replaying documents or reindexing shouldn't take
3195 // place.
3196 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3197 Eq(InitializeStatsProto::NONE));
3198 EXPECT_THAT(
3199 initialize_result.initialize_stats().integer_index_restoration_cause(),
3200 Eq(InitializeStatsProto::NONE));
3201 EXPECT_THAT(initialize_result.initialize_stats()
3202 .qualified_id_join_index_restoration_cause(),
3203 Eq(InitializeStatsProto::NONE));
3204 }
3205
3206 // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not
3207 // enough to verify the correctness of term index restoration. Instead, we
3208 // have to check hits for "foo", "bar" should not be found in term index.
3209 {
3210 ICING_ASSERT_OK_AND_ASSIGN(
3211 std::unique_ptr<Index> index,
3212 Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
3213 /*lite_index_sort_at_indexing=*/true,
3214 /*lite_index_sort_size=*/50),
3215 filesystem(), icing_filesystem()));
3216 ICING_ASSERT_OK_AND_ASSIGN(
3217 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
3218 index->GetIterator("foo", /*term_start_index=*/0,
3219 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3220 TermMatchType::EXACT_ONLY));
3221 EXPECT_THAT(doc_hit_info_iter->Advance(),
3222 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3223
3224 ICING_ASSERT_OK_AND_ASSIGN(
3225 doc_hit_info_iter,
3226 index->GetIterator("bar", /*term_start_index=*/0,
3227 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3228 TermMatchType::EXACT_ONLY));
3229 EXPECT_THAT(doc_hit_info_iter->Advance(),
3230 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3231 }
3232 }
3233
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateMainIndexWithReindexing)3234 TEST_F(IcingSearchEngineInitializationTest,
3235 RestoreIndexTruncateMainIndexWithReindexing) {
3236 // Test the following scenario: term main index is *partially* ahead of
3237 // document store. IcingSearchEngine should be able to recover term index.
3238 // Several additional behaviors are also tested:
3239 // - Index directory handling:
3240 // - Term index directory should be unaffected.
3241 // - Integer index directory should be unaffected.
3242 // - Qualified id join index directory should be unaffected.
3243 // - In RestoreIndexIfNecessary():
3244 // - "TruncateTo()" for term index should take effect and throw out the
3245 // entire lite and main index. However, some valid data in term main index
3246 // were discarded together, so reindexing should still take place to
3247 // recover them after truncating.
3248 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
3249 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
3250 // discarded.
3251 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
3252 // underlying storage sub directory (path_expr =
3253 // "*/qualified_id_join_index_dir/*") should be discarded.
3254
3255 SchemaProto schema =
3256 SchemaBuilder()
3257 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
3258 PropertyConfigBuilder()
3259 .SetName("name")
3260 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
3261 .SetCardinality(CARDINALITY_REQUIRED)))
3262 .AddType(SchemaTypeConfigBuilder()
3263 .SetType("Message")
3264 .AddProperty(PropertyConfigBuilder()
3265 .SetName("body")
3266 .SetDataTypeString(TERM_MATCH_PREFIX,
3267 TOKENIZER_PLAIN)
3268 .SetCardinality(CARDINALITY_REQUIRED))
3269 .AddProperty(PropertyConfigBuilder()
3270 .SetName("indexableInteger")
3271 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
3272 .SetCardinality(CARDINALITY_REQUIRED))
3273 .AddProperty(PropertyConfigBuilder()
3274 .SetName("senderQualifiedId")
3275 .SetDataTypeJoinableString(
3276 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
3277 .SetCardinality(CARDINALITY_REQUIRED)))
3278 .Build();
3279
3280 DocumentProto person =
3281 DocumentBuilder()
3282 .SetKey("namespace", "person")
3283 .SetSchema("Person")
3284 .AddStringProperty("name", "person")
3285 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3286 .Build();
3287 DocumentProto message =
3288 DocumentBuilder()
3289 .SetKey("namespace", "message/1")
3290 .SetSchema("Message")
3291 .AddStringProperty("body", kIpsumText)
3292 .AddInt64Property("indexableInteger", 123)
3293 .AddStringProperty("senderQualifiedId", "namespace#person")
3294 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3295 .Build();
3296
3297 // 1. Create an index with 3 message documents.
3298 {
3299 TestIcingSearchEngine icing(
3300 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3301 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3302 GetTestJniCache());
3303
3304 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3305 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
3306
3307 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
3308 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3309 message = DocumentBuilder(message).SetUri("message/2").Build();
3310 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3311 message = DocumentBuilder(message).SetUri("message/3").Build();
3312 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3313 }
3314
3315 // 2. Manually add some data into term lite index and increment
3316 // last_added_document_id. Merge some of them into the main index and keep
3317 // others in the lite index. This will cause mismatched document id with
3318 // document store.
3319 // - Document store: [0, 1, 2, 3]
3320 // - Term index
3321 // - Main index: [0, 1, 2, 3, 4]
3322 // - Lite index: [5]
3323 // - Integer index: [0, 1, 2, 3]
3324 // - Qualified id join index: [0, 1, 2, 3]
3325 {
3326 ICING_ASSERT_OK_AND_ASSIGN(
3327 std::unique_ptr<Index> index,
3328 Index::Create(
3329 Index::Options(GetIndexDir(),
3330 /*index_merge_size=*/message.ByteSizeLong(),
3331 /*lite_index_sort_at_indexing=*/true,
3332 /*lite_index_sort_size=*/8),
3333 filesystem(), icing_filesystem()));
3334 // Add hits for document 4 and merge.
3335 DocumentId original_last_added_doc_id = index->last_added_document_id();
3336 index->set_last_added_document_id(original_last_added_doc_id + 1);
3337 Index::Editor editor =
3338 index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
3339 TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3340 ICING_ASSERT_OK(editor.BufferTerm("foo"));
3341 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3342 ICING_ASSERT_OK(index->Merge());
3343
3344 // Add hits for document 5 and don't merge.
3345 index->set_last_added_document_id(original_last_added_doc_id + 2);
3346 editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0,
3347 TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3348 ICING_ASSERT_OK(editor.BufferTerm("bar"));
3349 ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3350 }
3351
3352 // 3. Create the index again. This should throw out the lite and main index
3353 // and trigger index restoration.
3354 {
3355 // Mock filesystem to observe and check the behavior of all indices.
3356 auto mock_filesystem = std::make_unique<MockFilesystem>();
3357 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3358 .WillRepeatedly(DoDefault());
3359 // Ensure term index directory should never be discarded. since we only call
3360 // TruncateTo for term index.
3361 EXPECT_CALL(*mock_filesystem,
3362 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3363 .Times(0);
3364 // Ensure integer index directory should never be discarded, and Clear()
3365 // should never be called (i.e. storage sub directory
3366 // "*/integer_index_dir/*" should never be discarded).
3367 EXPECT_CALL(*mock_filesystem,
3368 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3369 .Times(0);
3370 EXPECT_CALL(*mock_filesystem,
3371 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3372 .Times(0);
3373 // Ensure qualified id join index directory should never be discarded, and
3374 // Clear() should never be called (i.e. storage sub directory
3375 // "*/qualified_id_join_index_dir/*" should never be discarded).
3376 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3377 EndsWith("/qualified_id_join_index_dir")))
3378 .Times(0);
3379 EXPECT_CALL(
3380 *mock_filesystem,
3381 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3382 .Times(0);
3383
3384 TestIcingSearchEngine icing(
3385 GetDefaultIcingOptions(), std::move(mock_filesystem),
3386 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3387 GetTestJniCache());
3388 InitializeResultProto initialize_result = icing.Initialize();
3389 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3390 // Truncating main index not only deletes data ahead document store, but
3391 // also deletes valid data. Therefore, we still have to replay documents and
3392 // reindex.
3393 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3394 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
3395 EXPECT_THAT(
3396 initialize_result.initialize_stats().integer_index_restoration_cause(),
3397 Eq(InitializeStatsProto::NONE));
3398 EXPECT_THAT(initialize_result.initialize_stats()
3399 .qualified_id_join_index_restoration_cause(),
3400 Eq(InitializeStatsProto::NONE));
3401
3402 // Verify term index works normally
3403 SearchSpecProto search_spec1;
3404 search_spec1.set_query("body:consectetur");
3405 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
3406 SearchResultProto results1 =
3407 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
3408 ResultSpecProto::default_instance());
3409 EXPECT_THAT(results1.status(), ProtoIsOk());
3410 EXPECT_THAT(results1.next_page_token(), Eq(0));
3411 // Only the first document should be retrievable.
3412 ASSERT_THAT(results1.results(), SizeIs(3));
3413 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
3414 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
3415 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
3416
3417 // Verify integer index works normally
3418 SearchSpecProto search_spec2;
3419 search_spec2.set_query("indexableInteger == 123");
3420 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
3421
3422 SearchResultProto results2 =
3423 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
3424 ResultSpecProto::default_instance());
3425 ASSERT_THAT(results2.results(), SizeIs(3));
3426 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
3427 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
3428 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
3429
3430 // Verify qualified id join index works normally: join a query for
3431 // `name:person` with a child query for `body:consectetur` based on the
3432 // child's `senderQualifiedId` field.
3433 SearchSpecProto search_spec3;
3434 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
3435 search_spec3.set_query("name:person");
3436 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
3437 join_spec->set_parent_property_expression(
3438 std::string(JoinProcessor::kQualifiedIdExpr));
3439 join_spec->set_child_property_expression("senderQualifiedId");
3440 join_spec->set_aggregation_scoring_strategy(
3441 JoinSpecProto::AggregationScoringStrategy::COUNT);
3442 JoinSpecProto::NestedSpecProto* nested_spec =
3443 join_spec->mutable_nested_spec();
3444 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
3445 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
3446 nested_search_spec->set_query("body:consectetur");
3447 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
3448 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
3449
3450 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
3451 result_spec3.set_max_joined_children_per_parent_to_return(
3452 std::numeric_limits<int32_t>::max());
3453
3454 SearchResultProto results3 = icing.Search(
3455 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
3456 ASSERT_THAT(results3.results(), SizeIs(1));
3457 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
3458 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
3459 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
3460 Eq("message/3"));
3461 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
3462 Eq("message/2"));
3463 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
3464 Eq("message/1"));
3465 }
3466
3467 // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not
3468 // enough to verify the correctness of term index restoration. Instead, we
3469 // have to check hits for "foo", "bar" should not be found in term index.
3470 {
3471 ICING_ASSERT_OK_AND_ASSIGN(
3472 std::unique_ptr<Index> index,
3473 Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
3474 /*lite_index_sort_at_indexing=*/true,
3475 /*lite_index_sort_size=*/50),
3476 filesystem(), icing_filesystem()));
3477 ICING_ASSERT_OK_AND_ASSIGN(
3478 std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
3479 index->GetIterator("foo", /*term_start_index=*/0,
3480 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3481 TermMatchType::EXACT_ONLY));
3482 EXPECT_THAT(doc_hit_info_iter->Advance(),
3483 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3484
3485 ICING_ASSERT_OK_AND_ASSIGN(
3486 doc_hit_info_iter,
3487 index->GetIterator("bar", /*term_start_index=*/0,
3488 /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3489 TermMatchType::EXACT_ONLY));
3490 EXPECT_THAT(doc_hit_info_iter->Advance(),
3491 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3492 }
3493 }
3494
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateIntegerIndexWithoutReindexing)3495 TEST_F(IcingSearchEngineInitializationTest,
3496 RestoreIndexTruncateIntegerIndexWithoutReindexing) {
3497 // Test the following scenario: integer index is *completely* ahead of
3498 // document store. IcingSearchEngine should be able to recover integer index.
3499 // Several additional behaviors are also tested:
3500 // - Index directory handling:
3501 // - Term index directory should be unaffected.
3502 // - Integer index directory should be unaffected.
3503 // - Qualified id join index directory should be unaffected.
3504 // - Truncate indices:
3505 // - "TruncateTo()" for term index shouldn't take effect.
3506 // - "Clear()" should be called for integer index and throw out all integer
3507 // index storages, i.e. all storage sub directories (path_expr =
3508 // "*/integer_index_dir/*") should be discarded. This should be sufficient
3509 // to make integer index consistent with document store (in this case,
3510 // document store is empty as well), so reindexing should not take place.
3511 // - "Clear()" should be called for qualified id join index. It is a special
3512 // case when document store has no document.
3513
3514 // 1. Create an index with no document.
3515 {
3516 TestIcingSearchEngine icing(
3517 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3518 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3519 GetTestJniCache());
3520
3521 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3522 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
3523 }
3524
3525 // 2. Manually add some data into integer index and increment
3526 // last_added_document_id. This will cause mismatched document id with
3527 // document store.
3528 // - Document store: []
3529 // - Term index: []
3530 // - Integer index: [0]
3531 // - Qualified id join index: []
3532 {
3533 Filesystem filesystem;
3534 ICING_ASSERT_OK_AND_ASSIGN(
3535 std::unique_ptr<IntegerIndex> integer_index,
3536 IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
3537 /*num_data_threshold_for_bucket_split=*/65536,
3538 /*pre_mapping_fbv=*/false));
3539 // Add hits for document 0.
3540 ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId);
3541 integer_index->set_last_added_document_id(0);
3542 std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
3543 /*property_path=*/"indexableInteger", /*document_id=*/0,
3544 /*section_id=*/0);
3545 ICING_ASSERT_OK(editor->BufferKey(123));
3546 ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
3547 }
3548
3549 // 3. Create the index again. This should trigger index restoration.
3550 {
3551 // Mock filesystem to observe and check the behavior of all indices.
3552 auto mock_filesystem = std::make_unique<MockFilesystem>();
3553 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3554 .WillRepeatedly(DoDefault());
3555 // Ensure term index directory should never be discarded.
3556 EXPECT_CALL(*mock_filesystem,
3557 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3558 .Times(0);
3559 // Ensure integer index directory should never be discarded.
3560 EXPECT_CALL(*mock_filesystem,
3561 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3562 .Times(0);
3563 // Clear() should be called to truncate integer index and thus storage sub
3564 // directory (path_expr = "*/integer_index_dir/*") should be discarded.
3565 EXPECT_CALL(*mock_filesystem,
3566 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3567 .Times(1);
3568 // Ensure qualified id join index directory should never be discarded.
3569 // Clear() was called and should discard and reinitialize the underlying
3570 // mapper.
3571 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3572 EndsWith("/qualified_id_join_index_dir")))
3573 .Times(0);
3574 EXPECT_CALL(
3575 *mock_filesystem,
3576 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3577 .Times(AtLeast(1));
3578
3579 TestIcingSearchEngine icing(
3580 GetDefaultIcingOptions(), std::move(mock_filesystem),
3581 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3582 GetTestJniCache());
3583 InitializeResultProto initialize_result = icing.Initialize();
3584 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3585 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3586 Eq(InitializeStatsProto::NONE));
3587 // Since truncating integer index is sufficient to make it consistent with
3588 // document store, replaying documents or reindexing shouldn't take place.
3589 EXPECT_THAT(
3590 initialize_result.initialize_stats().integer_index_restoration_cause(),
3591 Eq(InitializeStatsProto::NONE));
3592 EXPECT_THAT(initialize_result.initialize_stats()
3593 .qualified_id_join_index_restoration_cause(),
3594 Eq(InitializeStatsProto::NONE));
3595
3596 // Verify that numeric query safely wiped out the pre-existing hit for
3597 // 'indexableInteger' == 123. Add a new document without that value for
3598 // 'indexableInteger' that will take docid=0. If the integer index was not
3599 // rebuilt correctly, then it will still have the previously added hit for
3600 // 'indexableInteger' == 123 for docid 0 and incorrectly return this new
3601 // doc in a query.
3602 DocumentProto another_message =
3603 DocumentBuilder()
3604 .SetKey("namespace", "message/1")
3605 .SetSchema("Message")
3606 .AddStringProperty("body", kIpsumText)
3607 .AddInt64Property("indexableInteger", 456)
3608 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3609 .Build();
3610 EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
3611 // Verify integer index works normally
3612 SearchSpecProto search_spec;
3613 search_spec.set_query("indexableInteger == 123");
3614 search_spec.add_enabled_features(std::string(kNumericSearchFeature));
3615
3616 SearchResultProto results =
3617 icing.Search(search_spec, ScoringSpecProto::default_instance(),
3618 ResultSpecProto::default_instance());
3619 EXPECT_THAT(results.results(), IsEmpty());
3620 }
3621 }
3622
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateIntegerIndexWithReindexing)3623 TEST_F(IcingSearchEngineInitializationTest,
3624 RestoreIndexTruncateIntegerIndexWithReindexing) {
3625 // Test the following scenario: integer index is *partially* ahead of document
3626 // store. IcingSearchEngine should be able to recover integer index. Several
3627 // additional behaviors are also tested:
3628 // - Index directory handling:
3629 // - Term index directory should be unaffected.
3630 // - Integer index directory should be unaffected.
3631 // - Qualified id join index directory should be unaffected.
3632 // - Truncate indices:
3633 // - "TruncateTo()" for term index shouldn't take effect.
3634 // - "Clear()" should be called for integer index and throw out all integer
3635 // index storages, i.e. all storage sub directories (path_expr =
3636 // "*/integer_index_dir/*") should be discarded. However, some valid data
3637 // in integer index were discarded together, so reindexing should still
3638 // take place to recover them after clearing.
3639 // - "Clear()" shouldn't be called for qualified id join index, i.e. no
3640 // underlying storage sub directory (path_expr =
3641 // "*/qualified_id_join_index_dir/*") should be discarded.
3642
3643 SchemaProto schema =
3644 SchemaBuilder()
3645 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
3646 PropertyConfigBuilder()
3647 .SetName("name")
3648 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
3649 .SetCardinality(CARDINALITY_REQUIRED)))
3650 .AddType(SchemaTypeConfigBuilder()
3651 .SetType("Message")
3652 .AddProperty(PropertyConfigBuilder()
3653 .SetName("body")
3654 .SetDataTypeString(TERM_MATCH_PREFIX,
3655 TOKENIZER_PLAIN)
3656 .SetCardinality(CARDINALITY_REQUIRED))
3657 .AddProperty(PropertyConfigBuilder()
3658 .SetName("indexableInteger")
3659 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
3660 .SetCardinality(CARDINALITY_OPTIONAL))
3661 .AddProperty(PropertyConfigBuilder()
3662 .SetName("senderQualifiedId")
3663 .SetDataTypeJoinableString(
3664 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
3665 .SetCardinality(CARDINALITY_REQUIRED)))
3666 .Build();
3667
3668 DocumentProto person =
3669 DocumentBuilder()
3670 .SetKey("namespace", "person")
3671 .SetSchema("Person")
3672 .AddStringProperty("name", "person")
3673 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3674 .Build();
3675 DocumentProto message =
3676 DocumentBuilder()
3677 .SetKey("namespace", "message/1")
3678 .SetSchema("Message")
3679 .AddStringProperty("body", kIpsumText)
3680 .AddInt64Property("indexableInteger", 123)
3681 .AddStringProperty("senderQualifiedId", "namespace#person")
3682 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3683 .Build();
3684
3685 // 1. Create an index with message 3 documents.
3686 {
3687 TestIcingSearchEngine icing(
3688 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3689 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3690 GetTestJniCache());
3691
3692 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3693 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
3694
3695 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
3696 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3697 message = DocumentBuilder(message).SetUri("message/2").Build();
3698 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3699 message = DocumentBuilder(message).SetUri("message/3").Build();
3700 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3701 }
3702
3703 // 2. Manually add some data into integer index and increment
3704 // last_added_document_id. This will cause mismatched document id with
3705 // document store.
3706 // - Document store: [0, 1, 2, 3]
3707 // - Term index: [0, 1, 2, 3]
3708 // - Integer index: [0, 1, 2, 3, 4]
3709 // - Qualified id join index: [0, 1, 2, 3]
3710 {
3711 Filesystem filesystem;
3712 ICING_ASSERT_OK_AND_ASSIGN(
3713 std::unique_ptr<IntegerIndex> integer_index,
3714 IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
3715 /*num_data_threshold_for_bucket_split=*/65536,
3716 /*pre_mapping_fbv=*/false));
3717 // Add hits for document 4.
3718 DocumentId original_last_added_doc_id =
3719 integer_index->last_added_document_id();
3720 integer_index->set_last_added_document_id(original_last_added_doc_id + 1);
3721 std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
3722 /*property_path=*/"indexableInteger",
3723 /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0);
3724 ICING_ASSERT_OK(editor->BufferKey(456));
3725 ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
3726 }
3727
3728 // 3. Create the index again. This should trigger index restoration.
3729 {
3730 // Mock filesystem to observe and check the behavior of all indices.
3731 auto mock_filesystem = std::make_unique<MockFilesystem>();
3732 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3733 .WillRepeatedly(DoDefault());
3734 // Ensure term index directory should never be discarded.
3735 EXPECT_CALL(*mock_filesystem,
3736 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3737 .Times(0);
3738 // Ensure integer index directory should never be discarded.
3739 EXPECT_CALL(*mock_filesystem,
3740 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3741 .Times(0);
3742 // Clear() should be called to truncate integer index and thus storage sub
3743 // directory (path_expr = "*/integer_index_dir/*") should be discarded.
3744 EXPECT_CALL(*mock_filesystem,
3745 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3746 .Times(1);
3747 // Ensure qualified id join index directory should never be discarded, and
3748 // Clear() should never be called (i.e. storage sub directory
3749 // "*/qualified_id_join_index_dir/*" should never be discarded).
3750 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3751 EndsWith("/qualified_id_join_index_dir")))
3752 .Times(0);
3753 EXPECT_CALL(
3754 *mock_filesystem,
3755 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3756 .Times(0);
3757
3758 TestIcingSearchEngine icing(
3759 GetDefaultIcingOptions(), std::move(mock_filesystem),
3760 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3761 GetTestJniCache());
3762 InitializeResultProto initialize_result = icing.Initialize();
3763 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3764 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3765 Eq(InitializeStatsProto::NONE));
3766 EXPECT_THAT(
3767 initialize_result.initialize_stats().integer_index_restoration_cause(),
3768 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
3769 EXPECT_THAT(initialize_result.initialize_stats()
3770 .qualified_id_join_index_restoration_cause(),
3771 Eq(InitializeStatsProto::NONE));
3772
3773 // Verify term index works normally
3774 SearchSpecProto search_spec1;
3775 search_spec1.set_query("body:consectetur");
3776 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
3777 SearchResultProto results1 =
3778 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
3779 ResultSpecProto::default_instance());
3780 EXPECT_THAT(results1.status(), ProtoIsOk());
3781 EXPECT_THAT(results1.next_page_token(), Eq(0));
3782 // All documents should be retrievable.
3783 ASSERT_THAT(results1.results(), SizeIs(3));
3784 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
3785 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
3786 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
3787
3788 // Verify integer index works normally
3789 SearchSpecProto search_spec2;
3790 search_spec2.set_query("indexableInteger == 123");
3791 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
3792
3793 SearchResultProto results2 =
3794 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
3795 ResultSpecProto::default_instance());
3796 ASSERT_THAT(results2.results(), SizeIs(3));
3797 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
3798 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
3799 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
3800
3801 // Verify qualified id join index works normally: join a query for
3802 // `name:person` with a child query for `body:consectetur` based on the
3803 // child's `senderQualifiedId` field.
3804 SearchSpecProto search_spec3;
3805 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
3806 search_spec3.set_query("name:person");
3807 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
3808 join_spec->set_parent_property_expression(
3809 std::string(JoinProcessor::kQualifiedIdExpr));
3810 join_spec->set_child_property_expression("senderQualifiedId");
3811 join_spec->set_aggregation_scoring_strategy(
3812 JoinSpecProto::AggregationScoringStrategy::COUNT);
3813 JoinSpecProto::NestedSpecProto* nested_spec =
3814 join_spec->mutable_nested_spec();
3815 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
3816 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
3817 nested_search_spec->set_query("body:consectetur");
3818 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
3819 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
3820
3821 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
3822 result_spec3.set_max_joined_children_per_parent_to_return(
3823 std::numeric_limits<int32_t>::max());
3824
3825 SearchResultProto results3 = icing.Search(
3826 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
3827 ASSERT_THAT(results3.results(), SizeIs(1));
3828 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
3829 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
3830 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
3831 Eq("message/3"));
3832 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
3833 Eq("message/2"));
3834 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
3835 Eq("message/1"));
3836
3837 // Verify that numeric index safely wiped out the pre-existing hit for
3838 // 'indexableInteger' == 456. Add a new document without that value for
3839 // 'indexableInteger' that will take docid=0. If the integer index was not
3840 // rebuilt correctly, then it will still have the previously added hit for
3841 // 'indexableInteger' == 456 for docid 0 and incorrectly return this new
3842 // doc in a query.
3843 DocumentProto another_message =
3844 DocumentBuilder()
3845 .SetKey("namespace", "message/4")
3846 .SetSchema("Message")
3847 .AddStringProperty("body", kIpsumText)
3848 .AddStringProperty("senderQualifiedId", "namespace#person")
3849 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3850 .Build();
3851 EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
3852 // Verify integer index works normally
3853 SearchSpecProto search_spec;
3854 search_spec.set_query("indexableInteger == 456");
3855 search_spec.add_enabled_features(std::string(kNumericSearchFeature));
3856
3857 SearchResultProto results =
3858 icing.Search(search_spec, ScoringSpecProto::default_instance(),
3859 ResultSpecProto::default_instance());
3860 EXPECT_THAT(results.results(), IsEmpty());
3861 }
3862 }
3863
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing)3864 TEST_F(IcingSearchEngineInitializationTest,
3865 RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) {
3866 // Test the following scenario: qualified id join index is *completely* ahead
3867 // of document store. IcingSearchEngine should be able to recover qualified id
3868 // join index. Several additional behaviors are also tested:
3869 // - Index directory handling:
3870 // - Term index directory should be unaffected.
3871 // - Integer index directory should be unaffected.
3872 // - Qualified id join index directory should be unaffected.
3873 // - Truncate indices:
3874 // - "TruncateTo()" for term index shouldn't take effect.
3875 // - "Clear()" should be called for integer index. It is a special case when
3876 // document store has no document. Since there is no integer index storage
3877 // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
3878 // discarded.
3879 // - "Clear()" should be called for qualified id join index and throw out
3880 // all data, i.e. discarding the underlying mapper (path_expr =
3881 // "*/qualified_id_join_index_dir/*") and reinitialize. This should be
3882 // sufficient to make qualified id join index consistent with document
3883 // store (in this case, document store is empty as well), so reindexing
3884 // should not take place.
3885
3886 // 1. Create an index with no document.
3887 {
3888 TestIcingSearchEngine icing(
3889 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3890 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3891 GetTestJniCache());
3892
3893 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3894 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
3895 }
3896
3897 // 2. Manually add some data into integer index and increment
3898 // last_added_document_id. This will cause mismatched document id with
3899 // document store.
3900 // - Document store: []
3901 // - Term index: []
3902 // - Integer index: []
3903 // - Qualified id join index: [0]
3904 {
3905 Filesystem filesystem;
3906 ICING_ASSERT_OK_AND_ASSIGN(
3907 std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
3908 QualifiedIdJoinIndexImplV2::Create(filesystem,
3909 GetQualifiedIdJoinIndexDir(),
3910 /*pre_mapping_fbv=*/false));
3911 // Add data for document 0.
3912 ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
3913 kInvalidDocumentId);
3914 qualified_id_join_index->set_last_added_document_id(0);
3915 ICING_ASSERT_OK(qualified_id_join_index->Put(
3916 /*schema_type_id=*/0, /*joinable_property_id=*/0, /*document_id=*/0,
3917 /*ref_namespace_fingerprint_ids=*/
3918 {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
3919 /*target_str=*/"uri")}));
3920 }
3921
3922 // 3. Create the index again. This should trigger index restoration.
3923 {
3924 // Mock filesystem to observe and check the behavior of all indices.
3925 auto mock_filesystem = std::make_unique<MockFilesystem>();
3926 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3927 .WillRepeatedly(DoDefault());
3928 // Ensure term index directory should never be discarded.
3929 EXPECT_CALL(*mock_filesystem,
3930 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3931 .Times(0);
3932 // Ensure integer index directory should never be discarded. Even though
3933 // Clear() was called, it shouldn't take effect since there is no storage
3934 // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
3935 EXPECT_CALL(*mock_filesystem,
3936 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3937 .Times(0);
3938 EXPECT_CALL(*mock_filesystem,
3939 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3940 .Times(0);
3941 // Ensure qualified id join index directory should never be discarded.
3942 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3943 EndsWith("/qualified_id_join_index_dir")))
3944 .Times(0);
3945 // Clear() should be called to truncate qualified id join index and thus
3946 // underlying storage sub directory (path_expr =
3947 // "*/qualified_id_join_index_dir/*") should be discarded.
3948 EXPECT_CALL(
3949 *mock_filesystem,
3950 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3951 .Times(AtLeast(1));
3952
3953 TestIcingSearchEngine icing(
3954 GetDefaultIcingOptions(), std::move(mock_filesystem),
3955 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3956 GetTestJniCache());
3957 InitializeResultProto initialize_result = icing.Initialize();
3958 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3959 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3960 Eq(InitializeStatsProto::NONE));
3961 EXPECT_THAT(
3962 initialize_result.initialize_stats().integer_index_restoration_cause(),
3963 Eq(InitializeStatsProto::NONE));
3964 // Since truncating qualified id join index is sufficient to make it
3965 // consistent with document store, replaying documents or reindexing
3966 // shouldn't take place.
3967 EXPECT_THAT(initialize_result.initialize_stats()
3968 .qualified_id_join_index_restoration_cause(),
3969 Eq(InitializeStatsProto::NONE));
3970 }
3971
3972 // 4. Since document 0 doesn't exist, testing join query is not enough to
3973 // verify the correctness of qualified id join index restoration. Instead, we
3974 // have to check the previously added data should not be found in qualified id
3975 // join index.
3976 {
3977 Filesystem filesystem;
3978 ICING_ASSERT_OK_AND_ASSIGN(
3979 std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
3980 QualifiedIdJoinIndexImplV2::Create(filesystem,
3981 GetQualifiedIdJoinIndexDir(),
3982 /*pre_mapping_fbv=*/false));
3983 ICING_ASSERT_OK_AND_ASSIGN(
3984 auto iterator, qualified_id_join_index->GetIterator(
3985 /*schema_type_id=*/0, /*joinable_property_id=*/0));
3986 EXPECT_THAT(iterator->Advance(),
3987 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3988 }
3989 }
3990
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing)3991 TEST_F(IcingSearchEngineInitializationTest,
3992 RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) {
3993 // Test the following scenario: qualified id join index is *partially* ahead
3994 // of document store. IcingSearchEngine should be able to recover qualified id
3995 // join index. Several additional behaviors are also tested:
3996 // - Index directory handling:
3997 // - Term index directory should be unaffected.
3998 // - Integer index directory should be unaffected.
3999 // - Qualified id join index directory should be unaffected.
4000 // - Truncate indices:
4001 // - "TruncateTo()" for term index shouldn't take effect.
4002 // - "Clear()" shouldn't be called for integer index, i.e. no integer index
4003 // storage sub directories (path_expr = "*/integer_index_dir/*") should be
4004 // discarded.
4005 // - "Clear()" should be called for qualified id join index and throw out
4006 // all data, i.e. discarding the underlying mapper (path_expr =
4007 // "*/qualified_id_join_index_dir/*") and reinitialize. However, some
4008 // valid data in qualified id join index were discarded together, so
4009 // reindexing should still take place to recover them after clearing.
4010
4011 SchemaProto schema =
4012 SchemaBuilder()
4013 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
4014 PropertyConfigBuilder()
4015 .SetName("name")
4016 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
4017 .SetCardinality(CARDINALITY_REQUIRED)))
4018 .AddType(SchemaTypeConfigBuilder()
4019 .SetType("Message")
4020 .AddProperty(PropertyConfigBuilder()
4021 .SetName("body")
4022 .SetDataTypeString(TERM_MATCH_PREFIX,
4023 TOKENIZER_PLAIN)
4024 .SetCardinality(CARDINALITY_REQUIRED))
4025 .AddProperty(PropertyConfigBuilder()
4026 .SetName("indexableInteger")
4027 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
4028 .SetCardinality(CARDINALITY_REQUIRED))
4029 .AddProperty(PropertyConfigBuilder()
4030 .SetName("senderQualifiedId")
4031 .SetDataTypeJoinableString(
4032 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
4033 .SetCardinality(CARDINALITY_OPTIONAL)))
4034 .Build();
4035
4036 DocumentProto person =
4037 DocumentBuilder()
4038 .SetKey("namespace", "person")
4039 .SetSchema("Person")
4040 .AddStringProperty("name", "person")
4041 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4042 .Build();
4043 DocumentProto message =
4044 DocumentBuilder()
4045 .SetKey("namespace", "message/1")
4046 .SetSchema("Message")
4047 .AddStringProperty("body", kIpsumText)
4048 .AddInt64Property("indexableInteger", 123)
4049 .AddStringProperty("senderQualifiedId", "namespace#person")
4050 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4051 .Build();
4052
4053 // 1. Create an index with message 3 documents.
4054 {
4055 TestIcingSearchEngine icing(
4056 GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
4057 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
4058 GetTestJniCache());
4059
4060 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4061 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4062
4063 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
4064 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4065 message = DocumentBuilder(message).SetUri("message/2").Build();
4066 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4067 message = DocumentBuilder(message).SetUri("message/3").Build();
4068 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4069 }
4070
4071 // 2. Manually add some data into qualified id join index and increment
4072 // last_added_document_id. This will cause mismatched document id with
4073 // document store.
4074 // - Document store: [0, 1, 2, 3]
4075 // - Term index: [0, 1, 2, 3]
4076 // - Integer index: [0, 1, 2, 3]
4077 // - Qualified id join index: [0, 1, 2, 3, 4]
4078 {
4079 Filesystem filesystem;
4080 ICING_ASSERT_OK_AND_ASSIGN(
4081 std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
4082 QualifiedIdJoinIndexImplV2::Create(filesystem,
4083 GetQualifiedIdJoinIndexDir(),
4084 /*pre_mapping_fbv=*/false));
4085 // Add data for document 4.
4086 DocumentId original_last_added_doc_id =
4087 qualified_id_join_index->last_added_document_id();
4088 qualified_id_join_index->set_last_added_document_id(
4089 original_last_added_doc_id + 1);
4090 ICING_ASSERT_OK(qualified_id_join_index->Put(
4091 /*schema_type_id=*/1, /*joinable_property_id=*/0,
4092 /*document_id=*/original_last_added_doc_id + 1,
4093 /*ref_namespace_fingerprint_ids=*/
4094 {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
4095 /*target_str=*/"person")}));
4096 }
4097
4098 // 3. Create the index again. This should trigger index restoration.
4099 {
4100 // Mock filesystem to observe and check the behavior of all indices.
4101 auto mock_filesystem = std::make_unique<MockFilesystem>();
4102 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
4103 .WillRepeatedly(DoDefault());
4104 // Ensure term index directory should never be discarded.
4105 EXPECT_CALL(*mock_filesystem,
4106 DeleteDirectoryRecursively(EndsWith("/index_dir")))
4107 .Times(0);
4108 // Ensure integer index directory should never be discarded, and Clear()
4109 // should never be called (i.e. storage sub directory
4110 // "*/integer_index_dir/*" should never be discarded).
4111 EXPECT_CALL(*mock_filesystem,
4112 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
4113 .Times(0);
4114 EXPECT_CALL(*mock_filesystem,
4115 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
4116 .Times(0);
4117 // Ensure qualified id join index directory should never be discarded.
4118 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
4119 EndsWith("/qualified_id_join_index_dir")))
4120 .Times(0);
4121 // Clear() should be called to truncate qualified id join index and thus
4122 // underlying storage sub directory (path_expr =
4123 // "*/qualified_id_join_index_dir/*") should be discarded.
4124 EXPECT_CALL(
4125 *mock_filesystem,
4126 DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
4127 .Times(AtLeast(1));
4128
4129 TestIcingSearchEngine icing(
4130 GetDefaultIcingOptions(), std::move(mock_filesystem),
4131 std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
4132 GetTestJniCache());
4133 InitializeResultProto initialize_result = icing.Initialize();
4134 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
4135 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
4136 Eq(InitializeStatsProto::NONE));
4137 EXPECT_THAT(
4138 initialize_result.initialize_stats().integer_index_restoration_cause(),
4139 Eq(InitializeStatsProto::NONE));
4140 EXPECT_THAT(initialize_result.initialize_stats()
4141 .qualified_id_join_index_restoration_cause(),
4142 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4143
4144 // Verify term index works normally
4145 SearchSpecProto search_spec1;
4146 search_spec1.set_query("body:consectetur");
4147 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
4148 SearchResultProto results1 =
4149 icing.Search(search_spec1, ScoringSpecProto::default_instance(),
4150 ResultSpecProto::default_instance());
4151 EXPECT_THAT(results1.status(), ProtoIsOk());
4152 EXPECT_THAT(results1.next_page_token(), Eq(0));
4153 // All documents should be retrievable.
4154 ASSERT_THAT(results1.results(), SizeIs(3));
4155 EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
4156 EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
4157 EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
4158
4159 // Verify integer index works normally
4160 SearchSpecProto search_spec2;
4161 search_spec2.set_query("indexableInteger == 123");
4162 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
4163
4164 SearchResultProto results2 =
4165 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
4166 ResultSpecProto::default_instance());
4167 ASSERT_THAT(results2.results(), SizeIs(3));
4168 EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
4169 EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
4170 EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
4171
4172 // Verify qualified id join index works normally: join a query for
4173 // `name:person` with a child query for `body:consectetur` based on the
4174 // child's `senderQualifiedId` field.
4175
4176 // Add document 4 without "senderQualifiedId". If join index is not rebuilt
4177 // correctly, then it will still have the previously added senderQualifiedId
4178 // for document 4 and include document 4 incorrectly in the right side.
4179 DocumentProto another_message =
4180 DocumentBuilder()
4181 .SetKey("namespace", "message/4")
4182 .SetSchema("Message")
4183 .AddStringProperty("body", kIpsumText)
4184 .AddInt64Property("indexableInteger", 123)
4185 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4186 .Build();
4187 EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
4188
4189 SearchSpecProto search_spec3;
4190 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
4191 search_spec3.set_query("name:person");
4192 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
4193 join_spec->set_parent_property_expression(
4194 std::string(JoinProcessor::kQualifiedIdExpr));
4195 join_spec->set_child_property_expression("senderQualifiedId");
4196 join_spec->set_aggregation_scoring_strategy(
4197 JoinSpecProto::AggregationScoringStrategy::COUNT);
4198 JoinSpecProto::NestedSpecProto* nested_spec =
4199 join_spec->mutable_nested_spec();
4200 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
4201 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
4202 nested_search_spec->set_query("body:consectetur");
4203 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
4204 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
4205
4206 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
4207 result_spec3.set_max_joined_children_per_parent_to_return(
4208 std::numeric_limits<int32_t>::max());
4209
4210 SearchResultProto results3 = icing.Search(
4211 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
4212 ASSERT_THAT(results3.results(), SizeIs(1));
4213 EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
4214 EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
4215 EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
4216 Eq("message/3"));
4217 EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
4218 Eq("message/2"));
4219 EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
4220 Eq("message/1"));
4221 }
4222 }
4223
TEST_F(IcingSearchEngineInitializationTest,DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex)4224 TEST_F(IcingSearchEngineInitializationTest,
4225 DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) {
4226 // 1. Create an index with a single document in it that has no indexed
4227 // content.
4228 {
4229 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4230 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4231
4232 // Set a schema for a single type that has no indexed properties.
4233 SchemaProto schema =
4234 SchemaBuilder()
4235 .AddType(
4236 SchemaTypeConfigBuilder()
4237 .SetType("Message")
4238 .AddProperty(PropertyConfigBuilder()
4239 .SetName("unindexedField")
4240 .SetDataTypeString(TERM_MATCH_UNKNOWN,
4241 TOKENIZER_NONE)
4242 .SetCardinality(CARDINALITY_REQUIRED))
4243 .AddProperty(PropertyConfigBuilder()
4244 .SetName("unindexedInteger")
4245 .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
4246 .SetCardinality(CARDINALITY_REQUIRED)))
4247 .Build();
4248 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4249
4250 // Add a document that contains no indexed properties.
4251 DocumentProto document =
4252 DocumentBuilder()
4253 .SetKey("icing", "fake_type/0")
4254 .SetSchema("Message")
4255 .AddStringProperty("unindexedField",
4256 "Don't you dare search over this!")
4257 .AddInt64Property("unindexedInteger", -123)
4258 .Build();
4259 EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4260 }
4261
4262 // 2. Create the index again. This should NOT trigger a recovery of any kind.
4263 {
4264 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4265 InitializeResultProto init_result = icing.Initialize();
4266 EXPECT_THAT(init_result.status(), ProtoIsOk());
4267 EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
4268 Eq(InitializeStatsProto::NO_DATA_LOSS));
4269 EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
4270 Eq(InitializeStatsProto::NONE));
4271 EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
4272 Eq(InitializeStatsProto::NONE));
4273 EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
4274 Eq(InitializeStatsProto::NONE));
4275 EXPECT_THAT(
4276 init_result.initialize_stats().integer_index_restoration_cause(),
4277 Eq(InitializeStatsProto::NONE));
4278 EXPECT_THAT(init_result.initialize_stats()
4279 .qualified_id_join_index_restoration_cause(),
4280 Eq(InitializeStatsProto::NONE));
4281 }
4282 }
4283
TEST_F(IcingSearchEngineInitializationTest,DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex)4284 TEST_F(IcingSearchEngineInitializationTest,
4285 DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
4286 // 1. Create an index with a single document in it that has no valid indexed
4287 // tokens in its content.
4288 {
4289 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4290 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4291
4292 SchemaProto schema =
4293 SchemaBuilder()
4294 .AddType(
4295 SchemaTypeConfigBuilder()
4296 .SetType("Message")
4297 .AddProperty(PropertyConfigBuilder()
4298 .SetName("body")
4299 .SetDataTypeString(TERM_MATCH_PREFIX,
4300 TOKENIZER_PLAIN)
4301 .SetCardinality(CARDINALITY_REQUIRED))
4302 .AddProperty(PropertyConfigBuilder()
4303 .SetName("indexableInteger")
4304 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
4305 .SetCardinality(CARDINALITY_OPTIONAL))
4306 .AddProperty(PropertyConfigBuilder()
4307 .SetName("senderQualifiedId")
4308 .SetDataTypeJoinableString(
4309 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
4310 .SetCardinality(CARDINALITY_OPTIONAL)))
4311 .Build();
4312 // Set a schema for a single type that has no term, integer, join indexed
4313 // contents.
4314 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4315
4316 // Add a document that contains:
4317 // - No valid indexed string content - just punctuation
4318 // - No integer content - since it is an optional property
4319 // - No qualified id content - since it is an optional property
4320 DocumentProto document = DocumentBuilder()
4321 .SetKey("icing", "fake_type/0")
4322 .SetSchema("Message")
4323 .AddStringProperty("body", "?...!")
4324 .Build();
4325 EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4326 }
4327
4328 // 2. Create the index again. This should NOT trigger a recovery of any kind.
4329 {
4330 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4331 InitializeResultProto init_result = icing.Initialize();
4332 EXPECT_THAT(init_result.status(), ProtoIsOk());
4333 EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
4334 Eq(InitializeStatsProto::NO_DATA_LOSS));
4335 EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
4336 Eq(InitializeStatsProto::NONE));
4337 EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
4338 Eq(InitializeStatsProto::NONE));
4339 EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
4340 Eq(InitializeStatsProto::NONE));
4341 EXPECT_THAT(
4342 init_result.initialize_stats().integer_index_restoration_cause(),
4343 Eq(InitializeStatsProto::NONE));
4344 EXPECT_THAT(init_result.initialize_stats()
4345 .qualified_id_join_index_restoration_cause(),
4346 Eq(InitializeStatsProto::NONE));
4347 }
4348 }
4349
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogFunctionLatency)4350 TEST_F(IcingSearchEngineInitializationTest,
4351 InitializeShouldLogFunctionLatency) {
4352 auto fake_clock = std::make_unique<FakeClock>();
4353 fake_clock->SetTimerElapsedMilliseconds(10);
4354 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4355 std::make_unique<Filesystem>(),
4356 std::make_unique<IcingFilesystem>(),
4357 std::move(fake_clock), GetTestJniCache());
4358 InitializeResultProto initialize_result_proto = icing.Initialize();
4359 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4360 EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
4361 }
4362
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogNumberOfDocuments)4363 TEST_F(IcingSearchEngineInitializationTest,
4364 InitializeShouldLogNumberOfDocuments) {
4365 DocumentProto document1 = DocumentBuilder()
4366 .SetKey("icing", "fake_type/1")
4367 .SetSchema("Message")
4368 .AddStringProperty("body", "message body")
4369 .AddInt64Property("indexableInteger", 123)
4370 .Build();
4371 DocumentProto document2 = DocumentBuilder()
4372 .SetKey("icing", "fake_type/2")
4373 .SetSchema("Message")
4374 .AddStringProperty("body", "message body")
4375 .AddInt64Property("indexableInteger", 456)
4376 .Build();
4377
4378 {
4379 // Initialize and put a document.
4380 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4381 InitializeResultProto initialize_result_proto = icing.Initialize();
4382 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4383 EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
4384 Eq(0));
4385
4386 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4387 ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
4388 }
4389
4390 {
4391 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4392 InitializeResultProto initialize_result_proto = icing.Initialize();
4393 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4394 EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
4395 Eq(1));
4396
4397 // Put another document.
4398 ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
4399 }
4400
4401 {
4402 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4403 InitializeResultProto initialize_result_proto = icing.Initialize();
4404 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4405 EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
4406 Eq(2));
4407 }
4408 }
4409
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize)4410 TEST_F(IcingSearchEngineInitializationTest,
4411 InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
4412 // Even though the fake timer will return 10, all the latency numbers related
4413 // to recovery / restoration should be 0 during the first-time initialization.
4414 auto fake_clock = std::make_unique<FakeClock>();
4415 fake_clock->SetTimerElapsedMilliseconds(10);
4416 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4417 std::make_unique<Filesystem>(),
4418 std::make_unique<IcingFilesystem>(),
4419 std::move(fake_clock), GetTestJniCache());
4420 InitializeResultProto initialize_result_proto = icing.Initialize();
4421 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4422 EXPECT_THAT(initialize_result_proto.initialize_stats()
4423 .document_store_recovery_cause(),
4424 Eq(InitializeStatsProto::NONE));
4425 EXPECT_THAT(initialize_result_proto.initialize_stats()
4426 .document_store_recovery_latency_ms(),
4427 Eq(0));
4428 EXPECT_THAT(
4429 initialize_result_proto.initialize_stats().document_store_data_status(),
4430 Eq(InitializeStatsProto::NO_DATA_LOSS));
4431 EXPECT_THAT(
4432 initialize_result_proto.initialize_stats().index_restoration_cause(),
4433 Eq(InitializeStatsProto::NONE));
4434 EXPECT_THAT(initialize_result_proto.initialize_stats()
4435 .integer_index_restoration_cause(),
4436 Eq(InitializeStatsProto::NONE));
4437 EXPECT_THAT(initialize_result_proto.initialize_stats()
4438 .qualified_id_join_index_restoration_cause(),
4439 Eq(InitializeStatsProto::NONE));
4440 EXPECT_THAT(
4441 initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
4442 Eq(0));
4443 EXPECT_THAT(
4444 initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
4445 Eq(InitializeStatsProto::NONE));
4446 EXPECT_THAT(initialize_result_proto.initialize_stats()
4447 .schema_store_recovery_latency_ms(),
4448 Eq(0));
4449 }
4450
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCausePartialDataLoss)4451 TEST_F(IcingSearchEngineInitializationTest,
4452 InitializeShouldLogRecoveryCausePartialDataLoss) {
4453 DocumentProto document = DocumentBuilder()
4454 .SetKey("icing", "fake_type/0")
4455 .SetSchema("Message")
4456 .AddStringProperty("body", "message body")
4457 .AddInt64Property("indexableInteger", 123)
4458 .Build();
4459
4460 {
4461 // Initialize and put a document.
4462 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4463 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4464 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4465 EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4466 }
4467
4468 {
4469 // Append a non-checksummed document. This will mess up the checksum of the
4470 // proto log, forcing it to rewind and later return a DATA_LOSS error.
4471 const std::string serialized_document = document.SerializeAsString();
4472 const std::string document_log_file = absl_ports::StrCat(
4473 GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
4474
4475 int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
4476 filesystem()->PWrite(document_log_file.c_str(), file_size,
4477 serialized_document.data(),
4478 serialized_document.size());
4479 }
4480
4481 {
4482 // Document store will rewind to previous checkpoint. The cause should be
4483 // DATA_LOSS and the data status should be PARTIAL_LOSS.
4484 auto fake_clock = std::make_unique<FakeClock>();
4485 fake_clock->SetTimerElapsedMilliseconds(10);
4486 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4487 std::make_unique<Filesystem>(),
4488 std::make_unique<IcingFilesystem>(),
4489 std::move(fake_clock), GetTestJniCache());
4490 InitializeResultProto initialize_result_proto = icing.Initialize();
4491 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4492 EXPECT_THAT(initialize_result_proto.initialize_stats()
4493 .document_store_recovery_cause(),
4494 Eq(InitializeStatsProto::DATA_LOSS));
4495 EXPECT_THAT(initialize_result_proto.initialize_stats()
4496 .document_store_recovery_latency_ms(),
4497 Eq(10));
4498 EXPECT_THAT(
4499 initialize_result_proto.initialize_stats().document_store_data_status(),
4500 Eq(InitializeStatsProto::PARTIAL_LOSS));
4501 // Document store rewinds to previous checkpoint and all derived files were
4502 // regenerated.
4503 // - Last stored doc id will be consistent with last added document ids in
4504 // term/integer indices, so there will be no index restoration.
4505 // - Qualified id join index depends on document store derived files and
4506 // since they were regenerated, we should rebuild qualified id join index.
4507 EXPECT_THAT(
4508 initialize_result_proto.initialize_stats().index_restoration_cause(),
4509 Eq(InitializeStatsProto::NONE));
4510 EXPECT_THAT(initialize_result_proto.initialize_stats()
4511 .integer_index_restoration_cause(),
4512 Eq(InitializeStatsProto::NONE));
4513 EXPECT_THAT(initialize_result_proto.initialize_stats()
4514 .qualified_id_join_index_restoration_cause(),
4515 Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
4516 EXPECT_THAT(initialize_result_proto.initialize_stats()
4517 .index_restoration_latency_ms(),
4518 Eq(10));
4519 EXPECT_THAT(initialize_result_proto.initialize_stats()
4520 .schema_store_recovery_cause(),
4521 Eq(InitializeStatsProto::NONE));
4522 EXPECT_THAT(initialize_result_proto.initialize_stats()
4523 .schema_store_recovery_latency_ms(),
4524 Eq(0));
4525 }
4526 }
4527
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseCompleteDataLoss)4528 TEST_F(IcingSearchEngineInitializationTest,
4529 InitializeShouldLogRecoveryCauseCompleteDataLoss) {
4530 DocumentProto document1 = DocumentBuilder()
4531 .SetKey("icing", "fake_type/1")
4532 .SetSchema("Message")
4533 .AddStringProperty("body", "message body")
4534 .AddInt64Property("indexableInteger", 123)
4535 .Build();
4536
4537 const std::string document_log_file = absl_ports::StrCat(
4538 GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
4539 int64_t corruptible_offset;
4540
4541 {
4542 // Initialize and put a document.
4543 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4544
4545 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4546
4547 // There's some space at the beginning of the file (e.g. header, kmagic,
4548 // etc) that is necessary to initialize the FileBackedProtoLog. We can't
4549 // corrupt that region, so we need to figure out the offset at which
4550 // documents will be written to - which is the file size after
4551 // initialization.
4552 corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
4553
4554 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4555 EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
4556 }
4557
4558 {
4559 // "Corrupt" the content written in the log. Make the corrupt document
4560 // smaller than our original one so we don't accidentally write past our
4561 // file.
4562 DocumentProto document =
4563 DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
4564 std::string serialized_document = document.SerializeAsString();
4565 ASSERT_TRUE(filesystem()->PWrite(
4566 document_log_file.c_str(), corruptible_offset,
4567 serialized_document.data(), serialized_document.size()));
4568
4569 PortableFileBackedProtoLog<DocumentWrapper>::Header header =
4570 ReadDocumentLogHeader(*filesystem(), document_log_file);
4571
4572 // Set dirty bit to true to reflect that something changed in the log.
4573 header.SetDirtyFlag(true);
4574 header.SetHeaderChecksum(header.CalculateHeaderChecksum());
4575
4576 WriteDocumentLogHeader(*filesystem(), document_log_file, header);
4577 }
4578
4579 {
4580 // Document store will completely rewind. The cause should be DATA_LOSS and
4581 // the data status should be COMPLETE_LOSS.
4582 auto fake_clock = std::make_unique<FakeClock>();
4583 fake_clock->SetTimerElapsedMilliseconds(10);
4584 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4585 std::make_unique<Filesystem>(),
4586 std::make_unique<IcingFilesystem>(),
4587 std::move(fake_clock), GetTestJniCache());
4588 InitializeResultProto initialize_result_proto = icing.Initialize();
4589 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4590 EXPECT_THAT(initialize_result_proto.initialize_stats()
4591 .document_store_recovery_cause(),
4592 Eq(InitializeStatsProto::DATA_LOSS));
4593 EXPECT_THAT(initialize_result_proto.initialize_stats()
4594 .document_store_recovery_latency_ms(),
4595 Eq(10));
4596 EXPECT_THAT(
4597 initialize_result_proto.initialize_stats().document_store_data_status(),
4598 Eq(InitializeStatsProto::COMPLETE_LOSS));
4599 // The complete rewind of ground truth causes us to clear the index, but
4600 // that's not considered a restoration.
4601 EXPECT_THAT(
4602 initialize_result_proto.initialize_stats().index_restoration_cause(),
4603 Eq(InitializeStatsProto::NONE));
4604 EXPECT_THAT(initialize_result_proto.initialize_stats()
4605 .integer_index_restoration_cause(),
4606 Eq(InitializeStatsProto::NONE));
4607 EXPECT_THAT(initialize_result_proto.initialize_stats()
4608 .qualified_id_join_index_restoration_cause(),
4609 Eq(InitializeStatsProto::NONE));
4610 EXPECT_THAT(initialize_result_proto.initialize_stats()
4611 .index_restoration_latency_ms(),
4612 Eq(0));
4613 EXPECT_THAT(initialize_result_proto.initialize_stats()
4614 .schema_store_recovery_cause(),
4615 Eq(InitializeStatsProto::NONE));
4616 EXPECT_THAT(initialize_result_proto.initialize_stats()
4617 .schema_store_recovery_latency_ms(),
4618 Eq(0));
4619 }
4620 }
4621
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth)4622 TEST_F(IcingSearchEngineInitializationTest,
4623 InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) {
4624 DocumentProto document = DocumentBuilder()
4625 .SetKey("icing", "fake_type/0")
4626 .SetSchema("Message")
4627 .AddStringProperty("body", "message body")
4628 .AddInt64Property("indexableInteger", 123)
4629 .Build();
4630 {
4631 // Initialize and put a document.
4632 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4633 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4634 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4635 EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4636 }
4637
4638 {
4639 // Delete and re-initialize an empty index file to trigger
4640 // RestoreIndexIfNeeded.
4641 std::string idx_subdir = GetIndexDir() + "/idx";
4642 ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
4643 ICING_ASSERT_OK_AND_ASSIGN(
4644 std::unique_ptr<Index> index,
4645 Index::Create(Index::Options(GetIndexDir(),
4646 /*index_merge_size=*/100,
4647 /*lite_index_sort_at_indexing=*/true,
4648 /*lite_index_sort_size=*/50),
4649 filesystem(), icing_filesystem()));
4650 ICING_ASSERT_OK(index->PersistToDisk());
4651 }
4652
4653 {
4654 // Index is empty but ground truth is not. Index should be restored due to
4655 // the inconsistency.
4656 auto fake_clock = std::make_unique<FakeClock>();
4657 fake_clock->SetTimerElapsedMilliseconds(10);
4658 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4659 std::make_unique<Filesystem>(),
4660 std::make_unique<IcingFilesystem>(),
4661 std::move(fake_clock), GetTestJniCache());
4662 InitializeResultProto initialize_result_proto = icing.Initialize();
4663 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4664 EXPECT_THAT(
4665 initialize_result_proto.initialize_stats().index_restoration_cause(),
4666 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4667 EXPECT_THAT(initialize_result_proto.initialize_stats()
4668 .integer_index_restoration_cause(),
4669 Eq(InitializeStatsProto::NONE));
4670 EXPECT_THAT(initialize_result_proto.initialize_stats()
4671 .qualified_id_join_index_restoration_cause(),
4672 Eq(InitializeStatsProto::NONE));
4673 EXPECT_THAT(initialize_result_proto.initialize_stats()
4674 .index_restoration_latency_ms(),
4675 Eq(10));
4676 EXPECT_THAT(initialize_result_proto.initialize_stats()
4677 .document_store_recovery_cause(),
4678 Eq(InitializeStatsProto::NONE));
4679 EXPECT_THAT(initialize_result_proto.initialize_stats()
4680 .document_store_recovery_latency_ms(),
4681 Eq(0));
4682 EXPECT_THAT(
4683 initialize_result_proto.initialize_stats().document_store_data_status(),
4684 Eq(InitializeStatsProto::NO_DATA_LOSS));
4685 EXPECT_THAT(initialize_result_proto.initialize_stats()
4686 .schema_store_recovery_cause(),
4687 Eq(InitializeStatsProto::NONE));
4688 EXPECT_THAT(initialize_result_proto.initialize_stats()
4689 .schema_store_recovery_latency_ms(),
4690 Eq(0));
4691 }
4692 }
4693
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth)4694 TEST_F(
4695 IcingSearchEngineInitializationTest,
4696 InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) {
4697 DocumentProto document = DocumentBuilder()
4698 .SetKey("icing", "fake_type/0")
4699 .SetSchema("Message")
4700 .AddStringProperty("body", "message body")
4701 .AddInt64Property("indexableInteger", 123)
4702 .Build();
4703 {
4704 // Initialize and put a document.
4705 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4706 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4707 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4708 EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4709 }
4710
4711 {
4712 // Delete the integer index file to trigger RestoreIndexIfNeeded.
4713 std::string integer_index_dir = GetIntegerIndexDir();
4714 filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
4715 }
4716
4717 {
4718 // Index is empty but ground truth is not. Index should be restored due to
4719 // the inconsistency.
4720 auto fake_clock = std::make_unique<FakeClock>();
4721 fake_clock->SetTimerElapsedMilliseconds(10);
4722 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4723 std::make_unique<Filesystem>(),
4724 std::make_unique<IcingFilesystem>(),
4725 std::move(fake_clock), GetTestJniCache());
4726 InitializeResultProto initialize_result_proto = icing.Initialize();
4727 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4728 EXPECT_THAT(
4729 initialize_result_proto.initialize_stats().index_restoration_cause(),
4730 Eq(InitializeStatsProto::NONE));
4731 EXPECT_THAT(initialize_result_proto.initialize_stats()
4732 .integer_index_restoration_cause(),
4733 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4734 EXPECT_THAT(initialize_result_proto.initialize_stats()
4735 .qualified_id_join_index_restoration_cause(),
4736 Eq(InitializeStatsProto::NONE));
4737 EXPECT_THAT(initialize_result_proto.initialize_stats()
4738 .index_restoration_latency_ms(),
4739 Eq(10));
4740 EXPECT_THAT(initialize_result_proto.initialize_stats()
4741 .document_store_recovery_cause(),
4742 Eq(InitializeStatsProto::NONE));
4743 EXPECT_THAT(initialize_result_proto.initialize_stats()
4744 .document_store_recovery_latency_ms(),
4745 Eq(0));
4746 EXPECT_THAT(
4747 initialize_result_proto.initialize_stats().document_store_data_status(),
4748 Eq(InitializeStatsProto::NO_DATA_LOSS));
4749 EXPECT_THAT(initialize_result_proto.initialize_stats()
4750 .schema_store_recovery_cause(),
4751 Eq(InitializeStatsProto::NONE));
4752 EXPECT_THAT(initialize_result_proto.initialize_stats()
4753 .schema_store_recovery_latency_ms(),
4754 Eq(0));
4755 }
4756 }
4757
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth)4758 TEST_F(
4759 IcingSearchEngineInitializationTest,
4760 InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) {
4761 SchemaProto schema =
4762 SchemaBuilder()
4763 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
4764 PropertyConfigBuilder()
4765 .SetName("name")
4766 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
4767 .SetCardinality(CARDINALITY_REQUIRED)))
4768 .AddType(SchemaTypeConfigBuilder()
4769 .SetType("Message")
4770 .AddProperty(PropertyConfigBuilder()
4771 .SetName("body")
4772 .SetDataTypeString(TERM_MATCH_PREFIX,
4773 TOKENIZER_PLAIN)
4774 .SetCardinality(CARDINALITY_REQUIRED))
4775 .AddProperty(PropertyConfigBuilder()
4776 .SetName("indexableInteger")
4777 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
4778 .SetCardinality(CARDINALITY_REQUIRED))
4779 .AddProperty(PropertyConfigBuilder()
4780 .SetName("senderQualifiedId")
4781 .SetDataTypeJoinableString(
4782 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
4783 .SetCardinality(CARDINALITY_REQUIRED)))
4784 .Build();
4785
4786 DocumentProto person =
4787 DocumentBuilder()
4788 .SetKey("namespace", "person")
4789 .SetSchema("Person")
4790 .AddStringProperty("name", "person")
4791 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4792 .Build();
4793 DocumentProto message =
4794 DocumentBuilder()
4795 .SetKey("namespace", "message/1")
4796 .SetSchema("Message")
4797 .AddStringProperty("body", "message body")
4798 .AddInt64Property("indexableInteger", 123)
4799 .AddStringProperty("senderQualifiedId", "namespace#person")
4800 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4801 .Build();
4802
4803 {
4804 // Initialize and put documents.
4805 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4806 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4807 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4808 EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
4809 EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4810 }
4811
4812 {
4813 // Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
4814 std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
4815 filesystem()->DeleteDirectoryRecursively(
4816 qualified_id_join_index_dir.c_str());
4817 }
4818
4819 {
4820 // Index is empty but ground truth is not. Index should be restored due to
4821 // the inconsistency.
4822 auto fake_clock = std::make_unique<FakeClock>();
4823 fake_clock->SetTimerElapsedMilliseconds(10);
4824 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4825 std::make_unique<Filesystem>(),
4826 std::make_unique<IcingFilesystem>(),
4827 std::move(fake_clock), GetTestJniCache());
4828 InitializeResultProto initialize_result_proto = icing.Initialize();
4829 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4830 EXPECT_THAT(
4831 initialize_result_proto.initialize_stats().index_restoration_cause(),
4832 Eq(InitializeStatsProto::NONE));
4833 EXPECT_THAT(initialize_result_proto.initialize_stats()
4834 .integer_index_restoration_cause(),
4835 Eq(InitializeStatsProto::NONE));
4836 EXPECT_THAT(initialize_result_proto.initialize_stats()
4837 .qualified_id_join_index_restoration_cause(),
4838 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4839 EXPECT_THAT(initialize_result_proto.initialize_stats()
4840 .index_restoration_latency_ms(),
4841 Eq(10));
4842 EXPECT_THAT(initialize_result_proto.initialize_stats()
4843 .document_store_recovery_cause(),
4844 Eq(InitializeStatsProto::NONE));
4845 EXPECT_THAT(initialize_result_proto.initialize_stats()
4846 .document_store_recovery_latency_ms(),
4847 Eq(0));
4848 EXPECT_THAT(
4849 initialize_result_proto.initialize_stats().document_store_data_status(),
4850 Eq(InitializeStatsProto::NO_DATA_LOSS));
4851 EXPECT_THAT(initialize_result_proto.initialize_stats()
4852 .schema_store_recovery_cause(),
4853 Eq(InitializeStatsProto::NONE));
4854 EXPECT_THAT(initialize_result_proto.initialize_stats()
4855 .schema_store_recovery_latency_ms(),
4856 Eq(0));
4857 }
4858 }
4859
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync)4860 TEST_F(IcingSearchEngineInitializationTest,
4861 InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) {
4862 DocumentProto document = DocumentBuilder()
4863 .SetKey("icing", "fake_type/0")
4864 .SetSchema("Message")
4865 .AddStringProperty("body", "message body")
4866 .AddInt64Property("indexableInteger", 123)
4867 .Build();
4868 IcingSearchEngineOptions options = GetDefaultIcingOptions();
4869 {
4870 // Initialize and put one document.
4871 IcingSearchEngine icing(options, GetTestJniCache());
4872 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4873 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4874 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
4875 }
4876
4877 {
4878 // Simulate a schema change where power is lost after the schema is written.
4879 SchemaProto new_schema =
4880 SchemaBuilder()
4881 .AddType(
4882 SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
4883 .AddProperty(PropertyConfigBuilder()
4884 .SetName("subject")
4885 .SetDataTypeString(TERM_MATCH_PREFIX,
4886 TOKENIZER_PLAIN)
4887 .SetCardinality(CARDINALITY_OPTIONAL)))
4888 .Build();
4889 // Write the marker file
4890 std::string marker_filepath =
4891 absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
4892 ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
4893 ASSERT_TRUE(sfd.is_valid());
4894
4895 // Write the new schema
4896 FakeClock fake_clock;
4897 ICING_ASSERT_OK_AND_ASSIGN(
4898 std::unique_ptr<SchemaStore> schema_store,
4899 SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
4900 ICING_EXPECT_OK(schema_store->SetSchema(
4901 new_schema, /*ignore_errors_and_delete_documents=*/false,
4902 /*allow_circular_schema_definitions=*/false));
4903 }
4904
4905 {
4906 // Both document store and index should be recovered from checksum mismatch.
4907 auto fake_clock = std::make_unique<FakeClock>();
4908 fake_clock->SetTimerElapsedMilliseconds(10);
4909 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4910 std::make_unique<Filesystem>(),
4911 std::make_unique<IcingFilesystem>(),
4912 std::move(fake_clock), GetTestJniCache());
4913 InitializeResultProto initialize_result_proto = icing.Initialize();
4914 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4915 EXPECT_THAT(
4916 initialize_result_proto.initialize_stats().index_restoration_cause(),
4917 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4918 EXPECT_THAT(initialize_result_proto.initialize_stats()
4919 .integer_index_restoration_cause(),
4920 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4921 EXPECT_THAT(initialize_result_proto.initialize_stats()
4922 .qualified_id_join_index_restoration_cause(),
4923 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4924 EXPECT_THAT(initialize_result_proto.initialize_stats()
4925 .index_restoration_latency_ms(),
4926 Eq(10));
4927 EXPECT_THAT(initialize_result_proto.initialize_stats()
4928 .document_store_recovery_cause(),
4929 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4930 EXPECT_THAT(initialize_result_proto.initialize_stats()
4931 .document_store_recovery_latency_ms(),
4932 Eq(10));
4933 EXPECT_THAT(
4934 initialize_result_proto.initialize_stats().document_store_data_status(),
4935 Eq(InitializeStatsProto::NO_DATA_LOSS));
4936 EXPECT_THAT(initialize_result_proto.initialize_stats()
4937 .schema_store_recovery_cause(),
4938 Eq(InitializeStatsProto::NONE));
4939 EXPECT_THAT(initialize_result_proto.initialize_stats()
4940 .schema_store_recovery_latency_ms(),
4941 Eq(0));
4942 }
4943
4944 {
4945 // No recovery should be needed.
4946 auto fake_clock = std::make_unique<FakeClock>();
4947 fake_clock->SetTimerElapsedMilliseconds(10);
4948 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4949 std::make_unique<Filesystem>(),
4950 std::make_unique<IcingFilesystem>(),
4951 std::move(fake_clock), GetTestJniCache());
4952 InitializeResultProto initialize_result_proto = icing.Initialize();
4953 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4954 EXPECT_THAT(
4955 initialize_result_proto.initialize_stats().index_restoration_cause(),
4956 Eq(InitializeStatsProto::NONE));
4957 EXPECT_THAT(initialize_result_proto.initialize_stats()
4958 .integer_index_restoration_cause(),
4959 Eq(InitializeStatsProto::NONE));
4960 EXPECT_THAT(initialize_result_proto.initialize_stats()
4961 .qualified_id_join_index_restoration_cause(),
4962 Eq(InitializeStatsProto::NONE));
4963 EXPECT_THAT(initialize_result_proto.initialize_stats()
4964 .index_restoration_latency_ms(),
4965 Eq(0));
4966 EXPECT_THAT(initialize_result_proto.initialize_stats()
4967 .document_store_recovery_cause(),
4968 Eq(InitializeStatsProto::NONE));
4969 EXPECT_THAT(initialize_result_proto.initialize_stats()
4970 .document_store_recovery_latency_ms(),
4971 Eq(0));
4972 EXPECT_THAT(
4973 initialize_result_proto.initialize_stats().document_store_data_status(),
4974 Eq(InitializeStatsProto::NO_DATA_LOSS));
4975 EXPECT_THAT(initialize_result_proto.initialize_stats()
4976 .schema_store_recovery_cause(),
4977 Eq(InitializeStatsProto::NONE));
4978 EXPECT_THAT(initialize_result_proto.initialize_stats()
4979 .schema_store_recovery_latency_ms(),
4980 Eq(0));
4981 }
4982 }
4983
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIndexIOError)4984 TEST_F(IcingSearchEngineInitializationTest,
4985 InitializeShouldLogRecoveryCauseIndexIOError) {
4986 DocumentProto document = DocumentBuilder()
4987 .SetKey("icing", "fake_type/0")
4988 .SetSchema("Message")
4989 .AddStringProperty("body", "message body")
4990 .AddInt64Property("indexableInteger", 123)
4991 .Build();
4992 {
4993 // Initialize and put one document.
4994 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4995 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4996 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4997 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
4998 }
4999
5000 std::string lite_index_buffer_file_path =
5001 absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
5002 auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
5003 EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_))
5004 .WillRepeatedly(DoDefault());
5005 // This fails Index::Create() once.
5006 EXPECT_CALL(*mock_icing_filesystem,
5007 OpenForWrite(Eq(lite_index_buffer_file_path)))
5008 .WillOnce(Return(-1))
5009 .WillRepeatedly(DoDefault());
5010
5011 auto fake_clock = std::make_unique<FakeClock>();
5012 fake_clock->SetTimerElapsedMilliseconds(10);
5013 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5014 std::make_unique<Filesystem>(),
5015 std::move(mock_icing_filesystem),
5016 std::move(fake_clock), GetTestJniCache());
5017
5018 InitializeResultProto initialize_result_proto = icing.Initialize();
5019 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5020 EXPECT_THAT(
5021 initialize_result_proto.initialize_stats().index_restoration_cause(),
5022 Eq(InitializeStatsProto::IO_ERROR));
5023 EXPECT_THAT(initialize_result_proto.initialize_stats()
5024 .integer_index_restoration_cause(),
5025 Eq(InitializeStatsProto::NONE));
5026 EXPECT_THAT(initialize_result_proto.initialize_stats()
5027 .qualified_id_join_index_restoration_cause(),
5028 Eq(InitializeStatsProto::NONE));
5029 EXPECT_THAT(
5030 initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5031 Eq(10));
5032 EXPECT_THAT(initialize_result_proto.initialize_stats()
5033 .document_store_recovery_cause(),
5034 Eq(InitializeStatsProto::NONE));
5035 EXPECT_THAT(initialize_result_proto.initialize_stats()
5036 .document_store_recovery_latency_ms(),
5037 Eq(0));
5038 EXPECT_THAT(
5039 initialize_result_proto.initialize_stats().document_store_data_status(),
5040 Eq(InitializeStatsProto::NO_DATA_LOSS));
5041 EXPECT_THAT(
5042 initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5043 Eq(InitializeStatsProto::NONE));
5044 EXPECT_THAT(initialize_result_proto.initialize_stats()
5045 .schema_store_recovery_latency_ms(),
5046 Eq(0));
5047 }
5048
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIntegerIndexIOError)5049 TEST_F(IcingSearchEngineInitializationTest,
5050 InitializeShouldLogRecoveryCauseIntegerIndexIOError) {
5051 DocumentProto document = DocumentBuilder()
5052 .SetKey("icing", "fake_type/0")
5053 .SetSchema("Message")
5054 .AddStringProperty("body", "message body")
5055 .AddInt64Property("indexableInteger", 123)
5056 .Build();
5057 {
5058 // Initialize and put one document.
5059 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5060 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5061 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5062 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
5063 }
5064
5065 std::string integer_index_metadata_file =
5066 absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m");
5067 auto mock_filesystem = std::make_unique<MockFilesystem>();
5068 EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault());
5069 // This fails IntegerIndex::Create() once.
5070 EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file)))
5071 .WillOnce(Return(-1))
5072 .WillRepeatedly(DoDefault());
5073
5074 auto fake_clock = std::make_unique<FakeClock>();
5075 fake_clock->SetTimerElapsedMilliseconds(10);
5076 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5077 std::move(mock_filesystem),
5078 std::make_unique<IcingFilesystem>(),
5079 std::move(fake_clock), GetTestJniCache());
5080
5081 InitializeResultProto initialize_result_proto = icing.Initialize();
5082 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5083 EXPECT_THAT(
5084 initialize_result_proto.initialize_stats().index_restoration_cause(),
5085 Eq(InitializeStatsProto::NONE));
5086 EXPECT_THAT(initialize_result_proto.initialize_stats()
5087 .integer_index_restoration_cause(),
5088 Eq(InitializeStatsProto::IO_ERROR));
5089 EXPECT_THAT(initialize_result_proto.initialize_stats()
5090 .qualified_id_join_index_restoration_cause(),
5091 Eq(InitializeStatsProto::NONE));
5092 EXPECT_THAT(
5093 initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5094 Eq(10));
5095 EXPECT_THAT(initialize_result_proto.initialize_stats()
5096 .document_store_recovery_cause(),
5097 Eq(InitializeStatsProto::NONE));
5098 EXPECT_THAT(initialize_result_proto.initialize_stats()
5099 .document_store_recovery_latency_ms(),
5100 Eq(0));
5101 EXPECT_THAT(
5102 initialize_result_proto.initialize_stats().document_store_data_status(),
5103 Eq(InitializeStatsProto::NO_DATA_LOSS));
5104 EXPECT_THAT(
5105 initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5106 Eq(InitializeStatsProto::NONE));
5107 EXPECT_THAT(initialize_result_proto.initialize_stats()
5108 .schema_store_recovery_latency_ms(),
5109 Eq(0));
5110 }
5111
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError)5112 TEST_F(IcingSearchEngineInitializationTest,
5113 InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) {
5114 SchemaProto schema =
5115 SchemaBuilder()
5116 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
5117 PropertyConfigBuilder()
5118 .SetName("name")
5119 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
5120 .SetCardinality(CARDINALITY_REQUIRED)))
5121 .AddType(SchemaTypeConfigBuilder()
5122 .SetType("Message")
5123 .AddProperty(PropertyConfigBuilder()
5124 .SetName("body")
5125 .SetDataTypeString(TERM_MATCH_PREFIX,
5126 TOKENIZER_PLAIN)
5127 .SetCardinality(CARDINALITY_REQUIRED))
5128 .AddProperty(PropertyConfigBuilder()
5129 .SetName("indexableInteger")
5130 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
5131 .SetCardinality(CARDINALITY_REQUIRED))
5132 .AddProperty(PropertyConfigBuilder()
5133 .SetName("senderQualifiedId")
5134 .SetDataTypeJoinableString(
5135 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
5136 .SetCardinality(CARDINALITY_REQUIRED)))
5137 .Build();
5138
5139 DocumentProto person =
5140 DocumentBuilder()
5141 .SetKey("namespace", "person")
5142 .SetSchema("Person")
5143 .AddStringProperty("name", "person")
5144 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5145 .Build();
5146 DocumentProto message =
5147 DocumentBuilder()
5148 .SetKey("namespace", "message/1")
5149 .SetSchema("Message")
5150 .AddStringProperty("body", "message body")
5151 .AddInt64Property("indexableInteger", 123)
5152 .AddStringProperty("senderQualifiedId", "namespace#person")
5153 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5154 .Build();
5155
5156 {
5157 // Initialize and put documents.
5158 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5159 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5160 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5161 ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
5162 ASSERT_THAT(icing.Put(message).status(), ProtoIsOk());
5163 }
5164
5165 std::string qualified_id_join_index_metadata_file =
5166 absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata");
5167 auto mock_filesystem = std::make_unique<MockFilesystem>();
5168 EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
5169 .WillRepeatedly(DoDefault());
5170 // This fails QualifiedIdJoinIndexImplV2::Create() once.
5171 EXPECT_CALL(
5172 *mock_filesystem,
5173 PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
5174 _, _))
5175 .WillOnce(Return(false))
5176 .WillRepeatedly(DoDefault());
5177
5178 auto fake_clock = std::make_unique<FakeClock>();
5179 fake_clock->SetTimerElapsedMilliseconds(10);
5180 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5181 std::move(mock_filesystem),
5182 std::make_unique<IcingFilesystem>(),
5183 std::move(fake_clock), GetTestJniCache());
5184
5185 InitializeResultProto initialize_result_proto = icing.Initialize();
5186 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5187 EXPECT_THAT(
5188 initialize_result_proto.initialize_stats().index_restoration_cause(),
5189 Eq(InitializeStatsProto::NONE));
5190 EXPECT_THAT(initialize_result_proto.initialize_stats()
5191 .integer_index_restoration_cause(),
5192 Eq(InitializeStatsProto::NONE));
5193 EXPECT_THAT(initialize_result_proto.initialize_stats()
5194 .qualified_id_join_index_restoration_cause(),
5195 Eq(InitializeStatsProto::IO_ERROR));
5196 EXPECT_THAT(
5197 initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5198 Eq(10));
5199 EXPECT_THAT(initialize_result_proto.initialize_stats()
5200 .document_store_recovery_cause(),
5201 Eq(InitializeStatsProto::NONE));
5202 EXPECT_THAT(initialize_result_proto.initialize_stats()
5203 .document_store_recovery_latency_ms(),
5204 Eq(0));
5205 EXPECT_THAT(
5206 initialize_result_proto.initialize_stats().document_store_data_status(),
5207 Eq(InitializeStatsProto::NO_DATA_LOSS));
5208 EXPECT_THAT(
5209 initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5210 Eq(InitializeStatsProto::NONE));
5211 EXPECT_THAT(initialize_result_proto.initialize_stats()
5212 .schema_store_recovery_latency_ms(),
5213 Eq(0));
5214 }
5215
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseDocStoreIOError)5216 TEST_F(IcingSearchEngineInitializationTest,
5217 InitializeShouldLogRecoveryCauseDocStoreIOError) {
5218 DocumentProto document = DocumentBuilder()
5219 .SetKey("icing", "fake_type/0")
5220 .SetSchema("Message")
5221 .AddStringProperty("body", "message body")
5222 .AddInt64Property("indexableInteger", 123)
5223 .Build();
5224 {
5225 // Initialize and put one document.
5226 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5227 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5228 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5229 ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
5230 }
5231
5232 std::string document_store_header_file_path =
5233 absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
5234 auto mock_filesystem = std::make_unique<MockFilesystem>();
5235 EXPECT_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
5236 .WillRepeatedly(DoDefault());
5237 // This fails DocumentStore::InitializeDerivedFiles() once.
5238 EXPECT_CALL(
5239 *mock_filesystem,
5240 Read(Matcher<const char*>(Eq(document_store_header_file_path)), _, _))
5241 .WillOnce(Return(false))
5242 .WillRepeatedly(DoDefault());
5243
5244 auto fake_clock = std::make_unique<FakeClock>();
5245 fake_clock->SetTimerElapsedMilliseconds(10);
5246 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5247 std::move(mock_filesystem),
5248 std::make_unique<IcingFilesystem>(),
5249 std::move(fake_clock), GetTestJniCache());
5250
5251 InitializeResultProto initialize_result_proto = icing.Initialize();
5252 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5253 EXPECT_THAT(initialize_result_proto.initialize_stats()
5254 .document_store_recovery_cause(),
5255 Eq(InitializeStatsProto::IO_ERROR));
5256 EXPECT_THAT(initialize_result_proto.initialize_stats()
5257 .document_store_recovery_latency_ms(),
5258 Eq(10));
5259 EXPECT_THAT(
5260 initialize_result_proto.initialize_stats().document_store_data_status(),
5261 Eq(InitializeStatsProto::NO_DATA_LOSS));
5262 EXPECT_THAT(
5263 initialize_result_proto.initialize_stats().index_restoration_cause(),
5264 Eq(InitializeStatsProto::NONE));
5265 EXPECT_THAT(initialize_result_proto.initialize_stats()
5266 .integer_index_restoration_cause(),
5267 Eq(InitializeStatsProto::NONE));
5268 EXPECT_THAT(initialize_result_proto.initialize_stats()
5269 .qualified_id_join_index_restoration_cause(),
5270 Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
5271 EXPECT_THAT(
5272 initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5273 Eq(10));
5274 EXPECT_THAT(
5275 initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5276 Eq(InitializeStatsProto::NONE));
5277 EXPECT_THAT(initialize_result_proto.initialize_stats()
5278 .schema_store_recovery_latency_ms(),
5279 Eq(0));
5280 }
5281
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseSchemaStoreIOError)5282 TEST_F(IcingSearchEngineInitializationTest,
5283 InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
5284 {
5285 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5286 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5287 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5288 }
5289
5290 {
5291 // Delete the schema store type mapper to trigger an I/O error.
5292 std::string schema_store_header_file_path =
5293 GetSchemaDir() + "/schema_type_mapper";
5294 ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(
5295 schema_store_header_file_path.c_str()));
5296 }
5297
5298 {
5299 auto fake_clock = std::make_unique<FakeClock>();
5300 fake_clock->SetTimerElapsedMilliseconds(10);
5301 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5302 std::make_unique<Filesystem>(),
5303 std::make_unique<IcingFilesystem>(),
5304 std::move(fake_clock), GetTestJniCache());
5305 InitializeResultProto initialize_result_proto = icing.Initialize();
5306 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5307 EXPECT_THAT(initialize_result_proto.initialize_stats()
5308 .schema_store_recovery_cause(),
5309 Eq(InitializeStatsProto::IO_ERROR));
5310 EXPECT_THAT(initialize_result_proto.initialize_stats()
5311 .schema_store_recovery_latency_ms(),
5312 Eq(10));
5313 EXPECT_THAT(initialize_result_proto.initialize_stats()
5314 .document_store_recovery_cause(),
5315 Eq(InitializeStatsProto::NONE));
5316 EXPECT_THAT(initialize_result_proto.initialize_stats()
5317 .document_store_recovery_latency_ms(),
5318 Eq(0));
5319 EXPECT_THAT(
5320 initialize_result_proto.initialize_stats().document_store_data_status(),
5321 Eq(InitializeStatsProto::NO_DATA_LOSS));
5322 EXPECT_THAT(
5323 initialize_result_proto.initialize_stats().index_restoration_cause(),
5324 Eq(InitializeStatsProto::NONE));
5325 EXPECT_THAT(initialize_result_proto.initialize_stats()
5326 .integer_index_restoration_cause(),
5327 Eq(InitializeStatsProto::NONE));
5328 EXPECT_THAT(initialize_result_proto.initialize_stats()
5329 .qualified_id_join_index_restoration_cause(),
5330 Eq(InitializeStatsProto::NONE));
5331 EXPECT_THAT(initialize_result_proto.initialize_stats()
5332 .index_restoration_latency_ms(),
5333 Eq(0));
5334 }
5335 }
5336
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogNumberOfSchemaTypes)5337 TEST_F(IcingSearchEngineInitializationTest,
5338 InitializeShouldLogNumberOfSchemaTypes) {
5339 {
5340 // Initialize an empty storage.
5341 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5342 InitializeResultProto initialize_result_proto = icing.Initialize();
5343 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5344 // There should be 0 schema types.
5345 EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
5346 Eq(0));
5347
5348 // Set a schema with one type config.
5349 ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5350 }
5351
5352 {
5353 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5354 InitializeResultProto initialize_result_proto = icing.Initialize();
5355 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5356 // There should be 1 schema type.
5357 EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
5358 Eq(1));
5359
5360 // Create and set a schema with two type configs: Email and Message.
5361 SchemaProto schema = CreateEmailSchema();
5362 *schema.add_types() = CreateMessageSchemaTypeConfig();
5363
5364 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5365 }
5366
5367 {
5368 IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5369 InitializeResultProto initialize_result_proto = icing.Initialize();
5370 EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5371 EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
5372 Eq(2));
5373 }
5374 }
5375
5376 struct IcingSearchEngineInitializationVersionChangeTestParam {
5377 version_util::VersionInfo existing_version_info;
5378 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
5379 existing_enabled_features;
5380
IcingSearchEngineInitializationVersionChangeTestParamicing::lib::__anon2df096810111::IcingSearchEngineInitializationVersionChangeTestParam5381 explicit IcingSearchEngineInitializationVersionChangeTestParam(
5382 version_util::VersionInfo version_info_in,
5383 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
5384 existing_enabled_features_in)
5385 : existing_version_info(std::move(version_info_in)),
5386 existing_enabled_features(std::move(existing_enabled_features_in)) {}
5387 };
5388
5389 class IcingSearchEngineInitializationVersionChangeTest
5390 : public IcingSearchEngineInitializationTest,
5391 public ::testing::WithParamInterface<
5392 IcingSearchEngineInitializationVersionChangeTestParam> {};
5393
TEST_P(IcingSearchEngineInitializationVersionChangeTest,RecoverFromVersionChangeOrUnknownFlagChange)5394 TEST_P(IcingSearchEngineInitializationVersionChangeTest,
5395 RecoverFromVersionChangeOrUnknownFlagChange) {
5396 // TODO(b/280697513): test backup schema migration
5397 // Test the following scenario: version change. All derived data should be
5398 // rebuilt. We test this by manually adding some invalid derived data and
5399 // verifying they're removed due to rebuild.
5400 SchemaProto schema =
5401 SchemaBuilder()
5402 .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
5403 PropertyConfigBuilder()
5404 .SetName("name")
5405 .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
5406 .SetCardinality(CARDINALITY_REQUIRED)))
5407 .AddType(SchemaTypeConfigBuilder()
5408 .SetType("Message")
5409 .AddProperty(PropertyConfigBuilder()
5410 .SetName("body")
5411 .SetDataTypeString(TERM_MATCH_PREFIX,
5412 TOKENIZER_PLAIN)
5413 .SetCardinality(CARDINALITY_REQUIRED))
5414 .AddProperty(PropertyConfigBuilder()
5415 .SetName("indexableInteger")
5416 .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
5417 .SetCardinality(CARDINALITY_REQUIRED))
5418 .AddProperty(PropertyConfigBuilder()
5419 .SetName("senderQualifiedId")
5420 .SetDataTypeJoinableString(
5421 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
5422 .SetCardinality(CARDINALITY_REQUIRED)))
5423 .Build();
5424
5425 DocumentProto person1 =
5426 DocumentBuilder()
5427 .SetKey("namespace", "person/1")
5428 .SetSchema("Person")
5429 .AddStringProperty("name", "person")
5430 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5431 .Build();
5432 DocumentProto person2 =
5433 DocumentBuilder()
5434 .SetKey("namespace", "person/2")
5435 .SetSchema("Person")
5436 .AddStringProperty("name", "person")
5437 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5438 .Build();
5439 DocumentProto message =
5440 DocumentBuilder()
5441 .SetKey("namespace", "message")
5442 .SetSchema("Message")
5443 .AddStringProperty("body", "correct message")
5444 .AddInt64Property("indexableInteger", 123)
5445 .AddStringProperty("senderQualifiedId", "namespace#person/1")
5446 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5447 .Build();
5448
5449 IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
5450
5451 {
5452 // Initializes folder and schema, index person1 and person2
5453 TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
5454 std::make_unique<IcingFilesystem>(),
5455 std::make_unique<FakeClock>(),
5456 GetTestJniCache());
5457 EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
5458 EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5459 EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
5460 EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
5461 } // This should shut down IcingSearchEngine and persist anything it needs to
5462
5463 {
5464 // Manually:
5465 // - Put message into DocumentStore
5466 // - But add some incorrect data for message into 3 indices
5467 // - Change version file
5468 //
5469 // These will make sure last_added_document_id is consistent with
5470 // last_stored_document_id, so if Icing didn't handle version change
5471 // correctly, then the index won't be rebuilt.
5472 FakeClock fake_clock;
5473 ICING_ASSERT_OK_AND_ASSIGN(
5474 std::unique_ptr<SchemaStore> schema_store,
5475 SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
5476
5477 // Put message into DocumentStore
5478 ICING_ASSERT_OK_AND_ASSIGN(
5479 DocumentStore::CreateResult create_result,
5480 DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
5481 schema_store.get(),
5482 /*force_recovery_and_revalidate_documents=*/false,
5483 /*namespace_id_fingerprint=*/true,
5484 /*pre_mapping_fbv=*/false,
5485 /*use_persistent_hash_map=*/true,
5486 PortableFileBackedProtoLog<
5487 DocumentWrapper>::kDeflateCompressionLevel,
5488 /*initialize_stats=*/nullptr));
5489 std::unique_ptr<DocumentStore> document_store =
5490 std::move(create_result.document_store);
5491 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
5492
5493 // Index doc_id with incorrect data
5494 Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024,
5495 /*lite_index_sort_at_indexing=*/true,
5496 /*lite_index_sort_size=*/1024 * 8);
5497 ICING_ASSERT_OK_AND_ASSIGN(
5498 std::unique_ptr<Index> index,
5499 Index::Create(options, filesystem(), icing_filesystem()));
5500
5501 ICING_ASSERT_OK_AND_ASSIGN(
5502 std::unique_ptr<IntegerIndex> integer_index,
5503 IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(),
5504 /*num_data_threshold_for_bucket_split=*/65536,
5505 /*pre_mapping_fbv=*/false));
5506
5507 ICING_ASSERT_OK_AND_ASSIGN(
5508 std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
5509 QualifiedIdJoinIndexImplV2::Create(*filesystem(),
5510 GetQualifiedIdJoinIndexDir(),
5511 /*pre_mapping_fbv=*/false));
5512
5513 ICING_ASSERT_OK_AND_ASSIGN(
5514 std::unique_ptr<TermIndexingHandler> term_indexing_handler,
5515 TermIndexingHandler::Create(
5516 &fake_clock, normalizer_.get(), index.get(),
5517 /*build_property_existence_metadata_hits=*/true));
5518 ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
5519 integer_section_indexing_handler,
5520 IntegerSectionIndexingHandler::Create(
5521 &fake_clock, integer_index.get()));
5522 ICING_ASSERT_OK_AND_ASSIGN(
5523 std::unique_ptr<QualifiedIdJoinIndexingHandler>
5524 qualified_id_join_indexing_handler,
5525 QualifiedIdJoinIndexingHandler::Create(
5526 &fake_clock, document_store.get(), qualified_id_join_index.get()));
5527 std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
5528 handlers.push_back(std::move(term_indexing_handler));
5529 handlers.push_back(std::move(integer_section_indexing_handler));
5530 handlers.push_back(std::move(qualified_id_join_indexing_handler));
5531 IndexProcessor index_processor(std::move(handlers), &fake_clock);
5532
5533 DocumentProto incorrect_message =
5534 DocumentBuilder()
5535 .SetKey("namespace", "message")
5536 .SetSchema("Message")
5537 .AddStringProperty("body", "wrong message")
5538 .AddInt64Property("indexableInteger", 456)
5539 .AddStringProperty("senderQualifiedId", "namespace#person/2")
5540 .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5541 .Build();
5542 ICING_ASSERT_OK_AND_ASSIGN(
5543 TokenizedDocument tokenized_document,
5544 TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(),
5545 std::move(incorrect_message)));
5546 ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id));
5547
5548 // Rewrite existing data's version files
5549 ICING_ASSERT_OK(
5550 version_util::DiscardVersionFiles(*filesystem(), GetVersionFileDir()));
5551 const version_util::VersionInfo& existing_version_info =
5552 GetParam().existing_version_info;
5553 ICING_ASSERT_OK(version_util::WriteV1Version(
5554 *filesystem(), GetVersionFileDir(), existing_version_info));
5555
5556 if (existing_version_info.version >= version_util::kFirstV2Version) {
5557 IcingSearchEngineVersionProto version_proto;
5558 version_proto.set_version(existing_version_info.version);
5559 version_proto.set_max_version(existing_version_info.max_version);
5560 auto* enabled_features = version_proto.mutable_enabled_features();
5561 for (const auto& feature : GetParam().existing_enabled_features) {
5562 enabled_features->Add(version_util::GetFeatureInfoProto(feature));
5563 }
5564 version_util::WriteV2Version(
5565 *filesystem(), GetVersionFileDir(),
5566 std::make_unique<IcingSearchEngineVersionProto>(
5567 std::move(version_proto)));
5568 }
5569 }
5570
5571 // Mock filesystem to observe and check the behavior of all indices.
5572 TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5573 std::make_unique<Filesystem>(),
5574 std::make_unique<IcingFilesystem>(),
5575 std::make_unique<FakeClock>(), GetTestJniCache());
5576 InitializeResultProto initialize_result = icing.Initialize();
5577 EXPECT_THAT(initialize_result.status(), ProtoIsOk());
5578
5579 // Derived files restoration should be triggered here. Incorrect data should
5580 // be deleted and correct data of message should be indexed.
5581 // Here we're recovering from a version change or a flag change that requires
5582 // rebuilding all derived files.
5583 //
5584 // TODO(b/314816301): test individual derived files rebuilds due to change
5585 // in trunk stable feature flags.
5586 // i.e. Test individual rebuilding for each of:
5587 // - document store
5588 // - schema store
5589 // - term index
5590 // - numeric index
5591 // - qualified id join index
5592 InitializeStatsProto::RecoveryCause expected_recovery_cause =
5593 GetParam().existing_version_info.version != version_util::kVersion
5594 ? InitializeStatsProto::VERSION_CHANGED
5595 : InitializeStatsProto::FEATURE_FLAG_CHANGED;
5596 EXPECT_THAT(
5597 initialize_result.initialize_stats().document_store_recovery_cause(),
5598 Eq(expected_recovery_cause));
5599 EXPECT_THAT(
5600 initialize_result.initialize_stats().schema_store_recovery_cause(),
5601 Eq(expected_recovery_cause));
5602 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
5603 Eq(expected_recovery_cause));
5604 EXPECT_THAT(
5605 initialize_result.initialize_stats().integer_index_restoration_cause(),
5606 Eq(expected_recovery_cause));
5607 EXPECT_THAT(initialize_result.initialize_stats()
5608 .qualified_id_join_index_restoration_cause(),
5609 Eq(expected_recovery_cause));
5610
5611 // Manually check version file
5612 ICING_ASSERT_OK_AND_ASSIGN(
5613 IcingSearchEngineVersionProto version_proto_after_init,
5614 version_util::ReadVersion(*filesystem(), GetVersionFileDir(),
5615 GetIndexDir()));
5616 EXPECT_THAT(version_proto_after_init.version(), Eq(version_util::kVersion));
5617 EXPECT_THAT(version_proto_after_init.max_version(),
5618 Eq(std::max(version_util::kVersion,
5619 GetParam().existing_version_info.max_version)));
5620
5621 SearchResultProto expected_search_result_proto;
5622 expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
5623 *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
5624 message;
5625
5626 // Verify term search
5627 SearchSpecProto search_spec1;
5628 search_spec1.set_query("body:correct");
5629 search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
5630 SearchResultProto search_result_proto1 =
5631 icing.Search(search_spec1, GetDefaultScoringSpec(),
5632 ResultSpecProto::default_instance());
5633 EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
5634 expected_search_result_proto));
5635
5636 // Verify numeric (integer) search
5637 SearchSpecProto search_spec2;
5638 search_spec2.set_query("indexableInteger == 123");
5639 search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
5640
5641 SearchResultProto search_result_google::protobuf =
5642 icing.Search(search_spec2, ScoringSpecProto::default_instance(),
5643 ResultSpecProto::default_instance());
5644 EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
5645 expected_search_result_proto));
5646
5647 // Verify join search: join a query for `name:person` with a child query for
5648 // `body:message` based on the child's `senderQualifiedId` field.
5649 SearchSpecProto search_spec3;
5650 search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
5651 search_spec3.set_query("name:person");
5652 JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
5653 join_spec->set_parent_property_expression(
5654 std::string(JoinProcessor::kQualifiedIdExpr));
5655 join_spec->set_child_property_expression("senderQualifiedId");
5656 join_spec->set_aggregation_scoring_strategy(
5657 JoinSpecProto::AggregationScoringStrategy::COUNT);
5658 JoinSpecProto::NestedSpecProto* nested_spec =
5659 join_spec->mutable_nested_spec();
5660 SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
5661 nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
5662 nested_search_spec->set_query("body:message");
5663 *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
5664 *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
5665
5666 ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
5667 result_spec3.set_max_joined_children_per_parent_to_return(
5668 std::numeric_limits<int32_t>::max());
5669
5670 SearchResultProto expected_join_search_result_proto;
5671 expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
5672 // Person 1 with message
5673 SearchResultProto::ResultProto* result_proto =
5674 expected_join_search_result_proto.mutable_results()->Add();
5675 *result_proto->mutable_document() = person1;
5676 *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
5677 // Person 2 without children
5678 *expected_join_search_result_proto.mutable_results()
5679 ->Add()
5680 ->mutable_document() = person2;
5681
5682 SearchResultProto search_result_proto3 = icing.Search(
5683 search_spec3, ScoringSpecProto::default_instance(), result_spec3);
5684 EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
5685 expected_join_search_result_proto));
5686 }
5687
5688 INSTANTIATE_TEST_SUITE_P(
5689 IcingSearchEngineInitializationVersionChangeTest,
5690 IcingSearchEngineInitializationVersionChangeTest,
5691 testing::Values(
5692 // Manually change existing data set's version to kVersion + 1. When
5693 // initializing, it will detect "rollback".
5694 IcingSearchEngineInitializationVersionChangeTestParam(
5695 version_util::VersionInfo(
5696 /*version_in=*/version_util::kVersion + 1,
5697 /*max_version_in=*/version_util::kVersion + 1),
5698 /*existing_enabled_features_in=*/{}),
5699
5700 // Currently we don't have any "upgrade" that requires rebuild derived
5701 // files, so skip this case until we have a case for it.
5702
5703 // Manually change existing data set's version to kVersion - 1 and
5704 // max_version to kVersion. When initializing, it will detect "roll
5705 // forward".
5706 IcingSearchEngineInitializationVersionChangeTestParam(
5707 version_util::VersionInfo(
5708 /*version_in=*/version_util::kVersion - 1,
5709 /*max_version_in=*/version_util::kVersion),
5710 /*existing_enabled_features_in=*/{}),
5711
5712 // Manually change existing data set's version to 0 and max_version to
5713 // 0. When initializing, it will detect "version 0 upgrade".
5714 //
5715 // Note: in reality, version 0 won't be written into version file, but
5716 // it is ok here since it is hack to simulate version 0 situation.
5717 IcingSearchEngineInitializationVersionChangeTestParam(
5718 version_util::VersionInfo(
5719 /*version_in=*/0,
5720 /*max_version_in=*/0),
5721 /*existing_enabled_features_in=*/{}),
5722
5723 // Manually change existing data set's version to 0 and max_version to
5724 // kVersion. When initializing, it will detect "version 0 roll forward".
5725 //
5726 // Note: in reality, version 0 won't be written into version file, but
5727 // it is ok here since it is hack to simulate version 0 situation.
5728 IcingSearchEngineInitializationVersionChangeTestParam(
5729 version_util::VersionInfo(
5730 /*version_in=*/0,
5731 /*max_version_in=*/version_util::kVersion),
5732 /*existing_enabled_features_in=*/{}),
5733
5734 // Manually write an unknown feature in the version proto while keeping
5735 // version the same as kVersion.
5736 //
5737 // Result: this will rebuild all derived files with restoration cause
5738 // FEATURE_FLAG_CHANGED
5739 IcingSearchEngineInitializationVersionChangeTestParam(
5740 version_util::VersionInfo(
5741 /*version_in=*/version_util::kVersion,
5742 /*max_version_in=*/version_util::kVersion),
5743 /*existing_enabled_features_in=*/{
5744 IcingSearchEngineFeatureInfoProto::UNKNOWN})));
5745
5746 class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest
5747 : public IcingSearchEngineInitializationTest,
5748 public ::testing::WithParamInterface<std::tuple<bool, bool>> {};
TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,ChangePropertyExistenceHitsFlagTest)5749 TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
5750 ChangePropertyExistenceHitsFlagTest) {
5751 bool before_build_property_existence_metadata_hits = std::get<0>(GetParam());
5752 bool after_build_property_existence_metadata_hits = std::get<1>(GetParam());
5753 bool flag_changed = before_build_property_existence_metadata_hits !=
5754 after_build_property_existence_metadata_hits;
5755
5756 SchemaProto schema =
5757 SchemaBuilder()
5758 .AddType(SchemaTypeConfigBuilder()
5759 .SetType("Value")
5760 .AddProperty(PropertyConfigBuilder()
5761 .SetName("body")
5762 .SetDataTypeString(TERM_MATCH_EXACT,
5763 TOKENIZER_PLAIN)
5764 .SetCardinality(CARDINALITY_REPEATED))
5765 .AddProperty(PropertyConfigBuilder()
5766 .SetName("timestamp")
5767 .SetDataType(TYPE_INT64)
5768 .SetCardinality(CARDINALITY_OPTIONAL))
5769 .AddProperty(PropertyConfigBuilder()
5770 .SetName("score")
5771 .SetDataType(TYPE_DOUBLE)
5772 .SetCardinality(CARDINALITY_OPTIONAL)))
5773 .Build();
5774
5775 // Create a document with every property.
5776 DocumentProto document0 = DocumentBuilder()
5777 .SetKey("icing", "uri0")
5778 .SetSchema("Value")
5779 .SetCreationTimestampMs(1)
5780 .AddStringProperty("body", "foo")
5781 .AddInt64Property("timestamp", 123)
5782 .AddDoubleProperty("score", 456.789)
5783 .Build();
5784 // Create a document with missing body.
5785 DocumentProto document1 = DocumentBuilder()
5786 .SetKey("icing", "uri1")
5787 .SetSchema("Value")
5788 .SetCreationTimestampMs(1)
5789 .AddInt64Property("timestamp", 123)
5790 .AddDoubleProperty("score", 456.789)
5791 .Build();
5792 // Create a document with missing timestamp.
5793 DocumentProto document2 = DocumentBuilder()
5794 .SetKey("icing", "uri2")
5795 .SetSchema("Value")
5796 .SetCreationTimestampMs(1)
5797 .AddStringProperty("body", "foo")
5798 .AddDoubleProperty("score", 456.789)
5799 .Build();
5800
5801 // 1. Create an index with the 3 documents.
5802 {
5803 IcingSearchEngineOptions options = GetDefaultIcingOptions();
5804 options.set_build_property_existence_metadata_hits(
5805 before_build_property_existence_metadata_hits);
5806 TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
5807 std::make_unique<IcingFilesystem>(),
5808 std::make_unique<FakeClock>(),
5809 GetTestJniCache());
5810
5811 ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5812 ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5813 ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
5814 ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
5815 ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
5816 }
5817
5818 // 2. Create the index again with
5819 // after_build_property_existence_metadata_hits.
5820 //
5821 // Mock filesystem to observe and check the behavior of all indices.
5822 auto mock_filesystem = std::make_unique<MockFilesystem>();
5823 EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
5824 .WillRepeatedly(DoDefault());
5825 // Ensure that the term index is rebuilt if the flag is changed.
5826 EXPECT_CALL(*mock_filesystem,
5827 DeleteDirectoryRecursively(EndsWith("/index_dir")))
5828 .Times(flag_changed ? 1 : 0);
5829
5830 IcingSearchEngineOptions options = GetDefaultIcingOptions();
5831 options.set_build_property_existence_metadata_hits(
5832 after_build_property_existence_metadata_hits);
5833 TestIcingSearchEngine icing(options, std::move(mock_filesystem),
5834 std::make_unique<IcingFilesystem>(),
5835 std::make_unique<FakeClock>(), GetTestJniCache());
5836 InitializeResultProto initialize_result = icing.Initialize();
5837 ASSERT_THAT(initialize_result.status(), ProtoIsOk());
5838 // Ensure that the term index is rebuilt if the flag is changed.
5839 EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
5840 Eq(flag_changed ? InitializeStatsProto::FEATURE_FLAG_CHANGED
5841 : InitializeStatsProto::NONE));
5842 EXPECT_THAT(
5843 initialize_result.initialize_stats().integer_index_restoration_cause(),
5844 Eq(InitializeStatsProto::NONE));
5845 EXPECT_THAT(initialize_result.initialize_stats()
5846 .qualified_id_join_index_restoration_cause(),
5847 Eq(InitializeStatsProto::NONE));
5848
5849 // Get all documents that have "body".
5850 SearchSpecProto search_spec;
5851 search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
5852 search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
5853 search_spec.add_enabled_features(
5854 std::string(kListFilterQueryLanguageFeature));
5855 search_spec.set_query("hasProperty(\"body\")");
5856 SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
5857 ResultSpecProto::default_instance());
5858 EXPECT_THAT(results.status(), ProtoIsOk());
5859 if (after_build_property_existence_metadata_hits) {
5860 EXPECT_THAT(results.results(), SizeIs(2));
5861 EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
5862 EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
5863 } else {
5864 EXPECT_THAT(results.results(), IsEmpty());
5865 }
5866
5867 // Get all documents that have "timestamp".
5868 search_spec.set_query("hasProperty(\"timestamp\")");
5869 results = icing.Search(search_spec, GetDefaultScoringSpec(),
5870 ResultSpecProto::default_instance());
5871 EXPECT_THAT(results.status(), ProtoIsOk());
5872 if (after_build_property_existence_metadata_hits) {
5873 EXPECT_THAT(results.results(), SizeIs(2));
5874 EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
5875 EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
5876 } else {
5877 EXPECT_THAT(results.results(), IsEmpty());
5878 }
5879
5880 // Get all documents that have "score".
5881 search_spec.set_query("hasProperty(\"score\")");
5882 results = icing.Search(search_spec, GetDefaultScoringSpec(),
5883 ResultSpecProto::default_instance());
5884 EXPECT_THAT(results.status(), ProtoIsOk());
5885 if (after_build_property_existence_metadata_hits) {
5886 EXPECT_THAT(results.results(), SizeIs(3));
5887 EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
5888 EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
5889 EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
5890 } else {
5891 EXPECT_THAT(results.results(), IsEmpty());
5892 }
5893 }
5894
5895 INSTANTIATE_TEST_SUITE_P(
5896 IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
5897 IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
5898 testing::Values(std::make_tuple(false, false), std::make_tuple(false, true),
5899 std::make_tuple(true, false), std::make_tuple(true, true)));
5900
5901 } // namespace
5902 } // namespace lib
5903 } // namespace icing
5904