• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <cstdint>
17 #include <limits>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <tuple>
22 #include <unordered_set>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "gmock/gmock.h"
28 #include "gtest/gtest.h"
29 #include "icing/absl_ports/str_cat.h"
30 #include "icing/document-builder.h"
31 #include "icing/file/file-backed-vector.h"
32 #include "icing/file/filesystem.h"
33 #include "icing/file/memory-mapped-file.h"
34 #include "icing/file/mock-filesystem.h"
35 #include "icing/file/portable-file-backed-proto-log.h"
36 #include "icing/file/version-util.h"
37 #include "icing/icing-search-engine.h"
38 #include "icing/index/data-indexing-handler.h"
39 #include "icing/index/index-processor.h"
40 #include "icing/index/index.h"
41 #include "icing/index/integer-section-indexing-handler.h"
42 #include "icing/index/iterator/doc-hit-info-iterator.h"
43 #include "icing/index/numeric/integer-index.h"
44 #include "icing/index/numeric/numeric-index.h"
45 #include "icing/index/term-indexing-handler.h"
46 #include "icing/jni/jni-cache.h"
47 #include "icing/join/join-processor.h"
48 #include "icing/join/qualified-id-join-index-impl-v2.h"
49 #include "icing/join/qualified-id-join-index.h"
50 #include "icing/join/qualified-id-join-indexing-handler.h"
51 #include "icing/legacy/index/icing-filesystem.h"
52 #include "icing/legacy/index/icing-mock-filesystem.h"
53 #include "icing/portable/endian.h"
54 #include "icing/portable/equals-proto.h"
55 #include "icing/portable/platform.h"
56 #include "icing/proto/debug.pb.h"
57 #include "icing/proto/document.pb.h"
58 #include "icing/proto/document_wrapper.pb.h"
59 #include "icing/proto/initialize.pb.h"
60 #include "icing/proto/logging.pb.h"
61 #include "icing/proto/optimize.pb.h"
62 #include "icing/proto/persist.pb.h"
63 #include "icing/proto/reset.pb.h"
64 #include "icing/proto/schema.pb.h"
65 #include "icing/proto/scoring.pb.h"
66 #include "icing/proto/search.pb.h"
67 #include "icing/proto/status.pb.h"
68 #include "icing/proto/storage.pb.h"
69 #include "icing/proto/term.pb.h"
70 #include "icing/proto/usage.pb.h"
71 #include "icing/query/query-features.h"
72 #include "icing/schema-builder.h"
73 #include "icing/schema/schema-store.h"
74 #include "icing/schema/section.h"
75 #include "icing/store/document-associated-score-data.h"
76 #include "icing/store/document-id.h"
77 #include "icing/store/document-log-creator.h"
78 #include "icing/store/document-store.h"
79 #include "icing/store/namespace-fingerprint-identifier.h"
80 #include "icing/testing/common-matchers.h"
81 #include "icing/testing/fake-clock.h"
82 #include "icing/testing/icu-data-file-helper.h"
83 #include "icing/testing/jni-test-helpers.h"
84 #include "icing/testing/test-data.h"
85 #include "icing/testing/tmp-directory.h"
86 #include "icing/tokenization/language-segmenter-factory.h"
87 #include "icing/tokenization/language-segmenter.h"
88 #include "icing/transform/normalizer-factory.h"
89 #include "icing/transform/normalizer.h"
90 #include "icing/util/clock.h"
91 #include "icing/util/tokenized-document.h"
92 #include "unicode/uloc.h"
93 
94 namespace icing {
95 namespace lib {
96 
97 namespace {
98 
99 using ::icing::lib::portable_equals_proto::EqualsProto;
100 using ::testing::_;
101 using ::testing::AtLeast;
102 using ::testing::DoDefault;
103 using ::testing::EndsWith;
104 using ::testing::Eq;
105 using ::testing::HasSubstr;
106 using ::testing::IsEmpty;
107 using ::testing::Matcher;
108 using ::testing::Ne;
109 using ::testing::Return;
110 using ::testing::SizeIs;
111 
112 constexpr std::string_view kIpsumText =
113     "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
114     "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
115     "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
116     "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
117     "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
118     "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
119     "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
120     "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
121     "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
122     "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
123     "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
124     "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
125     "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
126     "placerat semper.";
127 
ReadDocumentLogHeader(Filesystem filesystem,const std::string & file_path)128 PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
129     Filesystem filesystem, const std::string& file_path) {
130   PortableFileBackedProtoLog<DocumentWrapper>::Header header;
131   filesystem.PRead(file_path.c_str(), &header,
132                    sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header),
133                    /*offset=*/0);
134   return header;
135 }
136 
WriteDocumentLogHeader(Filesystem filesystem,const std::string & file_path,PortableFileBackedProtoLog<DocumentWrapper>::Header & header)137 void WriteDocumentLogHeader(
138     Filesystem filesystem, const std::string& file_path,
139     PortableFileBackedProtoLog<DocumentWrapper>::Header& header) {
140   filesystem.Write(file_path.c_str(), &header,
141                    sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header));
142 }
143 
144 // For mocking purpose, we allow tests to provide a custom Filesystem.
145 class TestIcingSearchEngine : public IcingSearchEngine {
146  public:
TestIcingSearchEngine(const IcingSearchEngineOptions & options,std::unique_ptr<const Filesystem> filesystem,std::unique_ptr<const IcingFilesystem> icing_filesystem,std::unique_ptr<Clock> clock,std::unique_ptr<JniCache> jni_cache)147   TestIcingSearchEngine(const IcingSearchEngineOptions& options,
148                         std::unique_ptr<const Filesystem> filesystem,
149                         std::unique_ptr<const IcingFilesystem> icing_filesystem,
150                         std::unique_ptr<Clock> clock,
151                         std::unique_ptr<JniCache> jni_cache)
152       : IcingSearchEngine(options, std::move(filesystem),
153                           std::move(icing_filesystem), std::move(clock),
154                           std::move(jni_cache)) {}
155 };
156 
GetTestBaseDir()157 std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
158 
159 // This test is meant to cover all tests relating to
160 // IcingSearchEngine::Initialize.
161 class IcingSearchEngineInitializationTest : public testing::Test {
162  protected:
SetUp()163   void SetUp() override {
164     if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
165       // If we've specified using the reverse-JNI method for segmentation (i.e.
166       // not ICU), then we won't have the ICU data file included to set up.
167       // Technically, we could choose to use reverse-JNI for segmentation AND
168       // include an ICU data file, but that seems unlikely and our current BUILD
169       // setup doesn't do this.
170       // File generated via icu_data_file rule in //icing/BUILD.
171       std::string icu_data_file_path =
172           GetTestFilePath("icing/icu.dat");
173       ICING_ASSERT_OK(
174           icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
175     }
176     filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
177 
178     language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
179     ICING_ASSERT_OK_AND_ASSIGN(
180         lang_segmenter_,
181         language_segmenter_factory::Create(std::move(segmenter_options)));
182 
183     ICING_ASSERT_OK_AND_ASSIGN(
184         normalizer_,
185         normalizer_factory::Create(
186             /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
187   }
188 
TearDown()189   void TearDown() override {
190     normalizer_.reset();
191     lang_segmenter_.reset();
192     filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
193   }
194 
filesystem() const195   const Filesystem* filesystem() const { return &filesystem_; }
196 
icing_filesystem() const197   const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; }
198 
199   Filesystem filesystem_;
200   IcingFilesystem icing_filesystem_;
201   std::unique_ptr<LanguageSegmenter> lang_segmenter_;
202   std::unique_ptr<Normalizer> normalizer_;
203 };
204 
205 // Non-zero value so we don't override it to be the current time
206 constexpr int64_t kDefaultCreationTimestampMs = 1575492852000;
207 
GetVersionFileDir()208 std::string GetVersionFileDir() { return GetTestBaseDir(); }
209 
GetDocumentDir()210 std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; }
211 
GetIndexDir()212 std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; }
213 
GetIntegerIndexDir()214 std::string GetIntegerIndexDir() {
215   return GetTestBaseDir() + "/integer_index_dir";
216 }
217 
GetQualifiedIdJoinIndexDir()218 std::string GetQualifiedIdJoinIndexDir() {
219   return GetTestBaseDir() + "/qualified_id_join_index_dir";
220 }
221 
GetSchemaDir()222 std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
223 
GetHeaderFilename()224 std::string GetHeaderFilename() {
225   return GetTestBaseDir() + "/icing_search_engine_header";
226 }
227 
GetDefaultIcingOptions()228 IcingSearchEngineOptions GetDefaultIcingOptions() {
229   IcingSearchEngineOptions icing_options;
230   icing_options.set_base_dir(GetTestBaseDir());
231   icing_options.set_document_store_namespace_id_fingerprint(true);
232   icing_options.set_use_new_qualified_id_join_index(true);
233   return icing_options;
234 }
235 
CreateMessageDocument(std::string name_space,std::string uri)236 DocumentProto CreateMessageDocument(std::string name_space, std::string uri) {
237   return DocumentBuilder()
238       .SetKey(std::move(name_space), std::move(uri))
239       .SetSchema("Message")
240       .AddStringProperty("body", "message body")
241       .AddInt64Property("indexableInteger", 123)
242       .SetCreationTimestampMs(kDefaultCreationTimestampMs)
243       .Build();
244 }
245 
CreateEmailDocument(const std::string & name_space,const std::string & uri,int score,const std::string & subject_content,const std::string & body_content)246 DocumentProto CreateEmailDocument(const std::string& name_space,
247                                   const std::string& uri, int score,
248                                   const std::string& subject_content,
249                                   const std::string& body_content) {
250   return DocumentBuilder()
251       .SetKey(name_space, uri)
252       .SetSchema("Email")
253       .SetScore(score)
254       .AddStringProperty("subject", subject_content)
255       .AddStringProperty("body", body_content)
256       .Build();
257 }
258 
CreateMessageSchemaTypeConfig()259 SchemaTypeConfigProto CreateMessageSchemaTypeConfig() {
260   return SchemaTypeConfigBuilder()
261       .SetType("Message")
262       .AddProperty(PropertyConfigBuilder()
263                        .SetName("body")
264                        .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
265                        .SetCardinality(CARDINALITY_REQUIRED))
266       .AddProperty(PropertyConfigBuilder()
267                        .SetName("indexableInteger")
268                        .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
269                        .SetCardinality(CARDINALITY_REQUIRED))
270       .Build();
271 }
272 
CreateEmailSchemaTypeConfig()273 SchemaTypeConfigProto CreateEmailSchemaTypeConfig() {
274   return SchemaTypeConfigBuilder()
275       .SetType("Email")
276       .AddProperty(PropertyConfigBuilder()
277                        .SetName("body")
278                        .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
279                        .SetCardinality(CARDINALITY_REQUIRED))
280       .AddProperty(PropertyConfigBuilder()
281                        .SetName("subject")
282                        .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
283                        .SetCardinality(CARDINALITY_REQUIRED))
284       .Build();
285 }
286 
CreateMessageSchema()287 SchemaProto CreateMessageSchema() {
288   return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build();
289 }
290 
CreateEmailSchema()291 SchemaProto CreateEmailSchema() {
292   return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build();
293 }
294 
GetDefaultScoringSpec()295 ScoringSpecProto GetDefaultScoringSpec() {
296   ScoringSpecProto scoring_spec;
297   scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
298   return scoring_spec;
299 }
300 
301 // TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder,
302 // SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all
303 // instances by them.
304 
TEST_F(IcingSearchEngineInitializationTest,UninitializedInstanceFailsSafely)305 TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) {
306   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
307 
308   SchemaProto email_schema = CreateMessageSchema();
309   EXPECT_THAT(icing.SetSchema(email_schema).status(),
310               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
311   EXPECT_THAT(icing.GetSchema().status(),
312               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
313   EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
314               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
315 
316   DocumentProto doc = CreateMessageDocument("namespace", "uri");
317   EXPECT_THAT(icing.Put(doc).status(),
318               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
319   EXPECT_THAT(icing
320                   .Get(doc.namespace_(), doc.uri(),
321                        GetResultSpecProto::default_instance())
322                   .status(),
323               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
324   EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
325               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
326   EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
327               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
328   EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
329                   .status()
330                   .code(),
331               Eq(StatusProto::FAILED_PRECONDITION));
332 
333   SearchSpecProto search_spec = SearchSpecProto::default_instance();
334   ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
335   ResultSpecProto result_spec = ResultSpecProto::default_instance();
336   EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
337               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
338   constexpr int kSomePageToken = 12;
339   EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
340               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
341   icing.InvalidateNextPageToken(kSomePageToken);  // Verify this doesn't crash.
342 
343   EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
344               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
345   EXPECT_THAT(icing.Optimize().status(),
346               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
347 }
348 
TEST_F(IcingSearchEngineInitializationTest,SimpleInitialization)349 TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) {
350   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
351   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
352   ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
353 
354   DocumentProto document = CreateMessageDocument("namespace", "uri");
355   ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
356   ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
357 }
358 
TEST_F(IcingSearchEngineInitializationTest,InitializingAgainSavesNonPersistedData)359 TEST_F(IcingSearchEngineInitializationTest,
360        InitializingAgainSavesNonPersistedData) {
361   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
362   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
363   ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
364 
365   DocumentProto document = CreateMessageDocument("namespace", "uri");
366   ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
367 
368   GetResultProto expected_get_result_proto;
369   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
370   *expected_get_result_proto.mutable_document() = document;
371 
372   ASSERT_THAT(
373       icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
374       EqualsProto(expected_get_result_proto));
375 
376   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
377   EXPECT_THAT(
378       icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
379       EqualsProto(expected_get_result_proto));
380 }
381 
TEST_F(IcingSearchEngineInitializationTest,MaxIndexMergeSizeReturnsInvalidArgument)382 TEST_F(IcingSearchEngineInitializationTest,
383        MaxIndexMergeSizeReturnsInvalidArgument) {
384   IcingSearchEngineOptions options = GetDefaultIcingOptions();
385   options.set_index_merge_size(std::numeric_limits<int32_t>::max());
386   IcingSearchEngine icing(options, GetTestJniCache());
387   EXPECT_THAT(icing.Initialize().status(),
388               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
389 }
390 
TEST_F(IcingSearchEngineInitializationTest,NegativeMergeSizeReturnsInvalidArgument)391 TEST_F(IcingSearchEngineInitializationTest,
392        NegativeMergeSizeReturnsInvalidArgument) {
393   IcingSearchEngineOptions options = GetDefaultIcingOptions();
394   options.set_index_merge_size(-1);
395   IcingSearchEngine icing(options, GetTestJniCache());
396   EXPECT_THAT(icing.Initialize().status(),
397               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
398 }
399 
TEST_F(IcingSearchEngineInitializationTest,ZeroMergeSizeReturnsInvalidArgument)400 TEST_F(IcingSearchEngineInitializationTest,
401        ZeroMergeSizeReturnsInvalidArgument) {
402   IcingSearchEngineOptions options = GetDefaultIcingOptions();
403   options.set_index_merge_size(0);
404   IcingSearchEngine icing(options, GetTestJniCache());
405   EXPECT_THAT(icing.Initialize().status(),
406               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
407 }
408 
TEST_F(IcingSearchEngineInitializationTest,GoodIndexMergeSizeReturnsOk)409 TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) {
410   IcingSearchEngineOptions options = GetDefaultIcingOptions();
411   // One is fine, if a bit weird. It just means that the lite index will be
412   // smaller and will request a merge any time content is added to it.
413   options.set_index_merge_size(1);
414   IcingSearchEngine icing(options, GetTestJniCache());
415   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
416 }
417 
TEST_F(IcingSearchEngineInitializationTest,NegativeMaxTokenLenReturnsInvalidArgument)418 TEST_F(IcingSearchEngineInitializationTest,
419        NegativeMaxTokenLenReturnsInvalidArgument) {
420   IcingSearchEngineOptions options = GetDefaultIcingOptions();
421   options.set_max_token_length(-1);
422   IcingSearchEngine icing(options, GetTestJniCache());
423   EXPECT_THAT(icing.Initialize().status(),
424               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
425 }
426 
TEST_F(IcingSearchEngineInitializationTest,ZeroMaxTokenLenReturnsInvalidArgument)427 TEST_F(IcingSearchEngineInitializationTest,
428        ZeroMaxTokenLenReturnsInvalidArgument) {
429   IcingSearchEngineOptions options = GetDefaultIcingOptions();
430   options.set_max_token_length(0);
431   IcingSearchEngine icing(options, GetTestJniCache());
432   EXPECT_THAT(icing.Initialize().status(),
433               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
434 }
435 
TEST_F(IcingSearchEngineInitializationTest,NegativeCompressionLevelReturnsInvalidArgument)436 TEST_F(IcingSearchEngineInitializationTest,
437        NegativeCompressionLevelReturnsInvalidArgument) {
438   IcingSearchEngineOptions options = GetDefaultIcingOptions();
439   options.set_compression_level(-1);
440   IcingSearchEngine icing(options, GetTestJniCache());
441   EXPECT_THAT(icing.Initialize().status(),
442               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
443 }
444 
TEST_F(IcingSearchEngineInitializationTest,GreaterThanMaxCompressionLevelReturnsInvalidArgument)445 TEST_F(IcingSearchEngineInitializationTest,
446        GreaterThanMaxCompressionLevelReturnsInvalidArgument) {
447   IcingSearchEngineOptions options = GetDefaultIcingOptions();
448   options.set_compression_level(10);
449   IcingSearchEngine icing(options, GetTestJniCache());
450   EXPECT_THAT(icing.Initialize().status(),
451               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
452 }
453 
TEST_F(IcingSearchEngineInitializationTest,GoodCompressionLevelReturnsOk)454 TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) {
455   IcingSearchEngineOptions options = GetDefaultIcingOptions();
456   options.set_compression_level(0);
457   IcingSearchEngine icing(options, GetTestJniCache());
458   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
459 }
460 
TEST_F(IcingSearchEngineInitializationTest,ReinitializingWithDifferentCompressionLevelReturnsOk)461 TEST_F(IcingSearchEngineInitializationTest,
462        ReinitializingWithDifferentCompressionLevelReturnsOk) {
463   IcingSearchEngineOptions options = GetDefaultIcingOptions();
464   options.set_compression_level(3);
465   {
466     IcingSearchEngine icing(options, GetTestJniCache());
467     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
468     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
469 
470     DocumentProto document = CreateMessageDocument("namespace", "uri");
471     ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
472     ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
473   }
474   options.set_compression_level(9);
475   {
476     IcingSearchEngine icing(options, GetTestJniCache());
477     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
478   }
479   options.set_compression_level(0);
480   {
481     IcingSearchEngine icing(options, GetTestJniCache());
482     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
483   }
484 }
485 
TEST_F(IcingSearchEngineInitializationTest,FailToCreateDocStore)486 TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) {
487   auto mock_filesystem = std::make_unique<MockFilesystem>();
488   // This fails DocumentStore::Create()
489   ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
490       .WillByDefault(Return(false));
491 
492   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
493                               std::move(mock_filesystem),
494                               std::make_unique<IcingFilesystem>(),
495                               std::make_unique<FakeClock>(), GetTestJniCache());
496 
497   InitializeResultProto initialize_result_proto = icing.Initialize();
498   EXPECT_THAT(initialize_result_proto.status(),
499               ProtoStatusIs(StatusProto::INTERNAL));
500   EXPECT_THAT(initialize_result_proto.status().message(),
501               HasSubstr("Could not create directory"));
502 }
503 
TEST_F(IcingSearchEngineInitializationTest,InitMarkerFilePreviousFailuresAtThreshold)504 TEST_F(IcingSearchEngineInitializationTest,
505        InitMarkerFilePreviousFailuresAtThreshold) {
506   Filesystem filesystem;
507   DocumentProto email1 =
508       CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
509   email1.set_creation_timestamp_ms(10000);
510   DocumentProto email2 =
511       CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
512   email2.set_creation_timestamp_ms(10000);
513 
514   {
515     // Create an index with a few documents.
516     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
517     InitializeResultProto init_result = icing.Initialize();
518     ASSERT_THAT(init_result.status(), ProtoIsOk());
519     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
520                 Eq(0));
521     ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
522     ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
523     ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
524   }
525 
526   // Write an init marker file with 5 previously failed attempts.
527   std::string marker_filepath = GetTestBaseDir() + "/init_marker";
528 
529   {
530     ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
531     int network_init_attempts = GHostToNetworkL(5);
532     // Write the updated number of attempts before we get started.
533     ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
534                                   &network_init_attempts,
535                                   sizeof(network_init_attempts)));
536     ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
537   }
538 
539   {
540     // Create the index again and verify that initialization succeeds and no
541     // data is thrown out.
542     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
543     InitializeResultProto init_result = icing.Initialize();
544     ASSERT_THAT(init_result.status(), ProtoIsOk());
545     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
546                 Eq(5));
547     EXPECT_THAT(
548         icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
549             .document(),
550         EqualsProto(email1));
551     EXPECT_THAT(
552         icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
553             .document(),
554         EqualsProto(email2));
555   }
556 
557   // The successful init should have thrown out the marker file.
558   ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
559 }
560 
TEST_F(IcingSearchEngineInitializationTest,InitMarkerFilePreviousFailuresBeyondThreshold)561 TEST_F(IcingSearchEngineInitializationTest,
562        InitMarkerFilePreviousFailuresBeyondThreshold) {
563   Filesystem filesystem;
564   DocumentProto email1 =
565       CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
566   DocumentProto email2 =
567       CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
568 
569   {
570     // Create an index with a few documents.
571     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
572     InitializeResultProto init_result = icing.Initialize();
573     ASSERT_THAT(init_result.status(), ProtoIsOk());
574     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
575                 Eq(0));
576     ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
577     ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
578     ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
579   }
580 
581   // Write an init marker file with 6 previously failed attempts.
582   std::string marker_filepath = GetTestBaseDir() + "/init_marker";
583 
584   {
585     ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str()));
586     int network_init_attempts = GHostToNetworkL(6);
587     // Write the updated number of attempts before we get started.
588     ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0,
589                                   &network_init_attempts,
590                                   sizeof(network_init_attempts)));
591     ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get()));
592   }
593 
594   {
595     // Create the index again and verify that initialization succeeds and all
596     // data is thrown out.
597     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
598     InitializeResultProto init_result = icing.Initialize();
599     ASSERT_THAT(init_result.status(),
600                 ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
601     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
602                 Eq(6));
603     EXPECT_THAT(
604         icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
605             .status(),
606         ProtoStatusIs(StatusProto::NOT_FOUND));
607     EXPECT_THAT(
608         icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
609             .status(),
610         ProtoStatusIs(StatusProto::NOT_FOUND));
611   }
612 
613   // The successful init should have thrown out the marker file.
614   ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
615 }
616 
TEST_F(IcingSearchEngineInitializationTest,SuccessiveInitFailuresIncrementsInitMarker)617 TEST_F(IcingSearchEngineInitializationTest,
618        SuccessiveInitFailuresIncrementsInitMarker) {
619   Filesystem filesystem;
620   DocumentProto email1 =
621       CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1");
622   DocumentProto email2 =
623       CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2");
624 
625   {
626     // 1. Create an index with a few documents.
627     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
628     InitializeResultProto init_result = icing.Initialize();
629     ASSERT_THAT(init_result.status(), ProtoIsOk());
630     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
631                 Eq(0));
632     ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
633     ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk());
634     ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk());
635   }
636 
637   {
638     // 2. Create an index that will encounter an IO failure when trying to
639     // create the document log.
640     IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
641 
642     auto mock_filesystem = std::make_unique<MockFilesystem>();
643     std::string document_log_filepath =
644         icing_options.base_dir() + "/document_dir/document_log_v1";
645     ON_CALL(*mock_filesystem,
646             GetFileSize(Matcher<const char*>(Eq(document_log_filepath))))
647         .WillByDefault(Return(Filesystem::kBadFileSize));
648 
649     TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
650                                 std::make_unique<IcingFilesystem>(),
651                                 std::make_unique<FakeClock>(),
652                                 GetTestJniCache());
653 
654     // Fail to initialize six times in a row.
655     InitializeResultProto init_result = icing.Initialize();
656     ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
657     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
658                 Eq(0));
659 
660     init_result = icing.Initialize();
661     ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
662     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
663                 Eq(1));
664 
665     init_result = icing.Initialize();
666     ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
667     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
668                 Eq(2));
669 
670     init_result = icing.Initialize();
671     ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
672     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
673                 Eq(3));
674 
675     init_result = icing.Initialize();
676     ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
677     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
678                 Eq(4));
679 
680     init_result = icing.Initialize();
681     ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL));
682     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
683                 Eq(5));
684   }
685 
686   {
687     // 3. Create the index again and verify that initialization succeeds and all
688     // data is thrown out.
689     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
690     InitializeResultProto init_result = icing.Initialize();
691     ASSERT_THAT(init_result.status(),
692                 ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
693     ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(),
694                 Eq(6));
695 
696     EXPECT_THAT(
697         icing.Get("namespace", "uri1", GetResultSpecProto::default_instance())
698             .status(),
699         ProtoStatusIs(StatusProto::NOT_FOUND));
700     EXPECT_THAT(
701         icing.Get("namespace", "uri2", GetResultSpecProto::default_instance())
702             .status(),
703         ProtoStatusIs(StatusProto::NOT_FOUND));
704   }
705 
706   // The successful init should have thrown out the marker file.
707   std::string marker_filepath = GetTestBaseDir() + "/init_marker";
708   ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str()));
709 }
710 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromMissingHeaderFile)711 TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) {
712   SearchSpecProto search_spec;
713   search_spec.set_query("message");
714   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
715 
716   SearchResultProto expected_search_result_proto;
717   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
718   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
719       CreateMessageDocument("namespace", "uri");
720 
721   GetResultProto expected_get_result_proto;
722   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
723   *expected_get_result_proto.mutable_document() =
724       CreateMessageDocument("namespace", "uri");
725 
726   {
727     // Basic initialization/setup
728     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
729     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
730     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
731     EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
732                 ProtoIsOk());
733     EXPECT_THAT(
734         icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
735         EqualsProto(expected_get_result_proto));
736     SearchResultProto search_result_proto =
737         icing.Search(search_spec, GetDefaultScoringSpec(),
738                      ResultSpecProto::default_instance());
739     EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
740                                          expected_search_result_proto));
741   }  // This should shut down IcingSearchEngine and persist anything it needs to
742 
743   EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
744 
745   // We should be able to recover from this and access all our previous data
746   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
747   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
748 
749   // Checks that DocumentLog is still ok
750   EXPECT_THAT(
751       icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
752       EqualsProto(expected_get_result_proto));
753 
754   // Checks that the term index is still ok so we can search over it
755   SearchResultProto search_result_proto =
756       icing.Search(search_spec, GetDefaultScoringSpec(),
757                    ResultSpecProto::default_instance());
758   EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
759                                        expected_search_result_proto));
760 
761   // Checks that the integer index is still ok so we can search over it
762   SearchSpecProto search_spec2;
763   search_spec2.set_query("indexableInteger == 123");
764   search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
765 
766   SearchResultProto search_result_google::protobuf =
767       icing.Search(search_spec2, ScoringSpecProto::default_instance(),
768                    ResultSpecProto::default_instance());
769   EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
770                                         expected_search_result_proto));
771 
772   // Checks that Schema is still since it'll be needed to validate the document
773   EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
774               ProtoIsOk());
775 }
776 
TEST_F(IcingSearchEngineInitializationTest,UnableToRecoverFromCorruptSchema)777 TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) {
778   {
779     // Basic initialization/setup
780     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
781     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
782     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
783     EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
784                 ProtoIsOk());
785 
786     GetResultProto expected_get_result_proto;
787     expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
788     *expected_get_result_proto.mutable_document() =
789         CreateMessageDocument("namespace", "uri");
790 
791     EXPECT_THAT(
792         icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
793         EqualsProto(expected_get_result_proto));
794   }  // This should shut down IcingSearchEngine and persist anything it needs to
795 
796   const std::string schema_file =
797       absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
798   const std::string corrupt_data = "1234";
799   EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
800                                   corrupt_data.size()));
801 
802   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
803   EXPECT_THAT(icing.Initialize().status(),
804               ProtoStatusIs(StatusProto::INTERNAL));
805 }
806 
TEST_F(IcingSearchEngineInitializationTest,UnableToRecoverFromCorruptDocumentLog)807 TEST_F(IcingSearchEngineInitializationTest,
808        UnableToRecoverFromCorruptDocumentLog) {
809   {
810     // Basic initialization/setup
811     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
812     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
813     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
814     EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
815                 ProtoIsOk());
816 
817     GetResultProto expected_get_result_proto;
818     expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
819     *expected_get_result_proto.mutable_document() =
820         CreateMessageDocument("namespace", "uri");
821 
822     EXPECT_THAT(
823         icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
824         EqualsProto(expected_get_result_proto));
825   }  // This should shut down IcingSearchEngine and persist anything it needs to
826 
827   const std::string document_log_file = absl_ports::StrCat(
828       GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
829   const std::string corrupt_data = "1234";
830   EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
831                                   corrupt_data.data(), corrupt_data.size()));
832 
833   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
834   EXPECT_THAT(icing.Initialize().status(),
835               ProtoStatusIs(StatusProto::INTERNAL));
836 }
837 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromInconsistentSchemaStore)838 TEST_F(IcingSearchEngineInitializationTest,
839        RecoverFromInconsistentSchemaStore) {
840   DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
841   DocumentProto document2_with_additional_property =
842       DocumentBuilder()
843           .SetKey("namespace", "uri2")
844           .SetSchema("Message")
845           .AddStringProperty("additional", "content")
846           .AddStringProperty("body", "message body")
847           .AddInt64Property("indexableInteger", 123)
848           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
849           .Build();
850 
851   IcingSearchEngineOptions options = GetDefaultIcingOptions();
852   {
853     // Initializes folder and schema
854     IcingSearchEngine icing(options, GetTestJniCache());
855     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
856 
857     SchemaProto schema =
858         SchemaBuilder()
859             .AddType(
860                 SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
861                     // Add non-indexable property "additional"
862                     .AddProperty(PropertyConfigBuilder()
863                                      .SetName("additional")
864                                      .SetDataType(TYPE_STRING)
865                                      .SetCardinality(CARDINALITY_OPTIONAL)))
866             .Build();
867 
868     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
869     EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
870     EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
871                 ProtoIsOk());
872 
873     // Won't get us anything because "additional" isn't marked as an indexed
874     // property in the schema
875     SearchSpecProto search_spec;
876     search_spec.set_query("additional:content");
877     search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
878 
879     SearchResultProto expected_search_result_proto;
880     expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
881     SearchResultProto search_result_proto =
882         icing.Search(search_spec, GetDefaultScoringSpec(),
883                      ResultSpecProto::default_instance());
884     EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
885                                          expected_search_result_proto));
886   }  // This should shut down IcingSearchEngine and persist anything it needs to
887 
888   {
889     // This schema will change the SchemaTypeIds from the previous schema_
890     // (since SchemaTypeIds are assigned based on order of the types, and this
891     // new schema changes the ordering of previous types)
892     SchemaProto new_schema;
893     auto type = new_schema.add_types();
894     type->set_schema_type("Email");
895 
896     // Switching a non-indexable property to indexable changes the SectionIds
897     // (since SectionIds are assigned based on alphabetical order of indexed
898     // sections, marking "additional" as an indexed property will push the
899     // "body" and "indexableInteger" property to different SectionIds)
900     *new_schema.add_types() =
901         SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
902             .AddProperty(
903                 PropertyConfigBuilder()
904                     .SetName("additional")
905                     .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
906                     .SetCardinality(CARDINALITY_OPTIONAL))
907             .Build();
908 
909     // Write the marker file
910     std::string marker_filepath =
911         absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
912     ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
913     ASSERT_TRUE(sfd.is_valid());
914 
915     // Write the new schema
916     FakeClock fake_clock;
917     ICING_ASSERT_OK_AND_ASSIGN(
918         std::unique_ptr<SchemaStore> schema_store,
919         SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
920     ICING_EXPECT_OK(schema_store->SetSchema(
921         new_schema, /*ignore_errors_and_delete_documents=*/false,
922         /*allow_circular_schema_definitions=*/false));
923   }  // Will persist new schema
924 
925   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
926   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
927 
928   // We can insert a Email document since we kept the new schema
929   DocumentProto email_document =
930       DocumentBuilder()
931           .SetKey("namespace", "email_uri")
932           .SetSchema("Email")
933           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
934           .Build();
935   EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
936 
937   GetResultProto expected_get_result_proto;
938   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
939   *expected_get_result_proto.mutable_document() = email_document;
940 
941   EXPECT_THAT(icing.Get("namespace", "email_uri",
942                         GetResultSpecProto::default_instance()),
943               EqualsProto(expected_get_result_proto));
944 
945   // Verify term search
946   SearchSpecProto search_spec1;
947 
948   // The section restrict will ensure we are using the correct, updated
949   // SectionId in the Index
950   search_spec1.set_query("additional:content");
951 
952   // Schema type filter will ensure we're using the correct, updated
953   // SchemaTypeId in the DocumentStore
954   search_spec1.add_schema_type_filters("Message");
955   search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
956 
957   SearchResultProto expected_search_result_proto1;
958   expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK);
959   *expected_search_result_proto1.mutable_results()->Add()->mutable_document() =
960       document2_with_additional_property;
961 
962   SearchResultProto search_result_proto1 =
963       icing.Search(search_spec1, GetDefaultScoringSpec(),
964                    ResultSpecProto::default_instance());
965   EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
966                                         expected_search_result_proto1));
967 
968   // Verify numeric (integer) search
969   SearchSpecProto search_spec2;
970   search_spec2.set_query("indexableInteger == 123");
971   search_spec1.add_schema_type_filters("Message");
972   search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
973 
974   SearchResultProto expected_search_result_google::protobuf;
975   expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK);
976   *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
977       document2_with_additional_property;
978   *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() =
979       document1;
980 
981   SearchResultProto search_result_google::protobuf =
982       icing.Search(search_spec2, ScoringSpecProto::default_instance(),
983                    ResultSpecProto::default_instance());
984   EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
985                                         expected_search_result_google::protobuf));
986 }
987 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromInconsistentDocumentStore)988 TEST_F(IcingSearchEngineInitializationTest,
989        RecoverFromInconsistentDocumentStore) {
990   // Test the following scenario: document store is ahead of term, integer and
991   // qualified id join index. IcingSearchEngine should be able to recover all
992   // indices. Several additional behaviors are also tested:
993   // - Index directory handling:
994   //   - Term index directory should be unaffected.
995   //   - Integer index directory should be unaffected.
996   //   - Qualified id join index directory should be unaffected.
997   // - Truncate indices:
998   //   - "TruncateTo()" for term index shouldn't take effect.
999   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
1000   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
1001   //     discarded.
1002   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
1003   //     underlying storage sub directory (path_expr =
1004   //     "*/qualified_id_join_index_dir/*") should be discarded.
1005   // - Still, we need to replay and reindex documents.
1006 
1007   SchemaProto schema =
1008       SchemaBuilder()
1009           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1010               PropertyConfigBuilder()
1011                   .SetName("name")
1012                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1013                   .SetCardinality(CARDINALITY_REQUIRED)))
1014           .AddType(SchemaTypeConfigBuilder()
1015                        .SetType("Message")
1016                        .AddProperty(PropertyConfigBuilder()
1017                                         .SetName("body")
1018                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1019                                                            TOKENIZER_PLAIN)
1020                                         .SetCardinality(CARDINALITY_REQUIRED))
1021                        .AddProperty(PropertyConfigBuilder()
1022                                         .SetName("indexableInteger")
1023                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1024                                         .SetCardinality(CARDINALITY_REQUIRED))
1025                        .AddProperty(PropertyConfigBuilder()
1026                                         .SetName("senderQualifiedId")
1027                                         .SetDataTypeJoinableString(
1028                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1029                                         .SetCardinality(CARDINALITY_REQUIRED)))
1030           .Build();
1031 
1032   DocumentProto person =
1033       DocumentBuilder()
1034           .SetKey("namespace", "person")
1035           .SetSchema("Person")
1036           .AddStringProperty("name", "person")
1037           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1038           .Build();
1039   DocumentProto message1 =
1040       DocumentBuilder()
1041           .SetKey("namespace", "message/1")
1042           .SetSchema("Message")
1043           .AddStringProperty("body", "message body one")
1044           .AddInt64Property("indexableInteger", 123)
1045           .AddStringProperty("senderQualifiedId", "namespace#person")
1046           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1047           .Build();
1048   DocumentProto message2 =
1049       DocumentBuilder()
1050           .SetKey("namespace", "message/2")
1051           .SetSchema("Message")
1052           .AddStringProperty("body", "message body two")
1053           .AddInt64Property("indexableInteger", 123)
1054           .AddStringProperty("senderQualifiedId", "namespace#person")
1055           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1056           .Build();
1057 
1058   IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
1059 
1060   {
1061     // Initializes folder and schema, index one document
1062     TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
1063                                 std::make_unique<IcingFilesystem>(),
1064                                 std::make_unique<FakeClock>(),
1065                                 GetTestJniCache());
1066     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1067     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1068     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1069     EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk());
1070   }  // This should shut down IcingSearchEngine and persist anything it needs to
1071 
1072   {
1073     FakeClock fake_clock;
1074     ICING_ASSERT_OK_AND_ASSIGN(
1075         std::unique_ptr<SchemaStore> schema_store,
1076         SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
1077 
1078     // Puts message2 into DocumentStore but doesn't index it.
1079     ICING_ASSERT_OK_AND_ASSIGN(
1080         DocumentStore::CreateResult create_result,
1081         DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
1082                               schema_store.get(),
1083                               /*force_recovery_and_revalidate_documents=*/false,
1084                               /*namespace_id_fingerprint=*/true,
1085                               /*pre_mapping_fbv=*/false,
1086                               /*use_persistent_hash_map=*/true,
1087                               PortableFileBackedProtoLog<
1088                                   DocumentWrapper>::kDeflateCompressionLevel,
1089                               /*initialize_stats=*/nullptr));
1090     std::unique_ptr<DocumentStore> document_store =
1091         std::move(create_result.document_store);
1092 
1093     ICING_EXPECT_OK(document_store->Put(message2));
1094   }
1095 
1096   // Mock filesystem to observe and check the behavior of all indices.
1097   auto mock_filesystem = std::make_unique<MockFilesystem>();
1098   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1099       .WillRepeatedly(DoDefault());
1100   // Ensure term index directory should never be discarded.
1101   EXPECT_CALL(*mock_filesystem,
1102               DeleteDirectoryRecursively(EndsWith("/index_dir")))
1103       .Times(0);
1104   // Ensure integer index directory should never be discarded, and Clear()
1105   // should never be called (i.e. storage sub directory
1106   // "*/integer_index_dir/*" should never be discarded).
1107   EXPECT_CALL(*mock_filesystem,
1108               DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1109       .Times(0);
1110   EXPECT_CALL(*mock_filesystem,
1111               DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1112       .Times(0);
1113   // Ensure qualified id join index directory should never be discarded, and
1114   // Clear() should never be called (i.e. storage sub directory
1115   // "*/qualified_id_join_index_dir/*" should never be discarded).
1116   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1117                                     EndsWith("/qualified_id_join_index_dir")))
1118       .Times(0);
1119   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1120                                     HasSubstr("/qualified_id_join_index_dir/")))
1121       .Times(0);
1122 
1123   TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
1124                               std::make_unique<IcingFilesystem>(),
1125                               std::make_unique<FakeClock>(), GetTestJniCache());
1126   InitializeResultProto initialize_result = icing.Initialize();
1127   EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1128   // Index Restoration should be triggered here and document2 should be
1129   // indexed.
1130   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1131               Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
1132   EXPECT_THAT(
1133       initialize_result.initialize_stats().integer_index_restoration_cause(),
1134       Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
1135   EXPECT_THAT(initialize_result.initialize_stats()
1136                   .qualified_id_join_index_restoration_cause(),
1137               Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
1138 
1139   GetResultProto expected_get_result_proto;
1140   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
1141   *expected_get_result_proto.mutable_document() = message1;
1142 
1143   // DocumentStore kept the additional document
1144   EXPECT_THAT(icing.Get("namespace", "message/1",
1145                         GetResultSpecProto::default_instance()),
1146               EqualsProto(expected_get_result_proto));
1147 
1148   *expected_get_result_proto.mutable_document() = message2;
1149   EXPECT_THAT(icing.Get("namespace", "message/2",
1150                         GetResultSpecProto::default_instance()),
1151               EqualsProto(expected_get_result_proto));
1152 
1153   SearchResultProto expected_search_result_proto;
1154   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1155   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1156       message2;
1157   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1158       message1;
1159 
1160   // We indexed the additional document in all indices.
1161   // Verify term search
1162   SearchSpecProto search_spec1;
1163   search_spec1.set_query("message");
1164   search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
1165   SearchResultProto search_result_proto1 =
1166       icing.Search(search_spec1, GetDefaultScoringSpec(),
1167                    ResultSpecProto::default_instance());
1168   EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
1169                                         expected_search_result_proto));
1170 
1171   // Verify numeric (integer) search
1172   SearchSpecProto search_spec2;
1173   search_spec2.set_query("indexableInteger == 123");
1174   search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
1175 
1176   SearchResultProto search_result_google::protobuf =
1177       icing.Search(search_spec2, ScoringSpecProto::default_instance(),
1178                    ResultSpecProto::default_instance());
1179   EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
1180                                         expected_search_result_proto));
1181 
1182   // Verify join search: join a query for `name:person` with a child query for
1183   // `body:message` based on the child's `senderQualifiedId` field.
1184   SearchSpecProto search_spec3;
1185   search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
1186   search_spec3.set_query("name:person");
1187   JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
1188   join_spec->set_parent_property_expression(
1189       std::string(JoinProcessor::kQualifiedIdExpr));
1190   join_spec->set_child_property_expression("senderQualifiedId");
1191   join_spec->set_aggregation_scoring_strategy(
1192       JoinSpecProto::AggregationScoringStrategy::COUNT);
1193   JoinSpecProto::NestedSpecProto* nested_spec =
1194       join_spec->mutable_nested_spec();
1195   SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1196   nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
1197   nested_search_spec->set_query("body:message");
1198   *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
1199   *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1200 
1201   ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
1202   result_spec3.set_max_joined_children_per_parent_to_return(
1203       std::numeric_limits<int32_t>::max());
1204 
1205   SearchResultProto expected_join_search_result_proto;
1206   expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1207   SearchResultProto::ResultProto* result_proto =
1208       expected_join_search_result_proto.mutable_results()->Add();
1209   *result_proto->mutable_document() = person;
1210   *result_proto->mutable_joined_results()->Add()->mutable_document() = message2;
1211   *result_proto->mutable_joined_results()->Add()->mutable_document() = message1;
1212 
1213   SearchResultProto search_result_proto3 = icing.Search(
1214       search_spec3, ScoringSpecProto::default_instance(), result_spec3);
1215   EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
1216                                         expected_join_search_result_proto));
1217 }
1218 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptedDocumentStore)1219 TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptedDocumentStore) {
1220   // Test the following scenario: some document store derived files are
1221   // corrupted. IcingSearchEngine should be able to recover the document store,
1222   // and since NamespaceIds were reassigned, we should rebuild qualified id join
1223   // index as well. Several additional behaviors are also tested:
1224   // - Index directory handling:
1225   //   - Term index directory should be unaffected.
1226   //   - Integer index directory should be unaffected.
1227   //   - Should discard the entire qualified id join index directory and start
1228   //     it from scratch.
1229   // - Truncate indices:
1230   //   - "TruncateTo()" for term index shouldn't take effect.
1231   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
1232   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
1233   //     discarded.
1234   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
1235   //     underlying storage sub directory (path_expr =
1236   //     "*/qualified_id_join_index_dir/*") should be discarded.
1237   // - Still, we need to replay and reindex documents (for qualified id join
1238   //   index).
1239 
1240   SchemaProto schema =
1241       SchemaBuilder()
1242           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1243               PropertyConfigBuilder()
1244                   .SetName("name")
1245                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1246                   .SetCardinality(CARDINALITY_REQUIRED)))
1247           .AddType(SchemaTypeConfigBuilder()
1248                        .SetType("Message")
1249                        .AddProperty(PropertyConfigBuilder()
1250                                         .SetName("body")
1251                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1252                                                            TOKENIZER_PLAIN)
1253                                         .SetCardinality(CARDINALITY_REQUIRED))
1254                        .AddProperty(PropertyConfigBuilder()
1255                                         .SetName("indexableInteger")
1256                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1257                                         .SetCardinality(CARDINALITY_REQUIRED))
1258                        .AddProperty(PropertyConfigBuilder()
1259                                         .SetName("senderQualifiedId")
1260                                         .SetDataTypeJoinableString(
1261                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1262                                         .SetCardinality(CARDINALITY_REQUIRED)))
1263           .Build();
1264 
1265   DocumentProto personDummy =
1266       DocumentBuilder()
1267           .SetKey("namespace2", "personDummy")
1268           .SetSchema("Person")
1269           .AddStringProperty("name", "personDummy")
1270           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1271           .Build();
1272   DocumentProto person1 =
1273       DocumentBuilder()
1274           .SetKey("namespace1", "person")
1275           .SetSchema("Person")
1276           .AddStringProperty("name", "person")
1277           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1278           .Build();
1279   DocumentProto person2 =
1280       DocumentBuilder()
1281           .SetKey("namespace2", "person")
1282           .SetSchema("Person")
1283           .AddStringProperty("name", "person")
1284           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1285           .Build();
1286   DocumentProto message =
1287       DocumentBuilder()
1288           .SetKey("namespace2", "message/1")
1289           .SetSchema("Message")
1290           .AddStringProperty("body", "message body one")
1291           .AddInt64Property("indexableInteger", 123)
1292           .AddStringProperty("senderQualifiedId", "namespace2#person")
1293           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1294           .Build();
1295 
1296   IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
1297 
1298   {
1299     // Initializes folder and schema, index one document
1300     TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
1301                                 std::make_unique<IcingFilesystem>(),
1302                                 std::make_unique<FakeClock>(),
1303                                 GetTestJniCache());
1304     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1305     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1306     // "namespace2" (in personDummy) will be assigned NamespaceId = 0.
1307     EXPECT_THAT(icing.Put(personDummy).status(), ProtoIsOk());
1308     // "namespace1" (in person1) will be assigned NamespaceId = 1.
1309     EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
1310     EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
1311     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1312 
1313     // Now delete personDummy.
1314     EXPECT_THAT(
1315         icing.Delete(personDummy.namespace_(), personDummy.uri()).status(),
1316         ProtoIsOk());
1317   }  // This should shut down IcingSearchEngine and persist anything it needs to
1318 
1319   {
1320     FakeClock fake_clock;
1321     ICING_ASSERT_OK_AND_ASSIGN(
1322         std::unique_ptr<SchemaStore> schema_store,
1323         SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
1324 
1325     // Manually corrupt one of the derived files of DocumentStore without
1326     // updating checksum in DocumentStore header.
1327     std::string score_cache_filename = GetDocumentDir() + "/score_cache";
1328     ICING_ASSERT_OK_AND_ASSIGN(
1329         std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>>
1330             score_cache,
1331         FileBackedVector<DocumentAssociatedScoreData>::Create(
1332             *filesystem(), std::move(score_cache_filename),
1333             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
1334     ICING_ASSERT_OK_AND_ASSIGN(const DocumentAssociatedScoreData* score_data,
1335                                score_cache->Get(/*idx=*/0));
1336     ICING_ASSERT_OK(score_cache->Set(
1337         /*idx=*/0,
1338         DocumentAssociatedScoreData(score_data->corpus_id(),
1339                                     score_data->document_score() + 1,
1340                                     score_data->creation_timestamp_ms(),
1341                                     score_data->length_in_tokens())));
1342     ICING_ASSERT_OK(score_cache->PersistToDisk());
1343   }
1344 
1345   // Mock filesystem to observe and check the behavior of all indices.
1346   auto mock_filesystem = std::make_unique<MockFilesystem>();
1347   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1348       .WillRepeatedly(DoDefault());
1349   // Ensure term index directory should never be discarded.
1350   EXPECT_CALL(*mock_filesystem,
1351               DeleteDirectoryRecursively(EndsWith("/index_dir")))
1352       .Times(0);
1353   // Ensure integer index directory should never be discarded, and Clear()
1354   // should never be called (i.e. storage sub directory
1355   // "*/integer_index_dir/*" should never be discarded).
1356   EXPECT_CALL(*mock_filesystem,
1357               DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1358       .Times(0);
1359   EXPECT_CALL(*mock_filesystem,
1360               DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1361       .Times(0);
1362   // Ensure qualified id join index directory should be discarded once, and
1363   // Clear() should never be called (i.e. storage sub directory
1364   // "*/qualified_id_join_index_dir/*" should never be discarded).
1365   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1366                                     EndsWith("/qualified_id_join_index_dir")))
1367       .Times(1);
1368   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1369                                     HasSubstr("/qualified_id_join_index_dir/")))
1370       .Times(0);
1371 
1372   TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
1373                               std::make_unique<IcingFilesystem>(),
1374                               std::make_unique<FakeClock>(), GetTestJniCache());
1375   InitializeResultProto initialize_result = icing.Initialize();
1376   EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1377   // DocumentStore should be recovered. When reassigning NamespaceId, the order
1378   // will be the document traversal order: [person1, person2, message].
1379   // Therefore, "namespace1" will have id = 0 and "namespace2" will have id = 1.
1380   EXPECT_THAT(
1381       initialize_result.initialize_stats().document_store_recovery_cause(),
1382       Eq(InitializeStatsProto::IO_ERROR));
1383   // Term, integer index should be unaffected.
1384   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1385               Eq(InitializeStatsProto::NONE));
1386   EXPECT_THAT(
1387       initialize_result.initialize_stats().integer_index_restoration_cause(),
1388       Eq(InitializeStatsProto::NONE));
1389   // Qualified id join index should be rebuilt.
1390   EXPECT_THAT(initialize_result.initialize_stats()
1391                   .qualified_id_join_index_restoration_cause(),
1392               Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
1393 
1394   // Verify join search: join a query for `name:person` with a child query for
1395   // `body:message` based on the child's `senderQualifiedId` field. message2
1396   // should be joined to person2 correctly.
1397   SearchSpecProto search_spec;
1398   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
1399   search_spec.set_query("name:person");
1400   JoinSpecProto* join_spec = search_spec.mutable_join_spec();
1401   join_spec->set_parent_property_expression(
1402       std::string(JoinProcessor::kQualifiedIdExpr));
1403   join_spec->set_child_property_expression("senderQualifiedId");
1404   join_spec->set_aggregation_scoring_strategy(
1405       JoinSpecProto::AggregationScoringStrategy::COUNT);
1406   JoinSpecProto::NestedSpecProto* nested_spec =
1407       join_spec->mutable_nested_spec();
1408   SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1409   nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
1410   nested_search_spec->set_query("body:message");
1411   *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
1412   *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1413 
1414   ResultSpecProto result_spec = ResultSpecProto::default_instance();
1415   result_spec.set_max_joined_children_per_parent_to_return(
1416       std::numeric_limits<int32_t>::max());
1417 
1418   SearchResultProto expected_join_search_result_proto;
1419   expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1420   SearchResultProto::ResultProto* result_proto =
1421       expected_join_search_result_proto.mutable_results()->Add();
1422   *result_proto->mutable_document() = person2;
1423   *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
1424 
1425   *expected_join_search_result_proto.mutable_results()
1426        ->Add()
1427        ->mutable_document() = person1;
1428 
1429   SearchResultProto search_result_proto = icing.Search(
1430       search_spec, ScoringSpecProto::default_instance(), result_spec);
1431   EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1432                                        expected_join_search_result_proto));
1433 }
1434 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptIndex)1435 TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) {
1436   // Test the following scenario: term index is corrupted (e.g. checksum doesn't
1437   // match). IcingSearchEngine should be able to recover term index. Several
1438   // additional behaviors are also tested:
1439   // - Index directory handling:
1440   //   - Should discard the entire term index directory and start it from
1441   //     scratch.
1442   //   - Integer index directory should be unaffected.
1443   //   - Qualified id join index directory should be unaffected.
1444   // - Truncate indices:
1445   //   - "TruncateTo()" for term index shouldn't take effect since we start it
1446   //     from scratch.
1447   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
1448   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
1449   //     discarded.
1450   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
1451   //     underlying storage sub directory (path_expr =
1452   //     "*/qualified_id_join_index_dir/*") should be discarded.
1453 
1454   SchemaProto schema =
1455       SchemaBuilder()
1456           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1457               PropertyConfigBuilder()
1458                   .SetName("name")
1459                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1460                   .SetCardinality(CARDINALITY_REQUIRED)))
1461           .AddType(SchemaTypeConfigBuilder()
1462                        .SetType("Message")
1463                        .AddProperty(PropertyConfigBuilder()
1464                                         .SetName("body")
1465                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1466                                                            TOKENIZER_PLAIN)
1467                                         .SetCardinality(CARDINALITY_REQUIRED))
1468                        .AddProperty(PropertyConfigBuilder()
1469                                         .SetName("indexableInteger")
1470                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1471                                         .SetCardinality(CARDINALITY_REQUIRED))
1472                        .AddProperty(PropertyConfigBuilder()
1473                                         .SetName("senderQualifiedId")
1474                                         .SetDataTypeJoinableString(
1475                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1476                                         .SetCardinality(CARDINALITY_REQUIRED)))
1477           .Build();
1478 
1479   DocumentProto person =
1480       DocumentBuilder()
1481           .SetKey("namespace", "person")
1482           .SetSchema("Person")
1483           .AddStringProperty("name", "person")
1484           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1485           .Build();
1486   DocumentProto message =
1487       DocumentBuilder()
1488           .SetKey("namespace", "message/1")
1489           .SetSchema("Message")
1490           .AddStringProperty("body", "message body")
1491           .AddInt64Property("indexableInteger", 123)
1492           .AddStringProperty("senderQualifiedId", "namespace#person")
1493           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1494           .Build();
1495 
1496   SearchSpecProto search_spec;
1497   search_spec.set_query("body:message");
1498   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
1499 
1500   SearchResultProto expected_search_result_proto;
1501   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1502   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1503       message;
1504 
1505   {
1506     // Initializes folder and schema, index one document
1507     TestIcingSearchEngine icing(
1508         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1509         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1510         GetTestJniCache());
1511     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1512     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1513     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1514     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1515     SearchResultProto search_result_proto =
1516         icing.Search(search_spec, GetDefaultScoringSpec(),
1517                      ResultSpecProto::default_instance());
1518     EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1519                                          expected_search_result_proto));
1520   }  // This should shut down IcingSearchEngine and persist anything it needs to
1521 
1522   // Manually corrupt term index
1523   {
1524     const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb";
1525     ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str()));
1526     ASSERT_TRUE(fd.is_valid());
1527     ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
1528   }
1529 
1530   // Mock filesystem to observe and check the behavior of all indices.
1531   auto mock_filesystem = std::make_unique<MockFilesystem>();
1532   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1533       .WillRepeatedly(DoDefault());
1534   // Ensure term index directory should be discarded once.
1535   EXPECT_CALL(*mock_filesystem,
1536               DeleteDirectoryRecursively(EndsWith("/index_dir")))
1537       .Times(1);
1538   // Ensure integer index directory should never be discarded, and Clear()
1539   // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
1540   // should never be discarded).
1541   EXPECT_CALL(*mock_filesystem,
1542               DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1543       .Times(0);
1544   EXPECT_CALL(*mock_filesystem,
1545               DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1546       .Times(0);
1547   // Ensure qualified id join index directory should never be discarded, and
1548   // Clear() should never be called (i.e. storage sub directory
1549   // "*/qualified_id_join_index_dir/*" should never be discarded).
1550   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1551                                     EndsWith("/qualified_id_join_index_dir")))
1552       .Times(0);
1553   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1554                                     HasSubstr("/qualified_id_join_index_dir/")))
1555       .Times(0);
1556 
1557   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
1558                               std::move(mock_filesystem),
1559                               std::make_unique<IcingFilesystem>(),
1560                               std::make_unique<FakeClock>(), GetTestJniCache());
1561   InitializeResultProto initialize_result = icing.Initialize();
1562   EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1563   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1564               Eq(InitializeStatsProto::IO_ERROR));
1565   EXPECT_THAT(
1566       initialize_result.initialize_stats().integer_index_restoration_cause(),
1567       Eq(InitializeStatsProto::NONE));
1568   EXPECT_THAT(initialize_result.initialize_stats()
1569                   .qualified_id_join_index_restoration_cause(),
1570               Eq(InitializeStatsProto::NONE));
1571 
1572   // Check that our index is ok by searching over the restored index
1573   SearchResultProto search_result_proto =
1574       icing.Search(search_spec, GetDefaultScoringSpec(),
1575                    ResultSpecProto::default_instance());
1576   EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1577                                        expected_search_result_proto));
1578 }
1579 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptIntegerIndex)1580 TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) {
1581   // Test the following scenario: integer index is corrupted (e.g. checksum
1582   // doesn't match). IcingSearchEngine should be able to recover integer index.
1583   // Several additional behaviors are also tested:
1584   // - Index directory handling:
1585   //   - Term index directory should be unaffected.
1586   //   - Should discard the entire integer index directory and start it from
1587   //     scratch.
1588   //   - Qualified id join index directory should be unaffected.
1589   // - Truncate indices:
1590   //   - "TruncateTo()" for term index shouldn't take effect.
1591   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
1592   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
1593   //     discarded, since we start it from scratch.
1594   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
1595   //     underlying storage sub directory (path_expr =
1596   //     "*/qualified_id_join_index_dir/*") should be discarded.
1597 
1598   SchemaProto schema =
1599       SchemaBuilder()
1600           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1601               PropertyConfigBuilder()
1602                   .SetName("name")
1603                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1604                   .SetCardinality(CARDINALITY_REQUIRED)))
1605           .AddType(SchemaTypeConfigBuilder()
1606                        .SetType("Message")
1607                        .AddProperty(PropertyConfigBuilder()
1608                                         .SetName("body")
1609                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1610                                                            TOKENIZER_PLAIN)
1611                                         .SetCardinality(CARDINALITY_REQUIRED))
1612                        .AddProperty(PropertyConfigBuilder()
1613                                         .SetName("indexableInteger")
1614                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1615                                         .SetCardinality(CARDINALITY_REQUIRED))
1616                        .AddProperty(PropertyConfigBuilder()
1617                                         .SetName("senderQualifiedId")
1618                                         .SetDataTypeJoinableString(
1619                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1620                                         .SetCardinality(CARDINALITY_REQUIRED)))
1621           .Build();
1622 
1623   DocumentProto person =
1624       DocumentBuilder()
1625           .SetKey("namespace", "person")
1626           .SetSchema("Person")
1627           .AddStringProperty("name", "person")
1628           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1629           .Build();
1630   DocumentProto message =
1631       DocumentBuilder()
1632           .SetKey("namespace", "message/1")
1633           .SetSchema("Message")
1634           .AddStringProperty("body", "message body")
1635           .AddInt64Property("indexableInteger", 123)
1636           .AddStringProperty("senderQualifiedId", "namespace#person")
1637           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1638           .Build();
1639 
1640   SearchSpecProto search_spec;
1641   search_spec.set_query("indexableInteger == 123");
1642   search_spec.add_enabled_features(std::string(kNumericSearchFeature));
1643 
1644   SearchResultProto expected_search_result_proto;
1645   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1646   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
1647       message;
1648 
1649   {
1650     // Initializes folder and schema, index one document
1651     TestIcingSearchEngine icing(
1652         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1653         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1654         GetTestJniCache());
1655     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1656     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1657     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1658     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1659     SearchResultProto search_result_proto =
1660         icing.Search(search_spec, GetDefaultScoringSpec(),
1661                      ResultSpecProto::default_instance());
1662     EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1663                                          expected_search_result_proto));
1664   }  // This should shut down IcingSearchEngine and persist anything it needs to
1665 
1666   // Manually corrupt integer index
1667   {
1668     const std::string integer_index_metadata_file =
1669         GetIntegerIndexDir() + "/integer_index.m";
1670     ScopedFd fd(
1671         filesystem()->OpenForWrite(integer_index_metadata_file.c_str()));
1672     ASSERT_TRUE(fd.is_valid());
1673     ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
1674   }
1675 
1676   // Mock filesystem to observe and check the behavior of all indices.
1677   auto mock_filesystem = std::make_unique<MockFilesystem>();
1678   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1679       .WillRepeatedly(DoDefault());
1680   // Ensure term index directory should never be discarded.
1681   EXPECT_CALL(*mock_filesystem,
1682               DeleteDirectoryRecursively(EndsWith("/index_dir")))
1683       .Times(0);
1684   // Ensure integer index directory should be discarded once, and Clear()
1685   // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
1686   // should never be discarded) since we start it from scratch.
1687   EXPECT_CALL(*mock_filesystem,
1688               DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1689       .Times(1);
1690   EXPECT_CALL(*mock_filesystem,
1691               DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1692       .Times(0);
1693   // Ensure qualified id join index directory should never be discarded, and
1694   // Clear() should never be called (i.e. storage sub directory
1695   // "*/qualified_id_join_index_dir/*" should never be discarded).
1696   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1697                                     EndsWith("/qualified_id_join_index_dir")))
1698       .Times(0);
1699   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1700                                     HasSubstr("/qualified_id_join_index_dir/")))
1701       .Times(0);
1702 
1703   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
1704                               std::move(mock_filesystem),
1705                               std::make_unique<IcingFilesystem>(),
1706                               std::make_unique<FakeClock>(), GetTestJniCache());
1707   InitializeResultProto initialize_result = icing.Initialize();
1708   EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1709   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1710               Eq(InitializeStatsProto::NONE));
1711   EXPECT_THAT(
1712       initialize_result.initialize_stats().integer_index_restoration_cause(),
1713       Eq(InitializeStatsProto::IO_ERROR));
1714   EXPECT_THAT(initialize_result.initialize_stats()
1715                   .qualified_id_join_index_restoration_cause(),
1716               Eq(InitializeStatsProto::NONE));
1717 
1718   // Check that our index is ok by searching over the restored index
1719   SearchResultProto search_result_proto =
1720       icing.Search(search_spec, GetDefaultScoringSpec(),
1721                    ResultSpecProto::default_instance());
1722   EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1723                                        expected_search_result_proto));
1724 }
1725 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromIntegerIndexBucketSplitThresholdChange)1726 TEST_F(IcingSearchEngineInitializationTest,
1727        RecoverFromIntegerIndexBucketSplitThresholdChange) {
1728   SchemaProto schema =
1729       SchemaBuilder()
1730           .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
1731               PropertyConfigBuilder()
1732                   .SetName("indexableInteger")
1733                   .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1734                   .SetCardinality(CARDINALITY_REQUIRED)))
1735           .Build();
1736 
1737   DocumentProto message =
1738       DocumentBuilder()
1739           .SetKey("namespace", "message/1")
1740           .SetSchema("Message")
1741           .AddInt64Property("indexableInteger", 123)
1742           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1743           .Build();
1744 
1745   // 1. Create an index with a message document.
1746   {
1747     TestIcingSearchEngine icing(
1748         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1749         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1750         GetTestJniCache());
1751 
1752     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
1753     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1754 
1755     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1756   }
1757 
1758   // 2. Create the index again with different
1759   //    integer_index_bucket_split_threshold. This should trigger index
1760   //    restoration.
1761   {
1762     // Mock filesystem to observe and check the behavior of all indices.
1763     auto mock_filesystem = std::make_unique<MockFilesystem>();
1764     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1765         .WillRepeatedly(DoDefault());
1766     // Ensure term index directory should never be discarded.
1767     EXPECT_CALL(*mock_filesystem,
1768                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
1769         .Times(0);
1770     // Ensure integer index directory should be discarded once, and Clear()
1771     // should never be called (i.e. storage sub directory
1772     // "*/integer_index_dir/*" should never be discarded) since we start it from
1773     // scratch.
1774     EXPECT_CALL(*mock_filesystem,
1775                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1776         .Times(1);
1777     EXPECT_CALL(*mock_filesystem,
1778                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1779         .Times(0);
1780     // Ensure qualified id join index directory should never be discarded, and
1781     // Clear() should never be called (i.e. storage sub directory
1782     // "*/qualified_id_join_index_dir/*" should never be discarded).
1783     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1784                                       EndsWith("/qualified_id_join_index_dir")))
1785         .Times(0);
1786     EXPECT_CALL(
1787         *mock_filesystem,
1788         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
1789         .Times(0);
1790 
1791     static constexpr int32_t kNewIntegerIndexBucketSplitThreshold = 1000;
1792     IcingSearchEngineOptions options = GetDefaultIcingOptions();
1793     ASSERT_THAT(kNewIntegerIndexBucketSplitThreshold,
1794                 Ne(options.integer_index_bucket_split_threshold()));
1795     options.set_integer_index_bucket_split_threshold(
1796         kNewIntegerIndexBucketSplitThreshold);
1797 
1798     TestIcingSearchEngine icing(options, std::move(mock_filesystem),
1799                                 std::make_unique<IcingFilesystem>(),
1800                                 std::make_unique<FakeClock>(),
1801                                 GetTestJniCache());
1802     InitializeResultProto initialize_result = icing.Initialize();
1803     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
1804     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1805                 Eq(InitializeStatsProto::NONE));
1806     EXPECT_THAT(
1807         initialize_result.initialize_stats().integer_index_restoration_cause(),
1808         Eq(InitializeStatsProto::IO_ERROR));
1809     EXPECT_THAT(initialize_result.initialize_stats()
1810                     .qualified_id_join_index_restoration_cause(),
1811                 Eq(InitializeStatsProto::NONE));
1812 
1813     // Verify integer index works normally
1814     SearchSpecProto search_spec;
1815     search_spec.set_query("indexableInteger == 123");
1816     search_spec.add_enabled_features(std::string(kNumericSearchFeature));
1817 
1818     SearchResultProto results =
1819         icing.Search(search_spec, ScoringSpecProto::default_instance(),
1820                      ResultSpecProto::default_instance());
1821     ASSERT_THAT(results.results(), SizeIs(1));
1822     EXPECT_THAT(results.results(0).document().uri(), Eq("message/1"));
1823   }
1824 }
1825 
TEST_F(IcingSearchEngineInitializationTest,RecoverFromCorruptQualifiedIdJoinIndex)1826 TEST_F(IcingSearchEngineInitializationTest,
1827        RecoverFromCorruptQualifiedIdJoinIndex) {
1828   // Test the following scenario: qualified id join index is corrupted (e.g.
1829   // checksum doesn't match). IcingSearchEngine should be able to recover
1830   // qualified id join index. Several additional behaviors are also tested:
1831   // - Index directory handling:
1832   //   - Term index directory should be unaffected.
1833   //   - Integer index directory should be unaffected.
1834   //   - Should discard the entire qualified id join index directory and start
1835   //     it from scratch.
1836   // - Truncate indices:
1837   //   - "TruncateTo()" for term index shouldn't take effect.
1838   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
1839   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
1840   //     discarded.
1841   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
1842   //     underlying storage sub directory (path_expr =
1843   //     "*/qualified_id_join_index_dir/*") should be discarded, since we start
1844   //     it from scratch.
1845 
1846   SchemaProto schema =
1847       SchemaBuilder()
1848           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
1849               PropertyConfigBuilder()
1850                   .SetName("name")
1851                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
1852                   .SetCardinality(CARDINALITY_REQUIRED)))
1853           .AddType(SchemaTypeConfigBuilder()
1854                        .SetType("Message")
1855                        .AddProperty(PropertyConfigBuilder()
1856                                         .SetName("body")
1857                                         .SetDataTypeString(TERM_MATCH_PREFIX,
1858                                                            TOKENIZER_PLAIN)
1859                                         .SetCardinality(CARDINALITY_REQUIRED))
1860                        .AddProperty(PropertyConfigBuilder()
1861                                         .SetName("indexableInteger")
1862                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
1863                                         .SetCardinality(CARDINALITY_REQUIRED))
1864                        .AddProperty(PropertyConfigBuilder()
1865                                         .SetName("senderQualifiedId")
1866                                         .SetDataTypeJoinableString(
1867                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
1868                                         .SetCardinality(CARDINALITY_REQUIRED)))
1869           .Build();
1870 
1871   DocumentProto person =
1872       DocumentBuilder()
1873           .SetKey("namespace", "person")
1874           .SetSchema("Person")
1875           .AddStringProperty("name", "person")
1876           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1877           .Build();
1878   DocumentProto message =
1879       DocumentBuilder()
1880           .SetKey("namespace", "message/1")
1881           .SetSchema("Message")
1882           .AddStringProperty("body", "message body")
1883           .AddInt64Property("indexableInteger", 123)
1884           .AddStringProperty("senderQualifiedId", "namespace#person")
1885           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
1886           .Build();
1887 
1888   // Prepare join search spec to join a query for `name:person` with a child
1889   // query for `body:message` based on the child's `senderQualifiedId` field.
1890   SearchSpecProto search_spec;
1891   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
1892   search_spec.set_query("name:person");
1893   JoinSpecProto* join_spec = search_spec.mutable_join_spec();
1894   join_spec->set_parent_property_expression(
1895       std::string(JoinProcessor::kQualifiedIdExpr));
1896   join_spec->set_child_property_expression("senderQualifiedId");
1897   join_spec->set_aggregation_scoring_strategy(
1898       JoinSpecProto::AggregationScoringStrategy::COUNT);
1899   JoinSpecProto::NestedSpecProto* nested_spec =
1900       join_spec->mutable_nested_spec();
1901   SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
1902   nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
1903   nested_search_spec->set_query("body:message");
1904   *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
1905   *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
1906 
1907   ResultSpecProto result_spec = ResultSpecProto::default_instance();
1908   result_spec.set_max_joined_children_per_parent_to_return(
1909       std::numeric_limits<int32_t>::max());
1910 
1911   SearchResultProto expected_search_result_proto;
1912   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
1913   SearchResultProto::ResultProto* result_proto =
1914       expected_search_result_proto.mutable_results()->Add();
1915   *result_proto->mutable_document() = person;
1916   *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
1917 
1918   {
1919     // Initializes folder and schema, index one document
1920     TestIcingSearchEngine icing(
1921         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
1922         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
1923         GetTestJniCache());
1924     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
1925     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
1926     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
1927     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
1928     SearchResultProto search_result_proto =
1929         icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
1930     EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1931                                          expected_search_result_proto));
1932   }  // This should shut down IcingSearchEngine and persist anything it needs to
1933 
1934   // Manually corrupt qualified id join index
1935   {
1936     const std::string qualified_id_join_index_metadata_file =
1937         GetQualifiedIdJoinIndexDir() + "/metadata";
1938     ScopedFd fd(filesystem()->OpenForWrite(
1939         qualified_id_join_index_metadata_file.c_str()));
1940     ASSERT_TRUE(fd.is_valid());
1941     ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
1942   }
1943 
1944   // Mock filesystem to observe and check the behavior of all indices.
1945   auto mock_filesystem = std::make_unique<MockFilesystem>();
1946   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
1947       .WillRepeatedly(DoDefault());
1948   // Ensure term index directory should never be discarded.
1949   EXPECT_CALL(*mock_filesystem,
1950               DeleteDirectoryRecursively(EndsWith("/index_dir")))
1951       .Times(0);
1952   // Ensure integer index directory should never be discarded, and Clear()
1953   // should never be called (i.e. storage sub directory "*/integer_index_dir/*"
1954   // should never be discarded).
1955   EXPECT_CALL(*mock_filesystem,
1956               DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
1957       .Times(0);
1958   EXPECT_CALL(*mock_filesystem,
1959               DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
1960       .Times(0);
1961   // Ensure qualified id join index directory should be discarded once, and
1962   // Clear() should never be called (i.e. storage sub directory
1963   // "*/qualified_id_join_index_dir/*" should never be discarded).
1964   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1965                                     EndsWith("/qualified_id_join_index_dir")))
1966       .Times(1);
1967   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
1968                                     HasSubstr("/qualified_id_join_index_dir/")))
1969       .Times(0);
1970 
1971   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
1972                               std::move(mock_filesystem),
1973                               std::make_unique<IcingFilesystem>(),
1974                               std::make_unique<FakeClock>(), GetTestJniCache());
1975   InitializeResultProto initialize_result = icing.Initialize();
1976   EXPECT_THAT(initialize_result.status(), ProtoIsOk());
1977   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
1978               Eq(InitializeStatsProto::NONE));
1979   EXPECT_THAT(
1980       initialize_result.initialize_stats().integer_index_restoration_cause(),
1981       Eq(InitializeStatsProto::NONE));
1982   EXPECT_THAT(initialize_result.initialize_stats()
1983                   .qualified_id_join_index_restoration_cause(),
1984               Eq(InitializeStatsProto::IO_ERROR));
1985 
1986   // Check that our index is ok by searching over the restored index
1987   SearchResultProto search_result_proto =
1988       icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
1989   EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
1990                                        expected_search_result_proto));
1991 }
1992 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexLoseTermIndex)1993 TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) {
1994   // Test the following scenario: losing the entire term index. Since we need
1995   // flash index magic to determine the version, in this test we will throw out
1996   // the entire term index and re-initialize an empty one, to bypass
1997   // undetermined version state change and correctly trigger "lose term index"
1998   // scenario.
1999   // IcingSearchEngine should be able to recover term index. Several additional
2000   // behaviors are also tested:
2001   // - Index directory handling:
2002   //   - Term index directory should not be discarded (but instead just being
2003   //     rebuilt by replaying all docs).
2004   //   - Integer index directory should be unaffected.
2005   //   - Qualified id join index directory should be unaffected.
2006   // - Truncate indices:
2007   //   - "TruncateTo()" for term index shouldn't take effect since it is empty.
2008   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
2009   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
2010   //     discarded.
2011   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
2012   //     underlying storage sub directory (path_expr =
2013   //     "*/qualified_id_join_index_dir/*") should be discarded.
2014 
2015   SchemaProto schema =
2016       SchemaBuilder()
2017           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2018               PropertyConfigBuilder()
2019                   .SetName("name")
2020                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2021                   .SetCardinality(CARDINALITY_REQUIRED)))
2022           .AddType(SchemaTypeConfigBuilder()
2023                        .SetType("Message")
2024                        .AddProperty(PropertyConfigBuilder()
2025                                         .SetName("body")
2026                                         .SetDataTypeString(TERM_MATCH_PREFIX,
2027                                                            TOKENIZER_PLAIN)
2028                                         .SetCardinality(CARDINALITY_REQUIRED))
2029                        .AddProperty(PropertyConfigBuilder()
2030                                         .SetName("indexableInteger")
2031                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2032                                         .SetCardinality(CARDINALITY_REQUIRED))
2033                        .AddProperty(PropertyConfigBuilder()
2034                                         .SetName("senderQualifiedId")
2035                                         .SetDataTypeJoinableString(
2036                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2037                                         .SetCardinality(CARDINALITY_REQUIRED)))
2038           .Build();
2039 
2040   DocumentProto person =
2041       DocumentBuilder()
2042           .SetKey("namespace", "person")
2043           .SetSchema("Person")
2044           .AddStringProperty("name", "person")
2045           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2046           .Build();
2047   DocumentProto message =
2048       DocumentBuilder()
2049           .SetKey("namespace", "message/1")
2050           .SetSchema("Message")
2051           .AddStringProperty("body", kIpsumText)
2052           .AddInt64Property("indexableInteger", 123)
2053           .AddStringProperty("senderQualifiedId", "namespace#person")
2054           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2055           .Build();
2056 
2057   // 1. Create an index with 3 message documents.
2058   {
2059     TestIcingSearchEngine icing(
2060         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
2061         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2062         GetTestJniCache());
2063 
2064     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2065     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2066 
2067     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2068     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2069     message = DocumentBuilder(message).SetUri("message/2").Build();
2070     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2071     message = DocumentBuilder(message).SetUri("message/3").Build();
2072     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2073   }
2074 
2075   // 2. Delete and re-initialize an empty term index to trigger
2076   // RestoreIndexIfNeeded.
2077   {
2078     std::string idx_subdir = GetIndexDir() + "/idx";
2079     ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
2080     ICING_ASSERT_OK_AND_ASSIGN(
2081         std::unique_ptr<Index> index,
2082         Index::Create(Index::Options(GetIndexDir(),
2083                                      /*index_merge_size=*/100,
2084                                      /*lite_index_sort_at_indexing=*/true,
2085                                      /*lite_index_sort_size=*/50),
2086                       filesystem(), icing_filesystem()));
2087     ICING_ASSERT_OK(index->PersistToDisk());
2088   }
2089 
2090   // 3. Create the index again. This should trigger index restoration.
2091   {
2092     // Mock filesystem to observe and check the behavior of all indices.
2093     auto mock_filesystem = std::make_unique<MockFilesystem>();
2094     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2095         .WillRepeatedly(DoDefault());
2096     // Ensure term index directory should never be discarded since we've already
2097     // lost it.
2098     EXPECT_CALL(*mock_filesystem,
2099                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2100         .Times(0);
2101     // Ensure integer index directory should never be discarded, and Clear()
2102     // should never be called (i.e. storage sub directory
2103     // "*/integer_index_dir/*" should never be discarded).
2104     EXPECT_CALL(*mock_filesystem,
2105                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2106         .Times(0);
2107     EXPECT_CALL(*mock_filesystem,
2108                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2109         .Times(0);
2110     // Ensure qualified id join index directory should never be discarded, and
2111     // Clear() should never be called (i.e. storage sub directory
2112     // "*/qualified_id_join_index_dir/*" should never be discarded).
2113     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2114                                       EndsWith("/qualified_id_join_index_dir")))
2115         .Times(0);
2116     EXPECT_CALL(
2117         *mock_filesystem,
2118         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2119         .Times(0);
2120 
2121     TestIcingSearchEngine icing(
2122         GetDefaultIcingOptions(), std::move(mock_filesystem),
2123         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2124         GetTestJniCache());
2125     InitializeResultProto initialize_result = icing.Initialize();
2126     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2127     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2128                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2129     EXPECT_THAT(
2130         initialize_result.initialize_stats().integer_index_restoration_cause(),
2131         Eq(InitializeStatsProto::NONE));
2132     EXPECT_THAT(initialize_result.initialize_stats()
2133                     .qualified_id_join_index_restoration_cause(),
2134                 Eq(InitializeStatsProto::NONE));
2135 
2136     // Verify term index works normally
2137     SearchSpecProto search_spec1;
2138     search_spec1.set_query("body:consectetur");
2139     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2140     SearchResultProto results1 =
2141         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2142                      ResultSpecProto::default_instance());
2143     EXPECT_THAT(results1.status(), ProtoIsOk());
2144     EXPECT_THAT(results1.next_page_token(), Eq(0));
2145     // All documents should be retrievable.
2146     ASSERT_THAT(results1.results(), SizeIs(3));
2147     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
2148     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
2149     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
2150 
2151     // Verify integer index works normally
2152     SearchSpecProto search_spec2;
2153     search_spec2.set_query("indexableInteger == 123");
2154     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2155 
2156     SearchResultProto results2 =
2157         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2158                      ResultSpecProto::default_instance());
2159     ASSERT_THAT(results2.results(), SizeIs(3));
2160     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
2161     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
2162     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
2163 
2164     // Verify qualified id join index works normally: join a query for
2165     // `name:person` with a child query for `body:consectetur` based on the
2166     // child's `senderQualifiedId` field.
2167     SearchSpecProto search_spec3;
2168     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2169     search_spec3.set_query("name:person");
2170     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2171     join_spec->set_parent_property_expression(
2172         std::string(JoinProcessor::kQualifiedIdExpr));
2173     join_spec->set_child_property_expression("senderQualifiedId");
2174     join_spec->set_aggregation_scoring_strategy(
2175         JoinSpecProto::AggregationScoringStrategy::COUNT);
2176     JoinSpecProto::NestedSpecProto* nested_spec =
2177         join_spec->mutable_nested_spec();
2178     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2179     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2180     nested_search_spec->set_query("body:consectetur");
2181     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2182     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2183 
2184     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2185     result_spec3.set_max_joined_children_per_parent_to_return(
2186         std::numeric_limits<int32_t>::max());
2187 
2188     SearchResultProto results3 = icing.Search(
2189         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2190     ASSERT_THAT(results3.results(), SizeIs(1));
2191     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2192     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
2193     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2194                 Eq("message/3"));
2195     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2196                 Eq("message/2"));
2197     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
2198                 Eq("message/1"));
2199   }
2200 }
2201 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexLoseIntegerIndex)2202 TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) {
2203   // Test the following scenario: losing the entire integer index directory.
2204   // IcingSearchEngine should be able to recover integer index. Several
2205   // additional behaviors are also tested:
2206   // - Index directory handling:
2207   //   - Term index directory should be unaffected.
2208   //   - Integer index directory should not be discarded since we've already
2209   //     lost it. Start it from scratch.
2210   //   - Qualified id join index directory should be unaffected.
2211   // - Truncate indices:
2212   //   - "TruncateTo()" for term index shouldn't take effect.
2213   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
2214   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
2215   //     discarded, since we start it from scratch.
2216   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
2217   //     underlying storage sub directory (path_expr =
2218   //     "*/qualified_id_join_index_dir/*") should be discarded.
2219 
2220   SchemaProto schema =
2221       SchemaBuilder()
2222           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2223               PropertyConfigBuilder()
2224                   .SetName("name")
2225                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2226                   .SetCardinality(CARDINALITY_REQUIRED)))
2227           .AddType(SchemaTypeConfigBuilder()
2228                        .SetType("Message")
2229                        .AddProperty(PropertyConfigBuilder()
2230                                         .SetName("body")
2231                                         .SetDataTypeString(TERM_MATCH_PREFIX,
2232                                                            TOKENIZER_PLAIN)
2233                                         .SetCardinality(CARDINALITY_REQUIRED))
2234                        .AddProperty(PropertyConfigBuilder()
2235                                         .SetName("indexableInteger")
2236                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2237                                         .SetCardinality(CARDINALITY_REQUIRED))
2238                        .AddProperty(PropertyConfigBuilder()
2239                                         .SetName("senderQualifiedId")
2240                                         .SetDataTypeJoinableString(
2241                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2242                                         .SetCardinality(CARDINALITY_REQUIRED)))
2243           .Build();
2244 
2245   DocumentProto person =
2246       DocumentBuilder()
2247           .SetKey("namespace", "person")
2248           .SetSchema("Person")
2249           .AddStringProperty("name", "person")
2250           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2251           .Build();
2252   DocumentProto message =
2253       DocumentBuilder()
2254           .SetKey("namespace", "message/1")
2255           .SetSchema("Message")
2256           .AddStringProperty("body", kIpsumText)
2257           .AddInt64Property("indexableInteger", 123)
2258           .AddStringProperty("senderQualifiedId", "namespace#person")
2259           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2260           .Build();
2261 
2262   // 1. Create an index with 3 message documents.
2263   {
2264     TestIcingSearchEngine icing(
2265         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
2266         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2267         GetTestJniCache());
2268 
2269     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2270     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2271 
2272     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2273     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2274     message = DocumentBuilder(message).SetUri("message/2").Build();
2275     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2276     message = DocumentBuilder(message).SetUri("message/3").Build();
2277     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2278   }
2279 
2280   // 2. Delete the integer index file to trigger RestoreIndexIfNeeded.
2281   std::string integer_index_dir = GetIntegerIndexDir();
2282   filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
2283 
2284   // 3. Create the index again. This should trigger index restoration.
2285   {
2286     // Mock filesystem to observe and check the behavior of all indices.
2287     auto mock_filesystem = std::make_unique<MockFilesystem>();
2288     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2289         .WillRepeatedly(DoDefault());
2290     // Ensure term index directory should never be discarded.
2291     EXPECT_CALL(*mock_filesystem,
2292                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2293         .Times(0);
2294     // Ensure integer index directory should never be discarded since we've
2295     // already lost it, and Clear() should never be called (i.e. storage sub
2296     // directory "*/integer_index_dir/*" should never be discarded) since we
2297     // start it from scratch.
2298     EXPECT_CALL(*mock_filesystem,
2299                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2300         .Times(0);
2301     EXPECT_CALL(*mock_filesystem,
2302                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2303         .Times(0);
2304     // Ensure qualified id join index directory should never be discarded, and
2305     // Clear() should never be called (i.e. storage sub directory
2306     // "*/qualified_id_join_index_dir/*" should never be discarded).
2307     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2308                                       EndsWith("/qualified_id_join_index_dir")))
2309         .Times(0);
2310     EXPECT_CALL(
2311         *mock_filesystem,
2312         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2313         .Times(0);
2314 
2315     TestIcingSearchEngine icing(
2316         GetDefaultIcingOptions(), std::move(mock_filesystem),
2317         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2318         GetTestJniCache());
2319     InitializeResultProto initialize_result = icing.Initialize();
2320     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2321     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2322                 Eq(InitializeStatsProto::NONE));
2323     EXPECT_THAT(
2324         initialize_result.initialize_stats().integer_index_restoration_cause(),
2325         Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2326     EXPECT_THAT(initialize_result.initialize_stats()
2327                     .qualified_id_join_index_restoration_cause(),
2328                 Eq(InitializeStatsProto::NONE));
2329 
2330     // Verify term index works normally
2331     SearchSpecProto search_spec1;
2332     search_spec1.set_query("body:consectetur");
2333     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2334     SearchResultProto results1 =
2335         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2336                      ResultSpecProto::default_instance());
2337     EXPECT_THAT(results1.status(), ProtoIsOk());
2338     EXPECT_THAT(results1.next_page_token(), Eq(0));
2339     // All documents should be retrievable.
2340     ASSERT_THAT(results1.results(), SizeIs(3));
2341     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
2342     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
2343     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
2344 
2345     // Verify integer index works normally
2346     SearchSpecProto search_spec2;
2347     search_spec2.set_query("indexableInteger == 123");
2348     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2349 
2350     SearchResultProto results2 =
2351         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2352                      ResultSpecProto::default_instance());
2353     ASSERT_THAT(results2.results(), SizeIs(3));
2354     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
2355     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
2356     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
2357 
2358     // Verify qualified id join index works normally: join a query for
2359     // `name:person` with a child query for `body:consectetur` based on the
2360     // child's `senderQualifiedId` field.
2361     SearchSpecProto search_spec3;
2362     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2363     search_spec3.set_query("name:person");
2364     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2365     join_spec->set_parent_property_expression(
2366         std::string(JoinProcessor::kQualifiedIdExpr));
2367     join_spec->set_child_property_expression("senderQualifiedId");
2368     join_spec->set_aggregation_scoring_strategy(
2369         JoinSpecProto::AggregationScoringStrategy::COUNT);
2370     JoinSpecProto::NestedSpecProto* nested_spec =
2371         join_spec->mutable_nested_spec();
2372     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2373     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2374     nested_search_spec->set_query("body:consectetur");
2375     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2376     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2377 
2378     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2379     result_spec3.set_max_joined_children_per_parent_to_return(
2380         std::numeric_limits<int32_t>::max());
2381 
2382     SearchResultProto results3 = icing.Search(
2383         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2384     ASSERT_THAT(results3.results(), SizeIs(1));
2385     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2386     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
2387     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2388                 Eq("message/3"));
2389     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2390                 Eq("message/2"));
2391     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
2392                 Eq("message/1"));
2393   }
2394 }
2395 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexLoseQualifiedIdJoinIndex)2396 TEST_F(IcingSearchEngineInitializationTest,
2397        RestoreIndexLoseQualifiedIdJoinIndex) {
2398   // Test the following scenario: losing the entire qualified id join index
2399   // directory. IcingSearchEngine should be able to recover qualified id join
2400   // index. Several additional behaviors are also tested:
2401   // - Index directory handling:
2402   //   - Term index directory should be unaffected.
2403   //   - Integer index directory should be unaffected.
2404   //   - Qualified id join index directory should not be discarded since we've
2405   //     already lost it. Start it from scratch.
2406   // - Truncate indices:
2407   //   - "TruncateTo()" for term index shouldn't take effect.
2408   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
2409   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
2410   //     discarded.
2411   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
2412   //     underlying storage sub directory (path_expr =
2413   //     "*/qualified_id_join_index_dir/*") should be discarded, since we start
2414   //     it from scratch.
2415 
2416   SchemaProto schema =
2417       SchemaBuilder()
2418           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2419               PropertyConfigBuilder()
2420                   .SetName("name")
2421                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2422                   .SetCardinality(CARDINALITY_REQUIRED)))
2423           .AddType(SchemaTypeConfigBuilder()
2424                        .SetType("Message")
2425                        .AddProperty(PropertyConfigBuilder()
2426                                         .SetName("body")
2427                                         .SetDataTypeString(TERM_MATCH_PREFIX,
2428                                                            TOKENIZER_PLAIN)
2429                                         .SetCardinality(CARDINALITY_REQUIRED))
2430                        .AddProperty(PropertyConfigBuilder()
2431                                         .SetName("indexableInteger")
2432                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2433                                         .SetCardinality(CARDINALITY_REQUIRED))
2434                        .AddProperty(PropertyConfigBuilder()
2435                                         .SetName("senderQualifiedId")
2436                                         .SetDataTypeJoinableString(
2437                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2438                                         .SetCardinality(CARDINALITY_REQUIRED)))
2439           .Build();
2440 
2441   DocumentProto person =
2442       DocumentBuilder()
2443           .SetKey("namespace", "person")
2444           .SetSchema("Person")
2445           .AddStringProperty("name", "person")
2446           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2447           .Build();
2448   DocumentProto message =
2449       DocumentBuilder()
2450           .SetKey("namespace", "message/1")
2451           .SetSchema("Message")
2452           .AddStringProperty("body", kIpsumText)
2453           .AddInt64Property("indexableInteger", 123)
2454           .AddStringProperty("senderQualifiedId", "namespace#person")
2455           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2456           .Build();
2457 
2458   // 1. Create an index with 3 message documents.
2459   {
2460     TestIcingSearchEngine icing(
2461         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
2462         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2463         GetTestJniCache());
2464 
2465     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2466     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2467 
2468     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2469     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2470     message = DocumentBuilder(message).SetUri("message/2").Build();
2471     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2472     message = DocumentBuilder(message).SetUri("message/3").Build();
2473     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2474   }
2475 
2476   // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
2477   std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
2478   filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str());
2479 
2480   // 3. Create the index again. This should trigger index restoration.
2481   {
2482     // Mock filesystem to observe and check the behavior of all indices.
2483     auto mock_filesystem = std::make_unique<MockFilesystem>();
2484     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2485         .WillRepeatedly(DoDefault());
2486     // Ensure term index directory should never be discarded.
2487     EXPECT_CALL(*mock_filesystem,
2488                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2489         .Times(0);
2490     // Ensure integer index directory should never be discarded since we've
2491     // already lost it, and Clear() should never be called (i.e. storage sub
2492     // directory "*/integer_index_dir/*" should never be discarded).
2493     EXPECT_CALL(*mock_filesystem,
2494                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2495         .Times(0);
2496     EXPECT_CALL(*mock_filesystem,
2497                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2498         .Times(0);
2499     // Ensure qualified id join index directory should never be discarded, and
2500     // Clear() should never be called (i.e. storage sub directory
2501     // "*/qualified_id_join_index_dir/*" should never be discarded)
2502     // since we start it from scratch.
2503     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2504                                       EndsWith("/qualified_id_join_index_dir")))
2505         .Times(0);
2506     EXPECT_CALL(
2507         *mock_filesystem,
2508         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2509         .Times(0);
2510 
2511     TestIcingSearchEngine icing(
2512         GetDefaultIcingOptions(), std::move(mock_filesystem),
2513         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
2514         GetTestJniCache());
2515     InitializeResultProto initialize_result = icing.Initialize();
2516     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2517     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2518                 Eq(InitializeStatsProto::NONE));
2519     EXPECT_THAT(
2520         initialize_result.initialize_stats().integer_index_restoration_cause(),
2521         Eq(InitializeStatsProto::NONE));
2522     EXPECT_THAT(initialize_result.initialize_stats()
2523                     .qualified_id_join_index_restoration_cause(),
2524                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2525 
2526     // Verify term index works normally
2527     SearchSpecProto search_spec1;
2528     search_spec1.set_query("body:consectetur");
2529     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2530     SearchResultProto results1 =
2531         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2532                      ResultSpecProto::default_instance());
2533     EXPECT_THAT(results1.status(), ProtoIsOk());
2534     EXPECT_THAT(results1.next_page_token(), Eq(0));
2535     // All documents should be retrievable.
2536     ASSERT_THAT(results1.results(), SizeIs(3));
2537     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
2538     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
2539     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
2540 
2541     // Verify integer index works normally
2542     SearchSpecProto search_spec2;
2543     search_spec2.set_query("indexableInteger == 123");
2544     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2545 
2546     SearchResultProto results2 =
2547         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2548                      ResultSpecProto::default_instance());
2549     ASSERT_THAT(results2.results(), SizeIs(3));
2550     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
2551     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
2552     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
2553 
2554     // Verify qualified id join index works normally: join a query for
2555     // `name:person` with a child query for `body:consectetur` based on the
2556     // child's `senderQualifiedId` field.
2557     SearchSpecProto search_spec3;
2558     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2559     search_spec3.set_query("name:person");
2560     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2561     join_spec->set_parent_property_expression(
2562         std::string(JoinProcessor::kQualifiedIdExpr));
2563     join_spec->set_child_property_expression("senderQualifiedId");
2564     join_spec->set_aggregation_scoring_strategy(
2565         JoinSpecProto::AggregationScoringStrategy::COUNT);
2566     JoinSpecProto::NestedSpecProto* nested_spec =
2567         join_spec->mutable_nested_spec();
2568     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2569     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2570     nested_search_spec->set_query("body:consectetur");
2571     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2572     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2573 
2574     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2575     result_spec3.set_max_joined_children_per_parent_to_return(
2576         std::numeric_limits<int32_t>::max());
2577 
2578     SearchResultProto results3 = icing.Search(
2579         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2580     ASSERT_THAT(results3.results(), SizeIs(1));
2581     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2582     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
2583     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2584                 Eq("message/3"));
2585     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2586                 Eq("message/2"));
2587     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
2588                 Eq("message/1"));
2589   }
2590 }
2591 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateLiteIndexWithoutReindexing)2592 TEST_F(IcingSearchEngineInitializationTest,
2593        RestoreIndexTruncateLiteIndexWithoutReindexing) {
2594   // Test the following scenario: term lite index is *completely* ahead of
2595   // document store. IcingSearchEngine should be able to recover term index.
2596   // Several additional behaviors are also tested:
2597   // - Index directory handling:
2598   //   - Term index directory should be unaffected.
2599   //   - Integer index directory should be unaffected.
2600   //   - Qualified id join index directory should be unaffected.
2601   // - Truncate indices:
2602   //   - "TruncateTo()" for term index should take effect and throw out the
2603   //     entire lite index. This should be sufficient to make term index
2604   //     consistent with document store, so reindexing should not take place.
2605   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
2606   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
2607   //     discarded.
2608   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
2609   //     underlying storage sub directory (path_expr =
2610   //     "*/qualified_id_join_index_dir/*") should be discarded.
2611 
2612   SchemaProto schema =
2613       SchemaBuilder()
2614           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2615               PropertyConfigBuilder()
2616                   .SetName("name")
2617                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2618                   .SetCardinality(CARDINALITY_REQUIRED)))
2619           .AddType(SchemaTypeConfigBuilder()
2620                        .SetType("Message")
2621                        .AddProperty(PropertyConfigBuilder()
2622                                         .SetName("body")
2623                                         .SetDataTypeString(TERM_MATCH_PREFIX,
2624                                                            TOKENIZER_PLAIN)
2625                                         .SetCardinality(CARDINALITY_REQUIRED))
2626                        .AddProperty(PropertyConfigBuilder()
2627                                         .SetName("indexableInteger")
2628                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2629                                         .SetCardinality(CARDINALITY_REQUIRED))
2630                        .AddProperty(PropertyConfigBuilder()
2631                                         .SetName("senderQualifiedId")
2632                                         .SetDataTypeJoinableString(
2633                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2634                                         .SetCardinality(CARDINALITY_REQUIRED)))
2635           .Build();
2636 
2637   DocumentProto person =
2638       DocumentBuilder()
2639           .SetKey("namespace", "person")
2640           .SetSchema("Person")
2641           .AddStringProperty("name", "person")
2642           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2643           .Build();
2644   DocumentProto message =
2645       DocumentBuilder()
2646           .SetKey("namespace", "message/1")
2647           .SetSchema("Message")
2648           .AddStringProperty("body", kIpsumText)
2649           .AddInt64Property("indexableInteger", 123)
2650           .AddStringProperty("senderQualifiedId", "namespace#person")
2651           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2652           .Build();
2653 
2654   // 1. Create an index with a LiteIndex that will only allow a person and a
2655   //    message document before needing a merge.
2656   {
2657     IcingSearchEngineOptions options = GetDefaultIcingOptions();
2658     options.set_index_merge_size(person.ByteSizeLong() +
2659                                  message.ByteSizeLong());
2660     TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
2661                                 std::make_unique<IcingFilesystem>(),
2662                                 std::make_unique<FakeClock>(),
2663                                 GetTestJniCache());
2664 
2665     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2666     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2667 
2668     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2669     // Add two message documents. These should get merged into the main index.
2670     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2671     message = DocumentBuilder(message).SetUri("message/2").Build();
2672     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2673   }
2674 
2675   // 2. Manually add some data into term lite index and increment
2676   // last_added_document_id, but don't merge into the main index. This will
2677   // cause mismatched last_added_document_id with term index.
2678   //   - Document store: [0, 1, 2]
2679   //   - Term index
2680   //     - Main index: [0, 1, 2]
2681   //     - Lite index: [3]
2682   //   - Integer index: [0, 1, 2]
2683   //   - Qualified id join index: [0, 1, 2]
2684   {
2685     ICING_ASSERT_OK_AND_ASSIGN(
2686         std::unique_ptr<Index> index,
2687         Index::Create(
2688             Index::Options(GetIndexDir(),
2689                            /*index_merge_size=*/message.ByteSizeLong(),
2690                            /*lite_index_sort_at_indexing=*/true,
2691                            /*lite_index_sort_size=*/8),
2692             filesystem(), icing_filesystem()));
2693     DocumentId original_last_added_doc_id = index->last_added_document_id();
2694     index->set_last_added_document_id(original_last_added_doc_id + 1);
2695     Index::Editor editor =
2696         index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
2697                     TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
2698     ICING_ASSERT_OK(editor.BufferTerm("foo"));
2699     ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
2700   }
2701 
2702   // 3. Create the index again.
2703   {
2704     // Mock filesystem to observe and check the behavior of all indices.
2705     auto mock_filesystem = std::make_unique<MockFilesystem>();
2706     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2707         .WillRepeatedly(DoDefault());
2708     // Ensure term index directory should never be discarded. since we only call
2709     // TruncateTo for term index.
2710     EXPECT_CALL(*mock_filesystem,
2711                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2712         .Times(0);
2713     // Ensure integer index directory should never be discarded, and Clear()
2714     // should never be called (i.e. storage sub directory
2715     // "*/integer_index_dir/*" should never be discarded).
2716     EXPECT_CALL(*mock_filesystem,
2717                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2718         .Times(0);
2719     EXPECT_CALL(*mock_filesystem,
2720                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2721         .Times(0);
2722     // Ensure qualified id join index directory should never be discarded, and
2723     // Clear() should never be called (i.e. storage sub directory
2724     // "*/qualified_id_join_index_dir/*" should never be discarded).
2725     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2726                                       EndsWith("/qualified_id_join_index_dir")))
2727         .Times(0);
2728     EXPECT_CALL(
2729         *mock_filesystem,
2730         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2731         .Times(0);
2732 
2733     IcingSearchEngineOptions options = GetDefaultIcingOptions();
2734     options.set_index_merge_size(message.ByteSizeLong());
2735     TestIcingSearchEngine icing(options, std::move(mock_filesystem),
2736                                 std::make_unique<IcingFilesystem>(),
2737                                 std::make_unique<FakeClock>(),
2738                                 GetTestJniCache());
2739     InitializeResultProto initialize_result = icing.Initialize();
2740     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2741     // Since truncating lite index is sufficient to make term index consistent
2742     // with document store, replaying documents or reindex shouldn't take place.
2743     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2744                 Eq(InitializeStatsProto::NONE));
2745     EXPECT_THAT(
2746         initialize_result.initialize_stats().integer_index_restoration_cause(),
2747         Eq(InitializeStatsProto::NONE));
2748     EXPECT_THAT(initialize_result.initialize_stats()
2749                     .qualified_id_join_index_restoration_cause(),
2750                 Eq(InitializeStatsProto::NONE));
2751 
2752     // Verify term index works normally
2753     SearchSpecProto search_spec1;
2754     search_spec1.set_query("body:consectetur");
2755     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
2756     SearchResultProto results1 =
2757         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
2758                      ResultSpecProto::default_instance());
2759     EXPECT_THAT(results1.status(), ProtoIsOk());
2760     EXPECT_THAT(results1.next_page_token(), Eq(0));
2761     // Only the documents that were in the main index should be retrievable.
2762     ASSERT_THAT(results1.results(), SizeIs(2));
2763     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2"));
2764     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1"));
2765 
2766     // Verify integer index works normally
2767     SearchSpecProto search_spec2;
2768     search_spec2.set_query("indexableInteger == 123");
2769     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
2770 
2771     SearchResultProto results2 =
2772         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
2773                      ResultSpecProto::default_instance());
2774     ASSERT_THAT(results2.results(), SizeIs(2));
2775     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2"));
2776     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1"));
2777 
2778     // Verify qualified id join index works normally: join a query for
2779     // `name:person` with a child query for `body:consectetur` based on the
2780     // child's `senderQualifiedId` field.
2781     SearchSpecProto search_spec3;
2782     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
2783     search_spec3.set_query("name:person");
2784     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
2785     join_spec->set_parent_property_expression(
2786         std::string(JoinProcessor::kQualifiedIdExpr));
2787     join_spec->set_child_property_expression("senderQualifiedId");
2788     join_spec->set_aggregation_scoring_strategy(
2789         JoinSpecProto::AggregationScoringStrategy::COUNT);
2790     JoinSpecProto::NestedSpecProto* nested_spec =
2791         join_spec->mutable_nested_spec();
2792     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
2793     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
2794     nested_search_spec->set_query("body:consectetur");
2795     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
2796     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
2797 
2798     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
2799     result_spec3.set_max_joined_children_per_parent_to_return(
2800         std::numeric_limits<int32_t>::max());
2801 
2802     SearchResultProto results3 = icing.Search(
2803         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
2804     ASSERT_THAT(results3.results(), SizeIs(1));
2805     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
2806     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2));
2807     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
2808                 Eq("message/2"));
2809     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
2810                 Eq("message/1"));
2811   }
2812 
2813   // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to
2814   // verify the correctness of term index restoration. Instead, we have to check
2815   // hits for "foo" should not be found in term index.
2816   {
2817     ICING_ASSERT_OK_AND_ASSIGN(
2818         std::unique_ptr<Index> index,
2819         Index::Create(
2820             Index::Options(GetIndexDir(),
2821                            /*index_merge_size=*/message.ByteSizeLong(),
2822                            /*lite_index_sort_at_indexing=*/true,
2823                            /*lite_index_sort_size=*/8),
2824             filesystem(), icing_filesystem()));
2825     ICING_ASSERT_OK_AND_ASSIGN(
2826         std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
2827         index->GetIterator("foo", /*term_start_index=*/0,
2828                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
2829                            TermMatchType::EXACT_ONLY));
2830     EXPECT_THAT(doc_hit_info_iter->Advance(),
2831                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
2832   }
2833 }
2834 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateLiteIndexWithReindexing)2835 TEST_F(IcingSearchEngineInitializationTest,
2836        RestoreIndexTruncateLiteIndexWithReindexing) {
2837   // Test the following scenario: term lite index is *partially* ahead of
2838   // document store. IcingSearchEngine should be able to recover term index.
2839   // Several additional behaviors are also tested:
2840   // - Index directory handling:
2841   //   - Term index directory should be unaffected.
2842   //   - Integer index directory should be unaffected.
2843   //   - Qualified id join index directory should be unaffected.
2844   // - Truncate indices:
2845   //   - "TruncateTo()" for term index should take effect and throw out the
2846   //     entire lite index. However, some valid data in term lite index were
2847   //     discarded together, so reindexing should still take place to recover
2848   //     them after truncating.
2849   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
2850   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
2851   //     discarded.
2852   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
2853   //     underlying storage sub directory (path_expr =
2854   //     "*/qualified_id_join_index_dir/*") should be discarded.
2855 
2856   SchemaProto schema =
2857       SchemaBuilder()
2858           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
2859               PropertyConfigBuilder()
2860                   .SetName("name")
2861                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
2862                   .SetCardinality(CARDINALITY_REQUIRED)))
2863           .AddType(SchemaTypeConfigBuilder()
2864                        .SetType("Message")
2865                        .AddProperty(PropertyConfigBuilder()
2866                                         .SetName("body")
2867                                         .SetDataTypeString(TERM_MATCH_PREFIX,
2868                                                            TOKENIZER_PLAIN)
2869                                         .SetCardinality(CARDINALITY_REQUIRED))
2870                        .AddProperty(PropertyConfigBuilder()
2871                                         .SetName("indexableInteger")
2872                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
2873                                         .SetCardinality(CARDINALITY_REQUIRED))
2874                        .AddProperty(PropertyConfigBuilder()
2875                                         .SetName("senderQualifiedId")
2876                                         .SetDataTypeJoinableString(
2877                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
2878                                         .SetCardinality(CARDINALITY_REQUIRED)))
2879           .Build();
2880 
2881   DocumentProto person =
2882       DocumentBuilder()
2883           .SetKey("namespace", "person")
2884           .SetSchema("Person")
2885           .AddStringProperty("name", "person")
2886           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2887           .Build();
2888   DocumentProto message =
2889       DocumentBuilder()
2890           .SetKey("namespace", "message/1")
2891           .SetSchema("Message")
2892           .AddStringProperty("body", kIpsumText)
2893           .AddInt64Property("indexableInteger", 123)
2894           .AddStringProperty("senderQualifiedId", "namespace#person")
2895           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
2896           .Build();
2897 
2898   // 1. Create an index with a LiteIndex that will only allow a person and a
2899   //    message document before needing a merge.
2900   {
2901     IcingSearchEngineOptions options = GetDefaultIcingOptions();
2902     options.set_index_merge_size(message.ByteSizeLong());
2903     TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
2904                                 std::make_unique<IcingFilesystem>(),
2905                                 std::make_unique<FakeClock>(),
2906                                 GetTestJniCache());
2907 
2908     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
2909     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
2910 
2911     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
2912     // Add two message documents. These should get merged into the main index.
2913     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2914     message = DocumentBuilder(message).SetUri("message/2").Build();
2915     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2916     // Add one document. This one should get remain in the lite index.
2917     message = DocumentBuilder(message).SetUri("message/3").Build();
2918     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
2919   }
2920 
2921   // 2. Manually add some data into term lite index and increment
2922   //    last_added_document_id, but don't merge into the main index. This will
2923   //    cause mismatched last_added_document_id with term index.
2924   //   - Document store: [0, 1, 2, 3]
2925   //   - Term index
2926   //     - Main index: [0, 1, 2]
2927   //     - Lite index: [3, 4]
2928   //   - Integer index: [0, 1, 2, 3]
2929   //   - Qualified id join index: [0, 1, 2, 3]
2930   {
2931     ICING_ASSERT_OK_AND_ASSIGN(
2932         std::unique_ptr<Index> index,
2933         Index::Create(
2934             Index::Options(GetIndexDir(),
2935                            /*index_merge_size=*/message.ByteSizeLong(),
2936                            /*lite_index_sort_at_indexing=*/true,
2937                            /*lite_index_sort_size=*/8),
2938             filesystem(), icing_filesystem()));
2939     DocumentId original_last_added_doc_id = index->last_added_document_id();
2940     index->set_last_added_document_id(original_last_added_doc_id + 1);
2941     Index::Editor editor =
2942         index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
2943                     TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
2944     ICING_ASSERT_OK(editor.BufferTerm("foo"));
2945     ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
2946   }
2947 
2948   // 3. Create the index again.
2949   {
2950     // Mock filesystem to observe and check the behavior of all indices.
2951     auto mock_filesystem = std::make_unique<MockFilesystem>();
2952     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
2953         .WillRepeatedly(DoDefault());
2954     // Ensure term index directory should never be discarded. since we only call
2955     // TruncateTo for term index.
2956     EXPECT_CALL(*mock_filesystem,
2957                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
2958         .Times(0);
2959     // Ensure integer index directory should never be discarded, and Clear()
2960     // should never be called (i.e. storage sub directory
2961     // "*/integer_index_dir/*" should never be discarded).
2962     EXPECT_CALL(*mock_filesystem,
2963                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
2964         .Times(0);
2965     EXPECT_CALL(*mock_filesystem,
2966                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
2967         .Times(0);
2968     // Ensure qualified id join index directory should never be discarded, and
2969     // Clear() should never be called (i.e. storage sub directory
2970     // "*/qualified_id_join_index_dir/*" should never be discarded).
2971     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
2972                                       EndsWith("/qualified_id_join_index_dir")))
2973         .Times(0);
2974     EXPECT_CALL(
2975         *mock_filesystem,
2976         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
2977         .Times(0);
2978 
2979     IcingSearchEngineOptions options = GetDefaultIcingOptions();
2980     options.set_index_merge_size(message.ByteSizeLong());
2981     TestIcingSearchEngine icing(options, std::move(mock_filesystem),
2982                                 std::make_unique<IcingFilesystem>(),
2983                                 std::make_unique<FakeClock>(),
2984                                 GetTestJniCache());
2985     InitializeResultProto initialize_result = icing.Initialize();
2986     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
2987     // Truncating lite index not only deletes data ahead document store, but
2988     // also deletes valid data. Therefore, we still have to replay documents and
2989     // reindex.
2990     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
2991                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
2992     EXPECT_THAT(
2993         initialize_result.initialize_stats().integer_index_restoration_cause(),
2994         Eq(InitializeStatsProto::NONE));
2995     EXPECT_THAT(initialize_result.initialize_stats()
2996                     .qualified_id_join_index_restoration_cause(),
2997                 Eq(InitializeStatsProto::NONE));
2998 
2999     // Verify term index works normally
3000     SearchSpecProto search_spec1;
3001     search_spec1.set_query("body:consectetur");
3002     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
3003     SearchResultProto results1 =
3004         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
3005                      ResultSpecProto::default_instance());
3006     EXPECT_THAT(results1.status(), ProtoIsOk());
3007     EXPECT_THAT(results1.next_page_token(), Eq(0));
3008     // Only the documents that were in the main index should be retrievable.
3009     ASSERT_THAT(results1.results(), SizeIs(3));
3010     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
3011     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
3012     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
3013 
3014     // Verify integer index works normally
3015     SearchSpecProto search_spec2;
3016     search_spec2.set_query("indexableInteger == 123");
3017     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
3018 
3019     SearchResultProto results2 =
3020         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
3021                      ResultSpecProto::default_instance());
3022     ASSERT_THAT(results2.results(), SizeIs(3));
3023     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
3024     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
3025     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
3026 
3027     // Verify qualified id join index works normally: join a query for
3028     // `name:person` with a child query for `body:consectetur` based on the
3029     // child's `senderQualifiedId` field.
3030     SearchSpecProto search_spec3;
3031     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
3032     search_spec3.set_query("name:person");
3033     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
3034     join_spec->set_parent_property_expression(
3035         std::string(JoinProcessor::kQualifiedIdExpr));
3036     join_spec->set_child_property_expression("senderQualifiedId");
3037     join_spec->set_aggregation_scoring_strategy(
3038         JoinSpecProto::AggregationScoringStrategy::COUNT);
3039     JoinSpecProto::NestedSpecProto* nested_spec =
3040         join_spec->mutable_nested_spec();
3041     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
3042     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
3043     nested_search_spec->set_query("body:consectetur");
3044     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
3045     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
3046 
3047     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
3048     result_spec3.set_max_joined_children_per_parent_to_return(
3049         std::numeric_limits<int32_t>::max());
3050 
3051     SearchResultProto results3 = icing.Search(
3052         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
3053     ASSERT_THAT(results3.results(), SizeIs(1));
3054     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
3055     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
3056     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
3057                 Eq("message/3"));
3058     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
3059                 Eq("message/2"));
3060     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
3061                 Eq("message/1"));
3062   }
3063 
3064   // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to
3065   // verify the correctness of term index restoration. Instead, we have to check
3066   // hits for "foo" should not be found in term index.
3067   {
3068     ICING_ASSERT_OK_AND_ASSIGN(
3069         std::unique_ptr<Index> index,
3070         Index::Create(
3071             Index::Options(GetIndexDir(),
3072                            /*index_merge_size=*/message.ByteSizeLong(),
3073                            /*lite_index_sort_at_indexing=*/true,
3074                            /*lite_index_sort_size=*/8),
3075             filesystem(), icing_filesystem()));
3076     ICING_ASSERT_OK_AND_ASSIGN(
3077         std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
3078         index->GetIterator("foo", /*term_start_index=*/0,
3079                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3080                            TermMatchType::EXACT_ONLY));
3081     EXPECT_THAT(doc_hit_info_iter->Advance(),
3082                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3083   }
3084 }
3085 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateMainIndexWithoutReindexing)3086 TEST_F(IcingSearchEngineInitializationTest,
3087        RestoreIndexTruncateMainIndexWithoutReindexing) {
3088   // Test the following scenario: term main index is *completely* ahead of
3089   // document store. IcingSearchEngine should be able to recover term index.
3090   // Several additional behaviors are also tested:
3091   // - Index directory handling:
3092   //   - Term index directory should be unaffected.
3093   //   - Integer index directory should be unaffected.
3094   //   - Qualified id join index directory should be unaffected.
3095   // - Truncate indices:
3096   //   - "TruncateTo()" for term index should take effect and throw out the
3097   //     entire lite and main index. This should be sufficient to make term
3098   //     index consistent with document store (in this case, document store is
3099   //     empty as well), so reindexing should not take place.
3100   //   - "Clear()" should be called for integer index. It is a special case when
3101   //     document store has no document. Since there is no integer index storage
3102   //     sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
3103   //     discarded.
3104   //   - "Clear()" should be called for qualified id join index. It is a special
3105   //     case when document store has no document.
3106 
3107   // 1. Create an index with no document.
3108   {
3109     TestIcingSearchEngine icing(
3110         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3111         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3112         GetTestJniCache());
3113 
3114     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3115     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
3116   }
3117 
3118   // 2. Manually add some data into term lite index and increment
3119   //    last_added_document_id. Merge some of them into the main index and keep
3120   //    others in the lite index. This will cause mismatched document id with
3121   //    document store.
3122   //   - Document store: []
3123   //   - Term index
3124   //     - Main index: [0]
3125   //     - Lite index: [1]
3126   //   - Integer index: []
3127   //   - Qualified id join index: []
3128   {
3129     ICING_ASSERT_OK_AND_ASSIGN(
3130         std::unique_ptr<Index> index,
3131         Index::Create(
3132             // index merge size is not important here because we will manually
3133             // invoke merge below.
3134             Index::Options(GetIndexDir(), /*index_merge_size=*/100,
3135                            /*lite_index_sort_at_indexing=*/true,
3136                            /*lite_index_sort_size=*/50),
3137             filesystem(), icing_filesystem()));
3138     // Add hits for document 0 and merge.
3139     ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId);
3140     index->set_last_added_document_id(0);
3141     Index::Editor editor =
3142         index->Edit(/*document_id=*/0, /*section_id=*/0,
3143                     TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3144     ICING_ASSERT_OK(editor.BufferTerm("foo"));
3145     ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3146     ICING_ASSERT_OK(index->Merge());
3147 
3148     // Add hits for document 1 and don't merge.
3149     index->set_last_added_document_id(1);
3150     editor = index->Edit(/*document_id=*/1, /*section_id=*/0,
3151                          TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3152     ICING_ASSERT_OK(editor.BufferTerm("bar"));
3153     ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3154   }
3155 
3156   // 3. Create the index again. This should throw out the lite and main index.
3157   {
3158     // Mock filesystem to observe and check the behavior of all indices.
3159     auto mock_filesystem = std::make_unique<MockFilesystem>();
3160     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3161         .WillRepeatedly(DoDefault());
3162     // Ensure term index directory should never be discarded. since we only call
3163     // TruncateTo for term index.
3164     EXPECT_CALL(*mock_filesystem,
3165                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3166         .Times(0);
3167     // Ensure integer index directory should never be discarded. Even though
3168     // Clear() was called, it shouldn't take effect since there is no storage
3169     // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
3170     EXPECT_CALL(*mock_filesystem,
3171                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3172         .Times(0);
3173     EXPECT_CALL(*mock_filesystem,
3174                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3175         .Times(0);
3176     // Ensure qualified id join index directory should never be discarded.
3177     // Clear() was called and should discard and reinitialize the underlying
3178     // mapper.
3179     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3180                                       EndsWith("/qualified_id_join_index_dir")))
3181         .Times(0);
3182     EXPECT_CALL(
3183         *mock_filesystem,
3184         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3185         .Times(AtLeast(1));
3186 
3187     TestIcingSearchEngine icing(
3188         GetDefaultIcingOptions(), std::move(mock_filesystem),
3189         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3190         GetTestJniCache());
3191     InitializeResultProto initialize_result = icing.Initialize();
3192     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3193     // Since truncating main index is sufficient to make term index consistent
3194     // with document store, replaying documents or reindexing shouldn't take
3195     // place.
3196     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3197                 Eq(InitializeStatsProto::NONE));
3198     EXPECT_THAT(
3199         initialize_result.initialize_stats().integer_index_restoration_cause(),
3200         Eq(InitializeStatsProto::NONE));
3201     EXPECT_THAT(initialize_result.initialize_stats()
3202                     .qualified_id_join_index_restoration_cause(),
3203                 Eq(InitializeStatsProto::NONE));
3204   }
3205 
3206   // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not
3207   // enough to verify the correctness of term index restoration. Instead, we
3208   // have to check hits for "foo", "bar" should not be found in term index.
3209   {
3210     ICING_ASSERT_OK_AND_ASSIGN(
3211         std::unique_ptr<Index> index,
3212         Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
3213                                      /*lite_index_sort_at_indexing=*/true,
3214                                      /*lite_index_sort_size=*/50),
3215                       filesystem(), icing_filesystem()));
3216     ICING_ASSERT_OK_AND_ASSIGN(
3217         std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
3218         index->GetIterator("foo", /*term_start_index=*/0,
3219                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3220                            TermMatchType::EXACT_ONLY));
3221     EXPECT_THAT(doc_hit_info_iter->Advance(),
3222                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3223 
3224     ICING_ASSERT_OK_AND_ASSIGN(
3225         doc_hit_info_iter,
3226         index->GetIterator("bar", /*term_start_index=*/0,
3227                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3228                            TermMatchType::EXACT_ONLY));
3229     EXPECT_THAT(doc_hit_info_iter->Advance(),
3230                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3231   }
3232 }
3233 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateMainIndexWithReindexing)3234 TEST_F(IcingSearchEngineInitializationTest,
3235        RestoreIndexTruncateMainIndexWithReindexing) {
3236   // Test the following scenario: term main index is *partially* ahead of
3237   // document store. IcingSearchEngine should be able to recover term index.
3238   // Several additional behaviors are also tested:
3239   // - Index directory handling:
3240   //   - Term index directory should be unaffected.
3241   //   - Integer index directory should be unaffected.
3242   //   - Qualified id join index directory should be unaffected.
3243   // - In RestoreIndexIfNecessary():
3244   //   - "TruncateTo()" for term index should take effect and throw out the
3245   //     entire lite and main index. However, some valid data in term main index
3246   //     were discarded together, so reindexing should still take place to
3247   //     recover them after truncating.
3248   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
3249   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
3250   //     discarded.
3251   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
3252   //     underlying storage sub directory (path_expr =
3253   //     "*/qualified_id_join_index_dir/*") should be discarded.
3254 
3255   SchemaProto schema =
3256       SchemaBuilder()
3257           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
3258               PropertyConfigBuilder()
3259                   .SetName("name")
3260                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
3261                   .SetCardinality(CARDINALITY_REQUIRED)))
3262           .AddType(SchemaTypeConfigBuilder()
3263                        .SetType("Message")
3264                        .AddProperty(PropertyConfigBuilder()
3265                                         .SetName("body")
3266                                         .SetDataTypeString(TERM_MATCH_PREFIX,
3267                                                            TOKENIZER_PLAIN)
3268                                         .SetCardinality(CARDINALITY_REQUIRED))
3269                        .AddProperty(PropertyConfigBuilder()
3270                                         .SetName("indexableInteger")
3271                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
3272                                         .SetCardinality(CARDINALITY_REQUIRED))
3273                        .AddProperty(PropertyConfigBuilder()
3274                                         .SetName("senderQualifiedId")
3275                                         .SetDataTypeJoinableString(
3276                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
3277                                         .SetCardinality(CARDINALITY_REQUIRED)))
3278           .Build();
3279 
3280   DocumentProto person =
3281       DocumentBuilder()
3282           .SetKey("namespace", "person")
3283           .SetSchema("Person")
3284           .AddStringProperty("name", "person")
3285           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3286           .Build();
3287   DocumentProto message =
3288       DocumentBuilder()
3289           .SetKey("namespace", "message/1")
3290           .SetSchema("Message")
3291           .AddStringProperty("body", kIpsumText)
3292           .AddInt64Property("indexableInteger", 123)
3293           .AddStringProperty("senderQualifiedId", "namespace#person")
3294           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3295           .Build();
3296 
3297   // 1. Create an index with 3 message documents.
3298   {
3299     TestIcingSearchEngine icing(
3300         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3301         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3302         GetTestJniCache());
3303 
3304     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3305     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
3306 
3307     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
3308     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3309     message = DocumentBuilder(message).SetUri("message/2").Build();
3310     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3311     message = DocumentBuilder(message).SetUri("message/3").Build();
3312     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3313   }
3314 
3315   // 2. Manually add some data into term lite index and increment
3316   //    last_added_document_id. Merge some of them into the main index and keep
3317   //    others in the lite index. This will cause mismatched document id with
3318   //    document store.
3319   //   - Document store: [0, 1, 2, 3]
3320   //   - Term index
3321   //     - Main index: [0, 1, 2, 3, 4]
3322   //     - Lite index: [5]
3323   //   - Integer index: [0, 1, 2, 3]
3324   //   - Qualified id join index: [0, 1, 2, 3]
3325   {
3326     ICING_ASSERT_OK_AND_ASSIGN(
3327         std::unique_ptr<Index> index,
3328         Index::Create(
3329             Index::Options(GetIndexDir(),
3330                            /*index_merge_size=*/message.ByteSizeLong(),
3331                            /*lite_index_sort_at_indexing=*/true,
3332                            /*lite_index_sort_size=*/8),
3333             filesystem(), icing_filesystem()));
3334     // Add hits for document 4 and merge.
3335     DocumentId original_last_added_doc_id = index->last_added_document_id();
3336     index->set_last_added_document_id(original_last_added_doc_id + 1);
3337     Index::Editor editor =
3338         index->Edit(original_last_added_doc_id + 1, /*section_id=*/0,
3339                     TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3340     ICING_ASSERT_OK(editor.BufferTerm("foo"));
3341     ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3342     ICING_ASSERT_OK(index->Merge());
3343 
3344     // Add hits for document 5 and don't merge.
3345     index->set_last_added_document_id(original_last_added_doc_id + 2);
3346     editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0,
3347                          TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
3348     ICING_ASSERT_OK(editor.BufferTerm("bar"));
3349     ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
3350   }
3351 
3352   // 3. Create the index again. This should throw out the lite and main index
3353   // and trigger index restoration.
3354   {
3355     // Mock filesystem to observe and check the behavior of all indices.
3356     auto mock_filesystem = std::make_unique<MockFilesystem>();
3357     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3358         .WillRepeatedly(DoDefault());
3359     // Ensure term index directory should never be discarded. since we only call
3360     // TruncateTo for term index.
3361     EXPECT_CALL(*mock_filesystem,
3362                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3363         .Times(0);
3364     // Ensure integer index directory should never be discarded, and Clear()
3365     // should never be called (i.e. storage sub directory
3366     // "*/integer_index_dir/*" should never be discarded).
3367     EXPECT_CALL(*mock_filesystem,
3368                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3369         .Times(0);
3370     EXPECT_CALL(*mock_filesystem,
3371                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3372         .Times(0);
3373     // Ensure qualified id join index directory should never be discarded, and
3374     // Clear() should never be called (i.e. storage sub directory
3375     // "*/qualified_id_join_index_dir/*" should never be discarded).
3376     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3377                                       EndsWith("/qualified_id_join_index_dir")))
3378         .Times(0);
3379     EXPECT_CALL(
3380         *mock_filesystem,
3381         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3382         .Times(0);
3383 
3384     TestIcingSearchEngine icing(
3385         GetDefaultIcingOptions(), std::move(mock_filesystem),
3386         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3387         GetTestJniCache());
3388     InitializeResultProto initialize_result = icing.Initialize();
3389     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3390     // Truncating main index not only deletes data ahead document store, but
3391     // also deletes valid data. Therefore, we still have to replay documents and
3392     // reindex.
3393     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3394                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
3395     EXPECT_THAT(
3396         initialize_result.initialize_stats().integer_index_restoration_cause(),
3397         Eq(InitializeStatsProto::NONE));
3398     EXPECT_THAT(initialize_result.initialize_stats()
3399                     .qualified_id_join_index_restoration_cause(),
3400                 Eq(InitializeStatsProto::NONE));
3401 
3402     // Verify term index works normally
3403     SearchSpecProto search_spec1;
3404     search_spec1.set_query("body:consectetur");
3405     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
3406     SearchResultProto results1 =
3407         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
3408                      ResultSpecProto::default_instance());
3409     EXPECT_THAT(results1.status(), ProtoIsOk());
3410     EXPECT_THAT(results1.next_page_token(), Eq(0));
3411     // Only the first document should be retrievable.
3412     ASSERT_THAT(results1.results(), SizeIs(3));
3413     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
3414     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
3415     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
3416 
3417     // Verify integer index works normally
3418     SearchSpecProto search_spec2;
3419     search_spec2.set_query("indexableInteger == 123");
3420     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
3421 
3422     SearchResultProto results2 =
3423         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
3424                      ResultSpecProto::default_instance());
3425     ASSERT_THAT(results2.results(), SizeIs(3));
3426     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
3427     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
3428     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
3429 
3430     // Verify qualified id join index works normally: join a query for
3431     // `name:person` with a child query for `body:consectetur` based on the
3432     // child's `senderQualifiedId` field.
3433     SearchSpecProto search_spec3;
3434     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
3435     search_spec3.set_query("name:person");
3436     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
3437     join_spec->set_parent_property_expression(
3438         std::string(JoinProcessor::kQualifiedIdExpr));
3439     join_spec->set_child_property_expression("senderQualifiedId");
3440     join_spec->set_aggregation_scoring_strategy(
3441         JoinSpecProto::AggregationScoringStrategy::COUNT);
3442     JoinSpecProto::NestedSpecProto* nested_spec =
3443         join_spec->mutable_nested_spec();
3444     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
3445     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
3446     nested_search_spec->set_query("body:consectetur");
3447     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
3448     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
3449 
3450     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
3451     result_spec3.set_max_joined_children_per_parent_to_return(
3452         std::numeric_limits<int32_t>::max());
3453 
3454     SearchResultProto results3 = icing.Search(
3455         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
3456     ASSERT_THAT(results3.results(), SizeIs(1));
3457     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
3458     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
3459     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
3460                 Eq("message/3"));
3461     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
3462                 Eq("message/2"));
3463     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
3464                 Eq("message/1"));
3465   }
3466 
3467   // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not
3468   // enough to verify the correctness of term index restoration. Instead, we
3469   // have to check hits for "foo", "bar" should not be found in term index.
3470   {
3471     ICING_ASSERT_OK_AND_ASSIGN(
3472         std::unique_ptr<Index> index,
3473         Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100,
3474                                      /*lite_index_sort_at_indexing=*/true,
3475                                      /*lite_index_sort_size=*/50),
3476                       filesystem(), icing_filesystem()));
3477     ICING_ASSERT_OK_AND_ASSIGN(
3478         std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter,
3479         index->GetIterator("foo", /*term_start_index=*/0,
3480                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3481                            TermMatchType::EXACT_ONLY));
3482     EXPECT_THAT(doc_hit_info_iter->Advance(),
3483                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3484 
3485     ICING_ASSERT_OK_AND_ASSIGN(
3486         doc_hit_info_iter,
3487         index->GetIterator("bar", /*term_start_index=*/0,
3488                            /*unnormalized_term_length=*/0, kSectionIdMaskAll,
3489                            TermMatchType::EXACT_ONLY));
3490     EXPECT_THAT(doc_hit_info_iter->Advance(),
3491                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3492   }
3493 }
3494 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateIntegerIndexWithoutReindexing)3495 TEST_F(IcingSearchEngineInitializationTest,
3496        RestoreIndexTruncateIntegerIndexWithoutReindexing) {
3497   // Test the following scenario: integer index is *completely* ahead of
3498   // document store. IcingSearchEngine should be able to recover integer index.
3499   // Several additional behaviors are also tested:
3500   // - Index directory handling:
3501   //   - Term index directory should be unaffected.
3502   //   - Integer index directory should be unaffected.
3503   //   - Qualified id join index directory should be unaffected.
3504   // - Truncate indices:
3505   //   - "TruncateTo()" for term index shouldn't take effect.
3506   //   - "Clear()" should be called for integer index and throw out all integer
3507   //     index storages, i.e. all storage sub directories (path_expr =
3508   //     "*/integer_index_dir/*") should be discarded. This should be sufficient
3509   //     to make integer index consistent with document store (in this case,
3510   //     document store is empty as well), so reindexing should not take place.
3511   //   - "Clear()" should be called for qualified id join index. It is a special
3512   //     case when document store has no document.
3513 
3514   // 1. Create an index with no document.
3515   {
3516     TestIcingSearchEngine icing(
3517         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3518         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3519         GetTestJniCache());
3520 
3521     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3522     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
3523   }
3524 
3525   // 2. Manually add some data into integer index and increment
3526   //    last_added_document_id. This will cause mismatched document id with
3527   //    document store.
3528   //   - Document store: []
3529   //   - Term index: []
3530   //   - Integer index: [0]
3531   //   - Qualified id join index: []
3532   {
3533     Filesystem filesystem;
3534     ICING_ASSERT_OK_AND_ASSIGN(
3535         std::unique_ptr<IntegerIndex> integer_index,
3536         IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
3537                              /*num_data_threshold_for_bucket_split=*/65536,
3538                              /*pre_mapping_fbv=*/false));
3539     // Add hits for document 0.
3540     ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId);
3541     integer_index->set_last_added_document_id(0);
3542     std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
3543         /*property_path=*/"indexableInteger", /*document_id=*/0,
3544         /*section_id=*/0);
3545     ICING_ASSERT_OK(editor->BufferKey(123));
3546     ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
3547   }
3548 
3549   // 3. Create the index again. This should trigger index restoration.
3550   {
3551     // Mock filesystem to observe and check the behavior of all indices.
3552     auto mock_filesystem = std::make_unique<MockFilesystem>();
3553     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3554         .WillRepeatedly(DoDefault());
3555     // Ensure term index directory should never be discarded.
3556     EXPECT_CALL(*mock_filesystem,
3557                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3558         .Times(0);
3559     // Ensure integer index directory should never be discarded.
3560     EXPECT_CALL(*mock_filesystem,
3561                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3562         .Times(0);
3563     // Clear() should be called to truncate integer index and thus storage sub
3564     // directory (path_expr = "*/integer_index_dir/*") should be discarded.
3565     EXPECT_CALL(*mock_filesystem,
3566                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3567         .Times(1);
3568     // Ensure qualified id join index directory should never be discarded.
3569     // Clear() was called and should discard and reinitialize the underlying
3570     // mapper.
3571     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3572                                       EndsWith("/qualified_id_join_index_dir")))
3573         .Times(0);
3574     EXPECT_CALL(
3575         *mock_filesystem,
3576         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3577         .Times(AtLeast(1));
3578 
3579     TestIcingSearchEngine icing(
3580         GetDefaultIcingOptions(), std::move(mock_filesystem),
3581         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3582         GetTestJniCache());
3583     InitializeResultProto initialize_result = icing.Initialize();
3584     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3585     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3586                 Eq(InitializeStatsProto::NONE));
3587     // Since truncating integer index is sufficient to make it consistent with
3588     // document store, replaying documents or reindexing shouldn't take place.
3589     EXPECT_THAT(
3590         initialize_result.initialize_stats().integer_index_restoration_cause(),
3591         Eq(InitializeStatsProto::NONE));
3592     EXPECT_THAT(initialize_result.initialize_stats()
3593                     .qualified_id_join_index_restoration_cause(),
3594                 Eq(InitializeStatsProto::NONE));
3595 
3596     // Verify that numeric query safely wiped out the pre-existing hit for
3597     // 'indexableInteger' == 123. Add a new document without that value for
3598     // 'indexableInteger' that will take docid=0. If the integer index was not
3599     // rebuilt correctly, then it will still have the previously added hit for
3600     // 'indexableInteger' == 123 for docid 0 and incorrectly return this new
3601     // doc in a query.
3602     DocumentProto another_message =
3603         DocumentBuilder()
3604             .SetKey("namespace", "message/1")
3605             .SetSchema("Message")
3606             .AddStringProperty("body", kIpsumText)
3607             .AddInt64Property("indexableInteger", 456)
3608             .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3609             .Build();
3610     EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
3611     // Verify integer index works normally
3612     SearchSpecProto search_spec;
3613     search_spec.set_query("indexableInteger == 123");
3614     search_spec.add_enabled_features(std::string(kNumericSearchFeature));
3615 
3616     SearchResultProto results =
3617         icing.Search(search_spec, ScoringSpecProto::default_instance(),
3618                      ResultSpecProto::default_instance());
3619     EXPECT_THAT(results.results(), IsEmpty());
3620   }
3621 }
3622 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateIntegerIndexWithReindexing)3623 TEST_F(IcingSearchEngineInitializationTest,
3624        RestoreIndexTruncateIntegerIndexWithReindexing) {
3625   // Test the following scenario: integer index is *partially* ahead of document
3626   // store. IcingSearchEngine should be able to recover integer index. Several
3627   // additional behaviors are also tested:
3628   // - Index directory handling:
3629   //   - Term index directory should be unaffected.
3630   //   - Integer index directory should be unaffected.
3631   //   - Qualified id join index directory should be unaffected.
3632   // - Truncate indices:
3633   //   - "TruncateTo()" for term index shouldn't take effect.
3634   //   - "Clear()" should be called for integer index and throw out all integer
3635   //     index storages, i.e. all storage sub directories (path_expr =
3636   //     "*/integer_index_dir/*") should be discarded. However, some valid data
3637   //     in integer index were discarded together, so reindexing should still
3638   //     take place to recover them after clearing.
3639   //   - "Clear()" shouldn't be called for qualified id join index, i.e. no
3640   //     underlying storage sub directory (path_expr =
3641   //     "*/qualified_id_join_index_dir/*") should be discarded.
3642 
3643   SchemaProto schema =
3644       SchemaBuilder()
3645           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
3646               PropertyConfigBuilder()
3647                   .SetName("name")
3648                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
3649                   .SetCardinality(CARDINALITY_REQUIRED)))
3650           .AddType(SchemaTypeConfigBuilder()
3651                        .SetType("Message")
3652                        .AddProperty(PropertyConfigBuilder()
3653                                         .SetName("body")
3654                                         .SetDataTypeString(TERM_MATCH_PREFIX,
3655                                                            TOKENIZER_PLAIN)
3656                                         .SetCardinality(CARDINALITY_REQUIRED))
3657                        .AddProperty(PropertyConfigBuilder()
3658                                         .SetName("indexableInteger")
3659                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
3660                                         .SetCardinality(CARDINALITY_OPTIONAL))
3661                        .AddProperty(PropertyConfigBuilder()
3662                                         .SetName("senderQualifiedId")
3663                                         .SetDataTypeJoinableString(
3664                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
3665                                         .SetCardinality(CARDINALITY_REQUIRED)))
3666           .Build();
3667 
3668   DocumentProto person =
3669       DocumentBuilder()
3670           .SetKey("namespace", "person")
3671           .SetSchema("Person")
3672           .AddStringProperty("name", "person")
3673           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3674           .Build();
3675   DocumentProto message =
3676       DocumentBuilder()
3677           .SetKey("namespace", "message/1")
3678           .SetSchema("Message")
3679           .AddStringProperty("body", kIpsumText)
3680           .AddInt64Property("indexableInteger", 123)
3681           .AddStringProperty("senderQualifiedId", "namespace#person")
3682           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3683           .Build();
3684 
3685   // 1. Create an index with message 3 documents.
3686   {
3687     TestIcingSearchEngine icing(
3688         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3689         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3690         GetTestJniCache());
3691 
3692     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3693     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
3694 
3695     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
3696     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3697     message = DocumentBuilder(message).SetUri("message/2").Build();
3698     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3699     message = DocumentBuilder(message).SetUri("message/3").Build();
3700     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
3701   }
3702 
3703   // 2. Manually add some data into integer index and increment
3704   //    last_added_document_id. This will cause mismatched document id with
3705   //    document store.
3706   //   - Document store: [0, 1, 2, 3]
3707   //   - Term index: [0, 1, 2, 3]
3708   //   - Integer index: [0, 1, 2, 3, 4]
3709   //   - Qualified id join index: [0, 1, 2, 3]
3710   {
3711     Filesystem filesystem;
3712     ICING_ASSERT_OK_AND_ASSIGN(
3713         std::unique_ptr<IntegerIndex> integer_index,
3714         IntegerIndex::Create(filesystem, GetIntegerIndexDir(),
3715                              /*num_data_threshold_for_bucket_split=*/65536,
3716                              /*pre_mapping_fbv=*/false));
3717     // Add hits for document 4.
3718     DocumentId original_last_added_doc_id =
3719         integer_index->last_added_document_id();
3720     integer_index->set_last_added_document_id(original_last_added_doc_id + 1);
3721     std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit(
3722         /*property_path=*/"indexableInteger",
3723         /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0);
3724     ICING_ASSERT_OK(editor->BufferKey(456));
3725     ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys());
3726   }
3727 
3728   // 3. Create the index again. This should trigger index restoration.
3729   {
3730     // Mock filesystem to observe and check the behavior of all indices.
3731     auto mock_filesystem = std::make_unique<MockFilesystem>();
3732     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3733         .WillRepeatedly(DoDefault());
3734     // Ensure term index directory should never be discarded.
3735     EXPECT_CALL(*mock_filesystem,
3736                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3737         .Times(0);
3738     // Ensure integer index directory should never be discarded.
3739     EXPECT_CALL(*mock_filesystem,
3740                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3741         .Times(0);
3742     // Clear() should be called to truncate integer index and thus storage sub
3743     // directory (path_expr = "*/integer_index_dir/*") should be discarded.
3744     EXPECT_CALL(*mock_filesystem,
3745                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3746         .Times(1);
3747     // Ensure qualified id join index directory should never be discarded, and
3748     // Clear() should never be called (i.e. storage sub directory
3749     // "*/qualified_id_join_index_dir/*" should never be discarded).
3750     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3751                                       EndsWith("/qualified_id_join_index_dir")))
3752         .Times(0);
3753     EXPECT_CALL(
3754         *mock_filesystem,
3755         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3756         .Times(0);
3757 
3758     TestIcingSearchEngine icing(
3759         GetDefaultIcingOptions(), std::move(mock_filesystem),
3760         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3761         GetTestJniCache());
3762     InitializeResultProto initialize_result = icing.Initialize();
3763     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3764     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3765                 Eq(InitializeStatsProto::NONE));
3766     EXPECT_THAT(
3767         initialize_result.initialize_stats().integer_index_restoration_cause(),
3768         Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
3769     EXPECT_THAT(initialize_result.initialize_stats()
3770                     .qualified_id_join_index_restoration_cause(),
3771                 Eq(InitializeStatsProto::NONE));
3772 
3773     // Verify term index works normally
3774     SearchSpecProto search_spec1;
3775     search_spec1.set_query("body:consectetur");
3776     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
3777     SearchResultProto results1 =
3778         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
3779                      ResultSpecProto::default_instance());
3780     EXPECT_THAT(results1.status(), ProtoIsOk());
3781     EXPECT_THAT(results1.next_page_token(), Eq(0));
3782     // All documents should be retrievable.
3783     ASSERT_THAT(results1.results(), SizeIs(3));
3784     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
3785     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
3786     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
3787 
3788     // Verify integer index works normally
3789     SearchSpecProto search_spec2;
3790     search_spec2.set_query("indexableInteger == 123");
3791     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
3792 
3793     SearchResultProto results2 =
3794         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
3795                      ResultSpecProto::default_instance());
3796     ASSERT_THAT(results2.results(), SizeIs(3));
3797     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
3798     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
3799     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
3800 
3801     // Verify qualified id join index works normally: join a query for
3802     // `name:person` with a child query for `body:consectetur` based on the
3803     // child's `senderQualifiedId` field.
3804     SearchSpecProto search_spec3;
3805     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
3806     search_spec3.set_query("name:person");
3807     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
3808     join_spec->set_parent_property_expression(
3809         std::string(JoinProcessor::kQualifiedIdExpr));
3810     join_spec->set_child_property_expression("senderQualifiedId");
3811     join_spec->set_aggregation_scoring_strategy(
3812         JoinSpecProto::AggregationScoringStrategy::COUNT);
3813     JoinSpecProto::NestedSpecProto* nested_spec =
3814         join_spec->mutable_nested_spec();
3815     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
3816     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
3817     nested_search_spec->set_query("body:consectetur");
3818     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
3819     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
3820 
3821     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
3822     result_spec3.set_max_joined_children_per_parent_to_return(
3823         std::numeric_limits<int32_t>::max());
3824 
3825     SearchResultProto results3 = icing.Search(
3826         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
3827     ASSERT_THAT(results3.results(), SizeIs(1));
3828     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
3829     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
3830     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
3831                 Eq("message/3"));
3832     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
3833                 Eq("message/2"));
3834     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
3835                 Eq("message/1"));
3836 
3837     // Verify that numeric index safely wiped out the pre-existing hit for
3838     // 'indexableInteger' == 456. Add a new document without that value for
3839     // 'indexableInteger' that will take docid=0. If the integer index was not
3840     // rebuilt correctly, then it will still have the previously added hit for
3841     // 'indexableInteger' == 456 for docid 0 and incorrectly return this new
3842     // doc in a query.
3843     DocumentProto another_message =
3844         DocumentBuilder()
3845             .SetKey("namespace", "message/4")
3846             .SetSchema("Message")
3847             .AddStringProperty("body", kIpsumText)
3848             .AddStringProperty("senderQualifiedId", "namespace#person")
3849             .SetCreationTimestampMs(kDefaultCreationTimestampMs)
3850             .Build();
3851     EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
3852     // Verify integer index works normally
3853     SearchSpecProto search_spec;
3854     search_spec.set_query("indexableInteger == 456");
3855     search_spec.add_enabled_features(std::string(kNumericSearchFeature));
3856 
3857     SearchResultProto results =
3858         icing.Search(search_spec, ScoringSpecProto::default_instance(),
3859                      ResultSpecProto::default_instance());
3860     EXPECT_THAT(results.results(), IsEmpty());
3861   }
3862 }
3863 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing)3864 TEST_F(IcingSearchEngineInitializationTest,
3865        RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) {
3866   // Test the following scenario: qualified id join index is *completely* ahead
3867   // of document store. IcingSearchEngine should be able to recover qualified id
3868   // join index. Several additional behaviors are also tested:
3869   // - Index directory handling:
3870   //   - Term index directory should be unaffected.
3871   //   - Integer index directory should be unaffected.
3872   //   - Qualified id join index directory should be unaffected.
3873   // - Truncate indices:
3874   //   - "TruncateTo()" for term index shouldn't take effect.
3875   //   - "Clear()" should be called for integer index. It is a special case when
3876   //     document store has no document. Since there is no integer index storage
3877   //     sub directories (path_expr = "*/integer_index_dir/*"), nothing will be
3878   //     discarded.
3879   //   - "Clear()" should be called for qualified id join index and throw out
3880   //     all data, i.e. discarding the underlying mapper (path_expr =
3881   //     "*/qualified_id_join_index_dir/*") and reinitialize. This should be
3882   //     sufficient to make qualified id join index consistent with document
3883   //     store (in this case, document store is empty as well), so reindexing
3884   //     should not take place.
3885 
3886   // 1. Create an index with no document.
3887   {
3888     TestIcingSearchEngine icing(
3889         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
3890         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3891         GetTestJniCache());
3892 
3893     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
3894     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
3895   }
3896 
3897   // 2. Manually add some data into integer index and increment
3898   //    last_added_document_id. This will cause mismatched document id with
3899   //    document store.
3900   //   - Document store: []
3901   //   - Term index: []
3902   //   - Integer index: []
3903   //   - Qualified id join index: [0]
3904   {
3905     Filesystem filesystem;
3906     ICING_ASSERT_OK_AND_ASSIGN(
3907         std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
3908         QualifiedIdJoinIndexImplV2::Create(filesystem,
3909                                            GetQualifiedIdJoinIndexDir(),
3910                                            /*pre_mapping_fbv=*/false));
3911     // Add data for document 0.
3912     ASSERT_THAT(qualified_id_join_index->last_added_document_id(),
3913                 kInvalidDocumentId);
3914     qualified_id_join_index->set_last_added_document_id(0);
3915     ICING_ASSERT_OK(qualified_id_join_index->Put(
3916         /*schema_type_id=*/0, /*joinable_property_id=*/0, /*document_id=*/0,
3917         /*ref_namespace_fingerprint_ids=*/
3918         {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
3919                                         /*target_str=*/"uri")}));
3920   }
3921 
3922   // 3. Create the index again. This should trigger index restoration.
3923   {
3924     // Mock filesystem to observe and check the behavior of all indices.
3925     auto mock_filesystem = std::make_unique<MockFilesystem>();
3926     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
3927         .WillRepeatedly(DoDefault());
3928     // Ensure term index directory should never be discarded.
3929     EXPECT_CALL(*mock_filesystem,
3930                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
3931         .Times(0);
3932     // Ensure integer index directory should never be discarded. Even though
3933     // Clear() was called, it shouldn't take effect since there is no storage
3934     // sub directory ("*/integer_index_dir/*") and nothing will be discarded.
3935     EXPECT_CALL(*mock_filesystem,
3936                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
3937         .Times(0);
3938     EXPECT_CALL(*mock_filesystem,
3939                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
3940         .Times(0);
3941     // Ensure qualified id join index directory should never be discarded.
3942     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
3943                                       EndsWith("/qualified_id_join_index_dir")))
3944         .Times(0);
3945     // Clear() should be called to truncate qualified id join index and thus
3946     // underlying storage sub directory (path_expr =
3947     // "*/qualified_id_join_index_dir/*") should be discarded.
3948     EXPECT_CALL(
3949         *mock_filesystem,
3950         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
3951         .Times(AtLeast(1));
3952 
3953     TestIcingSearchEngine icing(
3954         GetDefaultIcingOptions(), std::move(mock_filesystem),
3955         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
3956         GetTestJniCache());
3957     InitializeResultProto initialize_result = icing.Initialize();
3958     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
3959     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
3960                 Eq(InitializeStatsProto::NONE));
3961     EXPECT_THAT(
3962         initialize_result.initialize_stats().integer_index_restoration_cause(),
3963         Eq(InitializeStatsProto::NONE));
3964     // Since truncating qualified id join index is sufficient to make it
3965     // consistent with document store, replaying documents or reindexing
3966     // shouldn't take place.
3967     EXPECT_THAT(initialize_result.initialize_stats()
3968                     .qualified_id_join_index_restoration_cause(),
3969                 Eq(InitializeStatsProto::NONE));
3970   }
3971 
3972   // 4. Since document 0 doesn't exist, testing join query is not enough to
3973   // verify the correctness of qualified id join index restoration. Instead, we
3974   // have to check the previously added data should not be found in qualified id
3975   // join index.
3976   {
3977     Filesystem filesystem;
3978     ICING_ASSERT_OK_AND_ASSIGN(
3979         std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
3980         QualifiedIdJoinIndexImplV2::Create(filesystem,
3981                                            GetQualifiedIdJoinIndexDir(),
3982                                            /*pre_mapping_fbv=*/false));
3983     ICING_ASSERT_OK_AND_ASSIGN(
3984         auto iterator, qualified_id_join_index->GetIterator(
3985                            /*schema_type_id=*/0, /*joinable_property_id=*/0));
3986     EXPECT_THAT(iterator->Advance(),
3987                 StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
3988   }
3989 }
3990 
TEST_F(IcingSearchEngineInitializationTest,RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing)3991 TEST_F(IcingSearchEngineInitializationTest,
3992        RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) {
3993   // Test the following scenario: qualified id join index is *partially* ahead
3994   // of document store. IcingSearchEngine should be able to recover qualified id
3995   // join index. Several additional behaviors are also tested:
3996   // - Index directory handling:
3997   //   - Term index directory should be unaffected.
3998   //   - Integer index directory should be unaffected.
3999   //   - Qualified id join index directory should be unaffected.
4000   // - Truncate indices:
4001   //   - "TruncateTo()" for term index shouldn't take effect.
4002   //   - "Clear()" shouldn't be called for integer index, i.e. no integer index
4003   //     storage sub directories (path_expr = "*/integer_index_dir/*") should be
4004   //     discarded.
4005   //   - "Clear()" should be called for qualified id join index and throw out
4006   //     all data, i.e. discarding the underlying mapper (path_expr =
4007   //     "*/qualified_id_join_index_dir/*") and reinitialize. However, some
4008   //     valid data in qualified id join index were discarded together, so
4009   //     reindexing should still take place to recover them after clearing.
4010 
4011   SchemaProto schema =
4012       SchemaBuilder()
4013           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
4014               PropertyConfigBuilder()
4015                   .SetName("name")
4016                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
4017                   .SetCardinality(CARDINALITY_REQUIRED)))
4018           .AddType(SchemaTypeConfigBuilder()
4019                        .SetType("Message")
4020                        .AddProperty(PropertyConfigBuilder()
4021                                         .SetName("body")
4022                                         .SetDataTypeString(TERM_MATCH_PREFIX,
4023                                                            TOKENIZER_PLAIN)
4024                                         .SetCardinality(CARDINALITY_REQUIRED))
4025                        .AddProperty(PropertyConfigBuilder()
4026                                         .SetName("indexableInteger")
4027                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
4028                                         .SetCardinality(CARDINALITY_REQUIRED))
4029                        .AddProperty(PropertyConfigBuilder()
4030                                         .SetName("senderQualifiedId")
4031                                         .SetDataTypeJoinableString(
4032                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
4033                                         .SetCardinality(CARDINALITY_OPTIONAL)))
4034           .Build();
4035 
4036   DocumentProto person =
4037       DocumentBuilder()
4038           .SetKey("namespace", "person")
4039           .SetSchema("Person")
4040           .AddStringProperty("name", "person")
4041           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4042           .Build();
4043   DocumentProto message =
4044       DocumentBuilder()
4045           .SetKey("namespace", "message/1")
4046           .SetSchema("Message")
4047           .AddStringProperty("body", kIpsumText)
4048           .AddInt64Property("indexableInteger", 123)
4049           .AddStringProperty("senderQualifiedId", "namespace#person")
4050           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4051           .Build();
4052 
4053   // 1. Create an index with message 3 documents.
4054   {
4055     TestIcingSearchEngine icing(
4056         GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
4057         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
4058         GetTestJniCache());
4059 
4060     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4061     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4062 
4063     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
4064     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4065     message = DocumentBuilder(message).SetUri("message/2").Build();
4066     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4067     message = DocumentBuilder(message).SetUri("message/3").Build();
4068     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4069   }
4070 
4071   // 2. Manually add some data into qualified id join index and increment
4072   //    last_added_document_id. This will cause mismatched document id with
4073   //    document store.
4074   //   - Document store: [0, 1, 2, 3]
4075   //   - Term index: [0, 1, 2, 3]
4076   //   - Integer index: [0, 1, 2, 3]
4077   //   - Qualified id join index: [0, 1, 2, 3, 4]
4078   {
4079     Filesystem filesystem;
4080     ICING_ASSERT_OK_AND_ASSIGN(
4081         std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
4082         QualifiedIdJoinIndexImplV2::Create(filesystem,
4083                                            GetQualifiedIdJoinIndexDir(),
4084                                            /*pre_mapping_fbv=*/false));
4085     // Add data for document 4.
4086     DocumentId original_last_added_doc_id =
4087         qualified_id_join_index->last_added_document_id();
4088     qualified_id_join_index->set_last_added_document_id(
4089         original_last_added_doc_id + 1);
4090     ICING_ASSERT_OK(qualified_id_join_index->Put(
4091         /*schema_type_id=*/1, /*joinable_property_id=*/0,
4092         /*document_id=*/original_last_added_doc_id + 1,
4093         /*ref_namespace_fingerprint_ids=*/
4094         {NamespaceFingerprintIdentifier(/*namespace_id=*/0,
4095                                         /*target_str=*/"person")}));
4096   }
4097 
4098   // 3. Create the index again. This should trigger index restoration.
4099   {
4100     // Mock filesystem to observe and check the behavior of all indices.
4101     auto mock_filesystem = std::make_unique<MockFilesystem>();
4102     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
4103         .WillRepeatedly(DoDefault());
4104     // Ensure term index directory should never be discarded.
4105     EXPECT_CALL(*mock_filesystem,
4106                 DeleteDirectoryRecursively(EndsWith("/index_dir")))
4107         .Times(0);
4108     // Ensure integer index directory should never be discarded, and Clear()
4109     // should never be called (i.e. storage sub directory
4110     // "*/integer_index_dir/*" should never be discarded).
4111     EXPECT_CALL(*mock_filesystem,
4112                 DeleteDirectoryRecursively(EndsWith("/integer_index_dir")))
4113         .Times(0);
4114     EXPECT_CALL(*mock_filesystem,
4115                 DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/")))
4116         .Times(0);
4117     // Ensure qualified id join index directory should never be discarded.
4118     EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(
4119                                       EndsWith("/qualified_id_join_index_dir")))
4120         .Times(0);
4121     // Clear() should be called to truncate qualified id join index and thus
4122     // underlying storage sub directory (path_expr =
4123     // "*/qualified_id_join_index_dir/*") should be discarded.
4124     EXPECT_CALL(
4125         *mock_filesystem,
4126         DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/")))
4127         .Times(AtLeast(1));
4128 
4129     TestIcingSearchEngine icing(
4130         GetDefaultIcingOptions(), std::move(mock_filesystem),
4131         std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(),
4132         GetTestJniCache());
4133     InitializeResultProto initialize_result = icing.Initialize();
4134     ASSERT_THAT(initialize_result.status(), ProtoIsOk());
4135     EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
4136                 Eq(InitializeStatsProto::NONE));
4137     EXPECT_THAT(
4138         initialize_result.initialize_stats().integer_index_restoration_cause(),
4139         Eq(InitializeStatsProto::NONE));
4140     EXPECT_THAT(initialize_result.initialize_stats()
4141                     .qualified_id_join_index_restoration_cause(),
4142                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4143 
4144     // Verify term index works normally
4145     SearchSpecProto search_spec1;
4146     search_spec1.set_query("body:consectetur");
4147     search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
4148     SearchResultProto results1 =
4149         icing.Search(search_spec1, ScoringSpecProto::default_instance(),
4150                      ResultSpecProto::default_instance());
4151     EXPECT_THAT(results1.status(), ProtoIsOk());
4152     EXPECT_THAT(results1.next_page_token(), Eq(0));
4153     // All documents should be retrievable.
4154     ASSERT_THAT(results1.results(), SizeIs(3));
4155     EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3"));
4156     EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2"));
4157     EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1"));
4158 
4159     // Verify integer index works normally
4160     SearchSpecProto search_spec2;
4161     search_spec2.set_query("indexableInteger == 123");
4162     search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
4163 
4164     SearchResultProto results2 =
4165         icing.Search(search_spec2, ScoringSpecProto::default_instance(),
4166                      ResultSpecProto::default_instance());
4167     ASSERT_THAT(results2.results(), SizeIs(3));
4168     EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3"));
4169     EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2"));
4170     EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1"));
4171 
4172     // Verify qualified id join index works normally: join a query for
4173     // `name:person` with a child query for `body:consectetur` based on the
4174     // child's `senderQualifiedId` field.
4175 
4176     // Add document 4 without "senderQualifiedId". If join index is not rebuilt
4177     // correctly, then it will still have the previously added senderQualifiedId
4178     // for document 4 and include document 4 incorrectly in the right side.
4179     DocumentProto another_message =
4180         DocumentBuilder()
4181             .SetKey("namespace", "message/4")
4182             .SetSchema("Message")
4183             .AddStringProperty("body", kIpsumText)
4184             .AddInt64Property("indexableInteger", 123)
4185             .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4186             .Build();
4187     EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk());
4188 
4189     SearchSpecProto search_spec3;
4190     search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
4191     search_spec3.set_query("name:person");
4192     JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
4193     join_spec->set_parent_property_expression(
4194         std::string(JoinProcessor::kQualifiedIdExpr));
4195     join_spec->set_child_property_expression("senderQualifiedId");
4196     join_spec->set_aggregation_scoring_strategy(
4197         JoinSpecProto::AggregationScoringStrategy::COUNT);
4198     JoinSpecProto::NestedSpecProto* nested_spec =
4199         join_spec->mutable_nested_spec();
4200     SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
4201     nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
4202     nested_search_spec->set_query("body:consectetur");
4203     *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
4204     *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
4205 
4206     ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
4207     result_spec3.set_max_joined_children_per_parent_to_return(
4208         std::numeric_limits<int32_t>::max());
4209 
4210     SearchResultProto results3 = icing.Search(
4211         search_spec3, ScoringSpecProto::default_instance(), result_spec3);
4212     ASSERT_THAT(results3.results(), SizeIs(1));
4213     EXPECT_THAT(results3.results(0).document().uri(), Eq("person"));
4214     EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3));
4215     EXPECT_THAT(results3.results(0).joined_results(0).document().uri(),
4216                 Eq("message/3"));
4217     EXPECT_THAT(results3.results(0).joined_results(1).document().uri(),
4218                 Eq("message/2"));
4219     EXPECT_THAT(results3.results(0).joined_results(2).document().uri(),
4220                 Eq("message/1"));
4221   }
4222 }
4223 
TEST_F(IcingSearchEngineInitializationTest,DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex)4224 TEST_F(IcingSearchEngineInitializationTest,
4225        DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) {
4226   // 1. Create an index with a single document in it that has no indexed
4227   // content.
4228   {
4229     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4230     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4231 
4232     // Set a schema for a single type that has no indexed properties.
4233     SchemaProto schema =
4234         SchemaBuilder()
4235             .AddType(
4236                 SchemaTypeConfigBuilder()
4237                     .SetType("Message")
4238                     .AddProperty(PropertyConfigBuilder()
4239                                      .SetName("unindexedField")
4240                                      .SetDataTypeString(TERM_MATCH_UNKNOWN,
4241                                                         TOKENIZER_NONE)
4242                                      .SetCardinality(CARDINALITY_REQUIRED))
4243                     .AddProperty(PropertyConfigBuilder()
4244                                      .SetName("unindexedInteger")
4245                                      .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN)
4246                                      .SetCardinality(CARDINALITY_REQUIRED)))
4247             .Build();
4248     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4249 
4250     // Add a document that contains no indexed properties.
4251     DocumentProto document =
4252         DocumentBuilder()
4253             .SetKey("icing", "fake_type/0")
4254             .SetSchema("Message")
4255             .AddStringProperty("unindexedField",
4256                                "Don't you dare search over this!")
4257             .AddInt64Property("unindexedInteger", -123)
4258             .Build();
4259     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4260   }
4261 
4262   // 2. Create the index again. This should NOT trigger a recovery of any kind.
4263   {
4264     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4265     InitializeResultProto init_result = icing.Initialize();
4266     EXPECT_THAT(init_result.status(), ProtoIsOk());
4267     EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
4268                 Eq(InitializeStatsProto::NO_DATA_LOSS));
4269     EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
4270                 Eq(InitializeStatsProto::NONE));
4271     EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
4272                 Eq(InitializeStatsProto::NONE));
4273     EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
4274                 Eq(InitializeStatsProto::NONE));
4275     EXPECT_THAT(
4276         init_result.initialize_stats().integer_index_restoration_cause(),
4277         Eq(InitializeStatsProto::NONE));
4278     EXPECT_THAT(init_result.initialize_stats()
4279                     .qualified_id_join_index_restoration_cause(),
4280                 Eq(InitializeStatsProto::NONE));
4281   }
4282 }
4283 
TEST_F(IcingSearchEngineInitializationTest,DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex)4284 TEST_F(IcingSearchEngineInitializationTest,
4285        DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
4286   // 1. Create an index with a single document in it that has no valid indexed
4287   // tokens in its content.
4288   {
4289     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4290     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4291 
4292     SchemaProto schema =
4293         SchemaBuilder()
4294             .AddType(
4295                 SchemaTypeConfigBuilder()
4296                     .SetType("Message")
4297                     .AddProperty(PropertyConfigBuilder()
4298                                      .SetName("body")
4299                                      .SetDataTypeString(TERM_MATCH_PREFIX,
4300                                                         TOKENIZER_PLAIN)
4301                                      .SetCardinality(CARDINALITY_REQUIRED))
4302                     .AddProperty(PropertyConfigBuilder()
4303                                      .SetName("indexableInteger")
4304                                      .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
4305                                      .SetCardinality(CARDINALITY_OPTIONAL))
4306                     .AddProperty(PropertyConfigBuilder()
4307                                      .SetName("senderQualifiedId")
4308                                      .SetDataTypeJoinableString(
4309                                          JOINABLE_VALUE_TYPE_QUALIFIED_ID)
4310                                      .SetCardinality(CARDINALITY_OPTIONAL)))
4311             .Build();
4312     // Set a schema for a single type that has no term, integer, join indexed
4313     // contents.
4314     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4315 
4316     // Add a document that contains:
4317     // - No valid indexed string content - just punctuation
4318     // - No integer content - since it is an optional property
4319     // - No qualified id content - since it is an optional property
4320     DocumentProto document = DocumentBuilder()
4321                                  .SetKey("icing", "fake_type/0")
4322                                  .SetSchema("Message")
4323                                  .AddStringProperty("body", "?...!")
4324                                  .Build();
4325     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4326   }
4327 
4328   // 2. Create the index again. This should NOT trigger a recovery of any kind.
4329   {
4330     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4331     InitializeResultProto init_result = icing.Initialize();
4332     EXPECT_THAT(init_result.status(), ProtoIsOk());
4333     EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
4334                 Eq(InitializeStatsProto::NO_DATA_LOSS));
4335     EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
4336                 Eq(InitializeStatsProto::NONE));
4337     EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
4338                 Eq(InitializeStatsProto::NONE));
4339     EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
4340                 Eq(InitializeStatsProto::NONE));
4341     EXPECT_THAT(
4342         init_result.initialize_stats().integer_index_restoration_cause(),
4343         Eq(InitializeStatsProto::NONE));
4344     EXPECT_THAT(init_result.initialize_stats()
4345                     .qualified_id_join_index_restoration_cause(),
4346                 Eq(InitializeStatsProto::NONE));
4347   }
4348 }
4349 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogFunctionLatency)4350 TEST_F(IcingSearchEngineInitializationTest,
4351        InitializeShouldLogFunctionLatency) {
4352   auto fake_clock = std::make_unique<FakeClock>();
4353   fake_clock->SetTimerElapsedMilliseconds(10);
4354   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4355                               std::make_unique<Filesystem>(),
4356                               std::make_unique<IcingFilesystem>(),
4357                               std::move(fake_clock), GetTestJniCache());
4358   InitializeResultProto initialize_result_proto = icing.Initialize();
4359   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4360   EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
4361 }
4362 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogNumberOfDocuments)4363 TEST_F(IcingSearchEngineInitializationTest,
4364        InitializeShouldLogNumberOfDocuments) {
4365   DocumentProto document1 = DocumentBuilder()
4366                                 .SetKey("icing", "fake_type/1")
4367                                 .SetSchema("Message")
4368                                 .AddStringProperty("body", "message body")
4369                                 .AddInt64Property("indexableInteger", 123)
4370                                 .Build();
4371   DocumentProto document2 = DocumentBuilder()
4372                                 .SetKey("icing", "fake_type/2")
4373                                 .SetSchema("Message")
4374                                 .AddStringProperty("body", "message body")
4375                                 .AddInt64Property("indexableInteger", 456)
4376                                 .Build();
4377 
4378   {
4379     // Initialize and put a document.
4380     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4381     InitializeResultProto initialize_result_proto = icing.Initialize();
4382     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4383     EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
4384                 Eq(0));
4385 
4386     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4387     ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
4388   }
4389 
4390   {
4391     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4392     InitializeResultProto initialize_result_proto = icing.Initialize();
4393     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4394     EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
4395                 Eq(1));
4396 
4397     // Put another document.
4398     ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
4399   }
4400 
4401   {
4402     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4403     InitializeResultProto initialize_result_proto = icing.Initialize();
4404     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4405     EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
4406                 Eq(2));
4407   }
4408 }
4409 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize)4410 TEST_F(IcingSearchEngineInitializationTest,
4411        InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
4412   // Even though the fake timer will return 10, all the latency numbers related
4413   // to recovery / restoration should be 0 during the first-time initialization.
4414   auto fake_clock = std::make_unique<FakeClock>();
4415   fake_clock->SetTimerElapsedMilliseconds(10);
4416   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4417                               std::make_unique<Filesystem>(),
4418                               std::make_unique<IcingFilesystem>(),
4419                               std::move(fake_clock), GetTestJniCache());
4420   InitializeResultProto initialize_result_proto = icing.Initialize();
4421   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4422   EXPECT_THAT(initialize_result_proto.initialize_stats()
4423                   .document_store_recovery_cause(),
4424               Eq(InitializeStatsProto::NONE));
4425   EXPECT_THAT(initialize_result_proto.initialize_stats()
4426                   .document_store_recovery_latency_ms(),
4427               Eq(0));
4428   EXPECT_THAT(
4429       initialize_result_proto.initialize_stats().document_store_data_status(),
4430       Eq(InitializeStatsProto::NO_DATA_LOSS));
4431   EXPECT_THAT(
4432       initialize_result_proto.initialize_stats().index_restoration_cause(),
4433       Eq(InitializeStatsProto::NONE));
4434   EXPECT_THAT(initialize_result_proto.initialize_stats()
4435                   .integer_index_restoration_cause(),
4436               Eq(InitializeStatsProto::NONE));
4437   EXPECT_THAT(initialize_result_proto.initialize_stats()
4438                   .qualified_id_join_index_restoration_cause(),
4439               Eq(InitializeStatsProto::NONE));
4440   EXPECT_THAT(
4441       initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
4442       Eq(0));
4443   EXPECT_THAT(
4444       initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
4445       Eq(InitializeStatsProto::NONE));
4446   EXPECT_THAT(initialize_result_proto.initialize_stats()
4447                   .schema_store_recovery_latency_ms(),
4448               Eq(0));
4449 }
4450 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCausePartialDataLoss)4451 TEST_F(IcingSearchEngineInitializationTest,
4452        InitializeShouldLogRecoveryCausePartialDataLoss) {
4453   DocumentProto document = DocumentBuilder()
4454                                .SetKey("icing", "fake_type/0")
4455                                .SetSchema("Message")
4456                                .AddStringProperty("body", "message body")
4457                                .AddInt64Property("indexableInteger", 123)
4458                                .Build();
4459 
4460   {
4461     // Initialize and put a document.
4462     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4463     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4464     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4465     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4466   }
4467 
4468   {
4469     // Append a non-checksummed document. This will mess up the checksum of the
4470     // proto log, forcing it to rewind and later return a DATA_LOSS error.
4471     const std::string serialized_document = document.SerializeAsString();
4472     const std::string document_log_file = absl_ports::StrCat(
4473         GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
4474 
4475     int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
4476     filesystem()->PWrite(document_log_file.c_str(), file_size,
4477                          serialized_document.data(),
4478                          serialized_document.size());
4479   }
4480 
4481   {
4482     // Document store will rewind to previous checkpoint. The cause should be
4483     // DATA_LOSS and the data status should be PARTIAL_LOSS.
4484     auto fake_clock = std::make_unique<FakeClock>();
4485     fake_clock->SetTimerElapsedMilliseconds(10);
4486     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4487                                 std::make_unique<Filesystem>(),
4488                                 std::make_unique<IcingFilesystem>(),
4489                                 std::move(fake_clock), GetTestJniCache());
4490     InitializeResultProto initialize_result_proto = icing.Initialize();
4491     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4492     EXPECT_THAT(initialize_result_proto.initialize_stats()
4493                     .document_store_recovery_cause(),
4494                 Eq(InitializeStatsProto::DATA_LOSS));
4495     EXPECT_THAT(initialize_result_proto.initialize_stats()
4496                     .document_store_recovery_latency_ms(),
4497                 Eq(10));
4498     EXPECT_THAT(
4499         initialize_result_proto.initialize_stats().document_store_data_status(),
4500         Eq(InitializeStatsProto::PARTIAL_LOSS));
4501     // Document store rewinds to previous checkpoint and all derived files were
4502     // regenerated.
4503     // - Last stored doc id will be consistent with last added document ids in
4504     //   term/integer indices, so there will be no index restoration.
4505     // - Qualified id join index depends on document store derived files and
4506     //   since they were regenerated, we should rebuild qualified id join index.
4507     EXPECT_THAT(
4508         initialize_result_proto.initialize_stats().index_restoration_cause(),
4509         Eq(InitializeStatsProto::NONE));
4510     EXPECT_THAT(initialize_result_proto.initialize_stats()
4511                     .integer_index_restoration_cause(),
4512                 Eq(InitializeStatsProto::NONE));
4513     EXPECT_THAT(initialize_result_proto.initialize_stats()
4514                     .qualified_id_join_index_restoration_cause(),
4515                 Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
4516     EXPECT_THAT(initialize_result_proto.initialize_stats()
4517                     .index_restoration_latency_ms(),
4518                 Eq(10));
4519     EXPECT_THAT(initialize_result_proto.initialize_stats()
4520                     .schema_store_recovery_cause(),
4521                 Eq(InitializeStatsProto::NONE));
4522     EXPECT_THAT(initialize_result_proto.initialize_stats()
4523                     .schema_store_recovery_latency_ms(),
4524                 Eq(0));
4525   }
4526 }
4527 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseCompleteDataLoss)4528 TEST_F(IcingSearchEngineInitializationTest,
4529        InitializeShouldLogRecoveryCauseCompleteDataLoss) {
4530   DocumentProto document1 = DocumentBuilder()
4531                                 .SetKey("icing", "fake_type/1")
4532                                 .SetSchema("Message")
4533                                 .AddStringProperty("body", "message body")
4534                                 .AddInt64Property("indexableInteger", 123)
4535                                 .Build();
4536 
4537   const std::string document_log_file = absl_ports::StrCat(
4538       GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename());
4539   int64_t corruptible_offset;
4540 
4541   {
4542     // Initialize and put a document.
4543     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4544 
4545     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4546 
4547     // There's some space at the beginning of the file (e.g. header, kmagic,
4548     // etc) that is necessary to initialize the FileBackedProtoLog. We can't
4549     // corrupt that region, so we need to figure out the offset at which
4550     // documents will be written to - which is the file size after
4551     // initialization.
4552     corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str());
4553 
4554     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4555     EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
4556   }
4557 
4558   {
4559     // "Corrupt" the content written in the log. Make the corrupt document
4560     // smaller than our original one so we don't accidentally write past our
4561     // file.
4562     DocumentProto document =
4563         DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build();
4564     std::string serialized_document = document.SerializeAsString();
4565     ASSERT_TRUE(filesystem()->PWrite(
4566         document_log_file.c_str(), corruptible_offset,
4567         serialized_document.data(), serialized_document.size()));
4568 
4569     PortableFileBackedProtoLog<DocumentWrapper>::Header header =
4570         ReadDocumentLogHeader(*filesystem(), document_log_file);
4571 
4572     // Set dirty bit to true to reflect that something changed in the log.
4573     header.SetDirtyFlag(true);
4574     header.SetHeaderChecksum(header.CalculateHeaderChecksum());
4575 
4576     WriteDocumentLogHeader(*filesystem(), document_log_file, header);
4577   }
4578 
4579   {
4580     // Document store will completely rewind. The cause should be DATA_LOSS and
4581     // the data status should be COMPLETE_LOSS.
4582     auto fake_clock = std::make_unique<FakeClock>();
4583     fake_clock->SetTimerElapsedMilliseconds(10);
4584     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4585                                 std::make_unique<Filesystem>(),
4586                                 std::make_unique<IcingFilesystem>(),
4587                                 std::move(fake_clock), GetTestJniCache());
4588     InitializeResultProto initialize_result_proto = icing.Initialize();
4589     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4590     EXPECT_THAT(initialize_result_proto.initialize_stats()
4591                     .document_store_recovery_cause(),
4592                 Eq(InitializeStatsProto::DATA_LOSS));
4593     EXPECT_THAT(initialize_result_proto.initialize_stats()
4594                     .document_store_recovery_latency_ms(),
4595                 Eq(10));
4596     EXPECT_THAT(
4597         initialize_result_proto.initialize_stats().document_store_data_status(),
4598         Eq(InitializeStatsProto::COMPLETE_LOSS));
4599     // The complete rewind of ground truth causes us to clear the index, but
4600     // that's not considered a restoration.
4601     EXPECT_THAT(
4602         initialize_result_proto.initialize_stats().index_restoration_cause(),
4603         Eq(InitializeStatsProto::NONE));
4604     EXPECT_THAT(initialize_result_proto.initialize_stats()
4605                     .integer_index_restoration_cause(),
4606                 Eq(InitializeStatsProto::NONE));
4607     EXPECT_THAT(initialize_result_proto.initialize_stats()
4608                     .qualified_id_join_index_restoration_cause(),
4609                 Eq(InitializeStatsProto::NONE));
4610     EXPECT_THAT(initialize_result_proto.initialize_stats()
4611                     .index_restoration_latency_ms(),
4612                 Eq(0));
4613     EXPECT_THAT(initialize_result_proto.initialize_stats()
4614                     .schema_store_recovery_cause(),
4615                 Eq(InitializeStatsProto::NONE));
4616     EXPECT_THAT(initialize_result_proto.initialize_stats()
4617                     .schema_store_recovery_latency_ms(),
4618                 Eq(0));
4619   }
4620 }
4621 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth)4622 TEST_F(IcingSearchEngineInitializationTest,
4623        InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) {
4624   DocumentProto document = DocumentBuilder()
4625                                .SetKey("icing", "fake_type/0")
4626                                .SetSchema("Message")
4627                                .AddStringProperty("body", "message body")
4628                                .AddInt64Property("indexableInteger", 123)
4629                                .Build();
4630   {
4631     // Initialize and put a document.
4632     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4633     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4634     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4635     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4636   }
4637 
4638   {
4639     // Delete and re-initialize an empty index file to trigger
4640     // RestoreIndexIfNeeded.
4641     std::string idx_subdir = GetIndexDir() + "/idx";
4642     ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str()));
4643     ICING_ASSERT_OK_AND_ASSIGN(
4644         std::unique_ptr<Index> index,
4645         Index::Create(Index::Options(GetIndexDir(),
4646                                      /*index_merge_size=*/100,
4647                                      /*lite_index_sort_at_indexing=*/true,
4648                                      /*lite_index_sort_size=*/50),
4649                       filesystem(), icing_filesystem()));
4650     ICING_ASSERT_OK(index->PersistToDisk());
4651   }
4652 
4653   {
4654     // Index is empty but ground truth is not. Index should be restored due to
4655     // the inconsistency.
4656     auto fake_clock = std::make_unique<FakeClock>();
4657     fake_clock->SetTimerElapsedMilliseconds(10);
4658     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4659                                 std::make_unique<Filesystem>(),
4660                                 std::make_unique<IcingFilesystem>(),
4661                                 std::move(fake_clock), GetTestJniCache());
4662     InitializeResultProto initialize_result_proto = icing.Initialize();
4663     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4664     EXPECT_THAT(
4665         initialize_result_proto.initialize_stats().index_restoration_cause(),
4666         Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4667     EXPECT_THAT(initialize_result_proto.initialize_stats()
4668                     .integer_index_restoration_cause(),
4669                 Eq(InitializeStatsProto::NONE));
4670     EXPECT_THAT(initialize_result_proto.initialize_stats()
4671                     .qualified_id_join_index_restoration_cause(),
4672                 Eq(InitializeStatsProto::NONE));
4673     EXPECT_THAT(initialize_result_proto.initialize_stats()
4674                     .index_restoration_latency_ms(),
4675                 Eq(10));
4676     EXPECT_THAT(initialize_result_proto.initialize_stats()
4677                     .document_store_recovery_cause(),
4678                 Eq(InitializeStatsProto::NONE));
4679     EXPECT_THAT(initialize_result_proto.initialize_stats()
4680                     .document_store_recovery_latency_ms(),
4681                 Eq(0));
4682     EXPECT_THAT(
4683         initialize_result_proto.initialize_stats().document_store_data_status(),
4684         Eq(InitializeStatsProto::NO_DATA_LOSS));
4685     EXPECT_THAT(initialize_result_proto.initialize_stats()
4686                     .schema_store_recovery_cause(),
4687                 Eq(InitializeStatsProto::NONE));
4688     EXPECT_THAT(initialize_result_proto.initialize_stats()
4689                     .schema_store_recovery_latency_ms(),
4690                 Eq(0));
4691   }
4692 }
4693 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth)4694 TEST_F(
4695     IcingSearchEngineInitializationTest,
4696     InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) {
4697   DocumentProto document = DocumentBuilder()
4698                                .SetKey("icing", "fake_type/0")
4699                                .SetSchema("Message")
4700                                .AddStringProperty("body", "message body")
4701                                .AddInt64Property("indexableInteger", 123)
4702                                .Build();
4703   {
4704     // Initialize and put a document.
4705     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4706     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4707     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4708     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
4709   }
4710 
4711   {
4712     // Delete the integer index file to trigger RestoreIndexIfNeeded.
4713     std::string integer_index_dir = GetIntegerIndexDir();
4714     filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str());
4715   }
4716 
4717   {
4718     // Index is empty but ground truth is not. Index should be restored due to
4719     // the inconsistency.
4720     auto fake_clock = std::make_unique<FakeClock>();
4721     fake_clock->SetTimerElapsedMilliseconds(10);
4722     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4723                                 std::make_unique<Filesystem>(),
4724                                 std::make_unique<IcingFilesystem>(),
4725                                 std::move(fake_clock), GetTestJniCache());
4726     InitializeResultProto initialize_result_proto = icing.Initialize();
4727     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4728     EXPECT_THAT(
4729         initialize_result_proto.initialize_stats().index_restoration_cause(),
4730         Eq(InitializeStatsProto::NONE));
4731     EXPECT_THAT(initialize_result_proto.initialize_stats()
4732                     .integer_index_restoration_cause(),
4733                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4734     EXPECT_THAT(initialize_result_proto.initialize_stats()
4735                     .qualified_id_join_index_restoration_cause(),
4736                 Eq(InitializeStatsProto::NONE));
4737     EXPECT_THAT(initialize_result_proto.initialize_stats()
4738                     .index_restoration_latency_ms(),
4739                 Eq(10));
4740     EXPECT_THAT(initialize_result_proto.initialize_stats()
4741                     .document_store_recovery_cause(),
4742                 Eq(InitializeStatsProto::NONE));
4743     EXPECT_THAT(initialize_result_proto.initialize_stats()
4744                     .document_store_recovery_latency_ms(),
4745                 Eq(0));
4746     EXPECT_THAT(
4747         initialize_result_proto.initialize_stats().document_store_data_status(),
4748         Eq(InitializeStatsProto::NO_DATA_LOSS));
4749     EXPECT_THAT(initialize_result_proto.initialize_stats()
4750                     .schema_store_recovery_cause(),
4751                 Eq(InitializeStatsProto::NONE));
4752     EXPECT_THAT(initialize_result_proto.initialize_stats()
4753                     .schema_store_recovery_latency_ms(),
4754                 Eq(0));
4755   }
4756 }
4757 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth)4758 TEST_F(
4759     IcingSearchEngineInitializationTest,
4760     InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) {
4761   SchemaProto schema =
4762       SchemaBuilder()
4763           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
4764               PropertyConfigBuilder()
4765                   .SetName("name")
4766                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
4767                   .SetCardinality(CARDINALITY_REQUIRED)))
4768           .AddType(SchemaTypeConfigBuilder()
4769                        .SetType("Message")
4770                        .AddProperty(PropertyConfigBuilder()
4771                                         .SetName("body")
4772                                         .SetDataTypeString(TERM_MATCH_PREFIX,
4773                                                            TOKENIZER_PLAIN)
4774                                         .SetCardinality(CARDINALITY_REQUIRED))
4775                        .AddProperty(PropertyConfigBuilder()
4776                                         .SetName("indexableInteger")
4777                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
4778                                         .SetCardinality(CARDINALITY_REQUIRED))
4779                        .AddProperty(PropertyConfigBuilder()
4780                                         .SetName("senderQualifiedId")
4781                                         .SetDataTypeJoinableString(
4782                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
4783                                         .SetCardinality(CARDINALITY_REQUIRED)))
4784           .Build();
4785 
4786   DocumentProto person =
4787       DocumentBuilder()
4788           .SetKey("namespace", "person")
4789           .SetSchema("Person")
4790           .AddStringProperty("name", "person")
4791           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4792           .Build();
4793   DocumentProto message =
4794       DocumentBuilder()
4795           .SetKey("namespace", "message/1")
4796           .SetSchema("Message")
4797           .AddStringProperty("body", "message body")
4798           .AddInt64Property("indexableInteger", 123)
4799           .AddStringProperty("senderQualifiedId", "namespace#person")
4800           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
4801           .Build();
4802 
4803   {
4804     // Initialize and put documents.
4805     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4806     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4807     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
4808     EXPECT_THAT(icing.Put(person).status(), ProtoIsOk());
4809     EXPECT_THAT(icing.Put(message).status(), ProtoIsOk());
4810   }
4811 
4812   {
4813     // Delete the qualified id join index file to trigger RestoreIndexIfNeeded.
4814     std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir();
4815     filesystem()->DeleteDirectoryRecursively(
4816         qualified_id_join_index_dir.c_str());
4817   }
4818 
4819   {
4820     // Index is empty but ground truth is not. Index should be restored due to
4821     // the inconsistency.
4822     auto fake_clock = std::make_unique<FakeClock>();
4823     fake_clock->SetTimerElapsedMilliseconds(10);
4824     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4825                                 std::make_unique<Filesystem>(),
4826                                 std::make_unique<IcingFilesystem>(),
4827                                 std::move(fake_clock), GetTestJniCache());
4828     InitializeResultProto initialize_result_proto = icing.Initialize();
4829     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4830     EXPECT_THAT(
4831         initialize_result_proto.initialize_stats().index_restoration_cause(),
4832         Eq(InitializeStatsProto::NONE));
4833     EXPECT_THAT(initialize_result_proto.initialize_stats()
4834                     .integer_index_restoration_cause(),
4835                 Eq(InitializeStatsProto::NONE));
4836     EXPECT_THAT(initialize_result_proto.initialize_stats()
4837                     .qualified_id_join_index_restoration_cause(),
4838                 Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
4839     EXPECT_THAT(initialize_result_proto.initialize_stats()
4840                     .index_restoration_latency_ms(),
4841                 Eq(10));
4842     EXPECT_THAT(initialize_result_proto.initialize_stats()
4843                     .document_store_recovery_cause(),
4844                 Eq(InitializeStatsProto::NONE));
4845     EXPECT_THAT(initialize_result_proto.initialize_stats()
4846                     .document_store_recovery_latency_ms(),
4847                 Eq(0));
4848     EXPECT_THAT(
4849         initialize_result_proto.initialize_stats().document_store_data_status(),
4850         Eq(InitializeStatsProto::NO_DATA_LOSS));
4851     EXPECT_THAT(initialize_result_proto.initialize_stats()
4852                     .schema_store_recovery_cause(),
4853                 Eq(InitializeStatsProto::NONE));
4854     EXPECT_THAT(initialize_result_proto.initialize_stats()
4855                     .schema_store_recovery_latency_ms(),
4856                 Eq(0));
4857   }
4858 }
4859 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync)4860 TEST_F(IcingSearchEngineInitializationTest,
4861        InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) {
4862   DocumentProto document = DocumentBuilder()
4863                                .SetKey("icing", "fake_type/0")
4864                                .SetSchema("Message")
4865                                .AddStringProperty("body", "message body")
4866                                .AddInt64Property("indexableInteger", 123)
4867                                .Build();
4868   IcingSearchEngineOptions options = GetDefaultIcingOptions();
4869   {
4870     // Initialize and put one document.
4871     IcingSearchEngine icing(options, GetTestJniCache());
4872     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4873     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4874     ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
4875   }
4876 
4877   {
4878     // Simulate a schema change where power is lost after the schema is written.
4879     SchemaProto new_schema =
4880         SchemaBuilder()
4881             .AddType(
4882                 SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig())
4883                     .AddProperty(PropertyConfigBuilder()
4884                                      .SetName("subject")
4885                                      .SetDataTypeString(TERM_MATCH_PREFIX,
4886                                                         TOKENIZER_PLAIN)
4887                                      .SetCardinality(CARDINALITY_OPTIONAL)))
4888             .Build();
4889     // Write the marker file
4890     std::string marker_filepath =
4891         absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
4892     ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
4893     ASSERT_TRUE(sfd.is_valid());
4894 
4895     // Write the new schema
4896     FakeClock fake_clock;
4897     ICING_ASSERT_OK_AND_ASSIGN(
4898         std::unique_ptr<SchemaStore> schema_store,
4899         SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
4900     ICING_EXPECT_OK(schema_store->SetSchema(
4901         new_schema, /*ignore_errors_and_delete_documents=*/false,
4902         /*allow_circular_schema_definitions=*/false));
4903   }
4904 
4905   {
4906     // Both document store and index should be recovered from checksum mismatch.
4907     auto fake_clock = std::make_unique<FakeClock>();
4908     fake_clock->SetTimerElapsedMilliseconds(10);
4909     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4910                                 std::make_unique<Filesystem>(),
4911                                 std::make_unique<IcingFilesystem>(),
4912                                 std::move(fake_clock), GetTestJniCache());
4913     InitializeResultProto initialize_result_proto = icing.Initialize();
4914     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4915     EXPECT_THAT(
4916         initialize_result_proto.initialize_stats().index_restoration_cause(),
4917         Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4918     EXPECT_THAT(initialize_result_proto.initialize_stats()
4919                     .integer_index_restoration_cause(),
4920                 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4921     EXPECT_THAT(initialize_result_proto.initialize_stats()
4922                     .qualified_id_join_index_restoration_cause(),
4923                 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4924     EXPECT_THAT(initialize_result_proto.initialize_stats()
4925                     .index_restoration_latency_ms(),
4926                 Eq(10));
4927     EXPECT_THAT(initialize_result_proto.initialize_stats()
4928                     .document_store_recovery_cause(),
4929                 Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
4930     EXPECT_THAT(initialize_result_proto.initialize_stats()
4931                     .document_store_recovery_latency_ms(),
4932                 Eq(10));
4933     EXPECT_THAT(
4934         initialize_result_proto.initialize_stats().document_store_data_status(),
4935         Eq(InitializeStatsProto::NO_DATA_LOSS));
4936     EXPECT_THAT(initialize_result_proto.initialize_stats()
4937                     .schema_store_recovery_cause(),
4938                 Eq(InitializeStatsProto::NONE));
4939     EXPECT_THAT(initialize_result_proto.initialize_stats()
4940                     .schema_store_recovery_latency_ms(),
4941                 Eq(0));
4942   }
4943 
4944   {
4945     // No recovery should be needed.
4946     auto fake_clock = std::make_unique<FakeClock>();
4947     fake_clock->SetTimerElapsedMilliseconds(10);
4948     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
4949                                 std::make_unique<Filesystem>(),
4950                                 std::make_unique<IcingFilesystem>(),
4951                                 std::move(fake_clock), GetTestJniCache());
4952     InitializeResultProto initialize_result_proto = icing.Initialize();
4953     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
4954     EXPECT_THAT(
4955         initialize_result_proto.initialize_stats().index_restoration_cause(),
4956         Eq(InitializeStatsProto::NONE));
4957     EXPECT_THAT(initialize_result_proto.initialize_stats()
4958                     .integer_index_restoration_cause(),
4959                 Eq(InitializeStatsProto::NONE));
4960     EXPECT_THAT(initialize_result_proto.initialize_stats()
4961                     .qualified_id_join_index_restoration_cause(),
4962                 Eq(InitializeStatsProto::NONE));
4963     EXPECT_THAT(initialize_result_proto.initialize_stats()
4964                     .index_restoration_latency_ms(),
4965                 Eq(0));
4966     EXPECT_THAT(initialize_result_proto.initialize_stats()
4967                     .document_store_recovery_cause(),
4968                 Eq(InitializeStatsProto::NONE));
4969     EXPECT_THAT(initialize_result_proto.initialize_stats()
4970                     .document_store_recovery_latency_ms(),
4971                 Eq(0));
4972     EXPECT_THAT(
4973         initialize_result_proto.initialize_stats().document_store_data_status(),
4974         Eq(InitializeStatsProto::NO_DATA_LOSS));
4975     EXPECT_THAT(initialize_result_proto.initialize_stats()
4976                     .schema_store_recovery_cause(),
4977                 Eq(InitializeStatsProto::NONE));
4978     EXPECT_THAT(initialize_result_proto.initialize_stats()
4979                     .schema_store_recovery_latency_ms(),
4980                 Eq(0));
4981   }
4982 }
4983 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIndexIOError)4984 TEST_F(IcingSearchEngineInitializationTest,
4985        InitializeShouldLogRecoveryCauseIndexIOError) {
4986   DocumentProto document = DocumentBuilder()
4987                                .SetKey("icing", "fake_type/0")
4988                                .SetSchema("Message")
4989                                .AddStringProperty("body", "message body")
4990                                .AddInt64Property("indexableInteger", 123)
4991                                .Build();
4992   {
4993     // Initialize and put one document.
4994     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
4995     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
4996     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
4997     ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
4998   }
4999 
5000   std::string lite_index_buffer_file_path =
5001       absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
5002   auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
5003   EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_))
5004       .WillRepeatedly(DoDefault());
5005   // This fails Index::Create() once.
5006   EXPECT_CALL(*mock_icing_filesystem,
5007               OpenForWrite(Eq(lite_index_buffer_file_path)))
5008       .WillOnce(Return(-1))
5009       .WillRepeatedly(DoDefault());
5010 
5011   auto fake_clock = std::make_unique<FakeClock>();
5012   fake_clock->SetTimerElapsedMilliseconds(10);
5013   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5014                               std::make_unique<Filesystem>(),
5015                               std::move(mock_icing_filesystem),
5016                               std::move(fake_clock), GetTestJniCache());
5017 
5018   InitializeResultProto initialize_result_proto = icing.Initialize();
5019   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5020   EXPECT_THAT(
5021       initialize_result_proto.initialize_stats().index_restoration_cause(),
5022       Eq(InitializeStatsProto::IO_ERROR));
5023   EXPECT_THAT(initialize_result_proto.initialize_stats()
5024                   .integer_index_restoration_cause(),
5025               Eq(InitializeStatsProto::NONE));
5026   EXPECT_THAT(initialize_result_proto.initialize_stats()
5027                   .qualified_id_join_index_restoration_cause(),
5028               Eq(InitializeStatsProto::NONE));
5029   EXPECT_THAT(
5030       initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5031       Eq(10));
5032   EXPECT_THAT(initialize_result_proto.initialize_stats()
5033                   .document_store_recovery_cause(),
5034               Eq(InitializeStatsProto::NONE));
5035   EXPECT_THAT(initialize_result_proto.initialize_stats()
5036                   .document_store_recovery_latency_ms(),
5037               Eq(0));
5038   EXPECT_THAT(
5039       initialize_result_proto.initialize_stats().document_store_data_status(),
5040       Eq(InitializeStatsProto::NO_DATA_LOSS));
5041   EXPECT_THAT(
5042       initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5043       Eq(InitializeStatsProto::NONE));
5044   EXPECT_THAT(initialize_result_proto.initialize_stats()
5045                   .schema_store_recovery_latency_ms(),
5046               Eq(0));
5047 }
5048 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseIntegerIndexIOError)5049 TEST_F(IcingSearchEngineInitializationTest,
5050        InitializeShouldLogRecoveryCauseIntegerIndexIOError) {
5051   DocumentProto document = DocumentBuilder()
5052                                .SetKey("icing", "fake_type/0")
5053                                .SetSchema("Message")
5054                                .AddStringProperty("body", "message body")
5055                                .AddInt64Property("indexableInteger", 123)
5056                                .Build();
5057   {
5058     // Initialize and put one document.
5059     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5060     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5061     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5062     ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
5063   }
5064 
5065   std::string integer_index_metadata_file =
5066       absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m");
5067   auto mock_filesystem = std::make_unique<MockFilesystem>();
5068   EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault());
5069   // This fails IntegerIndex::Create() once.
5070   EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file)))
5071       .WillOnce(Return(-1))
5072       .WillRepeatedly(DoDefault());
5073 
5074   auto fake_clock = std::make_unique<FakeClock>();
5075   fake_clock->SetTimerElapsedMilliseconds(10);
5076   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5077                               std::move(mock_filesystem),
5078                               std::make_unique<IcingFilesystem>(),
5079                               std::move(fake_clock), GetTestJniCache());
5080 
5081   InitializeResultProto initialize_result_proto = icing.Initialize();
5082   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5083   EXPECT_THAT(
5084       initialize_result_proto.initialize_stats().index_restoration_cause(),
5085       Eq(InitializeStatsProto::NONE));
5086   EXPECT_THAT(initialize_result_proto.initialize_stats()
5087                   .integer_index_restoration_cause(),
5088               Eq(InitializeStatsProto::IO_ERROR));
5089   EXPECT_THAT(initialize_result_proto.initialize_stats()
5090                   .qualified_id_join_index_restoration_cause(),
5091               Eq(InitializeStatsProto::NONE));
5092   EXPECT_THAT(
5093       initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5094       Eq(10));
5095   EXPECT_THAT(initialize_result_proto.initialize_stats()
5096                   .document_store_recovery_cause(),
5097               Eq(InitializeStatsProto::NONE));
5098   EXPECT_THAT(initialize_result_proto.initialize_stats()
5099                   .document_store_recovery_latency_ms(),
5100               Eq(0));
5101   EXPECT_THAT(
5102       initialize_result_proto.initialize_stats().document_store_data_status(),
5103       Eq(InitializeStatsProto::NO_DATA_LOSS));
5104   EXPECT_THAT(
5105       initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5106       Eq(InitializeStatsProto::NONE));
5107   EXPECT_THAT(initialize_result_proto.initialize_stats()
5108                   .schema_store_recovery_latency_ms(),
5109               Eq(0));
5110 }
5111 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError)5112 TEST_F(IcingSearchEngineInitializationTest,
5113        InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) {
5114   SchemaProto schema =
5115       SchemaBuilder()
5116           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
5117               PropertyConfigBuilder()
5118                   .SetName("name")
5119                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
5120                   .SetCardinality(CARDINALITY_REQUIRED)))
5121           .AddType(SchemaTypeConfigBuilder()
5122                        .SetType("Message")
5123                        .AddProperty(PropertyConfigBuilder()
5124                                         .SetName("body")
5125                                         .SetDataTypeString(TERM_MATCH_PREFIX,
5126                                                            TOKENIZER_PLAIN)
5127                                         .SetCardinality(CARDINALITY_REQUIRED))
5128                        .AddProperty(PropertyConfigBuilder()
5129                                         .SetName("indexableInteger")
5130                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
5131                                         .SetCardinality(CARDINALITY_REQUIRED))
5132                        .AddProperty(PropertyConfigBuilder()
5133                                         .SetName("senderQualifiedId")
5134                                         .SetDataTypeJoinableString(
5135                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
5136                                         .SetCardinality(CARDINALITY_REQUIRED)))
5137           .Build();
5138 
5139   DocumentProto person =
5140       DocumentBuilder()
5141           .SetKey("namespace", "person")
5142           .SetSchema("Person")
5143           .AddStringProperty("name", "person")
5144           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5145           .Build();
5146   DocumentProto message =
5147       DocumentBuilder()
5148           .SetKey("namespace", "message/1")
5149           .SetSchema("Message")
5150           .AddStringProperty("body", "message body")
5151           .AddInt64Property("indexableInteger", 123)
5152           .AddStringProperty("senderQualifiedId", "namespace#person")
5153           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5154           .Build();
5155 
5156   {
5157     // Initialize and put documents.
5158     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5159     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5160     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5161     ASSERT_THAT(icing.Put(person).status(), ProtoIsOk());
5162     ASSERT_THAT(icing.Put(message).status(), ProtoIsOk());
5163   }
5164 
5165   std::string qualified_id_join_index_metadata_file =
5166       absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata");
5167   auto mock_filesystem = std::make_unique<MockFilesystem>();
5168   EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _))
5169       .WillRepeatedly(DoDefault());
5170   // This fails QualifiedIdJoinIndexImplV2::Create() once.
5171   EXPECT_CALL(
5172       *mock_filesystem,
5173       PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _,
5174             _, _))
5175       .WillOnce(Return(false))
5176       .WillRepeatedly(DoDefault());
5177 
5178   auto fake_clock = std::make_unique<FakeClock>();
5179   fake_clock->SetTimerElapsedMilliseconds(10);
5180   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5181                               std::move(mock_filesystem),
5182                               std::make_unique<IcingFilesystem>(),
5183                               std::move(fake_clock), GetTestJniCache());
5184 
5185   InitializeResultProto initialize_result_proto = icing.Initialize();
5186   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5187   EXPECT_THAT(
5188       initialize_result_proto.initialize_stats().index_restoration_cause(),
5189       Eq(InitializeStatsProto::NONE));
5190   EXPECT_THAT(initialize_result_proto.initialize_stats()
5191                   .integer_index_restoration_cause(),
5192               Eq(InitializeStatsProto::NONE));
5193   EXPECT_THAT(initialize_result_proto.initialize_stats()
5194                   .qualified_id_join_index_restoration_cause(),
5195               Eq(InitializeStatsProto::IO_ERROR));
5196   EXPECT_THAT(
5197       initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5198       Eq(10));
5199   EXPECT_THAT(initialize_result_proto.initialize_stats()
5200                   .document_store_recovery_cause(),
5201               Eq(InitializeStatsProto::NONE));
5202   EXPECT_THAT(initialize_result_proto.initialize_stats()
5203                   .document_store_recovery_latency_ms(),
5204               Eq(0));
5205   EXPECT_THAT(
5206       initialize_result_proto.initialize_stats().document_store_data_status(),
5207       Eq(InitializeStatsProto::NO_DATA_LOSS));
5208   EXPECT_THAT(
5209       initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5210       Eq(InitializeStatsProto::NONE));
5211   EXPECT_THAT(initialize_result_proto.initialize_stats()
5212                   .schema_store_recovery_latency_ms(),
5213               Eq(0));
5214 }
5215 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseDocStoreIOError)5216 TEST_F(IcingSearchEngineInitializationTest,
5217        InitializeShouldLogRecoveryCauseDocStoreIOError) {
5218   DocumentProto document = DocumentBuilder()
5219                                .SetKey("icing", "fake_type/0")
5220                                .SetSchema("Message")
5221                                .AddStringProperty("body", "message body")
5222                                .AddInt64Property("indexableInteger", 123)
5223                                .Build();
5224   {
5225     // Initialize and put one document.
5226     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5227     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5228     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5229     ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
5230   }
5231 
5232   std::string document_store_header_file_path =
5233       absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
5234   auto mock_filesystem = std::make_unique<MockFilesystem>();
5235   EXPECT_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
5236       .WillRepeatedly(DoDefault());
5237   // This fails DocumentStore::InitializeDerivedFiles() once.
5238   EXPECT_CALL(
5239       *mock_filesystem,
5240       Read(Matcher<const char*>(Eq(document_store_header_file_path)), _, _))
5241       .WillOnce(Return(false))
5242       .WillRepeatedly(DoDefault());
5243 
5244   auto fake_clock = std::make_unique<FakeClock>();
5245   fake_clock->SetTimerElapsedMilliseconds(10);
5246   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5247                               std::move(mock_filesystem),
5248                               std::make_unique<IcingFilesystem>(),
5249                               std::move(fake_clock), GetTestJniCache());
5250 
5251   InitializeResultProto initialize_result_proto = icing.Initialize();
5252   EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5253   EXPECT_THAT(initialize_result_proto.initialize_stats()
5254                   .document_store_recovery_cause(),
5255               Eq(InitializeStatsProto::IO_ERROR));
5256   EXPECT_THAT(initialize_result_proto.initialize_stats()
5257                   .document_store_recovery_latency_ms(),
5258               Eq(10));
5259   EXPECT_THAT(
5260       initialize_result_proto.initialize_stats().document_store_data_status(),
5261       Eq(InitializeStatsProto::NO_DATA_LOSS));
5262   EXPECT_THAT(
5263       initialize_result_proto.initialize_stats().index_restoration_cause(),
5264       Eq(InitializeStatsProto::NONE));
5265   EXPECT_THAT(initialize_result_proto.initialize_stats()
5266                   .integer_index_restoration_cause(),
5267               Eq(InitializeStatsProto::NONE));
5268   EXPECT_THAT(initialize_result_proto.initialize_stats()
5269                   .qualified_id_join_index_restoration_cause(),
5270               Eq(InitializeStatsProto::DEPENDENCIES_CHANGED));
5271   EXPECT_THAT(
5272       initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
5273       Eq(10));
5274   EXPECT_THAT(
5275       initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
5276       Eq(InitializeStatsProto::NONE));
5277   EXPECT_THAT(initialize_result_proto.initialize_stats()
5278                   .schema_store_recovery_latency_ms(),
5279               Eq(0));
5280 }
5281 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogRecoveryCauseSchemaStoreIOError)5282 TEST_F(IcingSearchEngineInitializationTest,
5283        InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
5284   {
5285     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5286     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5287     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5288   }
5289 
5290   {
5291     // Delete the schema store type mapper to trigger an I/O error.
5292     std::string schema_store_header_file_path =
5293         GetSchemaDir() + "/schema_type_mapper";
5294     ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(
5295         schema_store_header_file_path.c_str()));
5296   }
5297 
5298   {
5299     auto fake_clock = std::make_unique<FakeClock>();
5300     fake_clock->SetTimerElapsedMilliseconds(10);
5301     TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5302                                 std::make_unique<Filesystem>(),
5303                                 std::make_unique<IcingFilesystem>(),
5304                                 std::move(fake_clock), GetTestJniCache());
5305     InitializeResultProto initialize_result_proto = icing.Initialize();
5306     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5307     EXPECT_THAT(initialize_result_proto.initialize_stats()
5308                     .schema_store_recovery_cause(),
5309                 Eq(InitializeStatsProto::IO_ERROR));
5310     EXPECT_THAT(initialize_result_proto.initialize_stats()
5311                     .schema_store_recovery_latency_ms(),
5312                 Eq(10));
5313     EXPECT_THAT(initialize_result_proto.initialize_stats()
5314                     .document_store_recovery_cause(),
5315                 Eq(InitializeStatsProto::NONE));
5316     EXPECT_THAT(initialize_result_proto.initialize_stats()
5317                     .document_store_recovery_latency_ms(),
5318                 Eq(0));
5319     EXPECT_THAT(
5320         initialize_result_proto.initialize_stats().document_store_data_status(),
5321         Eq(InitializeStatsProto::NO_DATA_LOSS));
5322     EXPECT_THAT(
5323         initialize_result_proto.initialize_stats().index_restoration_cause(),
5324         Eq(InitializeStatsProto::NONE));
5325     EXPECT_THAT(initialize_result_proto.initialize_stats()
5326                     .integer_index_restoration_cause(),
5327                 Eq(InitializeStatsProto::NONE));
5328     EXPECT_THAT(initialize_result_proto.initialize_stats()
5329                     .qualified_id_join_index_restoration_cause(),
5330                 Eq(InitializeStatsProto::NONE));
5331     EXPECT_THAT(initialize_result_proto.initialize_stats()
5332                     .index_restoration_latency_ms(),
5333                 Eq(0));
5334   }
5335 }
5336 
TEST_F(IcingSearchEngineInitializationTest,InitializeShouldLogNumberOfSchemaTypes)5337 TEST_F(IcingSearchEngineInitializationTest,
5338        InitializeShouldLogNumberOfSchemaTypes) {
5339   {
5340     // Initialize an empty storage.
5341     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5342     InitializeResultProto initialize_result_proto = icing.Initialize();
5343     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5344     // There should be 0 schema types.
5345     EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
5346                 Eq(0));
5347 
5348     // Set a schema with one type config.
5349     ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
5350   }
5351 
5352   {
5353     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5354     InitializeResultProto initialize_result_proto = icing.Initialize();
5355     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5356     // There should be 1 schema type.
5357     EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
5358                 Eq(1));
5359 
5360     // Create and set a schema with two type configs: Email and Message.
5361     SchemaProto schema = CreateEmailSchema();
5362     *schema.add_types() = CreateMessageSchemaTypeConfig();
5363 
5364     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5365   }
5366 
5367   {
5368     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
5369     InitializeResultProto initialize_result_proto = icing.Initialize();
5370     EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
5371     EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
5372                 Eq(2));
5373   }
5374 }
5375 
5376 struct IcingSearchEngineInitializationVersionChangeTestParam {
5377   version_util::VersionInfo existing_version_info;
5378   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
5379       existing_enabled_features;
5380 
IcingSearchEngineInitializationVersionChangeTestParamicing::lib::__anon2df096810111::IcingSearchEngineInitializationVersionChangeTestParam5381   explicit IcingSearchEngineInitializationVersionChangeTestParam(
5382       version_util::VersionInfo version_info_in,
5383       std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
5384           existing_enabled_features_in)
5385       : existing_version_info(std::move(version_info_in)),
5386         existing_enabled_features(std::move(existing_enabled_features_in)) {}
5387 };
5388 
5389 class IcingSearchEngineInitializationVersionChangeTest
5390     : public IcingSearchEngineInitializationTest,
5391       public ::testing::WithParamInterface<
5392           IcingSearchEngineInitializationVersionChangeTestParam> {};
5393 
TEST_P(IcingSearchEngineInitializationVersionChangeTest,RecoverFromVersionChangeOrUnknownFlagChange)5394 TEST_P(IcingSearchEngineInitializationVersionChangeTest,
5395        RecoverFromVersionChangeOrUnknownFlagChange) {
5396   // TODO(b/280697513): test backup schema migration
5397   // Test the following scenario: version change. All derived data should be
5398   // rebuilt. We test this by manually adding some invalid derived data and
5399   // verifying they're removed due to rebuild.
5400   SchemaProto schema =
5401       SchemaBuilder()
5402           .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty(
5403               PropertyConfigBuilder()
5404                   .SetName("name")
5405                   .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN)
5406                   .SetCardinality(CARDINALITY_REQUIRED)))
5407           .AddType(SchemaTypeConfigBuilder()
5408                        .SetType("Message")
5409                        .AddProperty(PropertyConfigBuilder()
5410                                         .SetName("body")
5411                                         .SetDataTypeString(TERM_MATCH_PREFIX,
5412                                                            TOKENIZER_PLAIN)
5413                                         .SetCardinality(CARDINALITY_REQUIRED))
5414                        .AddProperty(PropertyConfigBuilder()
5415                                         .SetName("indexableInteger")
5416                                         .SetDataTypeInt64(NUMERIC_MATCH_RANGE)
5417                                         .SetCardinality(CARDINALITY_REQUIRED))
5418                        .AddProperty(PropertyConfigBuilder()
5419                                         .SetName("senderQualifiedId")
5420                                         .SetDataTypeJoinableString(
5421                                             JOINABLE_VALUE_TYPE_QUALIFIED_ID)
5422                                         .SetCardinality(CARDINALITY_REQUIRED)))
5423           .Build();
5424 
5425   DocumentProto person1 =
5426       DocumentBuilder()
5427           .SetKey("namespace", "person/1")
5428           .SetSchema("Person")
5429           .AddStringProperty("name", "person")
5430           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5431           .Build();
5432   DocumentProto person2 =
5433       DocumentBuilder()
5434           .SetKey("namespace", "person/2")
5435           .SetSchema("Person")
5436           .AddStringProperty("name", "person")
5437           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5438           .Build();
5439   DocumentProto message =
5440       DocumentBuilder()
5441           .SetKey("namespace", "message")
5442           .SetSchema("Message")
5443           .AddStringProperty("body", "correct message")
5444           .AddInt64Property("indexableInteger", 123)
5445           .AddStringProperty("senderQualifiedId", "namespace#person/1")
5446           .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5447           .Build();
5448 
5449   IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
5450 
5451   {
5452     // Initializes folder and schema, index person1 and person2
5453     TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
5454                                 std::make_unique<IcingFilesystem>(),
5455                                 std::make_unique<FakeClock>(),
5456                                 GetTestJniCache());
5457     EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
5458     EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5459     EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk());
5460     EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk());
5461   }  // This should shut down IcingSearchEngine and persist anything it needs to
5462 
5463   {
5464     // Manually:
5465     // - Put message into DocumentStore
5466     // - But add some incorrect data for message into 3 indices
5467     // - Change version file
5468     //
5469     // These will make sure last_added_document_id is consistent with
5470     // last_stored_document_id, so if Icing didn't handle version change
5471     // correctly, then the index won't be rebuilt.
5472     FakeClock fake_clock;
5473     ICING_ASSERT_OK_AND_ASSIGN(
5474         std::unique_ptr<SchemaStore> schema_store,
5475         SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
5476 
5477     // Put message into DocumentStore
5478     ICING_ASSERT_OK_AND_ASSIGN(
5479         DocumentStore::CreateResult create_result,
5480         DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
5481                               schema_store.get(),
5482                               /*force_recovery_and_revalidate_documents=*/false,
5483                               /*namespace_id_fingerprint=*/true,
5484                               /*pre_mapping_fbv=*/false,
5485                               /*use_persistent_hash_map=*/true,
5486                               PortableFileBackedProtoLog<
5487                                   DocumentWrapper>::kDeflateCompressionLevel,
5488                               /*initialize_stats=*/nullptr));
5489     std::unique_ptr<DocumentStore> document_store =
5490         std::move(create_result.document_store);
5491     ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message));
5492 
5493     // Index doc_id with incorrect data
5494     Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024,
5495                            /*lite_index_sort_at_indexing=*/true,
5496                            /*lite_index_sort_size=*/1024 * 8);
5497     ICING_ASSERT_OK_AND_ASSIGN(
5498         std::unique_ptr<Index> index,
5499         Index::Create(options, filesystem(), icing_filesystem()));
5500 
5501     ICING_ASSERT_OK_AND_ASSIGN(
5502         std::unique_ptr<IntegerIndex> integer_index,
5503         IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(),
5504                              /*num_data_threshold_for_bucket_split=*/65536,
5505                              /*pre_mapping_fbv=*/false));
5506 
5507     ICING_ASSERT_OK_AND_ASSIGN(
5508         std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index,
5509         QualifiedIdJoinIndexImplV2::Create(*filesystem(),
5510                                            GetQualifiedIdJoinIndexDir(),
5511                                            /*pre_mapping_fbv=*/false));
5512 
5513     ICING_ASSERT_OK_AND_ASSIGN(
5514         std::unique_ptr<TermIndexingHandler> term_indexing_handler,
5515         TermIndexingHandler::Create(
5516             &fake_clock, normalizer_.get(), index.get(),
5517             /*build_property_existence_metadata_hits=*/true));
5518     ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler>
5519                                    integer_section_indexing_handler,
5520                                IntegerSectionIndexingHandler::Create(
5521                                    &fake_clock, integer_index.get()));
5522     ICING_ASSERT_OK_AND_ASSIGN(
5523         std::unique_ptr<QualifiedIdJoinIndexingHandler>
5524             qualified_id_join_indexing_handler,
5525         QualifiedIdJoinIndexingHandler::Create(
5526             &fake_clock, document_store.get(), qualified_id_join_index.get()));
5527     std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
5528     handlers.push_back(std::move(term_indexing_handler));
5529     handlers.push_back(std::move(integer_section_indexing_handler));
5530     handlers.push_back(std::move(qualified_id_join_indexing_handler));
5531     IndexProcessor index_processor(std::move(handlers), &fake_clock);
5532 
5533     DocumentProto incorrect_message =
5534         DocumentBuilder()
5535             .SetKey("namespace", "message")
5536             .SetSchema("Message")
5537             .AddStringProperty("body", "wrong message")
5538             .AddInt64Property("indexableInteger", 456)
5539             .AddStringProperty("senderQualifiedId", "namespace#person/2")
5540             .SetCreationTimestampMs(kDefaultCreationTimestampMs)
5541             .Build();
5542     ICING_ASSERT_OK_AND_ASSIGN(
5543         TokenizedDocument tokenized_document,
5544         TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(),
5545                                   std::move(incorrect_message)));
5546     ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id));
5547 
5548     // Rewrite existing data's version files
5549     ICING_ASSERT_OK(
5550         version_util::DiscardVersionFiles(*filesystem(), GetVersionFileDir()));
5551     const version_util::VersionInfo& existing_version_info =
5552         GetParam().existing_version_info;
5553     ICING_ASSERT_OK(version_util::WriteV1Version(
5554         *filesystem(), GetVersionFileDir(), existing_version_info));
5555 
5556     if (existing_version_info.version >= version_util::kFirstV2Version) {
5557       IcingSearchEngineVersionProto version_proto;
5558       version_proto.set_version(existing_version_info.version);
5559       version_proto.set_max_version(existing_version_info.max_version);
5560       auto* enabled_features = version_proto.mutable_enabled_features();
5561       for (const auto& feature : GetParam().existing_enabled_features) {
5562         enabled_features->Add(version_util::GetFeatureInfoProto(feature));
5563       }
5564       version_util::WriteV2Version(
5565           *filesystem(), GetVersionFileDir(),
5566           std::make_unique<IcingSearchEngineVersionProto>(
5567               std::move(version_proto)));
5568     }
5569   }
5570 
5571   // Mock filesystem to observe and check the behavior of all indices.
5572   TestIcingSearchEngine icing(GetDefaultIcingOptions(),
5573                               std::make_unique<Filesystem>(),
5574                               std::make_unique<IcingFilesystem>(),
5575                               std::make_unique<FakeClock>(), GetTestJniCache());
5576   InitializeResultProto initialize_result = icing.Initialize();
5577   EXPECT_THAT(initialize_result.status(), ProtoIsOk());
5578 
5579   // Derived files restoration should be triggered here. Incorrect data should
5580   // be deleted and correct data of message should be indexed.
5581   // Here we're recovering from a version change or a flag change that requires
5582   // rebuilding all derived files.
5583   //
5584   // TODO(b/314816301): test individual derived files rebuilds due to change
5585   // in trunk stable feature flags.
5586   // i.e. Test individual rebuilding for each of:
5587   //  - document store
5588   //  - schema store
5589   //  - term index
5590   //  - numeric index
5591   //  - qualified id join index
5592   InitializeStatsProto::RecoveryCause expected_recovery_cause =
5593       GetParam().existing_version_info.version != version_util::kVersion
5594           ? InitializeStatsProto::VERSION_CHANGED
5595           : InitializeStatsProto::FEATURE_FLAG_CHANGED;
5596   EXPECT_THAT(
5597       initialize_result.initialize_stats().document_store_recovery_cause(),
5598       Eq(expected_recovery_cause));
5599   EXPECT_THAT(
5600       initialize_result.initialize_stats().schema_store_recovery_cause(),
5601       Eq(expected_recovery_cause));
5602   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
5603               Eq(expected_recovery_cause));
5604   EXPECT_THAT(
5605       initialize_result.initialize_stats().integer_index_restoration_cause(),
5606       Eq(expected_recovery_cause));
5607   EXPECT_THAT(initialize_result.initialize_stats()
5608                   .qualified_id_join_index_restoration_cause(),
5609               Eq(expected_recovery_cause));
5610 
5611   // Manually check version file
5612   ICING_ASSERT_OK_AND_ASSIGN(
5613       IcingSearchEngineVersionProto version_proto_after_init,
5614       version_util::ReadVersion(*filesystem(), GetVersionFileDir(),
5615                                 GetIndexDir()));
5616   EXPECT_THAT(version_proto_after_init.version(), Eq(version_util::kVersion));
5617   EXPECT_THAT(version_proto_after_init.max_version(),
5618               Eq(std::max(version_util::kVersion,
5619                           GetParam().existing_version_info.max_version)));
5620 
5621   SearchResultProto expected_search_result_proto;
5622   expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
5623   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
5624       message;
5625 
5626   // Verify term search
5627   SearchSpecProto search_spec1;
5628   search_spec1.set_query("body:correct");
5629   search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY);
5630   SearchResultProto search_result_proto1 =
5631       icing.Search(search_spec1, GetDefaultScoringSpec(),
5632                    ResultSpecProto::default_instance());
5633   EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores(
5634                                         expected_search_result_proto));
5635 
5636   // Verify numeric (integer) search
5637   SearchSpecProto search_spec2;
5638   search_spec2.set_query("indexableInteger == 123");
5639   search_spec2.add_enabled_features(std::string(kNumericSearchFeature));
5640 
5641   SearchResultProto search_result_google::protobuf =
5642       icing.Search(search_spec2, ScoringSpecProto::default_instance(),
5643                    ResultSpecProto::default_instance());
5644   EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores(
5645                                         expected_search_result_proto));
5646 
5647   // Verify join search: join a query for `name:person` with a child query for
5648   // `body:message` based on the child's `senderQualifiedId` field.
5649   SearchSpecProto search_spec3;
5650   search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY);
5651   search_spec3.set_query("name:person");
5652   JoinSpecProto* join_spec = search_spec3.mutable_join_spec();
5653   join_spec->set_parent_property_expression(
5654       std::string(JoinProcessor::kQualifiedIdExpr));
5655   join_spec->set_child_property_expression("senderQualifiedId");
5656   join_spec->set_aggregation_scoring_strategy(
5657       JoinSpecProto::AggregationScoringStrategy::COUNT);
5658   JoinSpecProto::NestedSpecProto* nested_spec =
5659       join_spec->mutable_nested_spec();
5660   SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec();
5661   nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY);
5662   nested_search_spec->set_query("body:message");
5663   *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec();
5664   *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance();
5665 
5666   ResultSpecProto result_spec3 = ResultSpecProto::default_instance();
5667   result_spec3.set_max_joined_children_per_parent_to_return(
5668       std::numeric_limits<int32_t>::max());
5669 
5670   SearchResultProto expected_join_search_result_proto;
5671   expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK);
5672   // Person 1 with message
5673   SearchResultProto::ResultProto* result_proto =
5674       expected_join_search_result_proto.mutable_results()->Add();
5675   *result_proto->mutable_document() = person1;
5676   *result_proto->mutable_joined_results()->Add()->mutable_document() = message;
5677   // Person 2 without children
5678   *expected_join_search_result_proto.mutable_results()
5679        ->Add()
5680        ->mutable_document() = person2;
5681 
5682   SearchResultProto search_result_proto3 = icing.Search(
5683       search_spec3, ScoringSpecProto::default_instance(), result_spec3);
5684   EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores(
5685                                         expected_join_search_result_proto));
5686 }
5687 
5688 INSTANTIATE_TEST_SUITE_P(
5689     IcingSearchEngineInitializationVersionChangeTest,
5690     IcingSearchEngineInitializationVersionChangeTest,
5691     testing::Values(
5692         // Manually change existing data set's version to kVersion + 1. When
5693         // initializing, it will detect "rollback".
5694         IcingSearchEngineInitializationVersionChangeTestParam(
5695             version_util::VersionInfo(
5696                 /*version_in=*/version_util::kVersion + 1,
5697                 /*max_version_in=*/version_util::kVersion + 1),
5698             /*existing_enabled_features_in=*/{}),
5699 
5700         // Currently we don't have any "upgrade" that requires rebuild derived
5701         // files, so skip this case until we have a case for it.
5702 
5703         // Manually change existing data set's version to kVersion - 1 and
5704         // max_version to kVersion. When initializing, it will detect "roll
5705         // forward".
5706         IcingSearchEngineInitializationVersionChangeTestParam(
5707             version_util::VersionInfo(
5708                 /*version_in=*/version_util::kVersion - 1,
5709                 /*max_version_in=*/version_util::kVersion),
5710             /*existing_enabled_features_in=*/{}),
5711 
5712         // Manually change existing data set's version to 0 and max_version to
5713         // 0. When initializing, it will detect "version 0 upgrade".
5714         //
5715         // Note: in reality, version 0 won't be written into version file, but
5716         // it is ok here since it is hack to simulate version 0 situation.
5717         IcingSearchEngineInitializationVersionChangeTestParam(
5718             version_util::VersionInfo(
5719                 /*version_in=*/0,
5720                 /*max_version_in=*/0),
5721             /*existing_enabled_features_in=*/{}),
5722 
5723         // Manually change existing data set's version to 0 and max_version to
5724         // kVersion. When initializing, it will detect "version 0 roll forward".
5725         //
5726         // Note: in reality, version 0 won't be written into version file, but
5727         // it is ok here since it is hack to simulate version 0 situation.
5728         IcingSearchEngineInitializationVersionChangeTestParam(
5729             version_util::VersionInfo(
5730                 /*version_in=*/0,
5731                 /*max_version_in=*/version_util::kVersion),
5732             /*existing_enabled_features_in=*/{}),
5733 
5734         // Manually write an unknown feature in the version proto while keeping
5735         // version the same as kVersion.
5736         //
5737         // Result: this will rebuild all derived files with restoration cause
5738         // FEATURE_FLAG_CHANGED
5739         IcingSearchEngineInitializationVersionChangeTestParam(
5740             version_util::VersionInfo(
5741                 /*version_in=*/version_util::kVersion,
5742                 /*max_version_in=*/version_util::kVersion),
5743             /*existing_enabled_features_in=*/{
5744                 IcingSearchEngineFeatureInfoProto::UNKNOWN})));
5745 
5746 class IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest
5747     : public IcingSearchEngineInitializationTest,
5748       public ::testing::WithParamInterface<std::tuple<bool, bool>> {};
TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,ChangePropertyExistenceHitsFlagTest)5749 TEST_P(IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
5750        ChangePropertyExistenceHitsFlagTest) {
5751   bool before_build_property_existence_metadata_hits = std::get<0>(GetParam());
5752   bool after_build_property_existence_metadata_hits = std::get<1>(GetParam());
5753   bool flag_changed = before_build_property_existence_metadata_hits !=
5754                       after_build_property_existence_metadata_hits;
5755 
5756   SchemaProto schema =
5757       SchemaBuilder()
5758           .AddType(SchemaTypeConfigBuilder()
5759                        .SetType("Value")
5760                        .AddProperty(PropertyConfigBuilder()
5761                                         .SetName("body")
5762                                         .SetDataTypeString(TERM_MATCH_EXACT,
5763                                                            TOKENIZER_PLAIN)
5764                                         .SetCardinality(CARDINALITY_REPEATED))
5765                        .AddProperty(PropertyConfigBuilder()
5766                                         .SetName("timestamp")
5767                                         .SetDataType(TYPE_INT64)
5768                                         .SetCardinality(CARDINALITY_OPTIONAL))
5769                        .AddProperty(PropertyConfigBuilder()
5770                                         .SetName("score")
5771                                         .SetDataType(TYPE_DOUBLE)
5772                                         .SetCardinality(CARDINALITY_OPTIONAL)))
5773           .Build();
5774 
5775   // Create a document with every property.
5776   DocumentProto document0 = DocumentBuilder()
5777                                 .SetKey("icing", "uri0")
5778                                 .SetSchema("Value")
5779                                 .SetCreationTimestampMs(1)
5780                                 .AddStringProperty("body", "foo")
5781                                 .AddInt64Property("timestamp", 123)
5782                                 .AddDoubleProperty("score", 456.789)
5783                                 .Build();
5784   // Create a document with missing body.
5785   DocumentProto document1 = DocumentBuilder()
5786                                 .SetKey("icing", "uri1")
5787                                 .SetSchema("Value")
5788                                 .SetCreationTimestampMs(1)
5789                                 .AddInt64Property("timestamp", 123)
5790                                 .AddDoubleProperty("score", 456.789)
5791                                 .Build();
5792   // Create a document with missing timestamp.
5793   DocumentProto document2 = DocumentBuilder()
5794                                 .SetKey("icing", "uri2")
5795                                 .SetSchema("Value")
5796                                 .SetCreationTimestampMs(1)
5797                                 .AddStringProperty("body", "foo")
5798                                 .AddDoubleProperty("score", 456.789)
5799                                 .Build();
5800 
5801   // 1. Create an index with the 3 documents.
5802   {
5803     IcingSearchEngineOptions options = GetDefaultIcingOptions();
5804     options.set_build_property_existence_metadata_hits(
5805         before_build_property_existence_metadata_hits);
5806     TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
5807                                 std::make_unique<IcingFilesystem>(),
5808                                 std::make_unique<FakeClock>(),
5809                                 GetTestJniCache());
5810 
5811     ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
5812     ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
5813     ASSERT_THAT(icing.Put(document0).status(), ProtoIsOk());
5814     ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
5815     ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
5816   }
5817 
5818   // 2. Create the index again with
5819   // after_build_property_existence_metadata_hits.
5820   //
5821   // Mock filesystem to observe and check the behavior of all indices.
5822   auto mock_filesystem = std::make_unique<MockFilesystem>();
5823   EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_))
5824       .WillRepeatedly(DoDefault());
5825   // Ensure that the term index is rebuilt if the flag is changed.
5826   EXPECT_CALL(*mock_filesystem,
5827               DeleteDirectoryRecursively(EndsWith("/index_dir")))
5828       .Times(flag_changed ? 1 : 0);
5829 
5830   IcingSearchEngineOptions options = GetDefaultIcingOptions();
5831   options.set_build_property_existence_metadata_hits(
5832       after_build_property_existence_metadata_hits);
5833   TestIcingSearchEngine icing(options, std::move(mock_filesystem),
5834                               std::make_unique<IcingFilesystem>(),
5835                               std::make_unique<FakeClock>(), GetTestJniCache());
5836   InitializeResultProto initialize_result = icing.Initialize();
5837   ASSERT_THAT(initialize_result.status(), ProtoIsOk());
5838   // Ensure that the term index is rebuilt if the flag is changed.
5839   EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(),
5840               Eq(flag_changed ? InitializeStatsProto::FEATURE_FLAG_CHANGED
5841                               : InitializeStatsProto::NONE));
5842   EXPECT_THAT(
5843       initialize_result.initialize_stats().integer_index_restoration_cause(),
5844       Eq(InitializeStatsProto::NONE));
5845   EXPECT_THAT(initialize_result.initialize_stats()
5846                   .qualified_id_join_index_restoration_cause(),
5847               Eq(InitializeStatsProto::NONE));
5848 
5849   // Get all documents that have "body".
5850   SearchSpecProto search_spec;
5851   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
5852   search_spec.add_enabled_features(std::string(kHasPropertyFunctionFeature));
5853   search_spec.add_enabled_features(
5854       std::string(kListFilterQueryLanguageFeature));
5855   search_spec.set_query("hasProperty(\"body\")");
5856   SearchResultProto results = icing.Search(search_spec, GetDefaultScoringSpec(),
5857                                            ResultSpecProto::default_instance());
5858   EXPECT_THAT(results.status(), ProtoIsOk());
5859   if (after_build_property_existence_metadata_hits) {
5860     EXPECT_THAT(results.results(), SizeIs(2));
5861     EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
5862     EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
5863   } else {
5864     EXPECT_THAT(results.results(), IsEmpty());
5865   }
5866 
5867   // Get all documents that have "timestamp".
5868   search_spec.set_query("hasProperty(\"timestamp\")");
5869   results = icing.Search(search_spec, GetDefaultScoringSpec(),
5870                          ResultSpecProto::default_instance());
5871   EXPECT_THAT(results.status(), ProtoIsOk());
5872   if (after_build_property_existence_metadata_hits) {
5873     EXPECT_THAT(results.results(), SizeIs(2));
5874     EXPECT_THAT(results.results(0).document(), EqualsProto(document1));
5875     EXPECT_THAT(results.results(1).document(), EqualsProto(document0));
5876   } else {
5877     EXPECT_THAT(results.results(), IsEmpty());
5878   }
5879 
5880   // Get all documents that have "score".
5881   search_spec.set_query("hasProperty(\"score\")");
5882   results = icing.Search(search_spec, GetDefaultScoringSpec(),
5883                          ResultSpecProto::default_instance());
5884   EXPECT_THAT(results.status(), ProtoIsOk());
5885   if (after_build_property_existence_metadata_hits) {
5886     EXPECT_THAT(results.results(), SizeIs(3));
5887     EXPECT_THAT(results.results(0).document(), EqualsProto(document2));
5888     EXPECT_THAT(results.results(1).document(), EqualsProto(document1));
5889     EXPECT_THAT(results.results(2).document(), EqualsProto(document0));
5890   } else {
5891     EXPECT_THAT(results.results(), IsEmpty());
5892   }
5893 }
5894 
5895 INSTANTIATE_TEST_SUITE_P(
5896     IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
5897     IcingSearchEngineInitializationChangePropertyExistenceHitsFlagTest,
5898     testing::Values(std::make_tuple(false, false), std::make_tuple(false, true),
5899                     std::make_tuple(true, false), std::make_tuple(true, true)));
5900 
5901 }  // namespace
5902 }  // namespace lib
5903 }  // namespace icing
5904