• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/join/qualified-id-join-indexing-handler.h"
16 
17 #include <memory>
18 #include <string>
19 #include <string_view>
20 #include <utility>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "gmock/gmock.h"
26 #include "gtest/gtest.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/document-builder.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/file/portable-file-backed-proto-log.h"
31 #include "icing/join/document-id-to-join-info.h"
32 #include "icing/join/qualified-id-join-index-impl-v2.h"
33 #include "icing/join/qualified-id-join-index.h"
34 #include "icing/join/qualified-id.h"
35 #include "icing/portable/platform.h"
36 #include "icing/proto/document.pb.h"
37 #include "icing/proto/schema.pb.h"
38 #include "icing/schema-builder.h"
39 #include "icing/schema/joinable-property.h"
40 #include "icing/schema/schema-store.h"
41 #include "icing/store/document-filter-data.h"
42 #include "icing/store/document-id.h"
43 #include "icing/store/document-store.h"
44 #include "icing/store/namespace-fingerprint-identifier.h"
45 #include "icing/store/namespace-id.h"
46 #include "icing/testing/common-matchers.h"
47 #include "icing/testing/fake-clock.h"
48 #include "icing/testing/icu-data-file-helper.h"
49 #include "icing/testing/test-data.h"
50 #include "icing/testing/tmp-directory.h"
51 #include "icing/tokenization/language-segmenter-factory.h"
52 #include "icing/tokenization/language-segmenter.h"
53 #include "icing/util/status-macros.h"
54 #include "icing/util/tokenized-document.h"
55 #include "unicode/uloc.h"
56 
57 namespace icing {
58 namespace lib {
59 
60 namespace {
61 
62 using ::testing::ElementsAre;
63 using ::testing::Eq;
64 using ::testing::IsEmpty;
65 using ::testing::IsTrue;
66 using ::testing::NotNull;
67 
68 // Schema type for referenced documents: ReferencedType
69 static constexpr std::string_view kReferencedType = "ReferencedType";
70 static constexpr std::string_view kPropertyName = "name";
71 
72 // Joinable properties and joinable property id. Joinable property id is
73 // determined by the lexicographical order of joinable property path.
74 // Schema type with joinable property: FakeType
75 static constexpr std::string_view kFakeType = "FakeType";
76 static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
77 
78 // Schema type with nested joinable properties: NestedType
79 static constexpr std::string_view kNestedType = "NestedType";
80 static constexpr std::string_view kPropertyNestedDoc = "nested";
81 static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
82 
83 class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
84  protected:
SetUp()85   void SetUp() override {
86     if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
87       ICING_ASSERT_OK(
88           // File generated via icu_data_file rule in //icing/BUILD.
89           icu_data_file_helper::SetUpICUDataFile(
90               GetTestFilePath("icing/icu.dat")));
91     }
92 
93     base_dir_ = GetTestTempDir() + "/icing_test";
94     ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
95                 IsTrue());
96 
97     qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
98     schema_store_dir_ = base_dir_ + "/schema_store";
99     doc_store_dir_ = base_dir_ + "/doc_store";
100 
101     ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
102                                QualifiedIdJoinIndexImplV2::Create(
103                                    filesystem_, qualified_id_join_index_dir_,
104                                    /*pre_mapping_fbv=*/false));
105 
106     language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
107     ICING_ASSERT_OK_AND_ASSIGN(
108         lang_segmenter_,
109         language_segmenter_factory::Create(std::move(segmenter_options)));
110 
111     ASSERT_THAT(
112         filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
113         IsTrue());
114     ICING_ASSERT_OK_AND_ASSIGN(
115         schema_store_,
116         SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
117     SchemaProto schema =
118         SchemaBuilder()
119             .AddType(
120                 SchemaTypeConfigBuilder()
121                     .SetType(kReferencedType)
122                     .AddProperty(PropertyConfigBuilder()
123                                      .SetName(kPropertyName)
124                                      .SetDataTypeString(TERM_MATCH_EXACT,
125                                                         TOKENIZER_PLAIN)
126                                      .SetCardinality(CARDINALITY_OPTIONAL)))
127             .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
128                 PropertyConfigBuilder()
129                     .SetName(kPropertyQualifiedId)
130                     .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
131                     .SetCardinality(CARDINALITY_OPTIONAL)))
132             .AddType(
133                 SchemaTypeConfigBuilder()
134                     .SetType(kNestedType)
135                     .AddProperty(
136                         PropertyConfigBuilder()
137                             .SetName(kPropertyNestedDoc)
138                             .SetDataTypeDocument(
139                                 kFakeType, /*index_nested_properties=*/true)
140                             .SetCardinality(CARDINALITY_OPTIONAL))
141                     .AddProperty(PropertyConfigBuilder()
142                                      .SetName(kPropertyQualifiedId2)
143                                      .SetDataTypeJoinableString(
144                                          JOINABLE_VALUE_TYPE_QUALIFIED_ID)
145                                      .SetCardinality(CARDINALITY_OPTIONAL)))
146             .Build();
147     ICING_ASSERT_OK(schema_store_->SetSchema(
148         schema, /*ignore_errors_and_delete_documents=*/false,
149         /*allow_circular_schema_definitions=*/false));
150 
151     ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
152                 IsTrue());
153     ICING_ASSERT_OK_AND_ASSIGN(
154         DocumentStore::CreateResult create_result,
155         DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
156                               schema_store_.get(),
157                               /*force_recovery_and_revalidate_documents=*/false,
158                               /*namespace_id_fingerprint=*/true,
159                               /*pre_mapping_fbv=*/false,
160                               /*use_persistent_hash_map=*/true,
161                               PortableFileBackedProtoLog<
162                                   DocumentWrapper>::kDeflateCompressionLevel,
163                               /*initialize_stats=*/nullptr));
164     doc_store_ = std::move(create_result.document_store);
165 
166     // Get FakeType related ids.
167     ICING_ASSERT_OK_AND_ASSIGN(fake_type_id_,
168                                schema_store_->GetSchemaTypeId(kFakeType));
169     ICING_ASSERT_OK_AND_ASSIGN(
170         const JoinablePropertyMetadata* metadata1,
171         schema_store_->GetJoinablePropertyMetadata(
172             fake_type_id_, std::string(kPropertyQualifiedId)));
173     ASSERT_THAT(metadata1, NotNull());
174     fake_type_joinable_property_id_ = metadata1->id;
175 
176     // Get NestedType related ids.
177     ICING_ASSERT_OK_AND_ASSIGN(nested_type_id_,
178                                schema_store_->GetSchemaTypeId(kNestedType));
179     ICING_ASSERT_OK_AND_ASSIGN(
180         const JoinablePropertyMetadata* metadata2,
181         schema_store_->GetJoinablePropertyMetadata(
182             nested_type_id_,
183             absl_ports::StrCat(kPropertyNestedDoc, ".", kPropertyQualifiedId)));
184     ASSERT_THAT(metadata2, NotNull());
185     nested_type_nested_joinable_property_id_ = metadata2->id;
186     ICING_ASSERT_OK_AND_ASSIGN(
187         const JoinablePropertyMetadata* metadata3,
188         schema_store_->GetJoinablePropertyMetadata(
189             nested_type_id_, std::string(kPropertyQualifiedId2)));
190     ASSERT_THAT(metadata3, NotNull());
191     nested_type_joinable_property_id_ = metadata3->id;
192   }
193 
TearDown()194   void TearDown() override {
195     doc_store_.reset();
196     schema_store_.reset();
197     lang_segmenter_.reset();
198     qualified_id_join_index_.reset();
199 
200     filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
201   }
202 
203   Filesystem filesystem_;
204   FakeClock fake_clock_;
205   std::string base_dir_;
206   std::string qualified_id_join_index_dir_;
207   std::string schema_store_dir_;
208   std::string doc_store_dir_;
209 
210   std::unique_ptr<QualifiedIdJoinIndexImplV2> qualified_id_join_index_;
211   std::unique_ptr<LanguageSegmenter> lang_segmenter_;
212   std::unique_ptr<SchemaStore> schema_store_;
213   std::unique_ptr<DocumentStore> doc_store_;
214 
215   // FakeType related ids.
216   SchemaTypeId fake_type_id_;
217   JoinablePropertyId fake_type_joinable_property_id_;
218 
219   // NestedType related ids.
220   SchemaTypeId nested_type_id_;
221   JoinablePropertyId nested_type_nested_joinable_property_id_;
222   JoinablePropertyId nested_type_joinable_property_id_;
223 };
224 
225 libtextclassifier3::StatusOr<
226     std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
GetJoinData(const QualifiedIdJoinIndexImplV2 & index,SchemaTypeId schema_type_id,JoinablePropertyId joinable_property_id)227 GetJoinData(const QualifiedIdJoinIndexImplV2& index,
228             SchemaTypeId schema_type_id,
229             JoinablePropertyId joinable_property_id) {
230   ICING_ASSIGN_OR_RETURN(
231       std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
232       index.GetIterator(schema_type_id, joinable_property_id));
233 
234   std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
235   while (iter->Advance().ok()) {
236     result.push_back(iter->GetCurrent());
237   }
238 
239   return result;
240 }
241 
TEST_F(QualifiedIdJoinIndexingHandlerTest,CreationWithNullPointerShouldFail)242 TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
243   EXPECT_THAT(
244       QualifiedIdJoinIndexingHandler::Create(
245           /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
246       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
247 
248   EXPECT_THAT(
249       QualifiedIdJoinIndexingHandler::Create(
250           &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
251       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
252 
253   EXPECT_THAT(
254       QualifiedIdJoinIndexingHandler::Create(
255           &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
256       StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
257 }
258 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleJoinableProperty)259 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
260   // Create and put referenced (parent) document. Get its document id and
261   // namespace id.
262   DocumentProto referenced_document =
263       DocumentBuilder()
264           .SetKey("pkg$db/ns", "ref_type/1")
265           .SetSchema(std::string(kReferencedType))
266           .AddStringProperty(std::string(kPropertyName), "one")
267           .Build();
268   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
269                              doc_store_->Put(referenced_document));
270   ICING_ASSERT_OK_AND_ASSIGN(
271       NamespaceId ref_doc_ns_id,
272       doc_store_->GetNamespaceId(referenced_document.namespace_()));
273   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
274       /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
275   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
276               IsOkAndHolds(ref_doc_id));
277 
278   // Create and put (child) document. Also tokenize it.
279   DocumentProto document =
280       DocumentBuilder()
281           .SetKey("icing", "fake_type/1")
282           .SetSchema(std::string(kFakeType))
283           .AddStringProperty(std::string(kPropertyQualifiedId),
284                              "pkg$db/ns#ref_type/1")
285           .Build();
286   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
287   ICING_ASSERT_OK_AND_ASSIGN(
288       TokenizedDocument tokenized_document,
289       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
290                                 std::move(document)));
291 
292   // Handle document.
293   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
294               Eq(kInvalidDocumentId));
295   ICING_ASSERT_OK_AND_ASSIGN(
296       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
297       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
298                                              qualified_id_join_index_.get()));
299   EXPECT_THAT(
300       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
301                       /*put_document_stats=*/nullptr),
302       IsOk());
303 
304   // Verify the state of qualified_id_join_index_ after Handle().
305   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
306   // (kFakeType, kPropertyQualifiedId) should contain
307   // [(doc_id, ref_doc_ns_fingerprint_id)].
308   EXPECT_THAT(
309       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
310                   /*joinable_property_id=*/fake_type_joinable_property_id_),
311       IsOkAndHolds(
312           ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
313               /*document_id=*/doc_id,
314               /*join_info=*/ref_doc_ns_fingerprint_id))));
315 }
316 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleNestedJoinableProperty)317 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
318   // Create and put referenced (parent) document1. Get its document id and
319   // namespace id.
320   DocumentProto referenced_document1 =
321       DocumentBuilder()
322           .SetKey("pkg$db/ns", "ref_type/1")
323           .SetSchema(std::string(kReferencedType))
324           .AddStringProperty(std::string(kPropertyName), "one")
325           .Build();
326   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id1,
327                              doc_store_->Put(referenced_document1));
328   ICING_ASSERT_OK_AND_ASSIGN(
329       NamespaceId ref_doc_ns_id1,
330       doc_store_->GetNamespaceId(referenced_document1.namespace_()));
331   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id1(
332       /*namespace_id=*/ref_doc_ns_id1,
333       /*target_str=*/referenced_document1.uri());
334   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id1),
335               IsOkAndHolds(ref_doc_id1));
336 
337   // Create and put referenced (parent) document2. Get its document id and
338   // namespace id.
339   DocumentProto referenced_document2 =
340       DocumentBuilder()
341           .SetKey("pkg$db/ns", "ref_type/2")
342           .SetSchema(std::string(kReferencedType))
343           .AddStringProperty(std::string(kPropertyName), "two")
344           .Build();
345   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id2,
346                              doc_store_->Put(referenced_document2));
347   ICING_ASSERT_OK_AND_ASSIGN(
348       NamespaceId ref_doc_ns_id2,
349       doc_store_->GetNamespaceId(referenced_document2.namespace_()));
350   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id2(
351       /*namespace_id=*/ref_doc_ns_id2,
352       /*target_str=*/referenced_document2.uri());
353   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id2),
354               IsOkAndHolds(ref_doc_id2));
355 
356   // Create and put (child) document:
357   // - kPropertyNestedDoc.kPropertyQualifiedId refers to referenced_document2.
358   // - kPropertyQualifiedId2 refers to referenced_document1.
359   //
360   // Also tokenize it.
361   DocumentProto nested_document =
362       DocumentBuilder()
363           .SetKey("pkg$db/ns", "nested_type/1")
364           .SetSchema(std::string(kNestedType))
365           .AddDocumentProperty(
366               std::string(kPropertyNestedDoc),
367               DocumentBuilder()
368                   .SetKey("pkg$db/ns", "nested_fake_type/1")
369                   .SetSchema(std::string(kFakeType))
370                   .AddStringProperty(std::string(kPropertyQualifiedId),
371                                      "pkg$db/ns#ref_type/2")
372                   .Build())
373           .AddStringProperty(std::string(kPropertyQualifiedId2),
374                              "pkg$db/ns#ref_type/1")
375           .Build();
376   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id,
377                              doc_store_->Put(nested_document));
378   ICING_ASSERT_OK_AND_ASSIGN(
379       TokenizedDocument tokenized_document,
380       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
381                                 nested_document));
382 
383   // Handle nested_document.
384   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
385               Eq(kInvalidDocumentId));
386   ICING_ASSERT_OK_AND_ASSIGN(
387       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
388       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
389                                              qualified_id_join_index_.get()));
390   EXPECT_THAT(
391       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
392                       /*put_document_stats=*/nullptr),
393       IsOk());
394 
395   // Verify the state of qualified_id_join_index_ after Handle().
396   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
397   // (kFakeType, kPropertyQualifiedId) should contain nothing.
398   EXPECT_THAT(
399       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
400                   /*joinable_property_id=*/fake_type_joinable_property_id_),
401       IsOkAndHolds(IsEmpty()));
402   // (kNestedType, kPropertyNestedDoc.kPropertyQualifiedId) should contain
403   // [(doc_id, ref_doc_ns_fingerprint_id2)].
404   EXPECT_THAT(
405       GetJoinData(
406           *qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
407           /*joinable_property_id=*/nested_type_nested_joinable_property_id_),
408       IsOkAndHolds(
409           ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
410               /*document_id=*/doc_id,
411               /*join_info=*/ref_doc_ns_fingerprint_id2))));
412   // (kNestedType, kPropertyQualifiedId2) should contain
413   // [(doc_id, ref_doc_ns_fingerprint_id1)].
414   EXPECT_THAT(
415       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
416                   /*joinable_property_id=*/nested_type_joinable_property_id_),
417       IsOkAndHolds(
418           ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
419               /*document_id=*/doc_id,
420               /*join_info=*/ref_doc_ns_fingerprint_id1))));
421 }
422 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleShouldSkipInvalidFormatQualifiedId)423 TEST_F(QualifiedIdJoinIndexingHandlerTest,
424        HandleShouldSkipInvalidFormatQualifiedId) {
425   static constexpr std::string_view kInvalidFormatQualifiedId =
426       "invalid_format_qualified_id";
427   ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
428               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
429 
430   // Create and put (child) document with an invalid format referenced qualified
431   // id. Also tokenize it.
432   DocumentProto document =
433       DocumentBuilder()
434           .SetKey("icing", "fake_type/1")
435           .SetSchema(std::string(kFakeType))
436           .AddStringProperty(std::string(kPropertyQualifiedId),
437                              std::string(kInvalidFormatQualifiedId))
438           .Build();
439   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
440   ICING_ASSERT_OK_AND_ASSIGN(
441       TokenizedDocument tokenized_document,
442       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
443                                 document));
444 
445   // Handle document. Should ignore invalid format qualified id.
446   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
447               Eq(kInvalidDocumentId));
448   ICING_ASSERT_OK_AND_ASSIGN(
449       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
450       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
451                                              qualified_id_join_index_.get()));
452   EXPECT_THAT(
453       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
454                       /*put_document_stats=*/nullptr),
455       IsOk());
456 
457   // Verify the state of qualified_id_join_index_ after Handle(). Index data
458   // should remain unchanged since there is no valid qualified id, but
459   // last_added_document_id should be updated.
460   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
461   // (kFakeType, kPropertyQualifiedId) should contain nothing.
462   EXPECT_THAT(
463       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
464                   /*joinable_property_id=*/fake_type_joinable_property_id_),
465       IsOkAndHolds(IsEmpty()));
466 }
467 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleShouldSkipNonExistingNamespace)468 TEST_F(QualifiedIdJoinIndexingHandlerTest,
469        HandleShouldSkipNonExistingNamespace) {
470   static constexpr std::string_view kUnknownNamespace = "UnknownNamespace";
471   // Create and put (child) document which references to a parent qualified id
472   // with an unknown namespace. Also tokenize it.
473   DocumentProto document =
474       DocumentBuilder()
475           .SetKey("icing", "fake_type/1")
476           .SetSchema(std::string(kFakeType))
477           .AddStringProperty(
478               std::string(kPropertyQualifiedId),
479               absl_ports::StrCat(kUnknownNamespace, "#", "ref_type/1"))
480           .Build();
481   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
482   ICING_ASSERT_OK_AND_ASSIGN(
483       TokenizedDocument tokenized_document,
484       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
485                                 std::move(document)));
486 
487   // Handle document.
488   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
489               Eq(kInvalidDocumentId));
490   ICING_ASSERT_OK_AND_ASSIGN(
491       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
492       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
493                                              qualified_id_join_index_.get()));
494   EXPECT_THAT(
495       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
496                       /*put_document_stats=*/nullptr),
497       IsOk());
498 
499   // Verify the state of qualified_id_join_index_ after Handle().
500   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
501   // (kFakeType, kPropertyQualifiedId) should be empty since
502   // "UnknownNamespace#ref_type/1" should be skipped.
503   EXPECT_THAT(
504       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
505                   /*joinable_property_id=*/fake_type_joinable_property_id_),
506       IsOkAndHolds(IsEmpty()));
507 }
508 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleShouldSkipEmptyQualifiedId)509 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
510   // Create and put (child) document without any qualified id. Also tokenize it.
511   DocumentProto document = DocumentBuilder()
512                                .SetKey("icing", "fake_type/1")
513                                .SetSchema(std::string(kFakeType))
514                                .Build();
515   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
516   ICING_ASSERT_OK_AND_ASSIGN(
517       TokenizedDocument tokenized_document,
518       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
519                                 document));
520   ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
521 
522   // Handle document.
523   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
524               Eq(kInvalidDocumentId));
525   ICING_ASSERT_OK_AND_ASSIGN(
526       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
527       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
528                                              qualified_id_join_index_.get()));
529   EXPECT_THAT(
530       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
531                       /*put_document_stats=*/nullptr),
532       IsOk());
533 
534   // Verify the state of qualified_id_join_index_ after Handle(). Index data
535   // should remain unchanged since there is no qualified id, but
536   // last_added_document_id should be updated.
537   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
538   // (kFakeType, kPropertyQualifiedId) should contain nothing.
539   EXPECT_THAT(
540       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
541                   /*joinable_property_id=*/fake_type_joinable_property_id_),
542       IsOkAndHolds(IsEmpty()));
543 }
544 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleInvalidDocumentIdShouldReturnInvalidArgumentError)545 TEST_F(QualifiedIdJoinIndexingHandlerTest,
546        HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
547   // Create and put referenced (parent) document. Get its document id and
548   // namespace id.
549   DocumentProto referenced_document =
550       DocumentBuilder()
551           .SetKey("pkg$db/ns", "ref_type/1")
552           .SetSchema(std::string(kReferencedType))
553           .AddStringProperty(std::string(kPropertyName), "one")
554           .Build();
555   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
556                              doc_store_->Put(referenced_document));
557   ICING_ASSERT_OK_AND_ASSIGN(
558       NamespaceId ref_doc_ns_id,
559       doc_store_->GetNamespaceId(referenced_document.namespace_()));
560   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
561       /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
562   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
563               IsOkAndHolds(ref_doc_id));
564 
565   // Create and put (child) document. Also tokenize it.
566   DocumentProto document =
567       DocumentBuilder()
568           .SetKey("icing", "fake_type/1")
569           .SetSchema(std::string(kFakeType))
570           .AddStringProperty(std::string(kPropertyQualifiedId),
571                              "pkg$db/ns#ref_type/1")
572           .Build();
573   ICING_ASSERT_OK(doc_store_->Put(document));
574   ICING_ASSERT_OK_AND_ASSIGN(
575       TokenizedDocument tokenized_document,
576       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
577                                 std::move(document)));
578 
579   qualified_id_join_index_->set_last_added_document_id(ref_doc_id);
580   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
581               Eq(ref_doc_id));
582 
583   ICING_ASSERT_OK_AND_ASSIGN(
584       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
585       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
586                                              qualified_id_join_index_.get()));
587 
588   // Handling document with kInvalidDocumentId should cause a failure.
589   EXPECT_THAT(
590       handler->Handle(tokenized_document, kInvalidDocumentId,
591                       /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
592       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
593   // Verify the state of qualified_id_join_index_ after Handle(). Both index
594   // data and last_added_document_id should remain unchanged.
595   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
596               Eq(ref_doc_id));
597   // (kFakeType, kPropertyQualifiedId) should contain nothing.
598   EXPECT_THAT(
599       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
600                   /*joinable_property_id=*/fake_type_joinable_property_id_),
601       IsOkAndHolds(IsEmpty()));
602 
603   // Recovery mode should get the same result.
604   EXPECT_THAT(
605       handler->Handle(tokenized_document, kInvalidDocumentId,
606                       /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
607       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
608   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
609               Eq(ref_doc_id));
610   // (kFakeType, kPropertyQualifiedId) should contain nothing.
611   EXPECT_THAT(
612       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
613                   /*joinable_property_id=*/fake_type_joinable_property_id_),
614       IsOkAndHolds(IsEmpty()));
615 }
616 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError)617 TEST_F(QualifiedIdJoinIndexingHandlerTest,
618        HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
619   // Create and put referenced (parent) document. Get its document id and
620   // namespace id.
621   DocumentProto referenced_document =
622       DocumentBuilder()
623           .SetKey("pkg$db/ns", "ref_type/1")
624           .SetSchema(std::string(kReferencedType))
625           .AddStringProperty(std::string(kPropertyName), "one")
626           .Build();
627   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
628                              doc_store_->Put(referenced_document));
629   ICING_ASSERT_OK_AND_ASSIGN(
630       NamespaceId ref_doc_ns_id,
631       doc_store_->GetNamespaceId(referenced_document.namespace_()));
632   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
633       /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
634   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
635               IsOkAndHolds(ref_doc_id));
636 
637   // Create and put (child) document. Also tokenize it.
638   DocumentProto document =
639       DocumentBuilder()
640           .SetKey("icing", "fake_type/1")
641           .SetSchema(std::string(kFakeType))
642           .AddStringProperty(std::string(kPropertyQualifiedId),
643                              "pkg$db/ns#ref_type/1")
644           .Build();
645   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
646   ICING_ASSERT_OK_AND_ASSIGN(
647       TokenizedDocument tokenized_document,
648       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
649                                 std::move(document)));
650 
651   ICING_ASSERT_OK_AND_ASSIGN(
652       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
653       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
654                                              qualified_id_join_index_.get()));
655 
656   // Handling document with document_id == last_added_document_id should cause a
657   // failure.
658   qualified_id_join_index_->set_last_added_document_id(doc_id);
659   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
660   EXPECT_THAT(
661       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
662                       /*put_document_stats=*/nullptr),
663       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
664   // Verify the state of qualified_id_join_index_ after Handle(). Both index
665   // data and last_added_document_id should remain unchanged.
666   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
667   // (kFakeType, kPropertyQualifiedId) should contain nothing.
668   EXPECT_THAT(
669       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
670                   /*joinable_property_id=*/fake_type_joinable_property_id_),
671       IsOkAndHolds(IsEmpty()));
672 
673   // Handling document with document_id < last_added_document_id should cause a
674   // failure.
675   qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
676   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
677               Eq(doc_id + 1));
678   EXPECT_THAT(
679       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
680                       /*put_document_stats=*/nullptr),
681       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
682   // Verify the state of qualified_id_join_index_ after Handle(). Both index
683   // data and last_added_document_id should remain unchanged.
684   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
685               Eq(doc_id + 1));
686   // (kFakeType, kPropertyQualifiedId) should contain nothing.
687   EXPECT_THAT(
688       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
689                   /*joinable_property_id=*/fake_type_joinable_property_id_),
690       IsOkAndHolds(IsEmpty()));
691 }
692 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleRecoveryModeShouldIndexDocsGtLastAddedDocId)693 TEST_F(QualifiedIdJoinIndexingHandlerTest,
694        HandleRecoveryModeShouldIndexDocsGtLastAddedDocId) {
695   // Create and put referenced (parent) document. Get its document id and
696   // namespace id.
697   DocumentProto referenced_document =
698       DocumentBuilder()
699           .SetKey("pkg$db/ns", "ref_type/1")
700           .SetSchema(std::string(kReferencedType))
701           .AddStringProperty(std::string(kPropertyName), "one")
702           .Build();
703   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
704                              doc_store_->Put(referenced_document));
705   ICING_ASSERT_OK_AND_ASSIGN(
706       NamespaceId ref_doc_ns_id,
707       doc_store_->GetNamespaceId(referenced_document.namespace_()));
708   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
709       /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
710   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
711               IsOkAndHolds(ref_doc_id));
712 
713   // Create and put (child) document. Also tokenize it.
714   DocumentProto document =
715       DocumentBuilder()
716           .SetKey("icing", "fake_type/1")
717           .SetSchema(std::string(kFakeType))
718           .AddStringProperty(std::string(kPropertyQualifiedId),
719                              "pkg$db/ns#ref_type/1")
720           .Build();
721   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
722   ICING_ASSERT_OK_AND_ASSIGN(
723       TokenizedDocument tokenized_document,
724       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
725                                 std::move(document)));
726 
727   ICING_ASSERT_OK_AND_ASSIGN(
728       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
729       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
730                                              qualified_id_join_index_.get()));
731 
732   // Handle document with document_id > last_added_document_id in recovery mode.
733   // The handler should index this document and update last_added_document_id.
734   qualified_id_join_index_->set_last_added_document_id(doc_id - 1);
735   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
736               Eq(doc_id - 1));
737   EXPECT_THAT(
738       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
739                       /*put_document_stats=*/nullptr),
740       IsOk());
741   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
742   EXPECT_THAT(
743       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
744                   /*joinable_property_id=*/fake_type_joinable_property_id_),
745       IsOkAndHolds(
746           ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
747               /*document_id=*/doc_id,
748               /*join_info=*/ref_doc_ns_fingerprint_id))));
749 }
750 
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId)751 TEST_F(QualifiedIdJoinIndexingHandlerTest,
752        HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId) {
753   // Create and put referenced (parent) document. Get its document id and
754   // namespace id.
755   DocumentProto referenced_document =
756       DocumentBuilder()
757           .SetKey("pkg$db/ns", "ref_type/1")
758           .SetSchema(std::string(kReferencedType))
759           .AddStringProperty(std::string(kPropertyName), "one")
760           .Build();
761   ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
762                              doc_store_->Put(referenced_document));
763   ICING_ASSERT_OK_AND_ASSIGN(
764       NamespaceId ref_doc_ns_id,
765       doc_store_->GetNamespaceId(referenced_document.namespace_()));
766   NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
767       /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
768   ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
769               IsOkAndHolds(ref_doc_id));
770 
771   // Create and put (child) document. Also tokenize it.
772   DocumentProto document =
773       DocumentBuilder()
774           .SetKey("icing", "fake_type/1")
775           .SetSchema(std::string(kFakeType))
776           .AddStringProperty(std::string(kPropertyQualifiedId),
777                              "pkg$db/ns#ref_type/1")
778           .Build();
779   ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
780   ICING_ASSERT_OK_AND_ASSIGN(
781       TokenizedDocument tokenized_document,
782       TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
783                                 std::move(document)));
784 
785   ICING_ASSERT_OK_AND_ASSIGN(
786       std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
787       QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
788                                              qualified_id_join_index_.get()));
789 
790   // Handle document with document_id == last_added_document_id in recovery
791   // mode. We should not get any error, but the handler should ignore the
792   // document, so both index data and last_added_document_id should remain
793   // unchanged.
794   qualified_id_join_index_->set_last_added_document_id(doc_id);
795   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
796   EXPECT_THAT(
797       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
798                       /*put_document_stats=*/nullptr),
799       IsOk());
800   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
801   // (kFakeType, kPropertyQualifiedId) should contain nothing.
802   EXPECT_THAT(
803       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
804                   /*joinable_property_id=*/fake_type_joinable_property_id_),
805       IsOkAndHolds(IsEmpty()));
806 
807   // Handle document with document_id < last_added_document_id in recovery mode.
808   // We should not get any error, but the handler should ignore the document, so
809   // both index data and last_added_document_id should remain unchanged.
810   qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
811   ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
812               Eq(doc_id + 1));
813   EXPECT_THAT(
814       handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
815                       /*put_document_stats=*/nullptr),
816       IsOk());
817   EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
818               Eq(doc_id + 1));
819   // (kFakeType, kPropertyQualifiedId) should contain nothing.
820   EXPECT_THAT(
821       GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
822                   /*joinable_property_id=*/fake_type_joinable_property_id_),
823       IsOkAndHolds(IsEmpty()));
824 }
825 
826 }  // namespace
827 
828 }  // namespace lib
829 }  // namespace icing
830