1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/join/qualified-id-join-indexing-handler.h"
16
17 #include <memory>
18 #include <string>
19 #include <string_view>
20 #include <utility>
21 #include <vector>
22
23 #include "icing/text_classifier/lib3/utils/base/status.h"
24 #include "icing/text_classifier/lib3/utils/base/statusor.h"
25 #include "gmock/gmock.h"
26 #include "gtest/gtest.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/document-builder.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/file/portable-file-backed-proto-log.h"
31 #include "icing/join/document-id-to-join-info.h"
32 #include "icing/join/qualified-id-join-index-impl-v2.h"
33 #include "icing/join/qualified-id-join-index.h"
34 #include "icing/join/qualified-id.h"
35 #include "icing/portable/platform.h"
36 #include "icing/proto/document.pb.h"
37 #include "icing/proto/schema.pb.h"
38 #include "icing/schema-builder.h"
39 #include "icing/schema/joinable-property.h"
40 #include "icing/schema/schema-store.h"
41 #include "icing/store/document-filter-data.h"
42 #include "icing/store/document-id.h"
43 #include "icing/store/document-store.h"
44 #include "icing/store/namespace-fingerprint-identifier.h"
45 #include "icing/store/namespace-id.h"
46 #include "icing/testing/common-matchers.h"
47 #include "icing/testing/fake-clock.h"
48 #include "icing/testing/icu-data-file-helper.h"
49 #include "icing/testing/test-data.h"
50 #include "icing/testing/tmp-directory.h"
51 #include "icing/tokenization/language-segmenter-factory.h"
52 #include "icing/tokenization/language-segmenter.h"
53 #include "icing/util/status-macros.h"
54 #include "icing/util/tokenized-document.h"
55 #include "unicode/uloc.h"
56
57 namespace icing {
58 namespace lib {
59
60 namespace {
61
62 using ::testing::ElementsAre;
63 using ::testing::Eq;
64 using ::testing::IsEmpty;
65 using ::testing::IsTrue;
66 using ::testing::NotNull;
67
68 // Schema type for referenced documents: ReferencedType
69 static constexpr std::string_view kReferencedType = "ReferencedType";
70 static constexpr std::string_view kPropertyName = "name";
71
72 // Joinable properties and joinable property id. Joinable property id is
73 // determined by the lexicographical order of joinable property path.
74 // Schema type with joinable property: FakeType
75 static constexpr std::string_view kFakeType = "FakeType";
76 static constexpr std::string_view kPropertyQualifiedId = "qualifiedId";
77
78 // Schema type with nested joinable properties: NestedType
79 static constexpr std::string_view kNestedType = "NestedType";
80 static constexpr std::string_view kPropertyNestedDoc = "nested";
81 static constexpr std::string_view kPropertyQualifiedId2 = "qualifiedId2";
82
83 class QualifiedIdJoinIndexingHandlerTest : public ::testing::Test {
84 protected:
SetUp()85 void SetUp() override {
86 if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
87 ICING_ASSERT_OK(
88 // File generated via icu_data_file rule in //icing/BUILD.
89 icu_data_file_helper::SetUpICUDataFile(
90 GetTestFilePath("icing/icu.dat")));
91 }
92
93 base_dir_ = GetTestTempDir() + "/icing_test";
94 ASSERT_THAT(filesystem_.CreateDirectoryRecursively(base_dir_.c_str()),
95 IsTrue());
96
97 qualified_id_join_index_dir_ = base_dir_ + "/qualified_id_join_index";
98 schema_store_dir_ = base_dir_ + "/schema_store";
99 doc_store_dir_ = base_dir_ + "/doc_store";
100
101 ICING_ASSERT_OK_AND_ASSIGN(qualified_id_join_index_,
102 QualifiedIdJoinIndexImplV2::Create(
103 filesystem_, qualified_id_join_index_dir_,
104 /*pre_mapping_fbv=*/false));
105
106 language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
107 ICING_ASSERT_OK_AND_ASSIGN(
108 lang_segmenter_,
109 language_segmenter_factory::Create(std::move(segmenter_options)));
110
111 ASSERT_THAT(
112 filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str()),
113 IsTrue());
114 ICING_ASSERT_OK_AND_ASSIGN(
115 schema_store_,
116 SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
117 SchemaProto schema =
118 SchemaBuilder()
119 .AddType(
120 SchemaTypeConfigBuilder()
121 .SetType(kReferencedType)
122 .AddProperty(PropertyConfigBuilder()
123 .SetName(kPropertyName)
124 .SetDataTypeString(TERM_MATCH_EXACT,
125 TOKENIZER_PLAIN)
126 .SetCardinality(CARDINALITY_OPTIONAL)))
127 .AddType(SchemaTypeConfigBuilder().SetType(kFakeType).AddProperty(
128 PropertyConfigBuilder()
129 .SetName(kPropertyQualifiedId)
130 .SetDataTypeJoinableString(JOINABLE_VALUE_TYPE_QUALIFIED_ID)
131 .SetCardinality(CARDINALITY_OPTIONAL)))
132 .AddType(
133 SchemaTypeConfigBuilder()
134 .SetType(kNestedType)
135 .AddProperty(
136 PropertyConfigBuilder()
137 .SetName(kPropertyNestedDoc)
138 .SetDataTypeDocument(
139 kFakeType, /*index_nested_properties=*/true)
140 .SetCardinality(CARDINALITY_OPTIONAL))
141 .AddProperty(PropertyConfigBuilder()
142 .SetName(kPropertyQualifiedId2)
143 .SetDataTypeJoinableString(
144 JOINABLE_VALUE_TYPE_QUALIFIED_ID)
145 .SetCardinality(CARDINALITY_OPTIONAL)))
146 .Build();
147 ICING_ASSERT_OK(schema_store_->SetSchema(
148 schema, /*ignore_errors_and_delete_documents=*/false,
149 /*allow_circular_schema_definitions=*/false));
150
151 ASSERT_THAT(filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str()),
152 IsTrue());
153 ICING_ASSERT_OK_AND_ASSIGN(
154 DocumentStore::CreateResult create_result,
155 DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
156 schema_store_.get(),
157 /*force_recovery_and_revalidate_documents=*/false,
158 /*namespace_id_fingerprint=*/true,
159 /*pre_mapping_fbv=*/false,
160 /*use_persistent_hash_map=*/true,
161 PortableFileBackedProtoLog<
162 DocumentWrapper>::kDeflateCompressionLevel,
163 /*initialize_stats=*/nullptr));
164 doc_store_ = std::move(create_result.document_store);
165
166 // Get FakeType related ids.
167 ICING_ASSERT_OK_AND_ASSIGN(fake_type_id_,
168 schema_store_->GetSchemaTypeId(kFakeType));
169 ICING_ASSERT_OK_AND_ASSIGN(
170 const JoinablePropertyMetadata* metadata1,
171 schema_store_->GetJoinablePropertyMetadata(
172 fake_type_id_, std::string(kPropertyQualifiedId)));
173 ASSERT_THAT(metadata1, NotNull());
174 fake_type_joinable_property_id_ = metadata1->id;
175
176 // Get NestedType related ids.
177 ICING_ASSERT_OK_AND_ASSIGN(nested_type_id_,
178 schema_store_->GetSchemaTypeId(kNestedType));
179 ICING_ASSERT_OK_AND_ASSIGN(
180 const JoinablePropertyMetadata* metadata2,
181 schema_store_->GetJoinablePropertyMetadata(
182 nested_type_id_,
183 absl_ports::StrCat(kPropertyNestedDoc, ".", kPropertyQualifiedId)));
184 ASSERT_THAT(metadata2, NotNull());
185 nested_type_nested_joinable_property_id_ = metadata2->id;
186 ICING_ASSERT_OK_AND_ASSIGN(
187 const JoinablePropertyMetadata* metadata3,
188 schema_store_->GetJoinablePropertyMetadata(
189 nested_type_id_, std::string(kPropertyQualifiedId2)));
190 ASSERT_THAT(metadata3, NotNull());
191 nested_type_joinable_property_id_ = metadata3->id;
192 }
193
TearDown()194 void TearDown() override {
195 doc_store_.reset();
196 schema_store_.reset();
197 lang_segmenter_.reset();
198 qualified_id_join_index_.reset();
199
200 filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
201 }
202
203 Filesystem filesystem_;
204 FakeClock fake_clock_;
205 std::string base_dir_;
206 std::string qualified_id_join_index_dir_;
207 std::string schema_store_dir_;
208 std::string doc_store_dir_;
209
210 std::unique_ptr<QualifiedIdJoinIndexImplV2> qualified_id_join_index_;
211 std::unique_ptr<LanguageSegmenter> lang_segmenter_;
212 std::unique_ptr<SchemaStore> schema_store_;
213 std::unique_ptr<DocumentStore> doc_store_;
214
215 // FakeType related ids.
216 SchemaTypeId fake_type_id_;
217 JoinablePropertyId fake_type_joinable_property_id_;
218
219 // NestedType related ids.
220 SchemaTypeId nested_type_id_;
221 JoinablePropertyId nested_type_nested_joinable_property_id_;
222 JoinablePropertyId nested_type_joinable_property_id_;
223 };
224
225 libtextclassifier3::StatusOr<
226 std::vector<QualifiedIdJoinIndexImplV2::JoinDataType>>
GetJoinData(const QualifiedIdJoinIndexImplV2 & index,SchemaTypeId schema_type_id,JoinablePropertyId joinable_property_id)227 GetJoinData(const QualifiedIdJoinIndexImplV2& index,
228 SchemaTypeId schema_type_id,
229 JoinablePropertyId joinable_property_id) {
230 ICING_ASSIGN_OR_RETURN(
231 std::unique_ptr<QualifiedIdJoinIndex::JoinDataIteratorBase> iter,
232 index.GetIterator(schema_type_id, joinable_property_id));
233
234 std::vector<QualifiedIdJoinIndexImplV2::JoinDataType> result;
235 while (iter->Advance().ok()) {
236 result.push_back(iter->GetCurrent());
237 }
238
239 return result;
240 }
241
TEST_F(QualifiedIdJoinIndexingHandlerTest,CreationWithNullPointerShouldFail)242 TEST_F(QualifiedIdJoinIndexingHandlerTest, CreationWithNullPointerShouldFail) {
243 EXPECT_THAT(
244 QualifiedIdJoinIndexingHandler::Create(
245 /*clock=*/nullptr, doc_store_.get(), qualified_id_join_index_.get()),
246 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
247
248 EXPECT_THAT(
249 QualifiedIdJoinIndexingHandler::Create(
250 &fake_clock_, /*doc_store=*/nullptr, qualified_id_join_index_.get()),
251 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
252
253 EXPECT_THAT(
254 QualifiedIdJoinIndexingHandler::Create(
255 &fake_clock_, doc_store_.get(), /*qualified_id_join_index=*/nullptr),
256 StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
257 }
258
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleJoinableProperty)259 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleJoinableProperty) {
260 // Create and put referenced (parent) document. Get its document id and
261 // namespace id.
262 DocumentProto referenced_document =
263 DocumentBuilder()
264 .SetKey("pkg$db/ns", "ref_type/1")
265 .SetSchema(std::string(kReferencedType))
266 .AddStringProperty(std::string(kPropertyName), "one")
267 .Build();
268 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
269 doc_store_->Put(referenced_document));
270 ICING_ASSERT_OK_AND_ASSIGN(
271 NamespaceId ref_doc_ns_id,
272 doc_store_->GetNamespaceId(referenced_document.namespace_()));
273 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
274 /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
275 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
276 IsOkAndHolds(ref_doc_id));
277
278 // Create and put (child) document. Also tokenize it.
279 DocumentProto document =
280 DocumentBuilder()
281 .SetKey("icing", "fake_type/1")
282 .SetSchema(std::string(kFakeType))
283 .AddStringProperty(std::string(kPropertyQualifiedId),
284 "pkg$db/ns#ref_type/1")
285 .Build();
286 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
287 ICING_ASSERT_OK_AND_ASSIGN(
288 TokenizedDocument tokenized_document,
289 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
290 std::move(document)));
291
292 // Handle document.
293 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
294 Eq(kInvalidDocumentId));
295 ICING_ASSERT_OK_AND_ASSIGN(
296 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
297 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
298 qualified_id_join_index_.get()));
299 EXPECT_THAT(
300 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
301 /*put_document_stats=*/nullptr),
302 IsOk());
303
304 // Verify the state of qualified_id_join_index_ after Handle().
305 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
306 // (kFakeType, kPropertyQualifiedId) should contain
307 // [(doc_id, ref_doc_ns_fingerprint_id)].
308 EXPECT_THAT(
309 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
310 /*joinable_property_id=*/fake_type_joinable_property_id_),
311 IsOkAndHolds(
312 ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
313 /*document_id=*/doc_id,
314 /*join_info=*/ref_doc_ns_fingerprint_id))));
315 }
316
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleNestedJoinableProperty)317 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleNestedJoinableProperty) {
318 // Create and put referenced (parent) document1. Get its document id and
319 // namespace id.
320 DocumentProto referenced_document1 =
321 DocumentBuilder()
322 .SetKey("pkg$db/ns", "ref_type/1")
323 .SetSchema(std::string(kReferencedType))
324 .AddStringProperty(std::string(kPropertyName), "one")
325 .Build();
326 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id1,
327 doc_store_->Put(referenced_document1));
328 ICING_ASSERT_OK_AND_ASSIGN(
329 NamespaceId ref_doc_ns_id1,
330 doc_store_->GetNamespaceId(referenced_document1.namespace_()));
331 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id1(
332 /*namespace_id=*/ref_doc_ns_id1,
333 /*target_str=*/referenced_document1.uri());
334 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id1),
335 IsOkAndHolds(ref_doc_id1));
336
337 // Create and put referenced (parent) document2. Get its document id and
338 // namespace id.
339 DocumentProto referenced_document2 =
340 DocumentBuilder()
341 .SetKey("pkg$db/ns", "ref_type/2")
342 .SetSchema(std::string(kReferencedType))
343 .AddStringProperty(std::string(kPropertyName), "two")
344 .Build();
345 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id2,
346 doc_store_->Put(referenced_document2));
347 ICING_ASSERT_OK_AND_ASSIGN(
348 NamespaceId ref_doc_ns_id2,
349 doc_store_->GetNamespaceId(referenced_document2.namespace_()));
350 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id2(
351 /*namespace_id=*/ref_doc_ns_id2,
352 /*target_str=*/referenced_document2.uri());
353 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id2),
354 IsOkAndHolds(ref_doc_id2));
355
356 // Create and put (child) document:
357 // - kPropertyNestedDoc.kPropertyQualifiedId refers to referenced_document2.
358 // - kPropertyQualifiedId2 refers to referenced_document1.
359 //
360 // Also tokenize it.
361 DocumentProto nested_document =
362 DocumentBuilder()
363 .SetKey("pkg$db/ns", "nested_type/1")
364 .SetSchema(std::string(kNestedType))
365 .AddDocumentProperty(
366 std::string(kPropertyNestedDoc),
367 DocumentBuilder()
368 .SetKey("pkg$db/ns", "nested_fake_type/1")
369 .SetSchema(std::string(kFakeType))
370 .AddStringProperty(std::string(kPropertyQualifiedId),
371 "pkg$db/ns#ref_type/2")
372 .Build())
373 .AddStringProperty(std::string(kPropertyQualifiedId2),
374 "pkg$db/ns#ref_type/1")
375 .Build();
376 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id,
377 doc_store_->Put(nested_document));
378 ICING_ASSERT_OK_AND_ASSIGN(
379 TokenizedDocument tokenized_document,
380 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
381 nested_document));
382
383 // Handle nested_document.
384 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
385 Eq(kInvalidDocumentId));
386 ICING_ASSERT_OK_AND_ASSIGN(
387 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
388 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
389 qualified_id_join_index_.get()));
390 EXPECT_THAT(
391 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
392 /*put_document_stats=*/nullptr),
393 IsOk());
394
395 // Verify the state of qualified_id_join_index_ after Handle().
396 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
397 // (kFakeType, kPropertyQualifiedId) should contain nothing.
398 EXPECT_THAT(
399 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
400 /*joinable_property_id=*/fake_type_joinable_property_id_),
401 IsOkAndHolds(IsEmpty()));
402 // (kNestedType, kPropertyNestedDoc.kPropertyQualifiedId) should contain
403 // [(doc_id, ref_doc_ns_fingerprint_id2)].
404 EXPECT_THAT(
405 GetJoinData(
406 *qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
407 /*joinable_property_id=*/nested_type_nested_joinable_property_id_),
408 IsOkAndHolds(
409 ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
410 /*document_id=*/doc_id,
411 /*join_info=*/ref_doc_ns_fingerprint_id2))));
412 // (kNestedType, kPropertyQualifiedId2) should contain
413 // [(doc_id, ref_doc_ns_fingerprint_id1)].
414 EXPECT_THAT(
415 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/nested_type_id_,
416 /*joinable_property_id=*/nested_type_joinable_property_id_),
417 IsOkAndHolds(
418 ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
419 /*document_id=*/doc_id,
420 /*join_info=*/ref_doc_ns_fingerprint_id1))));
421 }
422
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleShouldSkipInvalidFormatQualifiedId)423 TEST_F(QualifiedIdJoinIndexingHandlerTest,
424 HandleShouldSkipInvalidFormatQualifiedId) {
425 static constexpr std::string_view kInvalidFormatQualifiedId =
426 "invalid_format_qualified_id";
427 ASSERT_THAT(QualifiedId::Parse(kInvalidFormatQualifiedId),
428 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
429
430 // Create and put (child) document with an invalid format referenced qualified
431 // id. Also tokenize it.
432 DocumentProto document =
433 DocumentBuilder()
434 .SetKey("icing", "fake_type/1")
435 .SetSchema(std::string(kFakeType))
436 .AddStringProperty(std::string(kPropertyQualifiedId),
437 std::string(kInvalidFormatQualifiedId))
438 .Build();
439 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
440 ICING_ASSERT_OK_AND_ASSIGN(
441 TokenizedDocument tokenized_document,
442 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
443 document));
444
445 // Handle document. Should ignore invalid format qualified id.
446 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
447 Eq(kInvalidDocumentId));
448 ICING_ASSERT_OK_AND_ASSIGN(
449 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
450 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
451 qualified_id_join_index_.get()));
452 EXPECT_THAT(
453 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
454 /*put_document_stats=*/nullptr),
455 IsOk());
456
457 // Verify the state of qualified_id_join_index_ after Handle(). Index data
458 // should remain unchanged since there is no valid qualified id, but
459 // last_added_document_id should be updated.
460 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
461 // (kFakeType, kPropertyQualifiedId) should contain nothing.
462 EXPECT_THAT(
463 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
464 /*joinable_property_id=*/fake_type_joinable_property_id_),
465 IsOkAndHolds(IsEmpty()));
466 }
467
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleShouldSkipNonExistingNamespace)468 TEST_F(QualifiedIdJoinIndexingHandlerTest,
469 HandleShouldSkipNonExistingNamespace) {
470 static constexpr std::string_view kUnknownNamespace = "UnknownNamespace";
471 // Create and put (child) document which references to a parent qualified id
472 // with an unknown namespace. Also tokenize it.
473 DocumentProto document =
474 DocumentBuilder()
475 .SetKey("icing", "fake_type/1")
476 .SetSchema(std::string(kFakeType))
477 .AddStringProperty(
478 std::string(kPropertyQualifiedId),
479 absl_ports::StrCat(kUnknownNamespace, "#", "ref_type/1"))
480 .Build();
481 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
482 ICING_ASSERT_OK_AND_ASSIGN(
483 TokenizedDocument tokenized_document,
484 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
485 std::move(document)));
486
487 // Handle document.
488 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
489 Eq(kInvalidDocumentId));
490 ICING_ASSERT_OK_AND_ASSIGN(
491 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
492 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
493 qualified_id_join_index_.get()));
494 EXPECT_THAT(
495 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
496 /*put_document_stats=*/nullptr),
497 IsOk());
498
499 // Verify the state of qualified_id_join_index_ after Handle().
500 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
501 // (kFakeType, kPropertyQualifiedId) should be empty since
502 // "UnknownNamespace#ref_type/1" should be skipped.
503 EXPECT_THAT(
504 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
505 /*joinable_property_id=*/fake_type_joinable_property_id_),
506 IsOkAndHolds(IsEmpty()));
507 }
508
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleShouldSkipEmptyQualifiedId)509 TEST_F(QualifiedIdJoinIndexingHandlerTest, HandleShouldSkipEmptyQualifiedId) {
510 // Create and put (child) document without any qualified id. Also tokenize it.
511 DocumentProto document = DocumentBuilder()
512 .SetKey("icing", "fake_type/1")
513 .SetSchema(std::string(kFakeType))
514 .Build();
515 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
516 ICING_ASSERT_OK_AND_ASSIGN(
517 TokenizedDocument tokenized_document,
518 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
519 document));
520 ASSERT_THAT(tokenized_document.qualified_id_join_properties(), IsEmpty());
521
522 // Handle document.
523 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
524 Eq(kInvalidDocumentId));
525 ICING_ASSERT_OK_AND_ASSIGN(
526 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
527 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
528 qualified_id_join_index_.get()));
529 EXPECT_THAT(
530 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
531 /*put_document_stats=*/nullptr),
532 IsOk());
533
534 // Verify the state of qualified_id_join_index_ after Handle(). Index data
535 // should remain unchanged since there is no qualified id, but
536 // last_added_document_id should be updated.
537 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
538 // (kFakeType, kPropertyQualifiedId) should contain nothing.
539 EXPECT_THAT(
540 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
541 /*joinable_property_id=*/fake_type_joinable_property_id_),
542 IsOkAndHolds(IsEmpty()));
543 }
544
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleInvalidDocumentIdShouldReturnInvalidArgumentError)545 TEST_F(QualifiedIdJoinIndexingHandlerTest,
546 HandleInvalidDocumentIdShouldReturnInvalidArgumentError) {
547 // Create and put referenced (parent) document. Get its document id and
548 // namespace id.
549 DocumentProto referenced_document =
550 DocumentBuilder()
551 .SetKey("pkg$db/ns", "ref_type/1")
552 .SetSchema(std::string(kReferencedType))
553 .AddStringProperty(std::string(kPropertyName), "one")
554 .Build();
555 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
556 doc_store_->Put(referenced_document));
557 ICING_ASSERT_OK_AND_ASSIGN(
558 NamespaceId ref_doc_ns_id,
559 doc_store_->GetNamespaceId(referenced_document.namespace_()));
560 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
561 /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
562 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
563 IsOkAndHolds(ref_doc_id));
564
565 // Create and put (child) document. Also tokenize it.
566 DocumentProto document =
567 DocumentBuilder()
568 .SetKey("icing", "fake_type/1")
569 .SetSchema(std::string(kFakeType))
570 .AddStringProperty(std::string(kPropertyQualifiedId),
571 "pkg$db/ns#ref_type/1")
572 .Build();
573 ICING_ASSERT_OK(doc_store_->Put(document));
574 ICING_ASSERT_OK_AND_ASSIGN(
575 TokenizedDocument tokenized_document,
576 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
577 std::move(document)));
578
579 qualified_id_join_index_->set_last_added_document_id(ref_doc_id);
580 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
581 Eq(ref_doc_id));
582
583 ICING_ASSERT_OK_AND_ASSIGN(
584 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
585 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
586 qualified_id_join_index_.get()));
587
588 // Handling document with kInvalidDocumentId should cause a failure.
589 EXPECT_THAT(
590 handler->Handle(tokenized_document, kInvalidDocumentId,
591 /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
592 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
593 // Verify the state of qualified_id_join_index_ after Handle(). Both index
594 // data and last_added_document_id should remain unchanged.
595 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
596 Eq(ref_doc_id));
597 // (kFakeType, kPropertyQualifiedId) should contain nothing.
598 EXPECT_THAT(
599 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
600 /*joinable_property_id=*/fake_type_joinable_property_id_),
601 IsOkAndHolds(IsEmpty()));
602
603 // Recovery mode should get the same result.
604 EXPECT_THAT(
605 handler->Handle(tokenized_document, kInvalidDocumentId,
606 /*recovery_mode=*/false, /*put_document_stats=*/nullptr),
607 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
608 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
609 Eq(ref_doc_id));
610 // (kFakeType, kPropertyQualifiedId) should contain nothing.
611 EXPECT_THAT(
612 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
613 /*joinable_property_id=*/fake_type_joinable_property_id_),
614 IsOkAndHolds(IsEmpty()));
615 }
616
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError)617 TEST_F(QualifiedIdJoinIndexingHandlerTest,
618 HandleOutOfOrderDocumentIdShouldReturnInvalidArgumentError) {
619 // Create and put referenced (parent) document. Get its document id and
620 // namespace id.
621 DocumentProto referenced_document =
622 DocumentBuilder()
623 .SetKey("pkg$db/ns", "ref_type/1")
624 .SetSchema(std::string(kReferencedType))
625 .AddStringProperty(std::string(kPropertyName), "one")
626 .Build();
627 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
628 doc_store_->Put(referenced_document));
629 ICING_ASSERT_OK_AND_ASSIGN(
630 NamespaceId ref_doc_ns_id,
631 doc_store_->GetNamespaceId(referenced_document.namespace_()));
632 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
633 /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
634 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
635 IsOkAndHolds(ref_doc_id));
636
637 // Create and put (child) document. Also tokenize it.
638 DocumentProto document =
639 DocumentBuilder()
640 .SetKey("icing", "fake_type/1")
641 .SetSchema(std::string(kFakeType))
642 .AddStringProperty(std::string(kPropertyQualifiedId),
643 "pkg$db/ns#ref_type/1")
644 .Build();
645 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
646 ICING_ASSERT_OK_AND_ASSIGN(
647 TokenizedDocument tokenized_document,
648 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
649 std::move(document)));
650
651 ICING_ASSERT_OK_AND_ASSIGN(
652 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
653 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
654 qualified_id_join_index_.get()));
655
656 // Handling document with document_id == last_added_document_id should cause a
657 // failure.
658 qualified_id_join_index_->set_last_added_document_id(doc_id);
659 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
660 EXPECT_THAT(
661 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
662 /*put_document_stats=*/nullptr),
663 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
664 // Verify the state of qualified_id_join_index_ after Handle(). Both index
665 // data and last_added_document_id should remain unchanged.
666 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
667 // (kFakeType, kPropertyQualifiedId) should contain nothing.
668 EXPECT_THAT(
669 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
670 /*joinable_property_id=*/fake_type_joinable_property_id_),
671 IsOkAndHolds(IsEmpty()));
672
673 // Handling document with document_id < last_added_document_id should cause a
674 // failure.
675 qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
676 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
677 Eq(doc_id + 1));
678 EXPECT_THAT(
679 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/false,
680 /*put_document_stats=*/nullptr),
681 StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
682 // Verify the state of qualified_id_join_index_ after Handle(). Both index
683 // data and last_added_document_id should remain unchanged.
684 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
685 Eq(doc_id + 1));
686 // (kFakeType, kPropertyQualifiedId) should contain nothing.
687 EXPECT_THAT(
688 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
689 /*joinable_property_id=*/fake_type_joinable_property_id_),
690 IsOkAndHolds(IsEmpty()));
691 }
692
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleRecoveryModeShouldIndexDocsGtLastAddedDocId)693 TEST_F(QualifiedIdJoinIndexingHandlerTest,
694 HandleRecoveryModeShouldIndexDocsGtLastAddedDocId) {
695 // Create and put referenced (parent) document. Get its document id and
696 // namespace id.
697 DocumentProto referenced_document =
698 DocumentBuilder()
699 .SetKey("pkg$db/ns", "ref_type/1")
700 .SetSchema(std::string(kReferencedType))
701 .AddStringProperty(std::string(kPropertyName), "one")
702 .Build();
703 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
704 doc_store_->Put(referenced_document));
705 ICING_ASSERT_OK_AND_ASSIGN(
706 NamespaceId ref_doc_ns_id,
707 doc_store_->GetNamespaceId(referenced_document.namespace_()));
708 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
709 /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
710 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
711 IsOkAndHolds(ref_doc_id));
712
713 // Create and put (child) document. Also tokenize it.
714 DocumentProto document =
715 DocumentBuilder()
716 .SetKey("icing", "fake_type/1")
717 .SetSchema(std::string(kFakeType))
718 .AddStringProperty(std::string(kPropertyQualifiedId),
719 "pkg$db/ns#ref_type/1")
720 .Build();
721 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
722 ICING_ASSERT_OK_AND_ASSIGN(
723 TokenizedDocument tokenized_document,
724 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
725 std::move(document)));
726
727 ICING_ASSERT_OK_AND_ASSIGN(
728 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
729 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
730 qualified_id_join_index_.get()));
731
732 // Handle document with document_id > last_added_document_id in recovery mode.
733 // The handler should index this document and update last_added_document_id.
734 qualified_id_join_index_->set_last_added_document_id(doc_id - 1);
735 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
736 Eq(doc_id - 1));
737 EXPECT_THAT(
738 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
739 /*put_document_stats=*/nullptr),
740 IsOk());
741 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
742 EXPECT_THAT(
743 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
744 /*joinable_property_id=*/fake_type_joinable_property_id_),
745 IsOkAndHolds(
746 ElementsAre(DocumentIdToJoinInfo<NamespaceFingerprintIdentifier>(
747 /*document_id=*/doc_id,
748 /*join_info=*/ref_doc_ns_fingerprint_id))));
749 }
750
TEST_F(QualifiedIdJoinIndexingHandlerTest,HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId)751 TEST_F(QualifiedIdJoinIndexingHandlerTest,
752 HandleRecoveryModeShouldIgnoreDocsLeLastAddedDocId) {
753 // Create and put referenced (parent) document. Get its document id and
754 // namespace id.
755 DocumentProto referenced_document =
756 DocumentBuilder()
757 .SetKey("pkg$db/ns", "ref_type/1")
758 .SetSchema(std::string(kReferencedType))
759 .AddStringProperty(std::string(kPropertyName), "one")
760 .Build();
761 ICING_ASSERT_OK_AND_ASSIGN(DocumentId ref_doc_id,
762 doc_store_->Put(referenced_document));
763 ICING_ASSERT_OK_AND_ASSIGN(
764 NamespaceId ref_doc_ns_id,
765 doc_store_->GetNamespaceId(referenced_document.namespace_()));
766 NamespaceFingerprintIdentifier ref_doc_ns_fingerprint_id(
767 /*namespace_id=*/ref_doc_ns_id, /*target_str=*/referenced_document.uri());
768 ASSERT_THAT(doc_store_->GetDocumentId(ref_doc_ns_fingerprint_id),
769 IsOkAndHolds(ref_doc_id));
770
771 // Create and put (child) document. Also tokenize it.
772 DocumentProto document =
773 DocumentBuilder()
774 .SetKey("icing", "fake_type/1")
775 .SetSchema(std::string(kFakeType))
776 .AddStringProperty(std::string(kPropertyQualifiedId),
777 "pkg$db/ns#ref_type/1")
778 .Build();
779 ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, doc_store_->Put(document));
780 ICING_ASSERT_OK_AND_ASSIGN(
781 TokenizedDocument tokenized_document,
782 TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
783 std::move(document)));
784
785 ICING_ASSERT_OK_AND_ASSIGN(
786 std::unique_ptr<QualifiedIdJoinIndexingHandler> handler,
787 QualifiedIdJoinIndexingHandler::Create(&fake_clock_, doc_store_.get(),
788 qualified_id_join_index_.get()));
789
790 // Handle document with document_id == last_added_document_id in recovery
791 // mode. We should not get any error, but the handler should ignore the
792 // document, so both index data and last_added_document_id should remain
793 // unchanged.
794 qualified_id_join_index_->set_last_added_document_id(doc_id);
795 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
796 EXPECT_THAT(
797 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
798 /*put_document_stats=*/nullptr),
799 IsOk());
800 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(), Eq(doc_id));
801 // (kFakeType, kPropertyQualifiedId) should contain nothing.
802 EXPECT_THAT(
803 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
804 /*joinable_property_id=*/fake_type_joinable_property_id_),
805 IsOkAndHolds(IsEmpty()));
806
807 // Handle document with document_id < last_added_document_id in recovery mode.
808 // We should not get any error, but the handler should ignore the document, so
809 // both index data and last_added_document_id should remain unchanged.
810 qualified_id_join_index_->set_last_added_document_id(doc_id + 1);
811 ASSERT_THAT(qualified_id_join_index_->last_added_document_id(),
812 Eq(doc_id + 1));
813 EXPECT_THAT(
814 handler->Handle(tokenized_document, doc_id, /*recovery_mode=*/true,
815 /*put_document_stats=*/nullptr),
816 IsOk());
817 EXPECT_THAT(qualified_id_join_index_->last_added_document_id(),
818 Eq(doc_id + 1));
819 // (kFakeType, kPropertyQualifiedId) should contain nothing.
820 EXPECT_THAT(
821 GetJoinData(*qualified_id_join_index_, /*schema_type_id=*/fake_type_id_,
822 /*joinable_property_id=*/fake_type_joinable_property_id_),
823 IsOkAndHolds(IsEmpty()));
824 }
825
826 } // namespace
827
828 } // namespace lib
829 } // namespace icing
830