• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/join/qualified-id-join-indexing-handler.h"
16 
17 #include <cstdint>
18 #include <limits>
19 #include <memory>
20 #include <optional>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/join/document-join-id-pair.h"
29 #include "icing/join/qualified-id-join-index.h"
30 #include "icing/join/qualified-id.h"
31 #include "icing/legacy/core/icing-string-util.h"
32 #include "icing/proto/logging.pb.h"
33 #include "icing/schema/joinable-property.h"
34 #include "icing/store/document-filter-data.h"
35 #include "icing/store/document-id.h"
36 #include "icing/store/document-store.h"
37 #include "icing/store/namespace-id-fingerprint.h"
38 #include "icing/store/namespace-id.h"
39 #include "icing/util/clock.h"
40 #include "icing/util/logging.h"
41 #include "icing/util/status-macros.h"
42 #include "icing/util/tokenized-document.h"
43 
44 namespace icing {
45 namespace lib {
46 
47 /* static */ libtextclassifier3::StatusOr<
48     std::unique_ptr<QualifiedIdJoinIndexingHandler>>
Create(const Clock * clock,const DocumentStore * doc_store,QualifiedIdJoinIndex * qualified_id_join_index)49 QualifiedIdJoinIndexingHandler::Create(
50     const Clock* clock, const DocumentStore* doc_store,
51     QualifiedIdJoinIndex* qualified_id_join_index) {
52   ICING_RETURN_ERROR_IF_NULL(clock);
53   ICING_RETURN_ERROR_IF_NULL(doc_store);
54   ICING_RETURN_ERROR_IF_NULL(qualified_id_join_index);
55 
56   return std::unique_ptr<QualifiedIdJoinIndexingHandler>(
57       new QualifiedIdJoinIndexingHandler(clock, doc_store,
58                                          qualified_id_join_index));
59 }
60 
Handle(const TokenizedDocument & tokenized_document,DocumentId document_id,DocumentId old_document_id,bool recovery_mode,PutDocumentStatsProto * put_document_stats)61 libtextclassifier3::Status QualifiedIdJoinIndexingHandler::Handle(
62     const TokenizedDocument& tokenized_document, DocumentId document_id,
63     DocumentId old_document_id, bool recovery_mode,
64     PutDocumentStatsProto* put_document_stats) {
65   std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
66 
67   if (!IsDocumentIdValid(document_id)) {
68     return absl_ports::InvalidArgumentError(
69         IcingStringUtil::StringPrintf("Invalid DocumentId %d", document_id));
70   }
71 
72   if (qualified_id_join_index_.last_added_document_id() != kInvalidDocumentId &&
73       document_id <= qualified_id_join_index_.last_added_document_id()) {
74     if (recovery_mode) {
75       // Skip the document if document_id <= last_added_document_id in recovery
76       // mode without returning an error.
77       return libtextclassifier3::Status::OK;
78     }
79     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
80         "DocumentId %d must be greater than last added document_id %d",
81         document_id, qualified_id_join_index_.last_added_document_id()));
82   }
83   qualified_id_join_index_.set_last_added_document_id(document_id);
84 
85   switch (qualified_id_join_index_.version()) {
86     case QualifiedIdJoinIndex::Version::kV2:
87       ICING_RETURN_IF_ERROR(HandleV2(tokenized_document, document_id));
88       break;
89     case QualifiedIdJoinIndex::Version::kV3:
90       ICING_RETURN_IF_ERROR(
91           HandleV3(tokenized_document, document_id, old_document_id));
92       break;
93   }
94 
95   if (put_document_stats != nullptr) {
96     put_document_stats->set_qualified_id_join_index_latency_ms(
97         index_timer->GetElapsedMilliseconds());
98   }
99 
100   return libtextclassifier3::Status::OK;
101 }
102 
HandleV2(const TokenizedDocument & tokenized_document,DocumentId document_id)103 libtextclassifier3::Status QualifiedIdJoinIndexingHandler::HandleV2(
104     const TokenizedDocument& tokenized_document, DocumentId document_id) {
105   std::optional<DocumentFilterData> filter_data =
106       doc_store_.GetAliveDocumentFilterData(
107           document_id,
108           /*current_time_ms=*/std::numeric_limits<int64_t>::min());
109   if (!filter_data) {
110     // This should not happen.
111     return absl_ports::InternalError(
112         "Failed to get alive document filter data when indexing");
113   }
114 
115   for (const JoinableProperty<std::string_view>& qualified_id_property :
116        tokenized_document.qualified_id_join_properties()) {
117     // Parse all qualified id strings and convert them to
118     // NamespaceIdFingerprint.
119     std::vector<NamespaceIdFingerprint> ref_doc_nsid_uri_fingerprints;
120     for (std::string_view ref_qualified_id_str : qualified_id_property.values) {
121       // Attempt to parse qualified id string to make sure the format is
122       // correct.
123       auto ref_qualified_id_or = QualifiedId::Parse(ref_qualified_id_str);
124       if (!ref_qualified_id_or.ok()) {
125         // Skip incorrect format of qualified id string.
126         continue;
127       }
128 
129       QualifiedId ref_qualified_id =
130           std::move(ref_qualified_id_or).ValueOrDie();
131       auto ref_namespace_id_or =
132           doc_store_.GetNamespaceId(ref_qualified_id.name_space());
133       if (!ref_namespace_id_or.ok()) {
134         // Skip invalid namespace id.
135         continue;
136       }
137       NamespaceId ref_namespace_id =
138           std::move(ref_namespace_id_or).ValueOrDie();
139 
140       ref_doc_nsid_uri_fingerprints.push_back(
141           NamespaceIdFingerprint(ref_namespace_id, ref_qualified_id.uri()));
142     }
143 
144     // Batch add all join data of this (schema_type_id, joinable_property_id)
145     // into to the index.
146     libtextclassifier3::Status status = qualified_id_join_index_.Put(
147         filter_data->schema_type_id(), qualified_id_property.metadata.id,
148         document_id, std::move(ref_doc_nsid_uri_fingerprints));
149     if (!status.ok()) {
150       ICING_LOG(WARNING)
151           << "Failed to add data into qualified id join index v2 due to: "
152           << status.error_message();
153       return status;
154     }
155   }
156   return libtextclassifier3::Status::OK;
157 }
158 
HandleV3(const TokenizedDocument & tokenized_document,DocumentId document_id,DocumentId old_document_id)159 libtextclassifier3::Status QualifiedIdJoinIndexingHandler::HandleV3(
160     const TokenizedDocument& tokenized_document, DocumentId document_id,
161     DocumentId old_document_id) {
162   // (Parent perspective)
163   // When replacement, if there were any existing child documents joining to it,
164   // then we need to migrate the old document id to the new document id.
165   if (IsDocumentIdValid(old_document_id)) {
166     ICING_RETURN_IF_ERROR(
167         qualified_id_join_index_.MigrateParent(old_document_id, document_id));
168   }
169 
170   // (Child perspective)
171   // Add child join data.
172   for (const JoinableProperty<std::string_view>& qualified_id_property :
173        tokenized_document.qualified_id_join_properties()) {
174     if (qualified_id_property.values.empty()) {
175       continue;
176     }
177 
178     DocumentJoinIdPair child_doc_join_id_pair(
179         document_id, qualified_id_property.metadata.id);
180 
181     // Extract parent qualified ids and lookup their corresponding document ids.
182     std::vector<DocumentId> parent_doc_ids;
183     parent_doc_ids.reserve(qualified_id_property.values.size());
184     for (std::string_view parent_qualified_id_str :
185          qualified_id_property.values) {
186       libtextclassifier3::StatusOr<QualifiedId> parent_qualified_id_or =
187           QualifiedId::Parse(parent_qualified_id_str);
188       if (!parent_qualified_id_or.ok()) {
189         // Skip incorrect format of qualified id string.
190         continue;
191       }
192       QualifiedId parent_qualified_id =
193           std::move(parent_qualified_id_or).ValueOrDie();
194 
195       // Lookup document store to get the parent document id.
196       libtextclassifier3::StatusOr<DocumentId> parent_doc_id_or =
197           doc_store_.GetDocumentId(parent_qualified_id.name_space(),
198                                    parent_qualified_id.uri());
199       if (!parent_doc_id_or.ok() ||
200           parent_doc_id_or.ValueOrDie() == kInvalidDocumentId) {
201         // Skip invalid parent document id or parent document does not exist.
202         continue;
203       }
204       parent_doc_ids.push_back(parent_doc_id_or.ValueOrDie());
205     }
206 
207     // Add all parent document ids to the index.
208     libtextclassifier3::Status status = qualified_id_join_index_.Put(
209         child_doc_join_id_pair, std::move(parent_doc_ids));
210     if (!status.ok()) {
211       ICING_LOG(WARNING)
212           << "Failed to add data into qualified id join index due to: "
213           << status.error_message();
214       return status;
215     }
216   }
217   return libtextclassifier3::Status::OK;
218 }
219 
220 }  // namespace lib
221 }  // namespace icing
222