• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
16 #define ICING_SCHEMA_PROPERTY_UTIL_H_
17 
18 #include <string>
19 #include <string_view>
20 #include <vector>
21 
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/canonical_errors.h"
24 #include "icing/proto/document.pb.h"
25 
26 namespace icing {
27 namespace lib {
28 
29 namespace property_util {
30 
31 // Definition:
32 // - Expr (short for expression): with or without index.
33 // - property_name: one level of property name without index. E.g. "abc", "def".
34 // - property_name_expr: one level of property name with or without index. E.g.
35 //                       "abc", "abc[0]", "def[1]".
36 // - property_path: multiple levels (including one) of property names without
37 //                  indices. E.g. "abc", "abc.def".
38 // - property_path_expr: multiple levels (including one) of property name
39 //                       expressions. E.g. "abc", "abc[0]", "abc.def",
40 //                       "abc[0].def", "abc[0].def[1]".
41 //
42 // Set relationship graph (A -> B: A is a subset of B):
43 //
44 // property_path -> property_path_expr
45 //      ^                   ^
46 //      |                   |
47 // property_name -> property_name_expr
48 inline constexpr std::string_view kPropertyPathSeparator = ".";
49 inline constexpr std::string_view kLBracket = "[";
50 inline constexpr std::string_view kRBracket = "]";
51 
52 inline constexpr int kWildcardPropertyIndex = -1;
53 
54 struct PropertyInfo {
55   std::string name;
56   int index;
57 
PropertyInfoPropertyInfo58   explicit PropertyInfo(std::string name_in, int index_in)
59       : name(std::move(name_in)), index(index_in) {}
60 };
61 
62 // Converts a property (value) index to string, wrapped by kLBracket and
63 // kRBracket.
64 //
65 // REQUIRES: index should be valid or kWildcardPropertyIndex.
66 //
67 // Returns:
68 //   - "" if index is kWildcardPropertyIndex.
69 //   - kLBracket + std::to_string(index) + kRBracket for all non
70 //     kWildcardPropertyIndex indices.
71 std::string ConvertToPropertyExprIndexStr(int index);
72 
73 // Concatenates 2 property path expressions.
74 //
75 // Returns:
76 //   - property_path_expr1 + "." + property_path_expr2 if both are not empty.
77 //   - property_path_expr1 if property_path_expr2 is empty.
78 //   - property_path_expr2 if property_path_expr1 is empty.
79 //   - "" if both are empty.
80 std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
81                                         std::string_view property_path_expr2);
82 
83 // Splits a property path expression into multiple property name expressions.
84 //
85 // Returns: a vector of property name expressions.
86 std::vector<std::string_view> SplitPropertyPathExpr(
87     std::string_view property_path_expr);
88 
89 // Parses a property name expression into (property name, property index). If
90 // the index expression is missing, then the returned property index will be
91 // kWildcardPropertyIndex.
92 //
93 // Examples:
94 //   - ParsePropertyNameExpr("foo") will return ("foo",
95 //     kWildcardPropertyIndex).
96 //   - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
97 //
98 // Returns: a PropertyInfo instance.
99 PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
100 
101 // Parses a property path expression into multiple (property name, property
102 // index). It is similar to ParsePropertyPathExpr, except property path
103 // expression can contain multiple name expressions.
104 //
105 // Examples:
106 //   - ParsePropertyPathExpr("foo") will return [("foo",
107 //     kWildcardPropertyIndex)].
108 //   - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
109 //   - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
110 //     kWildcardPropertyIndex), ("bar", 2)]
111 //
112 // Returns: a vector of PropertyInfo instances.
113 std::vector<PropertyInfo> ParsePropertyPathExpr(
114     std::string_view property_path_expr);
115 
116 // Gets the desired PropertyProto from the document by given property name.
117 // Since the input parameter is property name, this function only deals with
118 // the first level of properties in the document and cannot deal with nested
119 // documents.
120 //
121 // Returns:
122 //   - const PropertyInfo* if property name exists in the document.
123 //   - nullptr if property name not found.
124 const PropertyProto* GetPropertyProto(const DocumentProto& document,
125                                       std::string_view property_name);
126 
127 template <typename T>
ExtractPropertyValues(const PropertyProto & property)128 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
129     const PropertyProto& property) {
130   return absl_ports::UnimplementedError(
131       "Unimplemented template type for ExtractPropertyValues");
132 }
133 
134 template <>
135 libtextclassifier3::StatusOr<std::vector<std::string>>
136 ExtractPropertyValues<std::string>(const PropertyProto& property);
137 
138 template <>
139 libtextclassifier3::StatusOr<std::vector<std::string_view>>
140 ExtractPropertyValues<std::string_view>(const PropertyProto& property);
141 
142 template <>
143 libtextclassifier3::StatusOr<std::vector<int64_t>>
144 ExtractPropertyValues<int64_t>(const PropertyProto& property);
145 
146 template <typename T>
ExtractPropertyValuesFromDocument(const DocumentProto & document,std::string_view property_path)147 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
148     const DocumentProto& document, std::string_view property_path) {
149   // Finds the first property name in property_path
150   size_t separator_position = property_path.find(kPropertyPathSeparator);
151   std::string_view current_property_name =
152       (separator_position == std::string::npos)
153           ? property_path
154           : property_path.substr(0, separator_position);
155 
156   const PropertyProto* property_proto =
157       GetPropertyProto(document, current_property_name);
158   if (property_proto == nullptr) {
159     // Property name not found, it could be one of the following 2 cases:
160     // 1. The property is optional and it's not in the document
161     // 2. The property name is invalid
162     return std::vector<T>();
163   }
164 
165   if (separator_position == std::string::npos) {
166     // Current property name is the last one in property path.
167     return ExtractPropertyValues<T>(*property_proto);
168   }
169 
170   // Extracts property values recursively
171   std::string_view sub_property_path =
172       property_path.substr(separator_position + 1);
173   std::vector<T> nested_document_content;
174   for (const DocumentProto& nested_document :
175        property_proto->document_values()) {
176     auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
177                                                            sub_property_path);
178     if (content_or.ok()) {
179       std::vector<T> content = std::move(content_or).ValueOrDie();
180       std::move(content.begin(), content.end(),
181                 std::back_inserter(nested_document_content));
182     }
183   }
184   return nested_document_content;
185 }
186 
187 }  // namespace property_util
188 
189 }  // namespace lib
190 }  // namespace icing
191 
192 #endif  // ICING_SCHEMA_PROPERTY_UTIL_H_
193