1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_SCHEMA_PROPERTY_UTIL_H_
16 #define ICING_SCHEMA_PROPERTY_UTIL_H_
17
18 #include <string>
19 #include <string_view>
20 #include <vector>
21
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/absl_ports/canonical_errors.h"
24 #include "icing/proto/document.pb.h"
25
26 namespace icing {
27 namespace lib {
28
29 namespace property_util {
30
31 // Definition:
32 // - Expr (short for expression): with or without index.
33 // - property_name: one level of property name without index. E.g. "abc", "def".
34 // - property_name_expr: one level of property name with or without index. E.g.
35 // "abc", "abc[0]", "def[1]".
36 // - property_path: multiple levels (including one) of property names without
37 // indices. E.g. "abc", "abc.def".
38 // - property_path_expr: multiple levels (including one) of property name
39 // expressions. E.g. "abc", "abc[0]", "abc.def",
40 // "abc[0].def", "abc[0].def[1]".
41 //
42 // Set relationship graph (A -> B: A is a subset of B):
43 //
44 // property_path -> property_path_expr
45 // ^ ^
46 // | |
47 // property_name -> property_name_expr
48 inline constexpr std::string_view kPropertyPathSeparator = ".";
49 inline constexpr std::string_view kLBracket = "[";
50 inline constexpr std::string_view kRBracket = "]";
51
52 inline constexpr int kWildcardPropertyIndex = -1;
53
54 struct PropertyInfo {
55 std::string name;
56 int index;
57
PropertyInfoPropertyInfo58 explicit PropertyInfo(std::string name_in, int index_in)
59 : name(std::move(name_in)), index(index_in) {}
60 };
61
62 // Converts a property (value) index to string, wrapped by kLBracket and
63 // kRBracket.
64 //
65 // REQUIRES: index should be valid or kWildcardPropertyIndex.
66 //
67 // Returns:
68 // - "" if index is kWildcardPropertyIndex.
69 // - kLBracket + std::to_string(index) + kRBracket for all non
70 // kWildcardPropertyIndex indices.
71 std::string ConvertToPropertyExprIndexStr(int index);
72
73 // Concatenates 2 property path expressions.
74 //
75 // Returns:
76 // - property_path_expr1 + "." + property_path_expr2 if both are not empty.
77 // - property_path_expr1 if property_path_expr2 is empty.
78 // - property_path_expr2 if property_path_expr1 is empty.
79 // - "" if both are empty.
80 std::string ConcatenatePropertyPathExpr(std::string_view property_path_expr1,
81 std::string_view property_path_expr2);
82
83 // Splits a property path expression into multiple property name expressions.
84 //
85 // Returns: a vector of property name expressions.
86 std::vector<std::string_view> SplitPropertyPathExpr(
87 std::string_view property_path_expr);
88
89 // Parses a property name expression into (property name, property index). If
90 // the index expression is missing, then the returned property index will be
91 // kWildcardPropertyIndex.
92 //
93 // Examples:
94 // - ParsePropertyNameExpr("foo") will return ("foo",
95 // kWildcardPropertyIndex).
96 // - ParsePropertyNameExpr("foo[5]") will return ("foo", 5).
97 //
98 // Returns: a PropertyInfo instance.
99 PropertyInfo ParsePropertyNameExpr(std::string_view property_name_expr);
100
101 // Parses a property path expression into multiple (property name, property
102 // index). It is similar to ParsePropertyPathExpr, except property path
103 // expression can contain multiple name expressions.
104 //
105 // Examples:
106 // - ParsePropertyPathExpr("foo") will return [("foo",
107 // kWildcardPropertyIndex)].
108 // - ParsePropertyPathExpr("foo[5]") will return [("foo", 5)].
109 // - ParsePropertyPathExpr("foo.bar[2]") will return [("foo",
110 // kWildcardPropertyIndex), ("bar", 2)]
111 //
112 // Returns: a vector of PropertyInfo instances.
113 std::vector<PropertyInfo> ParsePropertyPathExpr(
114 std::string_view property_path_expr);
115
116 // Gets the desired PropertyProto from the document by given property name.
117 // Since the input parameter is property name, this function only deals with
118 // the first level of properties in the document and cannot deal with nested
119 // documents.
120 //
121 // Returns:
122 // - const PropertyInfo* if property name exists in the document.
123 // - nullptr if property name not found.
124 const PropertyProto* GetPropertyProto(const DocumentProto& document,
125 std::string_view property_name);
126
127 template <typename T>
ExtractPropertyValues(const PropertyProto & property)128 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValues(
129 const PropertyProto& property) {
130 return absl_ports::UnimplementedError(
131 "Unimplemented template type for ExtractPropertyValues");
132 }
133
134 template <>
135 libtextclassifier3::StatusOr<std::vector<std::string>>
136 ExtractPropertyValues<std::string>(const PropertyProto& property);
137
138 template <>
139 libtextclassifier3::StatusOr<std::vector<std::string_view>>
140 ExtractPropertyValues<std::string_view>(const PropertyProto& property);
141
142 template <>
143 libtextclassifier3::StatusOr<std::vector<int64_t>>
144 ExtractPropertyValues<int64_t>(const PropertyProto& property);
145
146 template <typename T>
ExtractPropertyValuesFromDocument(const DocumentProto & document,std::string_view property_path)147 libtextclassifier3::StatusOr<std::vector<T>> ExtractPropertyValuesFromDocument(
148 const DocumentProto& document, std::string_view property_path) {
149 // Finds the first property name in property_path
150 size_t separator_position = property_path.find(kPropertyPathSeparator);
151 std::string_view current_property_name =
152 (separator_position == std::string::npos)
153 ? property_path
154 : property_path.substr(0, separator_position);
155
156 const PropertyProto* property_proto =
157 GetPropertyProto(document, current_property_name);
158 if (property_proto == nullptr) {
159 // Property name not found, it could be one of the following 2 cases:
160 // 1. The property is optional and it's not in the document
161 // 2. The property name is invalid
162 return std::vector<T>();
163 }
164
165 if (separator_position == std::string::npos) {
166 // Current property name is the last one in property path.
167 return ExtractPropertyValues<T>(*property_proto);
168 }
169
170 // Extracts property values recursively
171 std::string_view sub_property_path =
172 property_path.substr(separator_position + 1);
173 std::vector<T> nested_document_content;
174 for (const DocumentProto& nested_document :
175 property_proto->document_values()) {
176 auto content_or = ExtractPropertyValuesFromDocument<T>(nested_document,
177 sub_property_path);
178 if (content_or.ok()) {
179 std::vector<T> content = std::move(content_or).ValueOrDie();
180 std::move(content.begin(), content.end(),
181 std::back_inserter(nested_document_content));
182 }
183 }
184 return nested_document_content;
185 }
186
187 } // namespace property_util
188
189 } // namespace lib
190 } // namespace icing
191
192 #endif // ICING_SCHEMA_PROPERTY_UTIL_H_
193