1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "annotator/types.h"
18
19 #include <vector>
20
21 #include "utils/optional.h"
22
23 namespace libtextclassifier3 {
24
25 const CodepointSpan CodepointSpan::kInvalid =
26 CodepointSpan(kInvalidIndex, kInvalidIndex);
27
28 const TokenSpan TokenSpan::kInvalid = TokenSpan(kInvalidIndex, kInvalidIndex);
29
operator <<(logging::LoggingStringStream & stream,const CodepointSpan & span)30 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
31 const CodepointSpan& span) {
32 return stream << "CodepointSpan(" << span.first << ", " << span.second << ")";
33 }
34
operator <<(logging::LoggingStringStream & stream,const TokenSpan & span)35 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
36 const TokenSpan& span) {
37 return stream << "TokenSpan(" << span.first << ", " << span.second << ")";
38 }
39
operator <<(logging::LoggingStringStream & stream,const Token & token)40 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
41 const Token& token) {
42 if (!token.is_padding) {
43 return stream << "Token(\"" << token.value << "\", " << token.start << ", "
44 << token.end << ")";
45 } else {
46 return stream << "Token()";
47 }
48 }
49
ShouldRoundToGranularity() const50 bool DatetimeComponent::ShouldRoundToGranularity() const {
51 // Don't round to the granularity for relative expressions that specify the
52 // distance. So that, e.g. "in 2 hours" when it's 8:35:03 will result in
53 // 10:35:03.
54 if (relative_qualifier == RelativeQualifier::UNSPECIFIED) {
55 return false;
56 }
57 if (relative_qualifier == RelativeQualifier::NEXT ||
58 relative_qualifier == RelativeQualifier::TOMORROW ||
59 relative_qualifier == RelativeQualifier::YESTERDAY ||
60 relative_qualifier == RelativeQualifier::LAST ||
61 relative_qualifier == RelativeQualifier::THIS ||
62 relative_qualifier == RelativeQualifier::NOW) {
63 return true;
64 }
65 return false;
66 }
67
68 namespace {
FormatMillis(int64 time_ms_utc)69 std::string FormatMillis(int64 time_ms_utc) {
70 long time_seconds = time_ms_utc / 1000; // NOLINT
71 char buffer[512];
72 strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z",
73 localtime(&time_seconds));
74 return std::string(buffer);
75 }
76 } // namespace
77
ComponentTypeToString(const DatetimeComponent::ComponentType & component_type)78 std::string ComponentTypeToString(
79 const DatetimeComponent::ComponentType& component_type) {
80 switch (component_type) {
81 case DatetimeComponent::ComponentType::UNSPECIFIED:
82 return "UNSPECIFIED";
83 case DatetimeComponent::ComponentType::YEAR:
84 return "YEAR";
85 case DatetimeComponent::ComponentType::MONTH:
86 return "MONTH";
87 case DatetimeComponent::ComponentType::WEEK:
88 return "WEEK";
89 case DatetimeComponent::ComponentType::DAY_OF_WEEK:
90 return "DAY_OF_WEEK";
91 case DatetimeComponent::ComponentType::DAY_OF_MONTH:
92 return "DAY_OF_MONTH";
93 case DatetimeComponent::ComponentType::HOUR:
94 return "HOUR";
95 case DatetimeComponent::ComponentType::MINUTE:
96 return "MINUTE";
97 case DatetimeComponent::ComponentType::SECOND:
98 return "SECOND";
99 case DatetimeComponent::ComponentType::MERIDIEM:
100 return "MERIDIEM";
101 case DatetimeComponent::ComponentType::ZONE_OFFSET:
102 return "ZONE_OFFSET";
103 case DatetimeComponent::ComponentType::DST_OFFSET:
104 return "DST_OFFSET";
105 default:
106 return "";
107 }
108 }
109
RelativeQualifierToString(const DatetimeComponent::RelativeQualifier & relative_qualifier)110 std::string RelativeQualifierToString(
111 const DatetimeComponent::RelativeQualifier& relative_qualifier) {
112 switch (relative_qualifier) {
113 case DatetimeComponent::RelativeQualifier::UNSPECIFIED:
114 return "UNSPECIFIED";
115 case DatetimeComponent::RelativeQualifier::NEXT:
116 return "NEXT";
117 case DatetimeComponent::RelativeQualifier::THIS:
118 return "THIS";
119 case DatetimeComponent::RelativeQualifier::LAST:
120 return "LAST";
121 case DatetimeComponent::RelativeQualifier::NOW:
122 return "NOW";
123 case DatetimeComponent::RelativeQualifier::TOMORROW:
124 return "TOMORROW";
125 case DatetimeComponent::RelativeQualifier::YESTERDAY:
126 return "YESTERDAY";
127 case DatetimeComponent::RelativeQualifier::PAST:
128 return "PAST";
129 case DatetimeComponent::RelativeQualifier::FUTURE:
130 return "FUTURE";
131 default:
132 return "";
133 }
134 }
135
operator <<(logging::LoggingStringStream & stream,const DatetimeParseResultSpan & value)136 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
137 const DatetimeParseResultSpan& value) {
138 stream << "DatetimeParseResultSpan({" << value.span.first << ", "
139 << value.span.second << "}, "
140 << "/*target_classification_score=*/ "
141 << value.target_classification_score << "/*priority_score=*/"
142 << value.priority_score << " {";
143 for (const DatetimeParseResult& data : value.data) {
144 stream << "{/*time_ms_utc=*/ " << data.time_ms_utc << " /* "
145 << FormatMillis(data.time_ms_utc) << " */, /*granularity=*/ "
146 << data.granularity << ", /*datetime_components=*/ ";
147 for (const DatetimeComponent& datetime_comp : data.datetime_components) {
148 stream << "{/*component_type=*/ "
149 << ComponentTypeToString(datetime_comp.component_type)
150 << " /*relative_qualifier=*/ "
151 << RelativeQualifierToString(datetime_comp.relative_qualifier)
152 << " /*value=*/ " << datetime_comp.value << " /*relative_count=*/ "
153 << datetime_comp.relative_count << "}, ";
154 }
155 stream << "}, ";
156 }
157 stream << "})";
158 return stream;
159 }
160
operator ==(const ClassificationResult & other) const161 bool ClassificationResult::operator==(const ClassificationResult& other) const {
162 return ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(
163 *this, other) &&
164 fabs(score - other.score) < 0.001 &&
165 fabs(priority_score - other.priority_score) < 0.001 &&
166 serialized_entity_data == other.serialized_entity_data;
167 }
168
ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(const ClassificationResult & a,const ClassificationResult & b)169 bool ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(
170 const ClassificationResult& a, const ClassificationResult& b) {
171 return a.collection == b.collection &&
172 a.datetime_parse_result == b.datetime_parse_result &&
173 a.serialized_knowledge_result == b.serialized_knowledge_result &&
174 a.contact_pointer == b.contact_pointer &&
175 a.contact_name == b.contact_name &&
176 a.contact_given_name == b.contact_given_name &&
177 a.contact_family_name == b.contact_family_name &&
178 a.contact_nickname == b.contact_nickname &&
179 a.contact_email_address == b.contact_email_address &&
180 a.contact_phone_number == b.contact_phone_number &&
181 a.contact_id == b.contact_id &&
182 a.app_package_name == b.app_package_name &&
183 a.numeric_value == b.numeric_value &&
184 fabs(a.numeric_double_value - b.numeric_double_value) < 0.001 &&
185 a.duration_ms == b.duration_ms;
186 }
187
operator <<(logging::LoggingStringStream & stream,const ClassificationResult & result)188 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
189 const ClassificationResult& result) {
190 return stream << "ClassificationResult(" << result.collection
191 << ", /*score=*/ " << result.score << ", /*priority_score=*/ "
192 << result.priority_score << ")";
193 }
194
operator <<(logging::LoggingStringStream & stream,const std::vector<ClassificationResult> & results)195 logging::LoggingStringStream& operator<<(
196 logging::LoggingStringStream& stream,
197 const std::vector<ClassificationResult>& results) {
198 stream = stream << "{\n";
199 for (const ClassificationResult& result : results) {
200 stream = stream << " " << result << "\n";
201 }
202 stream = stream << "}";
203 return stream;
204 }
205
operator <<(logging::LoggingStringStream & stream,const AnnotatedSpan & span)206 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
207 const AnnotatedSpan& span) {
208 std::string best_class;
209 float best_score = -1;
210 if (!span.classification.empty()) {
211 best_class = span.classification[0].collection;
212 best_score = span.classification[0].score;
213 }
214 return stream << "Span(" << span.span.first << ", " << span.span.second
215 << ", " << best_class << ", " << best_score << ")";
216 }
217
operator <<(logging::LoggingStringStream & stream,const DatetimeParsedData & data)218 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
219 const DatetimeParsedData& data) {
220 std::vector<DatetimeComponent> date_time_components;
221 data.GetDatetimeComponents(&date_time_components);
222 stream = stream << "DatetimeParsedData { \n";
223 for (const DatetimeComponent& c : date_time_components) {
224 stream = stream << " DatetimeComponent { \n";
225 stream = stream << " Component Type:" << static_cast<int>(c.component_type)
226 << "\n";
227 stream = stream << " Value:" << c.value << "\n";
228 stream = stream << " Relative Qualifier:"
229 << static_cast<int>(c.relative_qualifier) << "\n";
230 stream = stream << " Relative Count:" << c.relative_count << "\n";
231 stream = stream << " } \n";
232 }
233 stream = stream << "}";
234 return stream;
235 }
236
SetAbsoluteValue(const DatetimeComponent::ComponentType & field_type,int value)237 void DatetimeParsedData::SetAbsoluteValue(
238 const DatetimeComponent::ComponentType& field_type, int value) {
239 GetOrCreateDatetimeComponent(field_type).value = value;
240 }
241
SetRelativeValue(const DatetimeComponent::ComponentType & field_type,const DatetimeComponent::RelativeQualifier & relative_value)242 void DatetimeParsedData::SetRelativeValue(
243 const DatetimeComponent::ComponentType& field_type,
244 const DatetimeComponent::RelativeQualifier& relative_value) {
245 GetOrCreateDatetimeComponent(field_type).relative_qualifier = relative_value;
246 }
247
SetRelativeCount(const DatetimeComponent::ComponentType & field_type,int relative_count)248 void DatetimeParsedData::SetRelativeCount(
249 const DatetimeComponent::ComponentType& field_type, int relative_count) {
250 GetOrCreateDatetimeComponent(field_type).relative_count = relative_count;
251 }
252
AddDatetimeComponents(const std::vector<DatetimeComponent> & datetime_components)253 void DatetimeParsedData::AddDatetimeComponents(
254 const std::vector<DatetimeComponent>& datetime_components) {
255 for (const DatetimeComponent& datetime_component : datetime_components) {
256 date_time_components_.insert(
257 {datetime_component.component_type, datetime_component});
258 }
259 }
260
HasFieldType(const DatetimeComponent::ComponentType & field_type) const261 bool DatetimeParsedData::HasFieldType(
262 const DatetimeComponent::ComponentType& field_type) const {
263 if (date_time_components_.find(field_type) == date_time_components_.end()) {
264 return false;
265 }
266 return true;
267 }
268
GetFieldValue(const DatetimeComponent::ComponentType & field_type,int * field_value) const269 bool DatetimeParsedData::GetFieldValue(
270 const DatetimeComponent::ComponentType& field_type,
271 int* field_value) const {
272 if (HasFieldType(field_type)) {
273 *field_value = date_time_components_.at(field_type).value;
274 return true;
275 }
276 return false;
277 }
278
GetRelativeValue(const DatetimeComponent::ComponentType & field_type,DatetimeComponent::RelativeQualifier * relative_value) const279 bool DatetimeParsedData::GetRelativeValue(
280 const DatetimeComponent::ComponentType& field_type,
281 DatetimeComponent::RelativeQualifier* relative_value) const {
282 if (HasFieldType(field_type)) {
283 *relative_value = date_time_components_.at(field_type).relative_qualifier;
284 return true;
285 }
286 return false;
287 }
288
HasRelativeValue(const DatetimeComponent::ComponentType & field_type) const289 bool DatetimeParsedData::HasRelativeValue(
290 const DatetimeComponent::ComponentType& field_type) const {
291 if (HasFieldType(field_type)) {
292 return date_time_components_.at(field_type).relative_qualifier !=
293 DatetimeComponent::RelativeQualifier::UNSPECIFIED;
294 }
295 return false;
296 }
297
HasAbsoluteValue(const DatetimeComponent::ComponentType & field_type) const298 bool DatetimeParsedData::HasAbsoluteValue(
299 const DatetimeComponent::ComponentType& field_type) const {
300 return HasFieldType(field_type) && !HasRelativeValue(field_type);
301 }
302
IsEmpty() const303 bool DatetimeParsedData::IsEmpty() const {
304 return date_time_components_.empty();
305 }
306
GetRelativeDatetimeComponents(std::vector<DatetimeComponent> * date_time_components) const307 void DatetimeParsedData::GetRelativeDatetimeComponents(
308 std::vector<DatetimeComponent>* date_time_components) const {
309 for (auto it = date_time_components_.begin();
310 it != date_time_components_.end(); it++) {
311 if (it->second.relative_qualifier !=
312 DatetimeComponent::RelativeQualifier::UNSPECIFIED) {
313 date_time_components->push_back(it->second);
314 }
315 }
316 }
317
GetDatetimeComponents(std::vector<DatetimeComponent> * date_time_components) const318 void DatetimeParsedData::GetDatetimeComponents(
319 std::vector<DatetimeComponent>* date_time_components) const {
320 for (auto it = date_time_components_.begin();
321 it != date_time_components_.end(); it++) {
322 date_time_components->push_back(it->second);
323 }
324 }
325
GetOrCreateDatetimeComponent(const DatetimeComponent::ComponentType & component_type)326 DatetimeComponent& DatetimeParsedData::GetOrCreateDatetimeComponent(
327 const DatetimeComponent::ComponentType& component_type) {
328 auto result =
329 date_time_components_
330 .insert(
331 {component_type,
332 DatetimeComponent(
333 component_type,
334 DatetimeComponent::RelativeQualifier::UNSPECIFIED, 0, 0)})
335 .first;
336 return result->second;
337 }
338
339 namespace {
GetFinestGranularityFromComponentTypes(const std::vector<DatetimeComponent::ComponentType> & datetime_component_types)340 DatetimeGranularity GetFinestGranularityFromComponentTypes(
341 const std::vector<DatetimeComponent::ComponentType>&
342 datetime_component_types) {
343 DatetimeGranularity granularity = DatetimeGranularity::GRANULARITY_UNKNOWN;
344 for (const auto& component_type : datetime_component_types) {
345 switch (component_type) {
346 case DatetimeComponent::ComponentType::YEAR:
347 if (granularity < DatetimeGranularity::GRANULARITY_YEAR) {
348 granularity = DatetimeGranularity::GRANULARITY_YEAR;
349 }
350 break;
351
352 case DatetimeComponent::ComponentType::MONTH:
353 if (granularity < DatetimeGranularity::GRANULARITY_MONTH) {
354 granularity = DatetimeGranularity::GRANULARITY_MONTH;
355 }
356 break;
357
358 case DatetimeComponent::ComponentType::WEEK:
359 if (granularity < DatetimeGranularity::GRANULARITY_WEEK) {
360 granularity = DatetimeGranularity::GRANULARITY_WEEK;
361 }
362 break;
363
364 case DatetimeComponent::ComponentType::DAY_OF_WEEK:
365 case DatetimeComponent::ComponentType::DAY_OF_MONTH:
366 if (granularity < DatetimeGranularity::GRANULARITY_DAY) {
367 granularity = DatetimeGranularity::GRANULARITY_DAY;
368 }
369 break;
370
371 case DatetimeComponent::ComponentType::HOUR:
372 if (granularity < DatetimeGranularity::GRANULARITY_HOUR) {
373 granularity = DatetimeGranularity::GRANULARITY_HOUR;
374 }
375 break;
376
377 case DatetimeComponent::ComponentType::MINUTE:
378 if (granularity < DatetimeGranularity::GRANULARITY_MINUTE) {
379 granularity = DatetimeGranularity::GRANULARITY_MINUTE;
380 }
381 break;
382
383 case DatetimeComponent::ComponentType::SECOND:
384 if (granularity < DatetimeGranularity::GRANULARITY_SECOND) {
385 granularity = DatetimeGranularity::GRANULARITY_SECOND;
386 }
387 break;
388
389 case DatetimeComponent::ComponentType::MERIDIEM:
390 case DatetimeComponent::ComponentType::ZONE_OFFSET:
391 case DatetimeComponent::ComponentType::DST_OFFSET:
392 default:
393 break;
394 }
395 }
396 return granularity;
397 }
398 } // namespace
399
GetFinestGranularity() const400 DatetimeGranularity DatetimeParsedData::GetFinestGranularity() const {
401 std::vector<DatetimeComponent::ComponentType> component_types;
402 std::transform(date_time_components_.begin(), date_time_components_.end(),
403 std::back_inserter(component_types),
404 [](const std::map<DatetimeComponent::ComponentType,
405 DatetimeComponent>::value_type& pair) {
406 return pair.first;
407 });
408 return GetFinestGranularityFromComponentTypes(component_types);
409 }
410
GetDatetimeComponent(const std::vector<DatetimeComponent> & datetime_components,const DatetimeComponent::ComponentType & component_type)411 Optional<DatetimeComponent> GetDatetimeComponent(
412 const std::vector<DatetimeComponent>& datetime_components,
413 const DatetimeComponent::ComponentType& component_type) {
414 for (auto datetime_component : datetime_components) {
415 if (datetime_component.component_type == component_type) {
416 return Optional<DatetimeComponent>(datetime_component);
417 }
418 }
419 return Optional<DatetimeComponent>();
420 }
421
422 // Returns the granularity of the DatetimeComponents.
GetFinestGranularity(const std::vector<DatetimeComponent> & datetime_component)423 DatetimeGranularity GetFinestGranularity(
424 const std::vector<DatetimeComponent>& datetime_component) {
425 std::vector<DatetimeComponent::ComponentType> component_types;
426 std::transform(datetime_component.begin(), datetime_component.end(),
427 std::back_inserter(component_types),
428 [](const DatetimeComponent& component) {
429 return component.component_type;
430 });
431 return GetFinestGranularityFromComponentTypes(component_types);
432 }
433
434 } // namespace libtextclassifier3
435