1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_H_
18 #define LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_H_
19
20 #include <time.h>
21
22 #include <algorithm>
23 #include <cmath>
24 #include <functional>
25 #include <map>
26 #include <set>
27 #include <string>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31
32 #include "annotator/entity-data_generated.h"
33 #include "annotator/knowledge/knowledge-engine-types.h"
34 #include "utils/base/integral_types.h"
35 #include "utils/base/logging.h"
36 #include "utils/flatbuffers/flatbuffers.h"
37 #include "utils/optional.h"
38 #include "utils/variant.h"
39
40 namespace libtextclassifier3 {
41
42 constexpr int kInvalidIndex = -1;
43 constexpr int kSunday = 1;
44 constexpr int kMonday = 2;
45 constexpr int kTuesday = 3;
46 constexpr int kWednesday = 4;
47 constexpr int kThursday = 5;
48 constexpr int kFriday = 6;
49 constexpr int kSaturday = 7;
50
51 // Index for a 0-based array of tokens.
52 using TokenIndex = int;
53
54 // Index for a 0-based array of codepoints.
55 using CodepointIndex = int;
56
57 // Marks a span in a sequence of codepoints. The first element is the index of
58 // the first codepoint of the span, and the second element is the index of the
59 // codepoint one past the end of the span.
60 struct CodepointSpan {
61 static const CodepointSpan kInvalid;
62
CodepointSpanCodepointSpan63 CodepointSpan() : first(kInvalidIndex), second(kInvalidIndex) {}
64
CodepointSpanCodepointSpan65 CodepointSpan(CodepointIndex start, CodepointIndex end)
66 : first(start), second(end) {}
67
68 CodepointSpan(const CodepointSpan& other) = default;
69 CodepointSpan& operator=(const CodepointSpan& other) = default;
70
71 bool operator==(const CodepointSpan& other) const {
72 return this->first == other.first && this->second == other.second;
73 }
74
75 bool operator!=(const CodepointSpan& other) const {
76 return !(*this == other);
77 }
78
79 bool operator<(const CodepointSpan& other) const {
80 if (this->first != other.first) {
81 return this->first < other.first;
82 }
83 return this->second < other.second;
84 }
85
IsValidCodepointSpan86 bool IsValid() const {
87 return this->first != kInvalidIndex && this->second != kInvalidIndex &&
88 this->first <= this->second && this->first >= 0;
89 }
90
IsEmptyCodepointSpan91 bool IsEmpty() const { return this->first == this->second; }
92
93 CodepointIndex first;
94 CodepointIndex second;
95 };
96
97 // Pretty-printing function for CodepointSpan.
98 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
99 const CodepointSpan& span);
100
SpansOverlap(const CodepointSpan & a,const CodepointSpan & b)101 inline bool SpansOverlap(const CodepointSpan& a, const CodepointSpan& b) {
102 return a.first < b.second && b.first < a.second;
103 }
104
SpanContains(const CodepointSpan & span,const CodepointSpan & sub_span)105 inline bool SpanContains(const CodepointSpan& span,
106 const CodepointSpan& sub_span) {
107 return span.first <= sub_span.first && span.second >= sub_span.second;
108 }
109
110 template <typename T>
DoesCandidateConflict(const int considered_candidate,const std::vector<T> & candidates,const std::set<int,std::function<bool (int,int)>> & chosen_indices_set)111 bool DoesCandidateConflict(
112 const int considered_candidate, const std::vector<T>& candidates,
113 const std::set<int, std::function<bool(int, int)>>& chosen_indices_set) {
114 if (chosen_indices_set.empty()) {
115 return false;
116 }
117
118 auto conflicting_it = chosen_indices_set.lower_bound(considered_candidate);
119 // Check conflict on the right.
120 if (conflicting_it != chosen_indices_set.end() &&
121 SpansOverlap(candidates[considered_candidate].span,
122 candidates[*conflicting_it].span)) {
123 return true;
124 }
125
126 // Check conflict on the left.
127 // If we can't go more left, there can't be a conflict:
128 if (conflicting_it == chosen_indices_set.begin()) {
129 return false;
130 }
131 // Otherwise move one span left and insert if it doesn't overlap with the
132 // candidate.
133 --conflicting_it;
134 if (!SpansOverlap(candidates[considered_candidate].span,
135 candidates[*conflicting_it].span)) {
136 return false;
137 }
138
139 return true;
140 }
141
142 // Marks a span in a sequence of tokens. The first element is the index of the
143 // first token in the span, and the second element is the index of the token one
144 // past the end of the span.
145 struct TokenSpan {
146 static const TokenSpan kInvalid;
147
TokenSpanTokenSpan148 TokenSpan() : first(kInvalidIndex), second(kInvalidIndex) {}
149
TokenSpanTokenSpan150 TokenSpan(TokenIndex start, TokenIndex end) : first(start), second(end) {}
151
152 // Creates a token span consisting of one token.
TokenSpanTokenSpan153 explicit TokenSpan(int token_index)
154 : first(token_index), second(token_index + 1) {}
155
156 TokenSpan& operator=(const TokenSpan& other) = default;
157
158 bool operator==(const TokenSpan& other) const {
159 return this->first == other.first && this->second == other.second;
160 }
161
162 bool operator!=(const TokenSpan& other) const { return !(*this == other); }
163
164 bool operator<(const TokenSpan& other) const {
165 if (this->first != other.first) {
166 return this->first < other.first;
167 }
168 return this->second < other.second;
169 }
170
IsValidTokenSpan171 bool IsValid() const {
172 return this->first != kInvalidIndex && this->second != kInvalidIndex;
173 }
174
175 // Returns the size of the token span. Assumes that the span is valid.
SizeTokenSpan176 int Size() const { return this->second - this->first; }
177
178 // Returns an expanded token span by adding a certain number of tokens on its
179 // left and on its right.
ExpandTokenSpan180 TokenSpan Expand(int num_tokens_left, int num_tokens_right) const {
181 return {this->first - num_tokens_left, this->second + num_tokens_right};
182 }
183
184 TokenIndex first;
185 TokenIndex second;
186 };
187
188 // Pretty-printing function for TokenSpan.
189 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
190 const TokenSpan& span);
191
192 // Returns an intersection of two token spans. Assumes that both spans are
193 // valid and overlapping.
IntersectTokenSpans(const TokenSpan & token_span1,const TokenSpan & token_span2)194 inline TokenSpan IntersectTokenSpans(const TokenSpan& token_span1,
195 const TokenSpan& token_span2) {
196 return {std::max(token_span1.first, token_span2.first),
197 std::min(token_span1.second, token_span2.second)};
198 }
199
200 // Token holds a token, its position in the original string and whether it was
201 // part of the input span.
202 struct Token {
203 std::string value;
204 CodepointIndex start;
205 CodepointIndex end;
206
207 // Whether the token is a padding token.
208 bool is_padding;
209
210 // Whether the token contains only white characters.
211 bool is_whitespace;
212
213 // Default constructor constructs the padding-token.
TokenToken214 Token()
215 : Token(/*arg_value=*/"", /*arg_start=*/kInvalidIndex,
216 /*arg_end=*/kInvalidIndex, /*is_padding=*/true,
217 /*is_whitespace=*/false) {}
218
TokenToken219 Token(const std::string& arg_value, CodepointIndex arg_start,
220 CodepointIndex arg_end)
221 : Token(/*arg_value=*/arg_value, /*arg_start=*/arg_start,
222 /*arg_end=*/arg_end, /*is_padding=*/false,
223 /*is_whitespace=*/false) {}
224
TokenToken225 Token(const std::string& arg_value, CodepointIndex arg_start,
226 CodepointIndex arg_end, bool is_padding, bool is_whitespace)
227 : value(arg_value),
228 start(arg_start),
229 end(arg_end),
230 is_padding(is_padding),
231 is_whitespace(is_whitespace) {}
232
233 bool operator==(const Token& other) const {
234 return value == other.value && start == other.start && end == other.end &&
235 is_padding == other.is_padding;
236 }
237
IsContainedInSpanToken238 bool IsContainedInSpan(const CodepointSpan& span) const {
239 return start >= span.first && end <= span.second;
240 }
241 };
242
243 // Pretty-printing function for Token.
244 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
245 const Token& token);
246
247 // Returns a TokenSpan that merges all of the given token spans.
AllOf(const std::vector<Token> & tokens)248 inline TokenSpan AllOf(const std::vector<Token>& tokens) {
249 return {0, static_cast<TokenIndex>(tokens.size())};
250 }
251
252 enum DatetimeGranularity {
253 GRANULARITY_UNKNOWN = -1, // GRANULARITY_UNKNOWN is used as a proxy for this
254 // structure being uninitialized.
255 GRANULARITY_YEAR = 0,
256 GRANULARITY_MONTH = 1,
257 GRANULARITY_WEEK = 2,
258 GRANULARITY_DAY = 3,
259 GRANULARITY_HOUR = 4,
260 GRANULARITY_MINUTE = 5,
261 GRANULARITY_SECOND = 6
262 };
263
264 // This struct represents a unit of date and time expression.
265 // Examples include:
266 // - In {March 21, 2019} datetime components are month: {March},
267 // day of month: {21} and year: {2019}.
268 // - {8:00 am} contains hour: {8}, minutes: {0} and am/pm: {am}
269 struct DatetimeComponent {
270 enum class ComponentType {
271 UNSPECIFIED = 0,
272 // Year of the date seen in the text match.
273 YEAR = 1,
274 // Month of the year starting with January = 1.
275 MONTH = 2,
276 // Week (7 days).
277 WEEK = 3,
278 // Day of week, start of the week is Sunday & its value is 1.
279 DAY_OF_WEEK = 4,
280 // Day of the month starting with 1.
281 DAY_OF_MONTH = 5,
282 // Hour of the day with a range of 0-23,
283 // values less than 12 need the AMPM field below or heuristics
284 // to definitively determine the time.
285 HOUR = 6,
286 // Minute of the hour with a range of 0-59.
287 MINUTE = 7,
288 // Seconds of the minute with a range of 0-59.
289 SECOND = 8,
290 // Meridiem field where 0 == AM, 1 == PM.
291 MERIDIEM = 9,
292 // Offset in number of minutes from UTC this date time is in.
293 ZONE_OFFSET = 10,
294 // Offset in number of hours for DST.
295 DST_OFFSET = 11,
296 };
297
298 // TODO(hassan): Remove RelativeQualifier as in the presence of relative
299 // count RelativeQualifier is redundant.
300 // Enum to represent the relative DateTimeComponent e.g. "next Monday",
301 // "the following day", "tomorrow".
302 enum class RelativeQualifier {
303 UNSPECIFIED = 0,
304 NEXT = 1,
305 THIS = 2,
306 LAST = 3,
307 NOW = 4,
308 TOMORROW = 5,
309 YESTERDAY = 6,
310 PAST = 7,
311 FUTURE = 8
312 };
313
314 bool operator==(const DatetimeComponent& other) const {
315 return component_type == other.component_type &&
316 relative_qualifier == other.relative_qualifier &&
317 relative_count == other.relative_count && value == other.value;
318 }
319
320 bool ShouldRoundToGranularity() const;
321
322 ComponentType component_type = ComponentType::UNSPECIFIED;
323 RelativeQualifier relative_qualifier = RelativeQualifier::UNSPECIFIED;
324
325 // Represents the absolute value of DateTime components.
326 int value = 0;
327 // The number of units of change present in the relative DateTimeComponent.
328 int relative_count = 0;
329
330 DatetimeComponent() = default;
331
DatetimeComponentDatetimeComponent332 explicit DatetimeComponent(ComponentType arg_component_type,
333 RelativeQualifier arg_relative_qualifier,
334 int arg_value, int arg_relative_count)
335 : component_type(arg_component_type),
336 relative_qualifier(arg_relative_qualifier),
337 value(arg_value),
338 relative_count(arg_relative_count) {}
339 };
340
341 // Utility method to calculate Returns the finest granularity of
342 // DatetimeComponents.
343 DatetimeGranularity GetFinestGranularity(
344 const std::vector<DatetimeComponent>& datetime_component);
345
346 // Return the 'DatetimeComponent' from collection filter by component type.
347 Optional<DatetimeComponent> GetDatetimeComponent(
348 const std::vector<DatetimeComponent>& datetime_components,
349 const DatetimeComponent::ComponentType& component_type);
350
351 struct DatetimeParseResult {
352 // The absolute time in milliseconds since the epoch in UTC.
353 int64 time_ms_utc;
354
355 // The precision of the estimate then in to calculating the milliseconds
356 DatetimeGranularity granularity;
357
358 // List of parsed DateTimeComponent.
359 std::vector<DatetimeComponent> datetime_components;
360
DatetimeParseResultDatetimeParseResult361 DatetimeParseResult() : time_ms_utc(0), granularity(GRANULARITY_UNKNOWN) {}
362
DatetimeParseResultDatetimeParseResult363 DatetimeParseResult(int64 arg_time_ms_utc,
364 DatetimeGranularity arg_granularity,
365 std::vector<DatetimeComponent> arg_datetime__components)
366 : time_ms_utc(arg_time_ms_utc),
367 granularity(arg_granularity),
368 datetime_components(arg_datetime__components) {}
369
IsSetDatetimeParseResult370 bool IsSet() const { return granularity != GRANULARITY_UNKNOWN; }
371
372 bool operator==(const DatetimeParseResult& other) const {
373 return granularity == other.granularity &&
374 time_ms_utc == other.time_ms_utc &&
375 datetime_components == other.datetime_components;
376 }
377 };
378
379 const float kFloatCompareEpsilon = 1e-5;
380
381 struct DatetimeParseResultSpan {
382 CodepointSpan span;
383 std::vector<DatetimeParseResult> data;
384 float target_classification_score;
385 float priority_score;
386
DatetimeParseResultSpanDatetimeParseResultSpan387 DatetimeParseResultSpan()
388 : span(CodepointSpan::kInvalid),
389 target_classification_score(-1.0),
390 priority_score(-1.0) {}
391
DatetimeParseResultSpanDatetimeParseResultSpan392 DatetimeParseResultSpan(const CodepointSpan& span,
393 const std::vector<DatetimeParseResult>& data,
394 const float target_classification_score,
395 const float priority_score)
396 : span(span),
397 data(data),
398 target_classification_score(target_classification_score),
399 priority_score(priority_score) {}
400
401 bool operator==(const DatetimeParseResultSpan& other) const {
402 return span == other.span && data == other.data &&
403 std::abs(target_classification_score -
404 other.target_classification_score) < kFloatCompareEpsilon &&
405 std::abs(priority_score - other.priority_score) <
406 kFloatCompareEpsilon;
407 }
408 };
409
410 // Pretty-printing function for DatetimeParseResultSpan.
411 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
412 const DatetimeParseResultSpan& value);
413
414 // This struct contains information intended to uniquely identify a device
415 // contact. Instances are created by the Knowledge Engine, and dereferenced by
416 // the Contact Engine.
417 struct ContactPointer {
418 std::string focus_contact_id;
419 std::string device_id;
420 std::string device_contact_id;
421 std::string contact_name;
422 std::string contact_name_hash;
423
424 bool operator==(const ContactPointer& other) const {
425 return focus_contact_id == other.focus_contact_id &&
426 device_id == other.device_id &&
427 device_contact_id == other.device_contact_id &&
428 contact_name == other.contact_name &&
429 contact_name_hash == other.contact_name_hash;
430 }
431 };
432
433 struct ClassificationResult {
434 std::string collection;
435 float score;
436 DatetimeParseResult datetime_parse_result;
437 std::string serialized_knowledge_result;
438 ContactPointer contact_pointer;
439 std::string contact_name, contact_given_name, contact_family_name,
440 contact_nickname, contact_email_address, contact_phone_number,
441 contact_account_type, contact_account_name, contact_id,
442 contact_alternate_name;
443 int64 contact_recognition_source;
444 float contact_neural_match_score;
445 std::string app_name, app_package_name;
446 int64 numeric_value;
447 double numeric_double_value;
448
449 // Length of the parsed duration in milliseconds.
450 int64 duration_ms;
451
452 // Internal score used for conflict resolution.
453 float priority_score;
454
455
456 // Entity data information.
457 std::string serialized_entity_data;
entity_dataClassificationResult458 const EntityData* entity_data() const {
459 return LoadAndVerifyFlatbuffer<EntityData>(serialized_entity_data.data(),
460 serialized_entity_data.size());
461 }
462
ClassificationResultClassificationResult463 explicit ClassificationResult()
464 : score(-1.0f),
465 numeric_value(0),
466 numeric_double_value(0.),
467 duration_ms(0),
468 priority_score(-1.0) {}
469
ClassificationResultClassificationResult470 ClassificationResult(const std::string& arg_collection, float arg_score)
471 : collection(arg_collection),
472 score(arg_score),
473 numeric_value(0),
474 numeric_double_value(0.),
475 duration_ms(0),
476 priority_score(arg_score) {}
477
ClassificationResultClassificationResult478 ClassificationResult(const std::string& arg_collection, float arg_score,
479 float arg_priority_score)
480 : collection(arg_collection),
481 score(arg_score),
482 numeric_value(0),
483 numeric_double_value(0.),
484 duration_ms(0),
485 priority_score(arg_priority_score) {}
486
487 bool operator!=(const ClassificationResult& other) const {
488 return !(*this == other);
489 }
490
491 bool operator==(const ClassificationResult& other) const;
492 };
493
494 // Aliases for long enum values.
495 const AnnotationUsecase ANNOTATION_USECASE_SMART =
496 AnnotationUsecase_ANNOTATION_USECASE_SMART;
497 const AnnotationUsecase ANNOTATION_USECASE_RAW =
498 AnnotationUsecase_ANNOTATION_USECASE_RAW;
499
500 struct LocationContext {
501 // User location latitude in degrees.
502 double user_location_lat = 180.;
503
504 // User location longitude in degrees.
505 double user_location_lng = 360.;
506
507 // The estimated horizontal accuracy of the user location in meters.
508 // Analogous to android.location.Location accuracy.
509 float user_location_accuracy_meters = 0.f;
510
511 bool operator==(const LocationContext& other) const {
512 return std::fabs(this->user_location_lat - other.user_location_lat) <
513 1e-8 &&
514 std::fabs(this->user_location_lng - other.user_location_lng) <
515 1e-8 &&
516 std::fabs(this->user_location_accuracy_meters -
517 other.user_location_accuracy_meters) < 1e-8;
518 }
519 };
520
521 struct BaseOptions {
522 // Comma-separated list of locale specification for the input text (BCP 47
523 // tags).
524 std::string locales;
525
526 // Comma-separated list of BCP 47 language tags.
527 std::string detected_text_language_tags;
528
529 // Tailors the output annotations according to the specified use-case.
530 AnnotationUsecase annotation_usecase = ANNOTATION_USECASE_SMART;
531
532 // The location context passed along with each annotation.
533 Optional<LocationContext> location_context;
534
535 // If true, the POD NER annotator is used.
536 bool use_pod_ner = true;
537
538 // If true and the model file supports that, the new vocab annotator is used
539 // to annotate "Dictionary". Otherwise, we use the FFModel to do so.
540 bool use_vocab_annotator = true;
541
542 bool operator==(const BaseOptions& other) const {
543 bool location_context_equality = this->location_context.has_value() ==
544 other.location_context.has_value();
545 if (this->location_context.has_value() &&
546 other.location_context.has_value()) {
547 location_context_equality =
548 this->location_context.value() == other.location_context.value();
549 }
550 return this->locales == other.locales &&
551 this->annotation_usecase == other.annotation_usecase &&
552 this->detected_text_language_tags ==
553 other.detected_text_language_tags &&
554 location_context_equality &&
555 this->use_pod_ner == other.use_pod_ner &&
556 this->use_vocab_annotator == other.use_vocab_annotator;
557 }
558 };
559
560 struct DatetimeOptions {
561 // For parsing relative datetimes, the reference now time against which the
562 // relative datetimes get resolved.
563 // UTC milliseconds since epoch.
564 int64 reference_time_ms_utc = 0;
565
566 // Timezone in which the input text was written (format as accepted by ICU).
567 std::string reference_timezone;
568
569 bool operator==(const DatetimeOptions& other) const {
570 return this->reference_time_ms_utc == other.reference_time_ms_utc &&
571 this->reference_timezone == other.reference_timezone;
572 }
573 };
574
575 struct SelectionOptions : public BaseOptions {};
576
577 struct ClassificationOptions : public BaseOptions, public DatetimeOptions {
578 // Comma-separated list of language tags which the user can read and
579 // understand (BCP 47).
580 std::string user_familiar_language_tags;
581 // If true, trigger dictionary on words that are of beginner level.
582 bool trigger_dictionary_on_beginner_words = false;
583 // If true, generate *Add* contact intent for email/phone entity.
584 bool enable_add_contact_intent;
585 // If true, generate *Search* intent for named entities.
586 bool enable_search_intent;
587
588 bool operator==(const ClassificationOptions& other) const {
589 return this->user_familiar_language_tags ==
590 other.user_familiar_language_tags &&
591 this->trigger_dictionary_on_beginner_words ==
592 other.trigger_dictionary_on_beginner_words &&
593 this->enable_add_contact_intent == other.enable_add_contact_intent &&
594 this->enable_search_intent == other.enable_search_intent &&
595 BaseOptions::operator==(other) && DatetimeOptions::operator==(other);
596 }
597 };
598
599 struct Permissions {
600 // If true the user location can be used to provide better annotations.
601 bool has_location_permission = true;
602 // If true, annotators can use personal data to provide personalized
603 // annotations.
604 bool has_personalization_permission = true;
605
606 bool operator==(const Permissions& other) const {
607 return this->has_location_permission == other.has_location_permission &&
608 this->has_personalization_permission ==
609 other.has_personalization_permission;
610 }
611 };
612
613 struct AnnotationOptions : public BaseOptions, public DatetimeOptions {
614 // List of entity types that should be used for annotation.
615 std::unordered_set<std::string> entity_types;
616
617 // If true, serialized_entity_data in the results is populated."
618 bool is_serialized_entity_data_enabled = false;
619
620 // Defines the permissions for the annotators.
621 Permissions permissions;
622
623 AnnotateMode annotate_mode = AnnotateMode::kEntityAnnotation;
624
625 // If true, trigger dictionary on words that are of beginner level.
626 bool trigger_dictionary_on_beginner_words = false;
627
628 bool operator==(const AnnotationOptions& other) const {
629 return this->is_serialized_entity_data_enabled ==
630 other.is_serialized_entity_data_enabled &&
631 this->permissions == other.permissions &&
632 this->entity_types == other.entity_types &&
633 this->annotate_mode == other.annotate_mode &&
634 this->trigger_dictionary_on_beginner_words ==
635 other.trigger_dictionary_on_beginner_words &&
636 BaseOptions::operator==(other) && DatetimeOptions::operator==(other);
637 }
638 };
639
640 // Returns true when ClassificationResults are euqal up to scores.
641 bool ClassificationResultsEqualIgnoringScoresAndSerializedEntityData(
642 const ClassificationResult& a, const ClassificationResult& b);
643
644 // Pretty-printing function for ClassificationResult.
645 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
646 const ClassificationResult& result);
647
648 // Pretty-printing function for std::vector<ClassificationResult>.
649 logging::LoggingStringStream& operator<<(
650 logging::LoggingStringStream& stream,
651 const std::vector<ClassificationResult>& results);
652
653 // Represents a result of Annotate call.
654 struct AnnotatedSpan {
655 enum class Source { OTHER, KNOWLEDGE, DURATION, DATETIME, PERSON_NAME };
656
657 // Unicode codepoint indices in the input string.
658 CodepointSpan span = CodepointSpan::kInvalid;
659
660 // Classification result for the span.
661 std::vector<ClassificationResult> classification;
662
663 // The source of the annotation, used in conflict resolution.
664 Source source = Source::OTHER;
665
666 AnnotatedSpan() = default;
667
AnnotatedSpanAnnotatedSpan668 AnnotatedSpan(CodepointSpan arg_span,
669 std::vector<ClassificationResult> arg_classification)
670 : span(arg_span), classification(std::move(arg_classification)) {}
671
AnnotatedSpanAnnotatedSpan672 AnnotatedSpan(CodepointSpan arg_span,
673 std::vector<ClassificationResult> arg_classification,
674 Source arg_source)
675 : span(arg_span),
676 classification(std::move(arg_classification)),
677 source(arg_source) {}
678 };
679
680 // Represents Annotations that correspond to all input fragments.
681 struct Annotations {
682 // List of annotations found in the corresponding input fragments. For these
683 // annotations, topicality score will not be set.
684 std::vector<std::vector<AnnotatedSpan>> annotated_spans;
685
686 // List of topicality results found across all input fragments.
687 std::vector<ClassificationResult> topicality_results;
688
689 Annotations() = default;
690
AnnotationsAnnotations691 explicit Annotations(
692 std::vector<std::vector<AnnotatedSpan>> arg_annotated_spans)
693 : annotated_spans(std::move(arg_annotated_spans)) {}
694
AnnotationsAnnotations695 Annotations(std::vector<std::vector<AnnotatedSpan>> arg_annotated_spans,
696 std::vector<ClassificationResult> arg_topicality_results)
697 : annotated_spans(std::move(arg_annotated_spans)),
698 topicality_results(std::move(arg_topicality_results)) {}
699 };
700
701 struct InputFragment {
702 std::string text;
703 float bounding_box_top;
704 float bounding_box_height;
705
706 // If present will override the AnnotationOptions reference time and timezone
707 // when annotating this specific string fragment.
708 Optional<DatetimeOptions> datetime_options;
709 };
710
711 // Pretty-printing function for AnnotatedSpan.
712 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
713 const AnnotatedSpan& span);
714
715 // StringPiece analogue for std::vector<T>.
716 template <class T>
717 class VectorSpan {
718 public:
VectorSpan()719 VectorSpan() : begin_(), end_() {}
VectorSpan(const std::vector<T> & v)720 explicit VectorSpan(const std::vector<T>& v) // NOLINT(runtime/explicit)
721 : begin_(v.begin()), end_(v.end()) {}
VectorSpan(typename std::vector<T>::const_iterator begin,typename std::vector<T>::const_iterator end)722 VectorSpan(typename std::vector<T>::const_iterator begin,
723 typename std::vector<T>::const_iterator end)
724 : begin_(begin), end_(end) {}
725
726 const T& operator[](typename std::vector<T>::size_type i) const {
727 return *(begin_ + i);
728 }
729
size()730 int size() const { return end_ - begin_; }
begin()731 typename std::vector<T>::const_iterator begin() const { return begin_; }
end()732 typename std::vector<T>::const_iterator end() const { return end_; }
data()733 const float* data() const { return &(*begin_); }
734
735 private:
736 typename std::vector<T>::const_iterator begin_;
737 typename std::vector<T>::const_iterator end_;
738 };
739
740 // Class to provide representation of date and time expressions
741 class DatetimeParsedData {
742 public:
743 // Function to set the absolute value of DateTimeComponent for the given
744 // FieldType, if the field is not present it will create the field and set
745 // the value.
746 void SetAbsoluteValue(const DatetimeComponent::ComponentType& field_type,
747 int value);
748
749 // Function to set the relative value of DateTimeComponent, if the field is
750 // not present the function will create the field and set the relative value.
751 void SetRelativeValue(
752 const DatetimeComponent::ComponentType& field_type,
753 const DatetimeComponent::RelativeQualifier& relative_value);
754
755 // Add collection of 'DatetimeComponent' to 'DatetimeParsedData'.
756 void AddDatetimeComponents(
757 const std::vector<DatetimeComponent>& datetime_components);
758
759 // Function to set the relative count of DateTimeComponent, if the field is
760 // not present the function will create the field and set the count.
761 void SetRelativeCount(const DatetimeComponent::ComponentType& field_type,
762 int relative_count);
763
764 // Function to populate the absolute value of the FieldType and return true.
765 // In case of no FieldType function will return false.
766 bool GetFieldValue(const DatetimeComponent::ComponentType& field_type,
767 int* field_value) const;
768
769 // Function to populate the relative value of the FieldType and return true.
770 // In case of no relative value function will return false.
771 bool GetRelativeValue(
772 const DatetimeComponent::ComponentType& field_type,
773 DatetimeComponent::RelativeQualifier* relative_value) const;
774
775 // Returns relative DateTimeComponent from the parsed DateTime span.
776 void GetRelativeDatetimeComponents(
777 std::vector<DatetimeComponent>* date_time_components) const;
778
779 // Returns DateTimeComponent from the parsed DateTime span.
780 void GetDatetimeComponents(
781 std::vector<DatetimeComponent>* date_time_components) const;
782
783 // Represent the granularity of the Parsed DateTime span. The function will
784 // return “GRANULARITY_UNKNOWN” if no datetime field is set.
785 DatetimeGranularity GetFinestGranularity() const;
786
787 // Utility function to check if DateTimeParsedData has FieldType initialized.
788 bool HasFieldType(const DatetimeComponent::ComponentType& field_type) const;
789
790 // Function to check if DateTimeParsedData has relative DateTimeComponent for
791 // given FieldType.
792 bool HasRelativeValue(
793 const DatetimeComponent::ComponentType& field_type) const;
794
795 // Function to check if DateTimeParsedData has absolute value
796 // DateTimeComponent for given FieldType.
797 bool HasAbsoluteValue(
798 const DatetimeComponent::ComponentType& field_type) const;
799
800 // Function to check if DateTimeParsedData has any DateTimeComponent.
801 bool IsEmpty() const;
802
803 private:
804 DatetimeComponent& GetOrCreateDatetimeComponent(
805
806 const DatetimeComponent::ComponentType& component_type);
807
808 std::map<DatetimeComponent::ComponentType, DatetimeComponent>
809 date_time_components_;
810 };
811
812 // Pretty-printing function for DateTimeParsedData.
813 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
814 const DatetimeParsedData& data);
815
816 } // namespace libtextclassifier3
817
818 #endif // LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_H_
819