• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
18 #define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
19 
20 #include <memory>
21 #include <string>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 
26 #include "annotator/datetime/extractor.h"
27 #include "annotator/model_generated.h"
28 #include "annotator/types.h"
29 #include "utils/base/integral_types.h"
30 #include "utils/calendar/calendar.h"
31 #include "utils/utf8/unicodetext.h"
32 #include "utils/utf8/unilib.h"
33 #include "utils/zlib/zlib.h"
34 
35 namespace libtextclassifier3 {
36 
37 // Parses datetime expressions in the input and resolves them to actual absolute
38 // time.
39 class DatetimeParser {
40  public:
41   static std::unique_ptr<DatetimeParser> Instance(
42       const DatetimeModel* model, const UniLib* unilib,
43       const CalendarLib* calendarlib, ZlibDecompressor* decompressor);
44 
45   // Parses the dates in 'input' and fills result. Makes sure that the results
46   // do not overlap.
47   // If 'anchor_start_end' is true the extracted results need to start at the
48   // beginning of 'input' and end at the end of it.
49   bool Parse(const std::string& input, int64 reference_time_ms_utc,
50              const std::string& reference_timezone, const std::string& locales,
51              ModeFlag mode, AnnotationUsecase annotation_usecase,
52              bool anchor_start_end,
53              std::vector<DatetimeParseResultSpan>* results) const;
54 
55   // Same as above but takes UnicodeText.
56   bool Parse(const UnicodeText& input, int64 reference_time_ms_utc,
57              const std::string& reference_timezone, const std::string& locales,
58              ModeFlag mode, AnnotationUsecase annotation_usecase,
59              bool anchor_start_end,
60              std::vector<DatetimeParseResultSpan>* results) const;
61 
62  protected:
63   explicit DatetimeParser(const DatetimeModel* model, const UniLib* unilib,
64                           const CalendarLib* calendarlib,
65                           ZlibDecompressor* decompressor);
66 
67   // Returns a list of locale ids for given locale spec string (comma-separated
68   // locale names). Assigns the first parsed locale to reference_locale.
69   std::vector<int> ParseAndExpandLocales(const std::string& locales,
70                                          std::string* reference_locale) const;
71 
72   // Helper function that finds datetime spans, only using the rules associated
73   // with the given locales.
74   bool FindSpansUsingLocales(
75       const std::vector<int>& locale_ids, const UnicodeText& input,
76       const int64 reference_time_ms_utc, const std::string& reference_timezone,
77       ModeFlag mode, AnnotationUsecase annotation_usecase,
78       bool anchor_start_end, const std::string& reference_locale,
79       std::unordered_set<int>* executed_rules,
80       std::vector<DatetimeParseResultSpan>* found_spans) const;
81 
82   bool ParseWithRule(const CompiledRule& rule, const UnicodeText& input,
83                      int64 reference_time_ms_utc,
84                      const std::string& reference_timezone,
85                      const std::string& reference_locale, const int locale_id,
86                      bool anchor_start_end,
87                      std::vector<DatetimeParseResultSpan>* result) const;
88 
89   // Converts the current match in 'matcher' into DatetimeParseResult.
90   bool ExtractDatetime(const CompiledRule& rule,
91                        const UniLib::RegexMatcher& matcher,
92                        int64 reference_time_ms_utc,
93                        const std::string& reference_timezone,
94                        const std::string& reference_locale, int locale_id,
95                        std::vector<DatetimeParseResult>* results,
96                        CodepointSpan* result_span) const;
97 
98   // Parse and extract information from current match in 'matcher'.
99   bool HandleParseMatch(const CompiledRule& rule,
100                         const UniLib::RegexMatcher& matcher,
101                         int64 reference_time_ms_utc,
102                         const std::string& reference_timezone,
103                         const std::string& reference_locale, int locale_id,
104                         std::vector<DatetimeParseResultSpan>* result) const;
105 
106  private:
107   bool initialized_;
108   const UniLib& unilib_;
109   const CalendarLib& calendarlib_;
110   std::vector<CompiledRule> rules_;
111   std::unordered_map<int, std::vector<int>> locale_to_rules_;
112   std::vector<std::unique_ptr<const UniLib::RegexPattern>> extractor_rules_;
113   std::unordered_map<DatetimeExtractorType, std::unordered_map<int, int>>
114       type_and_locale_to_extractor_rule_;
115   std::unordered_map<std::string, int> locale_string_to_id_;
116   std::vector<int> default_locale_ids_;
117   bool use_extractors_for_locating_;
118   bool generate_alternative_interpretations_when_ambiguous_;
119   bool prefer_future_for_unspecified_date_;
120 };
121 
122 }  // namespace libtextclassifier3
123 
124 #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
125