1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "utils/grammar/rules-utils.h"
18
19 namespace libtextclassifier3::grammar {
20
ParseRulesLocales(const RulesSet * rules)21 std::vector<std::vector<Locale>> ParseRulesLocales(const RulesSet* rules) {
22 if (rules == nullptr || rules->rules() == nullptr) {
23 return {};
24 }
25 std::vector<std::vector<Locale>> locales(rules->rules()->size());
26 for (int i = 0; i < rules->rules()->size(); i++) {
27 const grammar::RulesSet_::Rules* rules_shard = rules->rules()->Get(i);
28 if (rules_shard->locale() == nullptr) {
29 continue;
30 }
31 for (const LanguageTag* tag : *rules_shard->locale()) {
32 locales[i].push_back(Locale::FromLanguageTag(tag));
33 }
34 }
35 return locales;
36 }
37
SelectLocaleMatchingShards(const RulesSet * rules,const std::vector<std::vector<Locale>> & shard_locales,const std::vector<Locale> & locales)38 std::vector<const grammar::RulesSet_::Rules*> SelectLocaleMatchingShards(
39 const RulesSet* rules,
40 const std::vector<std::vector<Locale>>& shard_locales,
41 const std::vector<Locale>& locales) {
42 std::vector<const grammar::RulesSet_::Rules*> shards;
43 if (rules->rules() == nullptr) {
44 return shards;
45 }
46 for (int i = 0; i < shard_locales.size(); i++) {
47 if (shard_locales[i].empty() ||
48 Locale::IsAnyLocaleSupported(locales,
49 /*supported_locales=*/shard_locales[i],
50 /*default_value=*/false)) {
51 shards.push_back(rules->rules()->Get(i));
52 }
53 }
54 return shards;
55 }
56
DeduplicateDerivations(const std::vector<Derivation> & derivations)57 std::vector<Derivation> DeduplicateDerivations(
58 const std::vector<Derivation>& derivations) {
59 std::vector<Derivation> sorted_candidates = derivations;
60 std::stable_sort(
61 sorted_candidates.begin(), sorted_candidates.end(),
62 [](const Derivation& a, const Derivation& b) {
63 // Sort by id.
64 if (a.rule_id != b.rule_id) {
65 return a.rule_id < b.rule_id;
66 }
67
68 // Sort by increasing start.
69 if (a.match->codepoint_span.first != b.match->codepoint_span.first) {
70 return a.match->codepoint_span.first < b.match->codepoint_span.first;
71 }
72
73 // Sort by decreasing end.
74 return a.match->codepoint_span.second > b.match->codepoint_span.second;
75 });
76
77 // Deduplicate by overlap.
78 std::vector<Derivation> result;
79 for (int i = 0; i < sorted_candidates.size(); i++) {
80 const Derivation& candidate = sorted_candidates[i];
81 bool eliminated = false;
82
83 // Due to the sorting above, the candidate can only be completely
84 // intersected by a match before it in the sorted order.
85 for (int j = i - 1; j >= 0; j--) {
86 if (sorted_candidates[j].rule_id != candidate.rule_id) {
87 break;
88 }
89 if (sorted_candidates[j].match->codepoint_span.first <=
90 candidate.match->codepoint_span.first &&
91 sorted_candidates[j].match->codepoint_span.second >=
92 candidate.match->codepoint_span.second) {
93 eliminated = true;
94 break;
95 }
96 }
97
98 if (!eliminated) {
99 result.push_back(candidate);
100 }
101 }
102 return result;
103 }
104
VerifyAssertions(const Match * match)105 bool VerifyAssertions(const Match* match) {
106 bool result = true;
107 grammar::Traverse(match, [&result](const Match* node) {
108 if (node->type != Match::kAssertionMatch) {
109 // Only validation if all checks so far passed.
110 return result;
111 }
112
113 // Positive assertions are by definition fulfilled,
114 // fail if the assertion is negative.
115 if (static_cast<const AssertionMatch*>(node)->negative) {
116 result = false;
117 }
118 return result;
119 });
120 return result;
121 }
122
123 } // namespace libtextclassifier3::grammar
124