1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <iostream>
5 #include <stack>
6
7 #include "filterrb.h"
8 #include "errmsg.h"
9
10
11 const char* PathFilter::kEInclusionNames[] = {
12 "INCLUDE",
13 "PARTIAL",
14 "EXCLUDE"
15 };
16
17
ResKeyPath()18 ResKeyPath::ResKeyPath() {}
19
ResKeyPath(const std::string & path,UErrorCode & status)20 ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
21 if (path.empty() || path[0] != '/') {
22 std::cerr << "genrb error: path must start with /: " << path << std::endl;
23 status = U_PARSE_ERROR;
24 return;
25 }
26 if (path.length() == 1) {
27 return;
28 }
29 size_t i;
30 size_t j = 0;
31 while (true) {
32 i = j + 1;
33 j = path.find('/', i);
34 std::string key = path.substr(i, j - i);
35 if (key.empty()) {
36 std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
37 status = U_PARSE_ERROR;
38 return;
39 }
40 push(key);
41 if (j == std::string::npos) {
42 break;
43 }
44 }
45 }
46
push(const std::string & key)47 void ResKeyPath::push(const std::string& key) {
48 fPath.push_back(key);
49 }
50
pop()51 void ResKeyPath::pop() {
52 fPath.pop_back();
53 }
54
pieces() const55 const std::list<std::string>& ResKeyPath::pieces() const {
56 return fPath;
57 }
58
operator <<(std::ostream & out,const ResKeyPath & value)59 std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
60 if (value.pieces().empty()) {
61 out << "/";
62 } else for (auto& key : value.pieces()) {
63 out << "/" << key;
64 }
65 return out;
66 }
67
68
69 PathFilter::~PathFilter() = default;
70
71
addRule(const std::string & ruleLine,UErrorCode & status)72 void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
73 if (ruleLine.empty()) {
74 std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
75 status = U_PARSE_ERROR;
76 return;
77 }
78 bool inclusionRule = false;
79 if (ruleLine[0] == '+') {
80 inclusionRule = true;
81 } else if (ruleLine[0] != '-') {
82 std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
83 status = U_PARSE_ERROR;
84 return;
85 }
86 ResKeyPath path(ruleLine.substr(1), status);
87 addRule(path, inclusionRule, status);
88 }
89
addRule(const ResKeyPath & path,bool inclusionRule,UErrorCode & status)90 void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
91 if (U_FAILURE(status)) {
92 return;
93 }
94 fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status);
95 }
96
match(const ResKeyPath & path) const97 PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
98 const Tree* node = &fRoot;
99
100 // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
101 EInclusion defaultResult = INCLUDE;
102 if (node->fIncluded != PARTIAL) {
103 // rules handled here: "+/" and "-/"
104 defaultResult = node->fIncluded;
105 }
106
107 // isLeaf is whether the filter tree can provide no additional information
108 // even if additional subpaths are added to the given key
109 bool isLeaf = false;
110
111 for (auto& key : path.pieces()) {
112 auto child = node->fChildren.find(key);
113 // Leaf case 1: input path descends outside the filter tree
114 if (child == node->fChildren.end()) {
115 if (node->fWildcard) {
116 // A wildcard pattern is present; continue checking
117 node = node->fWildcard.get();
118 } else {
119 isLeaf = true;
120 break;
121 }
122 } else {
123 node = &child->second;
124 }
125 if (node->fIncluded != PARTIAL) {
126 defaultResult = node->fIncluded;
127 }
128 }
129
130 // Leaf case 2: input path exactly matches a filter leaf
131 if (node->isLeaf()) {
132 isLeaf = true;
133 }
134
135 // Always return PARTIAL if we are not at a leaf
136 if (!isLeaf) {
137 return PARTIAL;
138 }
139
140 // If leaf node is PARTIAL, return the default
141 if (node->fIncluded == PARTIAL) {
142 return defaultResult;
143 }
144
145 return node->fIncluded;
146 }
147
148
Tree(const Tree & other)149 SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other)
150 : fIncluded(other.fIncluded), fChildren(other.fChildren) {
151 // Note: can't use the default copy assignment because of the std::unique_ptr
152 if (other.fWildcard) {
153 fWildcard.reset(new Tree(*other.fWildcard));
154 }
155 }
156
isLeaf() const157 bool SimpleRuleBasedPathFilter::Tree::isLeaf() const {
158 return fChildren.empty() && !fWildcard;
159 }
160
applyRule(const ResKeyPath & path,std::list<std::string>::const_iterator it,bool inclusionRule,UErrorCode & status)161 void SimpleRuleBasedPathFilter::Tree::applyRule(
162 const ResKeyPath& path,
163 std::list<std::string>::const_iterator it,
164 bool inclusionRule,
165 UErrorCode& status) {
166
167 // Base Case
168 if (it == path.pieces().end()) {
169 if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) {
170 std::cout << "genrb info: rule on path " << path
171 << " overrides previous rules" << std::endl;
172 }
173 fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
174 fChildren.clear();
175 fWildcard.reset();
176 return;
177 }
178
179 // Recursive Step
180 auto& key = *it;
181 if (key == "*") {
182 // Case 1: Wildcard
183 if (!fWildcard) {
184 fWildcard.reset(new Tree());
185 }
186 // Apply the rule to fWildcard and also to all existing children.
187 it++;
188 fWildcard->applyRule(path, it, inclusionRule, status);
189 for (auto& child : fChildren) {
190 child.second.applyRule(path, it, inclusionRule, status);
191 }
192 it--;
193
194 } else {
195 // Case 2: Normal Key
196 auto search = fChildren.find(key);
197 if (search == fChildren.end()) {
198 if (fWildcard) {
199 // Deep-copy the existing wildcard tree into the new key
200 search = fChildren.emplace(key, Tree(*fWildcard)).first;
201 } else {
202 search = fChildren.emplace(key, Tree()).first;
203 }
204 }
205 it++;
206 search->second.applyRule(path, it, inclusionRule, status);
207 it--;
208 }
209 }
210
print(std::ostream & out,int32_t indent) const211 void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
212 for (int32_t i=0; i<indent; i++) out << "\t";
213 out << "included: " << kEInclusionNames[fIncluded] << std::endl;
214 for (auto& child : fChildren) {
215 for (int32_t i=0; i<indent; i++) out << "\t";
216 out << child.first << ": {" << std::endl;
217 child.second.print(out, indent + 1);
218 for (int32_t i=0; i<indent; i++) out << "\t";
219 out << "}" << std::endl;
220 }
221 if (fWildcard) {
222 for (int32_t i=0; i<indent; i++) out << "\t";
223 out << "* {" << std::endl;
224 fWildcard->print(out, indent + 1);
225 for (int32_t i=0; i<indent; i++) out << "\t";
226 out << "}" << std::endl;
227 }
228 }
229
print(std::ostream & out) const230 void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
231 out << "SimpleRuleBasedPathFilter {" << std::endl;
232 fRoot.print(out, 1);
233 out << "}" << std::endl;
234 }
235
operator <<(std::ostream & out,const SimpleRuleBasedPathFilter & value)236 std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
237 value.print(out);
238 return out;
239 }
240