1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_CMDLINE_TOKEN_RANGE_H_ 18 #define ART_CMDLINE_TOKEN_RANGE_H_ 19 20 #include <assert.h> 21 #include <algorithm> 22 #include <memory> 23 #include <string> 24 #include <vector> 25 26 #include "android-base/strings.h" 27 28 namespace art { 29 // A range of tokens to make token matching algorithms easier. 30 // 31 // We try really hard to avoid copying and store only a pointer and iterators to the 32 // interiors of the vector, so a typical copy constructor never ends up doing a deep copy. 33 // It is up to the user to play nice and not to mutate the strings in-place. 34 // 35 // Tokens are only copied if a mutating operation is performed (and even then only 36 // if it *actually* mutates the token). 37 struct TokenRange { 38 // Short-hand for a vector of strings. A single string and a token is synonymous. 39 using TokenList = std::vector<std::string>; 40 41 // Copying-from-vector constructor. TokenRangeTokenRange42 explicit TokenRange(const TokenList& token_list) 43 : token_list_(new TokenList(token_list)), 44 begin_(token_list_->begin()), 45 end_(token_list_->end()) 46 {} 47 48 // Copying-from-iterator constructor 49 template <typename ForwardIterator> TokenRangeTokenRange50 TokenRange(ForwardIterator it_begin, ForwardIterator it_end) 51 : token_list_(new TokenList(it_begin, it_end)), 52 begin_(token_list_->begin()), 53 end_(token_list_->end()) 54 {} 55 56 #if 0 57 // Copying-from-vector constructor. 58 TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED, 59 TokenList::const_iterator it_begin, 60 TokenList::const_iterator it_end) 61 : token_list_(new TokenList(it_begin, it_end)), 62 begin_(token_list_->begin()), 63 end_(token_list_->end()) { 64 assert(it_begin >= token_list.begin()); 65 assert(it_end <= token_list.end()); 66 } 67 #endif 68 69 // Copying from char array constructor, convertings into tokens (strings) along the way. TokenRangeTokenRange70 TokenRange(const char* token_list[], size_t length) 71 : token_list_(new TokenList(&token_list[0], &token_list[length])), 72 begin_(token_list_->begin()), 73 end_(token_list_->end()) 74 {} 75 76 // Non-copying move-from-vector constructor. Takes over the token vector. TokenRangeTokenRange77 explicit TokenRange(TokenList&& token_list) 78 : token_list_(new TokenList(std::forward<TokenList>(token_list))), 79 begin_(token_list_->begin()), 80 end_(token_list_->end()) 81 {} 82 83 // Non-copying constructor. Retain reference to existing list of tokens. TokenRangeTokenRange84 TokenRange(const std::shared_ptr<TokenList>& token_list, 85 TokenList::const_iterator it_begin, 86 TokenList::const_iterator it_end) 87 : token_list_(token_list), 88 begin_(it_begin), 89 end_(it_end) { 90 assert(it_begin >= token_list->begin()); 91 assert(it_end <= token_list->end()); 92 } 93 94 // Non-copying copy constructor. 95 TokenRange(const TokenRange&) = default; 96 97 // Non-copying move constructor. 98 TokenRange(TokenRange&&) = default; 99 100 // Non-copying constructor. Retains reference to an existing list of tokens, with offset. TokenRangeTokenRange101 explicit TokenRange(const std::shared_ptr<TokenList>& token_list) 102 : token_list_(token_list), 103 begin_(token_list_->begin()), 104 end_(token_list_->end()) 105 {} 106 107 // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator. 108 using iterator = TokenList::const_iterator; 109 110 // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator. 111 using const_iterator = iterator; 112 113 // Create a token range by splitting a string. Each separator gets their own token. 114 // Since the separator are retained as tokens, it might be useful to call 115 // RemoveToken afterwards. SplitTokenRange116 static TokenRange Split(const std::string& string, std::initializer_list<char> separators) { 117 TokenList new_token_list; 118 119 std::string tok; 120 for (auto&& c : string) { 121 for (char sep : separators) { 122 if (c == sep) { 123 // We spotted a separator character. 124 // Push back everything before the last separator as a new token. 125 // Push back the separator as a token. 126 if (!tok.empty()) { 127 new_token_list.push_back(tok); 128 tok = ""; 129 } 130 new_token_list.push_back(std::string() + sep); 131 } else { 132 // Build up the token with another character. 133 tok += c; 134 } 135 } 136 } 137 138 if (!tok.empty()) { 139 new_token_list.push_back(tok); 140 } 141 142 return TokenRange(std::move(new_token_list)); 143 } 144 145 // A RandomAccessIterator to the first element in this range. beginTokenRange146 iterator begin() const { 147 return begin_; 148 } 149 150 // A RandomAccessIterator to one past the last element in this range. endTokenRange151 iterator end() const { 152 return end_; 153 } 154 155 // The size of the range, i.e. how many tokens are in it. SizeTokenRange156 size_t Size() const { 157 return std::distance(begin_, end_); 158 } 159 160 // Are there 0 tokens in this range? IsEmptyTokenRange161 bool IsEmpty() const { 162 return Size() > 0; 163 } 164 165 // Look up a token by it's offset. GetTokenTokenRange166 const std::string& GetToken(size_t offset) const { 167 assert(offset < Size()); 168 return *(begin_ + offset); 169 } 170 171 // Does this token range equal the other range? 172 // Equality is defined as having both the same size, and 173 // each corresponding token being equal. 174 bool operator==(const TokenRange& other) const { 175 if (this == &other) { 176 return true; 177 } 178 179 if (Size() != other.Size()) { 180 return false; 181 } 182 183 return std::equal(begin(), end(), other.begin()); 184 } 185 186 // Look up the token at the requested index. 187 const std::string& operator[](int index) const { 188 assert(index >= 0 && static_cast<size_t>(index) < Size()); 189 return *(begin() + index); 190 } 191 192 // Does this current range start with the other range? StartsWithTokenRange193 bool StartsWith(const TokenRange& other) const { 194 if (this == &other) { 195 return true; 196 } 197 198 if (Size() < other.Size()) { 199 return false; 200 } 201 202 auto& smaller = Size() < other.Size() ? *this : other; 203 auto& greater = Size() < other.Size() ? other : *this; 204 205 return std::equal(smaller.begin(), smaller.end(), greater.begin()); 206 } 207 208 // Remove all characters 'c' from each token, potentially copying the underlying tokens. RemoveCharacterTokenRange209 TokenRange RemoveCharacter(char c) const { 210 TokenList new_token_list(begin(), end()); 211 212 bool changed = false; 213 for (auto&& token : new_token_list) { 214 auto it = std::remove_if(token.begin(), token.end(), [&](char ch) { 215 if (ch == c) { 216 changed = true; 217 return true; 218 } 219 return false; 220 }); 221 token.erase(it, token.end()); 222 } 223 224 if (!changed) { 225 return *this; 226 } 227 228 return TokenRange(std::move(new_token_list)); 229 } 230 231 // Remove all tokens matching this one, potentially copying the underlying tokens. RemoveTokenTokenRange232 TokenRange RemoveToken(const std::string& token) { 233 return RemoveIf([&](const std::string& tok) { return tok == token; }); 234 } 235 236 // Discard all empty tokens, potentially copying the underlying tokens. DiscardEmptyTokenRange237 TokenRange DiscardEmpty() const { 238 return RemoveIf([](const std::string& token) { return token.empty(); }); 239 } 240 241 // Create a non-copying subset of this range. 242 // Length is trimmed so that the Slice does not go out of range. 243 TokenRange Slice(size_t offset, size_t length = std::string::npos) const { 244 assert(offset < Size()); 245 246 if (length != std::string::npos && offset + length > Size()) { 247 length = Size() - offset; 248 } 249 250 iterator it_end; 251 if (length == std::string::npos) { 252 it_end = end(); 253 } else { 254 it_end = begin() + offset + length; 255 } 256 257 return TokenRange(token_list_, begin() + offset, it_end); 258 } 259 260 // Try to match the string with tokens from this range. 261 // Each token is used to match exactly once (after which the next token is used, and so on). 262 // The matching happens from left-to-right in a non-greedy fashion. 263 // If the currently-matched token is the wildcard, then the new outputted token will 264 // contain as much as possible until the next token is matched. 265 // 266 // For example, if this == ["a:", "_", "b:] and "_" is the match string, then 267 // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"] 268 // 269 // Since the string matching can fail (e.g. ["foo"] against "bar"), then this 270 // function can fail, in which cause it will return null. MatchSubstringsTokenRange271 std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string, 272 const std::string& wildcard) const { 273 TokenList new_token_list; 274 275 size_t wildcard_idx = std::string::npos; 276 size_t string_idx = 0; 277 278 // Function to push all the characters matched as a wildcard so far 279 // as a brand new token. It resets the wildcard matching. 280 // Empty wildcards are possible and ok, but only if wildcard matching was on. 281 auto maybe_push_wildcard_token = [&]() { 282 if (wildcard_idx != std::string::npos) { 283 size_t wildcard_length = string_idx - wildcard_idx; 284 std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length); 285 new_token_list.push_back(std::move(wildcard_substr)); 286 287 wildcard_idx = std::string::npos; 288 } 289 }; 290 291 for (iterator it = begin(); it != end(); ++it) { 292 const std::string& tok = *it; 293 294 if (tok == wildcard) { 295 maybe_push_wildcard_token(); 296 wildcard_idx = string_idx; 297 continue; 298 } 299 300 size_t next_token_idx = string.find(tok); 301 if (next_token_idx == std::string::npos) { 302 // Could not find token at all 303 return nullptr; 304 } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) { 305 // Found the token at a non-starting location, and we weren't 306 // trying to parse the wildcard. 307 return nullptr; 308 } 309 310 new_token_list.push_back(string.substr(next_token_idx, tok.size())); 311 maybe_push_wildcard_token(); 312 string_idx += tok.size(); 313 } 314 315 size_t remaining = string.size() - string_idx; 316 if (remaining > 0) { 317 if (wildcard_idx == std::string::npos) { 318 // Some characters were still remaining in the string, 319 // but it wasn't trying to match a wildcard. 320 return nullptr; 321 } 322 } 323 324 // If some characters are remaining, the rest must be a wildcard. 325 string_idx += remaining; 326 maybe_push_wildcard_token(); 327 328 return std::make_unique<TokenRange>(std::move(new_token_list)); 329 } 330 331 // Do a quick match token-by-token, and see if they match. 332 // Any tokens with a wildcard in them are only matched up until the wildcard. 333 // If this is true, then the wildcard matching later on can still fail, so this is not 334 // a guarantee that the argument is correct, it's more of a strong hint that the 335 // user-provided input *probably* was trying to match this argument. 336 // 337 // Returns how many tokens were either matched (or ignored because there was a 338 // wildcard present). 0 means no match. If the size() tokens are returned. MaybeMatchesTokenRange339 size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const { 340 auto token_it = token_list.begin(); 341 auto token_end = token_list.end(); 342 auto name_it = begin(); 343 auto name_end = end(); 344 345 size_t matched_tokens = 0; 346 347 while (token_it != token_end && name_it != name_end) { 348 // Skip token matching when the corresponding name has a wildcard in it. 349 const std::string& name = *name_it; 350 351 size_t wildcard_idx = name.find(wildcard); 352 if (wildcard_idx == std::string::npos) { // No wildcard present 353 // Did the definition token match the user token? 354 if (name != *token_it) { 355 return matched_tokens; 356 } 357 } else { 358 std::string name_prefix = name.substr(0, wildcard_idx); 359 360 // Did the user token start with the up-to-the-wildcard prefix? 361 if (!StartsWith(*token_it, name_prefix)) { 362 return matched_tokens; 363 } 364 } 365 366 ++token_it; 367 ++name_it; 368 ++matched_tokens; 369 } 370 371 // If we got this far, it's either a full match or the token list was too short. 372 return matched_tokens; 373 } 374 375 // Flatten the token range by joining every adjacent token with the separator character. 376 // e.g. ["hello", "world"].join('$') == "hello$world" JoinTokenRange377 std::string Join(char separator) const { 378 TokenList tmp(begin(), end()); 379 return android::base::Join(tmp, separator); 380 // TODO: Join should probably take an offset or iterators 381 } 382 383 private: StartsWithTokenRange384 static bool StartsWith(const std::string& larger, const std::string& smaller) { 385 if (larger.size() >= smaller.size()) { 386 return std::equal(smaller.begin(), smaller.end(), larger.begin()); 387 } 388 389 return false; 390 } 391 392 template <typename TPredicate> RemoveIfTokenRange393 TokenRange RemoveIf(const TPredicate& predicate) const { 394 // If any of the tokens in the token lists are empty, then 395 // we need to remove them and compress the token list into a smaller one. 396 bool remove = false; 397 for (auto it = begin_; it != end_; ++it) { 398 auto&& token = *it; 399 400 if (predicate(token)) { 401 remove = true; 402 break; 403 } 404 } 405 406 // Actually copy the token list and remove the tokens that don't match our predicate. 407 if (remove) { 408 auto token_list = std::make_shared<TokenList>(begin(), end()); 409 TokenList::iterator new_end = 410 std::remove_if(token_list->begin(), token_list->end(), predicate); 411 token_list->erase(new_end, token_list->end()); 412 413 assert(token_list_->size() > token_list->size() && "Nothing was actually removed!"); 414 415 return TokenRange(token_list); 416 } 417 418 return *this; 419 } 420 421 const std::shared_ptr<std::vector<std::string>> token_list_; 422 const iterator begin_; 423 const iterator end_; 424 }; 425 } // namespace art 426 427 #endif // ART_CMDLINE_TOKEN_RANGE_H_ 428