1 /* 2 * Copyright (C) 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <memory> 20 #include <string> 21 #include <unordered_set> 22 23 #include <cvd_server.pb.h> 24 25 #include "common/libs/utils/result.h" 26 27 namespace cuttlefish { 28 namespace selector { 29 30 /** 31 * A "token" is each piece of command line argument that is mostly 32 * separated by " ". 33 * 34 * Each token has a type. The type is a useful information for the 35 * grammar parser, which will use this lexer. 36 * 37 * Before going into the details, we assume that a set of flags are 38 * pre-registered, and the user may still give unregisterred flags. 39 * 40 * Note that the purpose of this lexer/parser is to separate cvd 41 * client specific arguments and the "subcmd" from the rest. So, 42 * "registered" arguments would be the cvd client specific arguments. 43 * The unregisterred arguments would be for the sub tool. 44 * 45 * Also, in terms of lexing, boolean flags are different from other 46 * value-taking flags. A boolean flag --foo could be --nofoo. 47 * 48 * 1. kKnownValueFlag 49 * --foo, -foo that may take a non-boolean value 50 * 2. kKnownFlagAndValue 51 * --foo=value, -foo=value, which does not take more values 52 * 3. kKnownBoolFlag 53 * --daemon, -daemon, etc, which may take a boolean arg 54 * 4. kKnownBoolNoFlag 55 * --nodaemon, -nodaemon, etc, which does not take another argument. 56 * 5. kUnknownFlag 57 * -anything_else or --anything_else 58 * --anything_else=any_value, etc 59 * Note that if we don't know the type of the flag, we will have to forward 60 * the entire thing to the subcmd as is. 61 * 6. kPositional 62 * mostly without leading "-" or "--" 63 * 7. kDoubleDash 64 * A literally "--" 65 * cvd and its subtools as of not are not really using that. 66 * However, it might be useful in the future for any subtool of cvd, so 67 * we allow "--" in the subcmd arguments only in the parser level. 68 * In the lexer level, we simply returns kDoubleDash token. 69 * 8. kError 70 * The rest. 71 * 72 */ 73 enum class ArgType : int { 74 kKnownValueFlag, 75 kKnownFlagAndValue, 76 kKnownBoolFlag, 77 kKnownBoolNoFlag, 78 kUnknownFlag, 79 kPositional, 80 kDoubleDash, 81 kError 82 }; 83 84 class ArgToken { 85 public: 86 ArgToken() = delete; ArgToken(const ArgType arg_type,const std::string & token)87 ArgToken(const ArgType arg_type, const std::string& token) 88 : type_(arg_type), token_(token) {} 89 ArgToken(const ArgToken& src) = default; 90 ArgToken(ArgToken&& src) = default; 91 ArgToken& operator=(const ArgToken& src) { 92 type_ = src.type_; 93 token_ = src.token_; 94 return *this; 95 } 96 ArgToken& operator=(ArgToken&& src) { 97 type_ = std::move(src.type_); 98 token_ = std::move(src.token_); 99 return *this; 100 } 101 Type()102 auto Type() const { return type_; } Token()103 const auto& Token() const { return token_; } Token()104 auto& Token() { return token_; } 105 bool operator==(const ArgToken& dst) const { 106 return Type() == dst.Type() && Token() == dst.Token(); 107 } 108 109 private: 110 ArgType type_; 111 std::string token_; 112 }; 113 114 class ArgumentsLexer { 115 friend class ArgumentsLexerBuilder; 116 using CvdProtobufArg = google::protobuf::RepeatedPtrField<std::string>; 117 118 public: 119 Result<std::vector<ArgToken>> Tokenize(const std::vector<std::string>& args); 120 Result<std::vector<ArgToken>> Tokenize(const CvdProtobufArg& args); 121 Result<std::vector<ArgToken>> Tokenize(const std::string& args, 122 const std::string delim = " "); 123 124 private: 125 // Lexer factory function will internally generate this, 126 // and give it to ArgumentsLexer. 127 struct FlagPatterns { 128 /* represents flags that takes values 129 * e.g. -group_name, --group_name (which may take an additional 130 * positional arg, or use its default value.) 131 * 132 * With the given example, this set shall be: 133 * {"-group_name", "--group_name"} 134 */ 135 std::unordered_set<std::string> value_patterns; 136 /* boolean flags 137 * e.g. --daemon, --nodaemon 138 * 139 * With the given example, this set shall be: 140 * {"-daemon", "--daemon"} 141 */ 142 std::unordered_set<std::string> bool_patterns; 143 // e.g. {"-nodaemon", "--nodaemon"} 144 std::unordered_set<std::string> bool_no_patterns; 145 }; 146 ArgumentsLexer(FlagPatterns&& flag_patterns); 147 148 // preprocess boolean flags: 149 // e.g. --help=yes --> --help 150 // --help=faLSe --> --nohelp 151 Result<std::vector<std::string>> Preprocess( 152 const std::vector<std::string>& args); 153 Result<ArgToken> Process(const std::string& token) const; 154 155 struct FlagValuePair { 156 std::string flag_string; 157 std::string value; 158 }; 159 Result<FlagValuePair> Separate( 160 const std::string& equal_included_string) const; 161 // flag_string starts with "-" or "--" 162 static bool Registered(const std::string& flag_string, 163 const FlagPatterns& flag_patterns); Registered(const std::string & flag_string)164 bool Registered(const std::string& flag_string) const { 165 return Registered(flag_string, flag_patterns_); 166 } 167 std::unordered_set<std::string> valid_bool_values_in_lower_cases_; 168 FlagPatterns flag_patterns_; 169 }; 170 171 // input to the lexer factory function 172 struct LexerFlagsSpecification { 173 std::unordered_set<std::string> known_boolean_flags; 174 std::unordered_set<std::string> known_value_flags; 175 }; 176 177 /* 178 * At the top level, there are only two tokens: flag and positional tokens. 179 * 180 * A flag token starts with "-" or "--" followed by one or more non "-" letters. 181 * A positional token starts with any character other than "-". 182 * 183 * Between flag tokens, there are "known" and "unknown" flag tokens. 184 * 185 */ 186 class ArgumentsLexerBuilder { 187 using FlagPatterns = ArgumentsLexer::FlagPatterns; 188 189 public: 190 static Result<std::unique_ptr<ArgumentsLexer>> Build( 191 const LexerFlagsSpecification& known_flags); 192 193 private: 194 static Result<FlagPatterns> GenerateFlagPatterns( 195 const LexerFlagsSpecification& known_flags); 196 }; 197 198 } // namespace selector 199 } // namespace cuttlefish 200