1 /*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "host/commands/cvd/selector/arguments_lexer.h"
18
19 #include <algorithm>
20 #include <regex>
21 #include <vector>
22
23 #include <android-base/strings.h>
24
25 #include "host/commands/cvd/selector/instance_database_utils.h"
26
27 namespace cuttlefish {
28 namespace selector {
29 namespace {
30
31 template <typename... Sets>
Included(const std::string & item,Sets &&...containers)32 bool Included(const std::string& item, Sets&&... containers) {
33 return ((Contains(std::forward<Sets>(containers), item)) || ... || false);
34 }
35
36 } // namespace
37
38 /*
39 * Eventually, we get two sets, each include strings start with "-" or "--".
40 *
41 * Say, the two sets are BaseSet and NoPrependedSet.
42 *
43 * Given a boolean flag --foo, these will happen:
44 * BaseSet = BaseSet U {"--foo", "-foo"}
45 * NoPrependedSet = NoPrependedSet U {"--nofoo", "-nofoo"}
46 * Given a non boolean flag --bar, these will happen:
47 * BaseSet = BaseSet U {"--bar", "-bar"}
48 *
49 * Later on, when the parser reads a token, the parser will look up the
50 * two sets to see if the token that is supposedly a flag is a known
51 * flag.
52 */
53 Result<ArgumentsLexerBuilder::FlagPatterns>
GenerateFlagPatterns(const LexerFlagsSpecification & known_flags)54 ArgumentsLexerBuilder::GenerateFlagPatterns(
55 const LexerFlagsSpecification& known_flags) {
56 FlagPatterns flag_patterns;
57 for (const auto& non_bool_flag : known_flags.known_value_flags) {
58 const auto one_dash = "-" + non_bool_flag;
59 const auto two_dashes = "--" + non_bool_flag;
60 CF_EXPECT(!ArgumentsLexer::Registered(one_dash, flag_patterns));
61 CF_EXPECT(!ArgumentsLexer::Registered(two_dashes, flag_patterns));
62 flag_patterns.value_patterns.insert(one_dash);
63 flag_patterns.value_patterns.insert(two_dashes);
64 }
65 for (const auto& bool_flag : known_flags.known_boolean_flags) {
66 const auto one_dash = "-" + bool_flag;
67 const auto two_dashes = "--" + bool_flag;
68 const auto one_dash_with_no = "-no" + bool_flag;
69 const auto two_dashes_with_no = "--no" + bool_flag;
70 CF_EXPECT(!ArgumentsLexer::Registered(one_dash, flag_patterns));
71 CF_EXPECT(!ArgumentsLexer::Registered(two_dashes, flag_patterns));
72 CF_EXPECT(!ArgumentsLexer::Registered(one_dash_with_no, flag_patterns));
73 CF_EXPECT(!ArgumentsLexer::Registered(two_dashes_with_no, flag_patterns));
74 flag_patterns.bool_patterns.insert(one_dash);
75 flag_patterns.bool_patterns.insert(two_dashes);
76 flag_patterns.bool_no_patterns.insert(one_dash_with_no);
77 flag_patterns.bool_no_patterns.insert(two_dashes_with_no);
78 }
79 return flag_patterns;
80 }
81
Build(const LexerFlagsSpecification & known_flags)82 Result<std::unique_ptr<ArgumentsLexer>> ArgumentsLexerBuilder::Build(
83 const LexerFlagsSpecification& known_flags) {
84 auto flag_patterns = CF_EXPECT(GenerateFlagPatterns(known_flags));
85 ArgumentsLexer* new_lexer = new ArgumentsLexer(std::move(flag_patterns));
86 CF_EXPECT(new_lexer != nullptr,
87 "Memory allocation for ArgumentsLexer failed.");
88 return std::unique_ptr<ArgumentsLexer>{new_lexer};
89 }
90
ArgumentsLexer(FlagPatterns && flag_patterns)91 ArgumentsLexer::ArgumentsLexer(FlagPatterns&& flag_patterns)
92 : flag_patterns_{std::move(flag_patterns)} {
93 valid_bool_values_in_lower_cases_ = std::move(
94 std::unordered_set<std::string>{"true", "false", "yes", "no", "y", "n"});
95 }
96
Registered(const std::string & flag_string,const FlagPatterns & flag_patterns)97 bool ArgumentsLexer::Registered(const std::string& flag_string,
98 const FlagPatterns& flag_patterns) {
99 return Included(flag_string, flag_patterns.value_patterns,
100 flag_patterns.bool_patterns, flag_patterns.bool_no_patterns);
101 }
102
Process(const std::string & token) const103 Result<ArgToken> ArgumentsLexer::Process(const std::string& token) const {
104 if (token == "--") {
105 return ArgToken{ArgType::kDoubleDash, token};
106 }
107 std::regex flag_and_value_pattern("[\\-][\\-]?[^\\-]+.*=.*");
108 std::regex flag_pattern("[\\-][\\-]?[^\\-]+.*");
109 std::regex base_pattern("[^\\-]+.*");
110 if (std::regex_match(token, base_pattern)) {
111 return ArgToken{ArgType::kPositional, token};
112 }
113 if (!std::regex_match(token, flag_pattern)) {
114 return ArgToken{ArgType::kError, token};
115 }
116 // --flag=value
117 if (std::regex_match(token, flag_and_value_pattern)) {
118 auto [flag_string, value] = CF_EXPECT(Separate(token));
119 // is --flag registered?
120 if (Contains(flag_patterns_.value_patterns, flag_string)) {
121 return ArgToken{ArgType::kKnownFlagAndValue, token};
122 }
123 return ArgToken{ArgType::kUnknownFlag, token};
124 }
125 if (Contains(flag_patterns_.value_patterns, token)) {
126 return ArgToken{ArgType::kKnownValueFlag, token};
127 }
128 if (Contains(flag_patterns_.bool_patterns, token)) {
129 return ArgToken{ArgType::kKnownBoolFlag, token};
130 }
131 if (Contains(flag_patterns_.bool_no_patterns, token)) {
132 return ArgToken{ArgType::kKnownBoolNoFlag, token};
133 }
134 return ArgToken{ArgType::kUnknownFlag, token};
135 }
136
Tokenize(const CvdProtobufArg & args)137 Result<std::vector<ArgToken>> ArgumentsLexer::Tokenize(
138 const CvdProtobufArg& args) {
139 std::vector<std::string> args_vec;
140 args_vec.reserve(args.size());
141 for (const auto& arg : args) {
142 args_vec.emplace_back(arg);
143 }
144 auto arg_tokens = CF_EXPECT(Tokenize(args_vec));
145 return arg_tokens;
146 }
147
Tokenize(const std::string & args,const std::string delim)148 Result<std::vector<ArgToken>> ArgumentsLexer::Tokenize(
149 const std::string& args, const std::string delim) {
150 auto args_vec = android::base::Tokenize(args, delim);
151 auto arg_tokens = CF_EXPECT(Tokenize(args_vec));
152 return arg_tokens;
153 }
154
Tokenize(const std::vector<std::string> & args)155 Result<std::vector<ArgToken>> ArgumentsLexer::Tokenize(
156 const std::vector<std::string>& args) {
157 std::vector<ArgToken> tokenized;
158 auto intersection =
159 Intersection(flag_patterns_.value_patterns, flag_patterns_.bool_patterns);
160 CF_EXPECT(intersection.empty());
161 auto preprocessed_args = CF_EXPECT(Preprocess(args));
162 for (const auto& arg : preprocessed_args) {
163 auto arg_token = CF_EXPECT(Process(arg));
164 tokenized.emplace_back(arg_token);
165 }
166 return tokenized;
167 }
168
ToLower(const std::string & src)169 static std::string ToLower(const std::string& src) {
170 std::string lower_cased_value;
171 lower_cased_value.resize(src.size());
172 std::transform(src.begin(), src.end(), lower_cased_value.begin(), ::tolower);
173 return lower_cased_value;
174 }
175
Separate(const std::string & equal_included_string) const176 Result<ArgumentsLexer::FlagValuePair> ArgumentsLexer::Separate(
177 const std::string& equal_included_string) const {
178 CF_EXPECT(Contains(equal_included_string, "="));
179 auto equal_sign_pos = equal_included_string.find_first_of('=');
180 auto first_token = equal_included_string.substr(0, equal_sign_pos);
181 auto second_token = equal_included_string.substr(equal_sign_pos + 1);
182 return FlagValuePair{.flag_string = first_token, .value = second_token};
183 }
184
Preprocess(const std::vector<std::string> & args)185 Result<std::vector<std::string>> ArgumentsLexer::Preprocess(
186 const std::vector<std::string>& args) {
187 std::vector<std::string> new_args;
188 std::regex pattern("[\\-][\\-]?[^\\-]+.*=.*");
189 for (const auto& arg : args) {
190 if (!std::regex_match(arg, pattern)) {
191 new_args.emplace_back(arg);
192 continue;
193 }
194 // needs to split based on the first '='
195 // --something=another_thing or
196 // -something=another_thing
197 const auto [flag_string, value] = CF_EXPECT(Separate(arg));
198
199 if (Contains(flag_patterns_.bool_patterns, flag_string)) {
200 const auto low_cased_value = ToLower(value);
201 CF_EXPECT(Contains(valid_bool_values_in_lower_cases_, low_cased_value),
202 "The value for the boolean flag " << flag_string << ", "
203 << value << " is not valid");
204 if (low_cased_value == "true" || low_cased_value == "yes") {
205 new_args.emplace_back(flag_string);
206 continue;
207 }
208 auto base_pos = flag_string.find_first_not_of('-');
209 auto base = flag_string.substr(base_pos);
210 new_args.emplace_back("--no" + base);
211 continue;
212 }
213
214 if (Contains(flag_patterns_.bool_no_patterns, flag_string)) {
215 CF_EXPECT(android::base::StartsWith(flag_string, "-no") ||
216 android::base::StartsWith(flag_string, "--no"));
217 // if --nohelp=XYZ, the "=XYZ" is ignored.
218 new_args.emplace_back(flag_string);
219 continue;
220 }
221
222 new_args.emplace_back(arg);
223 }
224 return new_args;
225 }
226
227 } // namespace selector
228 } // namespace cuttlefish
229