• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <memory>
20 #include <string>
21 #include <unordered_set>
22 
23 #include <cvd_server.pb.h>
24 
25 #include "common/libs/utils/result.h"
26 
27 namespace cuttlefish {
28 namespace selector {
29 
30 /**
31  * A "token" is each piece of command line argument that is mostly
32  * separated by " ".
33  *
34  * Each token has a type. The type is a useful information for the
35  * grammar parser, which will use this lexer.
36  *
37  * Before going into the details, we assume that a set of flags are
38  * pre-registered, and the user may still give unregisterred flags.
39  *
40  * Note that the purpose of this lexer/parser is to separate cvd
41  * client specific arguments and the "subcmd" from the rest. So,
42  * "registered" arguments would be the cvd client specific arguments.
43  * The unregisterred arguments would be for the sub tool.
44  *
45  * Also, in terms of lexing, boolean flags are different from other
46  * value-taking flags. A boolean flag --foo could be --nofoo.
47  *
48  * 1. kKnownValueFlag
49  *    --foo, -foo that may take a non-boolean value
50  * 2. kKnownFlagAndValue
51  *    --foo=value, -foo=value, which does not take more values
52  * 3. kKnownBoolFlag
53  *    --daemon, -daemon, etc, which may take a boolean arg
54  * 4. kKnownBoolNoFlag
55  *    --nodaemon, -nodaemon, etc, which does not take another argument.
56  * 5. kUnknownFlag
57  *    -anything_else or --anything_else
58  *    --anything_else=any_value, etc
59  *    Note that if we don't know the type of the flag, we will have to forward
60  *    the entire thing to the subcmd as is.
61  * 6. kPositional
62  *    mostly without leading "-" or "--"
63  * 7. kDoubleDash
64  *    A literally "--"
65  *    cvd and its subtools as of not are not really using that.
66  *    However, it might be useful in the future for any subtool of cvd, so
67  *    we allow "--" in the subcmd arguments only in the parser level.
68  *    In the lexer level, we simply returns kDoubleDash token.
69  * 8. kError
70  *    The rest.
71  *
72  */
73 enum class ArgType : int {
74   kKnownValueFlag,
75   kKnownFlagAndValue,
76   kKnownBoolFlag,
77   kKnownBoolNoFlag,
78   kUnknownFlag,
79   kPositional,
80   kDoubleDash,
81   kError
82 };
83 
84 class ArgToken {
85  public:
86   ArgToken() = delete;
ArgToken(const ArgType arg_type,const std::string & token)87   ArgToken(const ArgType arg_type, const std::string& token)
88       : type_(arg_type), token_(token) {}
89   ArgToken(const ArgToken& src) = default;
90   ArgToken(ArgToken&& src) = default;
91   ArgToken& operator=(const ArgToken& src) {
92     type_ = src.type_;
93     token_ = src.token_;
94     return *this;
95   }
96   ArgToken& operator=(ArgToken&& src) {
97     type_ = std::move(src.type_);
98     token_ = std::move(src.token_);
99     return *this;
100   }
101 
Type()102   auto Type() const { return type_; }
Token()103   const auto& Token() const { return token_; }
Token()104   auto& Token() { return token_; }
105   bool operator==(const ArgToken& dst) const {
106     return Type() == dst.Type() && Token() == dst.Token();
107   }
108 
109  private:
110   ArgType type_;
111   std::string token_;
112 };
113 
114 class ArgumentsLexer {
115   friend class ArgumentsLexerBuilder;
116   using CvdProtobufArg = google::protobuf::RepeatedPtrField<std::string>;
117 
118  public:
119   Result<std::vector<ArgToken>> Tokenize(const std::vector<std::string>& args);
120   Result<std::vector<ArgToken>> Tokenize(const CvdProtobufArg& args);
121   Result<std::vector<ArgToken>> Tokenize(const std::string& args,
122                                          const std::string delim = " ");
123 
124  private:
125   // Lexer factory function will internally generate this,
126   // and give it to ArgumentsLexer.
127   struct FlagPatterns {
128     /* represents flags that takes values
129      * e.g. -group_name, --group_name (which may take an additional
130      * positional arg, or use its default value.)
131      *
132      * With the given example, this set shall be:
133      *  {"-group_name", "--group_name"}
134      */
135     std::unordered_set<std::string> value_patterns;
136     /* boolean flags
137      * e.g. --daemon, --nodaemon
138      *
139      * With the given example, this set shall be:
140      *  {"-daemon", "--daemon"}
141      */
142     std::unordered_set<std::string> bool_patterns;
143     // e.g. {"-nodaemon", "--nodaemon"}
144     std::unordered_set<std::string> bool_no_patterns;
145   };
146   ArgumentsLexer(FlagPatterns&& flag_patterns);
147 
148   // preprocess boolean flags:
149   //  e.g. --help=yes --> --help
150   //       --help=faLSe --> --nohelp
151   Result<std::vector<std::string>> Preprocess(
152       const std::vector<std::string>& args);
153   Result<ArgToken> Process(const std::string& token) const;
154 
155   struct FlagValuePair {
156     std::string flag_string;
157     std::string value;
158   };
159   Result<FlagValuePair> Separate(
160       const std::string& equal_included_string) const;
161   // flag_string starts with "-" or "--"
162   static bool Registered(const std::string& flag_string,
163                          const FlagPatterns& flag_patterns);
Registered(const std::string & flag_string)164   bool Registered(const std::string& flag_string) const {
165     return Registered(flag_string, flag_patterns_);
166   }
167   std::unordered_set<std::string> valid_bool_values_in_lower_cases_;
168   FlagPatterns flag_patterns_;
169 };
170 
171 // input to the lexer factory function
172 struct LexerFlagsSpecification {
173   std::unordered_set<std::string> known_boolean_flags;
174   std::unordered_set<std::string> known_value_flags;
175 };
176 
177 /*
178  * At the top level, there are only two tokens: flag and positional tokens.
179  *
180  * A flag token starts with "-" or "--" followed by one or more non "-" letters.
181  * A positional token starts with any character other than "-".
182  *
183  * Between flag tokens, there are "known" and "unknown" flag tokens.
184  *
185  */
186 class ArgumentsLexerBuilder {
187   using FlagPatterns = ArgumentsLexer::FlagPatterns;
188 
189  public:
190   static Result<std::unique_ptr<ArgumentsLexer>> Build(
191       const LexerFlagsSpecification& known_flags);
192 
193  private:
194   static Result<FlagPatterns> GenerateFlagPatterns(
195       const LexerFlagsSpecification& known_flags);
196 };
197 
198 }  // namespace selector
199 }  // namespace cuttlefish
200