• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2022 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
16 #define ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
17 
18 #include <memory>
19 #include <vector>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
23 #include "icing/query/advanced_query_parser/lexer.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 class Parser {
29  public:
Create(std::vector<Lexer::LexerToken> && lexer_tokens)30   static Parser Create(std::vector<Lexer::LexerToken>&& lexer_tokens) {
31     return Parser(std::move(lexer_tokens));
32   }
33 
34   // Returns:
35   //   On success, pointer to the root node of the AST
36   //   INVALID_ARGUMENT for input that does not conform to the grammar
37   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQuery();
38 
39   // Returns:
40   //   On success, pointer to the root node of the AST
41   //   INVALID_ARGUMENT for input that does not conform to the grammar
42   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeScoring();
43 
44  private:
Parser(std::vector<Lexer::LexerToken> && lexer_tokens)45   explicit Parser(std::vector<Lexer::LexerToken>&& lexer_tokens)
46       : lexer_tokens_(std::move(lexer_tokens)),
47         current_token_(lexer_tokens_.begin()) {}
48 
49   // Match Functions
50   // These functions are used to test whether the current_token matches a member
51   // of the FIRST set of a particular symbol in our grammar.
Match(Lexer::TokenType token_type)52   bool Match(Lexer::TokenType token_type) const {
53     return current_token_ != lexer_tokens_.end() &&
54            current_token_->type == token_type;
55   }
56 
MatchMember()57   bool MatchMember() const { return Match(Lexer::TokenType::TEXT); }
58 
MatchFunction()59   bool MatchFunction() const { return Match(Lexer::TokenType::FUNCTION_NAME); }
60 
MatchComparable()61   bool MatchComparable() const {
62     return Match(Lexer::TokenType::STRING) || MatchMember() || MatchFunction();
63   }
64 
MatchComposite()65   bool MatchComposite() const { return Match(Lexer::TokenType::LPAREN); }
66 
MatchRestriction()67   bool MatchRestriction() const { return MatchComparable(); }
68 
MatchSimple()69   bool MatchSimple() const { return MatchRestriction() || MatchComposite(); }
70 
MatchTerm()71   bool MatchTerm() const {
72     return MatchSimple() || Match(Lexer::TokenType::NOT) ||
73            Match(Lexer::TokenType::MINUS);
74   }
75 
MatchFactor()76   bool MatchFactor() const { return MatchTerm(); }
77 
78   // Consume Functions
79   // These functions attempt to parse the token sequence starting at
80   // current_token_.
81   // Returns INVALID_ARGUMENT if unable to parse the token sequence starting at
82   // current_token_ as that particular grammar symbol. There are no guarantees
83   // about what state current_token and lexer_tokens_ are in when returning an
84   // error.
85   //
86   // Consume functions for terminal symbols. These are the only Consume
87   // functions that will directly modify current_token_.
88   // The Consume functions for terminals will guarantee not to modify
89   // current_token_ and lexer_tokens_ when returning an error.
90   libtextclassifier3::Status Consume(Lexer::TokenType token_type);
91 
92   libtextclassifier3::StatusOr<std::unique_ptr<TextNode>> ConsumeText();
93 
94   libtextclassifier3::StatusOr<std::unique_ptr<FunctionNameNode>>
95   ConsumeFunctionName();
96 
97   libtextclassifier3::StatusOr<std::unique_ptr<StringNode>>
98   ConsumeStringElement();
99 
100   libtextclassifier3::StatusOr<std::string> ConsumeComparator();
101 
102   // Consume functions for non-terminal symbols.
103   libtextclassifier3::StatusOr<std::unique_ptr<MemberNode>> ConsumeMember();
104 
105   libtextclassifier3::StatusOr<std::unique_ptr<FunctionNode>> ConsumeFunction();
106 
107   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComparable();
108 
109   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeComposite();
110 
111   libtextclassifier3::StatusOr<std::vector<std::unique_ptr<Node>>>
112   ConsumeArgs();
113 
114   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeRestriction();
115 
116   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSimple();
117 
118   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeTerm();
119 
120   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeFactor();
121 
122   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeSequence();
123 
124   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeQueryExpression();
125 
126   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeMultExpr();
127 
128   libtextclassifier3::StatusOr<std::unique_ptr<Node>>
129   ConsumeScoringExpression();
130 
131   libtextclassifier3::StatusOr<std::unique_ptr<Node>> ConsumeExpression();
132 
133   std::vector<Lexer::LexerToken> lexer_tokens_;
134   std::vector<Lexer::LexerToken>::const_iterator current_token_;
135   Lexer::Language language_ = Lexer::Language::QUERY;
136 };
137 
138 }  // namespace lib
139 }  // namespace icing
140 
141 #endif  // ICING_QUERY_ADVANCED_QUERY_PARSER_PARSER_H_
142