• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
6 #define COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
7 
8 #include <vector>
9 
10 #include "base/basictypes.h"
11 #include "base/strings/string16.h"
12 #include "components/query_parser/snippet.h"
13 
14 namespace query_parser {
15 
16 class QueryNodeList;
17 
18 // Used by HasMatchIn.
19 struct QueryWord {
20   // The work to match against.
21   base::string16 word;
22 
23   // The starting position of the word in the original text.
24   size_t position;
25 };
26 
27 typedef std::vector<query_parser::QueryWord> QueryWordVector;
28 
29 // QueryNode is used by QueryParser to represent the elements that constitute a
30 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
31 // for external usage.
32 class QueryNode {
33  public:
~QueryNode()34   virtual ~QueryNode() {}
35 
36   // Serialize ourselves out to a string that can be passed to SQLite. Returns
37   // the number of words in this node.
38   virtual int AppendToSQLiteQuery(base::string16* query) const = 0;
39 
40   // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
41   virtual bool IsWord() const = 0;
42 
43   // Returns true if this node matches |word|. If |exact| is true, the string
44   // must exactly match. Otherwise, this uses a starts with comparison.
45   virtual bool Matches(const base::string16& word, bool exact) const = 0;
46 
47   // Returns true if this node matches at least one of the words in |words|. An
48   // entry is added to |match_positions| for all matching words giving the
49   // matching regions.
50   virtual bool HasMatchIn(const QueryWordVector& words,
51                           Snippet::MatchPositions* match_positions) const = 0;
52 
53   // Returns true if this node matches at least one of the words in |words|.
54   virtual bool HasMatchIn(const QueryWordVector& words) const = 0;
55 
56   // Appends the words that make up this node in |words|.
57   virtual void AppendWords(std::vector<base::string16>* words) const = 0;
58 };
59 
60 typedef std::vector<query_parser::QueryNode*> QueryNodeStarVector;
61 
62 // This class is used to parse queries entered into the history search into more
63 // normalized queries that can be passed to the SQLite backend.
64 class QueryParser {
65  public:
66   QueryParser();
67 
68   // For CJK ideographs and Korean Hangul, even a single character
69   // can be useful in prefix matching, but that may give us too many
70   // false positives. Moreover, the current ICU word breaker gives us
71   // back every single Chinese character as a word so that there's no
72   // point doing anything for them and we only adjust the minimum length
73   // to 2 for Korean Hangul while using 3 for others. This is a temporary
74   // hack until we have a segmentation support.
75   static bool IsWordLongEnoughForPrefixSearch(const base::string16& word);
76 
77   // Parse a query into a SQLite query. The resulting query is placed in
78   // |sqlite_query| and the number of words is returned.
79   int ParseQuery(const base::string16& query, base::string16* sqlite_query);
80 
81   // Parses |query|, returning the words that make up it. Any words in quotes
82   // are put in |words| without the quotes. For example, the query text
83   // "foo bar" results in two entries being added to words, one for foo and one
84   // for bar.
85   void ParseQueryWords(const base::string16& query,
86                        std::vector<base::string16>* words);
87 
88   // Parses |query|, returning the nodes that constitute the valid words in the
89   // query. This is intended for later usage with DoesQueryMatch. Ownership of
90   // the nodes passes to the caller.
91   void ParseQueryNodes(const base::string16& query,
92                        QueryNodeStarVector* nodes);
93 
94   // Returns true if the string text matches the query nodes created by a call
95   // to ParseQuery. If the query does match, each of the matching positions in
96   // the text is added to |match_positions|.
97   bool DoesQueryMatch(const base::string16& text,
98                       const QueryNodeStarVector& nodes,
99                       Snippet::MatchPositions* match_positions);
100 
101   // Returns true if all of the |words| match the query |nodes| created by a
102   // call to ParseQuery.
103   bool DoesQueryMatch(const QueryWordVector& words,
104                       const QueryNodeStarVector& nodes);
105 
106   // Extracts the words from |text|, placing each word into |words|.
107   void ExtractQueryWords(const base::string16& text,
108                          QueryWordVector* words);
109 
110   // Sorts the match positions in |matches| by their first index, then
111   // coalesces any match positions that intersect each other.
112   static void SortAndCoalesceMatchPositions(Snippet::MatchPositions* matches);
113 
114  private:
115   // Does the work of parsing |query|; creates nodes in |root| as appropriate.
116   // This is invoked from both of the ParseQuery methods.
117   bool ParseQueryImpl(const base::string16& query, QueryNodeList* root);
118 
119   DISALLOW_COPY_AND_ASSIGN(QueryParser);
120 };
121 
122 }  // namespace query_parser
123 
124 #endif  // COMPONENTS_QUERY_PARSER_QUERY_PARSER_H_
125