• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
6 #define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
7 
8 #include <vector>
9 
10 #include "base/basictypes.h"
11 #include "base/strings/string16.h"
12 #include "chrome/browser/history/snippet.h"
13 
14 class QueryNodeList;
15 
16 // Used by HasMatchIn.
17 struct QueryWord {
18   // The work to match against.
19   base::string16 word;
20 
21   // The starting position of the word in the original text.
22   size_t position;
23 };
24 
25 // QueryNode is used by QueryParser to represent the elements that constitute a
26 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant
27 // for external usage.
28 class QueryNode {
29  public:
~QueryNode()30   virtual ~QueryNode() {}
31 
32   // Serialize ourselves out to a string that can be passed to SQLite. Returns
33   // the number of words in this node.
34   virtual int AppendToSQLiteQuery(base::string16* query) const = 0;
35 
36   // Return true if this is a QueryNodeWord, false if it's a QueryNodeList.
37   virtual bool IsWord() const = 0;
38 
39   // Returns true if this node matches |word|. If |exact| is true, the string
40   // must exactly match. Otherwise, this uses a starts with comparison.
41   virtual bool Matches(const base::string16& word, bool exact) const = 0;
42 
43   // Returns true if this node matches at least one of the words in |words|. An
44   // entry is added to |match_positions| for all matching words giving the
45   // matching regions.
46   virtual bool HasMatchIn(const std::vector<QueryWord>& words,
47                           Snippet::MatchPositions* match_positions) const = 0;
48 
49   // Returns true if this node matches at least one of the words in |words|.
50   virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0;
51 
52   // Appends the words that make up this node in |words|.
53   virtual void AppendWords(std::vector<base::string16>* words) const = 0;
54 };
55 
56 // This class is used to parse queries entered into the history search into more
57 // normalized queries that can be passed to the SQLite backend.
58 class QueryParser {
59  public:
60   QueryParser();
61 
62   // For CJK ideographs and Korean Hangul, even a single character
63   // can be useful in prefix matching, but that may give us too many
64   // false positives. Moreover, the current ICU word breaker gives us
65   // back every single Chinese character as a word so that there's no
66   // point doing anything for them and we only adjust the minimum length
67   // to 2 for Korean Hangul while using 3 for others. This is a temporary
68   // hack until we have a segmentation support.
69   static bool IsWordLongEnoughForPrefixSearch(const base::string16& word);
70 
71   // Parse a query into a SQLite query. The resulting query is placed in
72   // |sqlite_query| and the number of words is returned.
73   int ParseQuery(const base::string16& query, base::string16* sqlite_query);
74 
75   // Parses |query|, returning the words that make up it. Any words in quotes
76   // are put in |words| without the quotes. For example, the query text
77   // "foo bar" results in two entries being added to words, one for foo and one
78   // for bar.
79   void ParseQueryWords(const base::string16& query,
80                        std::vector<base::string16>* words);
81 
82   // Parses |query|, returning the nodes that constitute the valid words in the
83   // query. This is intended for later usage with DoesQueryMatch. Ownership of
84   // the nodes passes to the caller.
85   void ParseQueryNodes(const base::string16& query,
86                        std::vector<QueryNode*>* nodes);
87 
88   // Returns true if the string text matches the query nodes created by a call
89   // to ParseQuery. If the query does match, each of the matching positions in
90   // the text is added to |match_positions|.
91   bool DoesQueryMatch(const base::string16& text,
92                       const std::vector<QueryNode*>& nodes,
93                       Snippet::MatchPositions* match_positions);
94 
95   // Returns true if all of the |words| match the query |nodes| created by a
96   // call to ParseQuery.
97   bool DoesQueryMatch(const std::vector<QueryWord>& words,
98                       const std::vector<QueryNode*>& nodes);
99 
100   // Extracts the words from |text|, placing each word into |words|.
101   void ExtractQueryWords(const base::string16& text,
102                          std::vector<QueryWord>* words);
103 
104  private:
105   // Does the work of parsing |query|; creates nodes in |root| as appropriate.
106   // This is invoked from both of the ParseQuery methods.
107   bool ParseQueryImpl(const base::string16& query, QueryNodeList* root);
108 
109   DISALLOW_COPY_AND_ASSIGN(QueryParser);
110 };
111 
112 #endif  // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
113