1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 6 #define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 7 8 #include <vector> 9 10 #include "base/basictypes.h" 11 #include "base/strings/string16.h" 12 #include "chrome/browser/history/snippet.h" 13 14 class QueryNodeList; 15 16 // Used by HasMatchIn. 17 struct QueryWord { 18 // The work to match against. 19 base::string16 word; 20 21 // The starting position of the word in the original text. 22 size_t position; 23 }; 24 25 // QueryNode is used by QueryParser to represent the elements that constitute a 26 // query. While QueryNode is exposed by way of ParseQuery, it really isn't meant 27 // for external usage. 28 class QueryNode { 29 public: ~QueryNode()30 virtual ~QueryNode() {} 31 32 // Serialize ourselves out to a string that can be passed to SQLite. Returns 33 // the number of words in this node. 34 virtual int AppendToSQLiteQuery(base::string16* query) const = 0; 35 36 // Return true if this is a QueryNodeWord, false if it's a QueryNodeList. 37 virtual bool IsWord() const = 0; 38 39 // Returns true if this node matches |word|. If |exact| is true, the string 40 // must exactly match. Otherwise, this uses a starts with comparison. 41 virtual bool Matches(const base::string16& word, bool exact) const = 0; 42 43 // Returns true if this node matches at least one of the words in |words|. An 44 // entry is added to |match_positions| for all matching words giving the 45 // matching regions. 46 virtual bool HasMatchIn(const std::vector<QueryWord>& words, 47 Snippet::MatchPositions* match_positions) const = 0; 48 49 // Returns true if this node matches at least one of the words in |words|. 50 virtual bool HasMatchIn(const std::vector<QueryWord>& words) const = 0; 51 52 // Appends the words that make up this node in |words|. 53 virtual void AppendWords(std::vector<base::string16>* words) const = 0; 54 }; 55 56 // This class is used to parse queries entered into the history search into more 57 // normalized queries that can be passed to the SQLite backend. 58 class QueryParser { 59 public: 60 QueryParser(); 61 62 // For CJK ideographs and Korean Hangul, even a single character 63 // can be useful in prefix matching, but that may give us too many 64 // false positives. Moreover, the current ICU word breaker gives us 65 // back every single Chinese character as a word so that there's no 66 // point doing anything for them and we only adjust the minimum length 67 // to 2 for Korean Hangul while using 3 for others. This is a temporary 68 // hack until we have a segmentation support. 69 static bool IsWordLongEnoughForPrefixSearch(const base::string16& word); 70 71 // Parse a query into a SQLite query. The resulting query is placed in 72 // |sqlite_query| and the number of words is returned. 73 int ParseQuery(const base::string16& query, base::string16* sqlite_query); 74 75 // Parses |query|, returning the words that make up it. Any words in quotes 76 // are put in |words| without the quotes. For example, the query text 77 // "foo bar" results in two entries being added to words, one for foo and one 78 // for bar. 79 void ParseQueryWords(const base::string16& query, 80 std::vector<base::string16>* words); 81 82 // Parses |query|, returning the nodes that constitute the valid words in the 83 // query. This is intended for later usage with DoesQueryMatch. Ownership of 84 // the nodes passes to the caller. 85 void ParseQueryNodes(const base::string16& query, 86 std::vector<QueryNode*>* nodes); 87 88 // Returns true if the string text matches the query nodes created by a call 89 // to ParseQuery. If the query does match, each of the matching positions in 90 // the text is added to |match_positions|. 91 bool DoesQueryMatch(const base::string16& text, 92 const std::vector<QueryNode*>& nodes, 93 Snippet::MatchPositions* match_positions); 94 95 // Returns true if all of the |words| match the query |nodes| created by a 96 // call to ParseQuery. 97 bool DoesQueryMatch(const std::vector<QueryWord>& words, 98 const std::vector<QueryNode*>& nodes); 99 100 // Extracts the words from |text|, placing each word into |words|. 101 void ExtractQueryWords(const base::string16& text, 102 std::vector<QueryWord>* words); 103 104 private: 105 // Does the work of parsing |query|; creates nodes in |root| as appropriate. 106 // This is invoked from both of the ParseQuery methods. 107 bool ParseQueryImpl(const base::string16& query, QueryNodeList* root); 108 109 DISALLOW_COPY_AND_ASSIGN(QueryParser); 110 }; 111 112 #endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 113