1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // The query parser is used to parse queries entered into the history 6 // search into more normalized queries can be passed to the SQLite backend. 7 8 #ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 9 #define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 10 #pragma once 11 12 #include <vector> 13 14 #include "base/string16.h" 15 #include "chrome/browser/history/snippet.h" 16 17 class QueryNodeList; 18 19 // Used by HasMatchIn. 20 struct QueryWord { 21 // The work to match against. 22 string16 word; 23 24 // The starting position of the word in the original text. 25 size_t position; 26 }; 27 28 // QueryNode is used by QueryNodeParser to represent the elements that 29 // constitute a query. While QueryNode is exposed by way of ParseQuery, it 30 // really isn't meant for external usage. 31 class QueryNode { 32 public: ~QueryNode()33 virtual ~QueryNode() {} 34 35 // Serialize ourselves out to a string that can be passed to SQLite. Returns 36 // the number of words in this node. 37 virtual int AppendToSQLiteQuery(string16* query) const = 0; 38 39 // Return true if this is a word node, false if it's a QueryNodeList. 40 virtual bool IsWord() const = 0; 41 42 // Returns true if this node matches the specified text. If exact is true, 43 // the string must exactly match. Otherwise, this uses a starts with 44 // comparison. 45 virtual bool Matches(const string16& word, bool exact) const = 0; 46 47 // Returns true if this node matches at least one of the words in words. If 48 // the node matches at least one word, an entry is added to match_positions 49 // giving the matching region. 50 virtual bool HasMatchIn(const std::vector<QueryWord>& words, 51 Snippet::MatchPositions* match_positions) const = 0; 52 53 // Appends the words that make up this node in |words|. 54 virtual void AppendWords(std::vector<string16>* words) const = 0; 55 }; 56 57 58 class QueryParser { 59 public: 60 QueryParser(); 61 62 // For CJK ideographs and Korean Hangul, even a single character 63 // can be useful in prefix matching, but that may give us too many 64 // false positives. Moreover, the current ICU word breaker gives us 65 // back every single Chinese character as a word so that there's no 66 // point doing anything for them and we only adjust the minimum length 67 // to 2 for Korean Hangul while using 3 for others. This is a temporary 68 // hack until we have a segmentation support. 69 static bool IsWordLongEnoughForPrefixSearch(const string16& word); 70 71 // Parse a query into a SQLite query. The resulting query is placed in 72 // sqlite_query and the number of words is returned. 73 int ParseQuery(const string16& query, 74 string16* sqlite_query); 75 76 // Parses the query words in query, returning the nodes that constitute the 77 // valid words in the query. This is intended for later usage with 78 // DoesQueryMatch. 79 // Ownership of the nodes passes to the caller. 80 void ParseQuery(const string16& query, 81 std::vector<QueryNode*>* nodes); 82 83 // Parses a query returning the words that make up the query. Any words in 84 // quotes are put in |words| without the quotes. For example, the query text 85 // "foo bar" results in two entries being added to words, one for foo and one 86 // for bar. 87 void ExtractQueryWords(const string16& query, 88 std::vector<string16>* words); 89 90 // Returns true if the string text matches the query nodes created by a call 91 // to ParseQuery. If the query does match each of the matching positions in 92 // the text is added to |match_positions|. 93 bool DoesQueryMatch(const string16& text, 94 const std::vector<QueryNode*>& nodes, 95 Snippet::MatchPositions* match_positions); 96 97 private: 98 // Does the work of parsing a query; creates nodes in QueryNodeList as 99 // appropriate. This is invoked from both of the ParseQuery methods. 100 bool ParseQueryImpl(const string16& query, 101 QueryNodeList* root); 102 103 // Extracts the words from text, placing each word into words. 104 void ExtractQueryWords(const string16& text, 105 std::vector<QueryWord>* words); 106 }; 107 108 #endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_ 109