1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/autocomplete/bookmark_provider.h"
6
7 #include <algorithm>
8 #include <functional>
9 #include <vector>
10
11 #include "base/prefs/pref_service.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
15 #include "chrome/browser/autocomplete/history_provider.h"
16 #include "chrome/browser/bookmarks/bookmark_model_factory.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/common/pref_names.h"
19 #include "components/bookmarks/browser/bookmark_match.h"
20 #include "components/bookmarks/browser/bookmark_model.h"
21 #include "components/metrics/proto/omnibox_input_type.pb.h"
22 #include "components/omnibox/autocomplete_result.h"
23 #include "components/omnibox/url_prefix.h"
24 #include "net/base/net_util.h"
25
26 using bookmarks::BookmarkMatch;
27
28 typedef std::vector<BookmarkMatch> BookmarkMatches;
29
30 namespace {
31
32 // Removes leading spaces from |title| before displaying, otherwise it looks
33 // funny. In the process, corrects |title_match_positions| so the correct
34 // characters are highlighted.
CorrectTitleAndMatchPositions(base::string16 * title,BookmarkMatch::MatchPositions * title_match_positions)35 void CorrectTitleAndMatchPositions(
36 base::string16* title,
37 BookmarkMatch::MatchPositions* title_match_positions) {
38 size_t leading_whitespace_chars = title->length();
39 base::TrimWhitespace(*title, base::TRIM_LEADING, title);
40 leading_whitespace_chars-= title->length();
41 if (leading_whitespace_chars == 0)
42 return;
43 for (query_parser::Snippet::MatchPositions::iterator it =
44 title_match_positions->begin();
45 it != title_match_positions->end(); ++it) {
46 (*it) = query_parser::Snippet::MatchPosition(
47 it->first - leading_whitespace_chars,
48 it->second - leading_whitespace_chars);
49 }
50 }
51
52 } // namespace
53
54 // BookmarkProvider ------------------------------------------------------------
55
BookmarkProvider(Profile * profile)56 BookmarkProvider::BookmarkProvider(Profile* profile)
57 : AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK),
58 profile_(profile),
59 bookmark_model_(NULL) {
60 if (profile) {
61 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
62 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
63 }
64 }
65
Start(const AutocompleteInput & input,bool minimal_changes)66 void BookmarkProvider::Start(const AutocompleteInput& input,
67 bool minimal_changes) {
68 if (minimal_changes)
69 return;
70 matches_.clear();
71
72 if (input.text().empty() ||
73 (input.type() == metrics::OmniboxInputType::FORCED_QUERY))
74 return;
75
76 DoAutocomplete(input);
77 }
78
~BookmarkProvider()79 BookmarkProvider::~BookmarkProvider() {}
80
DoAutocomplete(const AutocompleteInput & input)81 void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) {
82 // We may not have a bookmark model for some unit tests.
83 if (!bookmark_model_)
84 return;
85
86 BookmarkMatches matches;
87 // Retrieve enough bookmarks so that we have a reasonable probability of
88 // suggesting the one that the user desires.
89 const size_t kMaxBookmarkMatches = 50;
90
91 // GetBookmarksMatching returns bookmarks matching the user's
92 // search terms using the following rules:
93 // - The search text is broken up into search terms. Each term is searched
94 // for separately.
95 // - Term matches are always performed against the start of a word. 'def'
96 // will match against 'define' but not against 'indefinite'.
97 // - Terms must be at least three characters in length in order to perform
98 // partial word matches. Any term of lesser length will only be used as an
99 // exact match. 'def' will match against 'define' but 'de' will not match.
100 // - A search containing multiple terms will return results with those words
101 // occuring in any order.
102 // - Terms enclosed in quotes comprises a phrase that must match exactly.
103 // - Multiple terms enclosed in quotes will require those exact words in that
104 // exact order to match.
105 //
106 // Please refer to the code for BookmarkIndex::GetBookmarksMatching for
107 // complete details of how searches are performed against the user's
108 // bookmarks.
109 bookmark_model_->GetBookmarksMatching(input.text(),
110 kMaxBookmarkMatches,
111 &matches);
112 if (matches.empty())
113 return; // There were no matches.
114 const base::string16 fixed_up_input(FixupUserInput(input).second);
115 for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end();
116 ++i) {
117 // Create and score the AutocompleteMatch. If its score is 0 then the
118 // match is discarded.
119 AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i));
120 if (match.relevance > 0)
121 matches_.push_back(match);
122 }
123
124 // Sort and clip the resulting matches.
125 size_t num_matches =
126 std::min(matches_.size(), AutocompleteProvider::kMaxMatches);
127 std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
128 matches_.end(), AutocompleteMatch::MoreRelevant);
129 matches_.resize(num_matches);
130 }
131
132 namespace {
133
134 // for_each helper functor that calculates a match factor for each query term
135 // when calculating the final score.
136 //
137 // Calculate a 'factor' from 0 to the bookmark's title length for a match
138 // based on 1) how many characters match and 2) where the match is positioned.
139 class ScoringFunctor {
140 public:
141 // |title_length| is the length of the bookmark title against which this
142 // match will be scored.
ScoringFunctor(size_t title_length)143 explicit ScoringFunctor(size_t title_length)
144 : title_length_(static_cast<double>(title_length)),
145 scoring_factor_(0.0) {
146 }
147
operator ()(const query_parser::Snippet::MatchPosition & match)148 void operator()(const query_parser::Snippet::MatchPosition& match) {
149 double term_length = static_cast<double>(match.second - match.first);
150 scoring_factor_ += term_length *
151 (title_length_ - match.first) / title_length_;
152 }
153
ScoringFactor()154 double ScoringFactor() { return scoring_factor_; }
155
156 private:
157 double title_length_;
158 double scoring_factor_;
159 };
160
161 } // namespace
162
BookmarkMatchToACMatch(const AutocompleteInput & input,const base::string16 & fixed_up_input_text,const BookmarkMatch & bookmark_match)163 AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch(
164 const AutocompleteInput& input,
165 const base::string16& fixed_up_input_text,
166 const BookmarkMatch& bookmark_match) {
167 // The AutocompleteMatch we construct is non-deletable because the only
168 // way to support this would be to delete the underlying bookmark, which is
169 // unlikely to be what the user intends.
170 AutocompleteMatch match(this, 0, false,
171 AutocompleteMatchType::BOOKMARK_TITLE);
172 base::string16 title(bookmark_match.node->GetTitle());
173 BookmarkMatch::MatchPositions new_title_match_positions =
174 bookmark_match.title_match_positions;
175 CorrectTitleAndMatchPositions(&title, &new_title_match_positions);
176 const GURL& url(bookmark_match.node->url());
177 const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec());
178 size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset(
179 input.text(), fixed_up_input_text, false, url_utf16);
180 match.destination_url = url;
181 const size_t match_start = bookmark_match.url_match_positions.empty() ?
182 0 : bookmark_match.url_match_positions[0].first;
183 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) &&
184 ((match_start == base::string16::npos) || (match_start != 0));
185 std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions(
186 bookmark_match.url_match_positions);
187 // In addition to knowing how |offsets| is transformed, we need to know how
188 // |inline_autocomplete_offset| is transformed. We add it to the end of
189 // |offsets|, compute how everything is transformed, then remove it from the
190 // end.
191 offsets.push_back(inline_autocomplete_offset);
192 match.contents = net::FormatUrlWithOffsets(url, languages_,
193 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
194 net::UnescapeRule::SPACES, NULL, NULL, &offsets);
195 inline_autocomplete_offset = offsets.back();
196 offsets.pop_back();
197 BookmarkMatch::MatchPositions new_url_match_positions =
198 BookmarkMatch::ReplaceOffsetsInMatchPositions(
199 bookmark_match.url_match_positions, offsets);
200 match.contents_class =
201 ClassificationsFromMatch(new_url_match_positions,
202 match.contents.size(),
203 true);
204 match.fill_into_edit =
205 AutocompleteInput::FormattedStringWithEquivalentMeaning(
206 url, match.contents, ChromeAutocompleteSchemeClassifier(profile_));
207 if (inline_autocomplete_offset != base::string16::npos) {
208 // |inline_autocomplete_offset| may be beyond the end of the
209 // |fill_into_edit| if the user has typed an URL with a scheme and the
210 // last character typed is a slash. That slash is removed by the
211 // FormatURLWithOffsets call above.
212 if (inline_autocomplete_offset < match.fill_into_edit.length()) {
213 match.inline_autocompletion =
214 match.fill_into_edit.substr(inline_autocomplete_offset);
215 }
216 match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
217 !HistoryProvider::PreventInlineAutocomplete(input);
218 }
219 match.description = title;
220 match.description_class =
221 ClassificationsFromMatch(bookmark_match.title_match_positions,
222 match.description.size(),
223 false);
224
225 // Summary on how a relevance score is determined for the match:
226 //
227 // For each match within the bookmark's title or URL (or both), calculate a
228 // 'factor', sum up those factors, then use the sum to figure out a value
229 // between the base score and the maximum score.
230 //
231 // The factor for each match is the product of:
232 //
233 // 1) how many characters in the bookmark's title/URL are part of this match.
234 // This is capped at the length of the bookmark's title
235 // to prevent terms that match in both the title and the URL from
236 // scoring too strongly.
237 //
238 // 2) where the match occurs within the bookmark's title or URL,
239 // giving more points for matches that appear earlier in the string:
240 // ((string_length - position of match start) / string_length).
241 //
242 // Example: Given a bookmark title of 'abcde fghijklm', with a title length
243 // of 14, and two different search terms, 'abcde' and 'fghij', with
244 // start positions of 0 and 6, respectively, 'abcde' will score higher
245 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
246 // a partial factor of (14-6)/14 = 0.571 ). (In this example neither
247 // term matches in the URL.)
248 //
249 // Once all match factors have been calculated they are summed. If there
250 // are no URL matches, the resulting sum will never be greater than the
251 // length of the bookmark title because of the way the bookmark model matches
252 // and removes overlaps. (In particular, the bookmark model only
253 // matches terms to the beginning of words and it removes all overlapping
254 // matches, keeping only the longest. Together these mean that each
255 // character is included in at most one match.) If there are matches in the
256 // URL, the sum can be greater.
257 //
258 // This sum is then normalized by the length of the bookmark title + 10
259 // and capped at 1.0. The +10 is to expand the scoring range so fewer
260 // bookmarks will hit the 1.0 cap and hence lose all ability to distinguish
261 // between these high-quality bookmarks.
262 //
263 // The normalized value is multiplied against the scoring range available,
264 // which is 299. The 299 is calculated by subtracting the minimum possible
265 // score, 900, from the maximum possible score, 1199. This product, ranging
266 // from 0 to 299, is added to the minimum possible score, 900, giving the
267 // preliminary score.
268 //
269 // If the preliminary score is less than the maximum possible score, 1199,
270 // it can be boosted up to that maximum possible score if the URL referenced
271 // by the bookmark is also referenced by any of the user's other bookmarks.
272 // A count of how many times the bookmark's URL is referenced is determined
273 // and, for each additional reference beyond the one for the bookmark being
274 // scored up to a maximum of three, the score is boosted by a fixed amount
275 // given by |kURLCountBoost|, below.
276 //
277
278 // Pretend empty titles are identical to the URL.
279 if (title.empty())
280 title = base::ASCIIToUTF16(url.spec());
281 ScoringFunctor title_position_functor =
282 for_each(bookmark_match.title_match_positions.begin(),
283 bookmark_match.title_match_positions.end(),
284 ScoringFunctor(title.size()));
285 ScoringFunctor url_position_functor =
286 for_each(bookmark_match.url_match_positions.begin(),
287 bookmark_match.url_match_positions.end(),
288 ScoringFunctor(bookmark_match.node->url().spec().length()));
289 const double summed_factors = title_position_functor.ScoringFactor() +
290 url_position_functor.ScoringFactor();
291 const double normalized_sum =
292 std::min(summed_factors / (title.size() + 10), 1.0);
293 const int kBaseBookmarkScore = 900;
294 const int kMaxBookmarkScore = 1199;
295 const double kBookmarkScoreRange =
296 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
297 match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) +
298 kBaseBookmarkScore;
299 // Don't waste any time searching for additional referenced URLs if we
300 // already have a perfect title match.
301 if (match.relevance >= kMaxBookmarkScore)
302 return match;
303 // Boost the score if the bookmark's URL is referenced by other bookmarks.
304 const int kURLCountBoost[4] = { 0, 75, 125, 150 };
305 std::vector<const BookmarkNode*> nodes;
306 bookmark_model_->GetNodesByURL(url, &nodes);
307 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
308 match.relevance +=
309 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
310 match.relevance = std::min(kMaxBookmarkScore, match.relevance);
311 return match;
312 }
313
314 // static
ClassificationsFromMatch(const query_parser::Snippet::MatchPositions & positions,size_t text_length,bool is_url)315 ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
316 const query_parser::Snippet::MatchPositions& positions,
317 size_t text_length,
318 bool is_url) {
319 ACMatchClassification::Style url_style =
320 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
321 ACMatchClassifications classifications;
322 if (positions.empty()) {
323 if (text_length > 0)
324 classifications.push_back(ACMatchClassification(0, url_style));
325 return classifications;
326 }
327
328 for (query_parser::Snippet::MatchPositions::const_iterator i =
329 positions.begin();
330 i != positions.end();
331 ++i) {
332 AutocompleteMatch::ACMatchClassifications new_class;
333 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
334 text_length, url_style, &new_class);
335 classifications = AutocompleteMatch::MergeClassifications(
336 classifications, new_class);
337 }
338 return classifications;
339 }
340