1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // This file implements utility functions for eliding and formatting UI text.
6 //
7 // Note that several of the functions declared in text_elider.h are implemented
8 // in this file using helper classes in an unnamed namespace.
9
10 #include "ui/gfx/text_elider.h"
11
12 #include <string>
13 #include <vector>
14
15 #include "base/files/file_path.h"
16 #include "base/i18n/break_iterator.h"
17 #include "base/i18n/char_iterator.h"
18 #include "base/i18n/rtl.h"
19 #include "base/memory/scoped_ptr.h"
20 #include "base/strings/string_split.h"
21 #include "base/strings/string_util.h"
22 #include "base/strings/sys_string_conversions.h"
23 #include "base/strings/utf_string_conversions.h"
24 #include "net/base/escape.h"
25 #include "net/base/net_util.h"
26 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
27 #include "third_party/icu/source/common/unicode/rbbi.h"
28 #include "third_party/icu/source/common/unicode/uloc.h"
29 #include "ui/gfx/font_list.h"
30 #include "ui/gfx/text_utils.h"
31 #include "url/gurl.h"
32
33 namespace gfx {
34
35 // U+2026 in utf8
36 const char kEllipsis[] = "\xE2\x80\xA6";
37 const base::char16 kEllipsisUTF16[] = { 0x2026, 0 };
38 const base::char16 kForwardSlash = '/';
39
40 namespace {
41
42 // Helper class to split + elide text, while respecting UTF16 surrogate pairs.
43 class StringSlicer {
44 public:
StringSlicer(const base::string16 & text,const base::string16 & ellipsis,bool elide_in_middle)45 StringSlicer(const base::string16& text,
46 const base::string16& ellipsis,
47 bool elide_in_middle)
48 : text_(text),
49 ellipsis_(ellipsis),
50 elide_in_middle_(elide_in_middle) {
51 }
52
53 // Cuts |text_| to be |length| characters long. If |elide_in_middle_| is true,
54 // the middle of the string is removed to leave equal-length pieces from the
55 // beginning and end of the string; otherwise, the end of the string is
56 // removed and only the beginning remains. If |insert_ellipsis| is true,
57 // then an ellipsis character will be inserted at the cut point.
CutString(size_t length,bool insert_ellipsis)58 base::string16 CutString(size_t length, bool insert_ellipsis) {
59 const base::string16 ellipsis_text = insert_ellipsis ? ellipsis_
60 : base::string16();
61
62 if (!elide_in_middle_)
63 return text_.substr(0, FindValidBoundaryBefore(length)) + ellipsis_text;
64
65 // We put the extra character, if any, before the cut.
66 const size_t half_length = length / 2;
67 const size_t prefix_length = FindValidBoundaryBefore(length - half_length);
68 const size_t suffix_start_guess = text_.length() - half_length;
69 const size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess);
70 const size_t suffix_length =
71 half_length - (suffix_start_guess - suffix_start);
72 return text_.substr(0, prefix_length) + ellipsis_text +
73 text_.substr(suffix_start, suffix_length);
74 }
75
76 private:
77 // Returns a valid cut boundary at or before |index|.
FindValidBoundaryBefore(size_t index) const78 size_t FindValidBoundaryBefore(size_t index) const {
79 DCHECK_LE(index, text_.length());
80 if (index != text_.length())
81 U16_SET_CP_START(text_.data(), 0, index);
82 return index;
83 }
84
85 // Returns a valid cut boundary at or after |index|.
FindValidBoundaryAfter(size_t index) const86 size_t FindValidBoundaryAfter(size_t index) const {
87 DCHECK_LE(index, text_.length());
88 if (index != text_.length())
89 U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length());
90 return index;
91 }
92
93 // The text to be sliced.
94 const base::string16& text_;
95
96 // Ellipsis string to use.
97 const base::string16& ellipsis_;
98
99 // If true, the middle of the string will be elided.
100 bool elide_in_middle_;
101
102 DISALLOW_COPY_AND_ASSIGN(StringSlicer);
103 };
104
105 // Build a path from the first |num_components| elements in |path_elements|.
106 // Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
BuildPathFromComponents(const base::string16 & path_prefix,const std::vector<base::string16> & path_elements,const base::string16 & filename,size_t num_components)107 base::string16 BuildPathFromComponents(
108 const base::string16& path_prefix,
109 const std::vector<base::string16>& path_elements,
110 const base::string16& filename,
111 size_t num_components) {
112 // Add the initial elements of the path.
113 base::string16 path = path_prefix;
114
115 // Build path from first |num_components| elements.
116 for (size_t j = 0; j < num_components; ++j)
117 path += path_elements[j] + kForwardSlash;
118
119 // Add |filename|, ellipsis if necessary.
120 if (num_components != (path_elements.size() - 1))
121 path += base::string16(kEllipsisUTF16) + kForwardSlash;
122 path += filename;
123
124 return path;
125 }
126
127 // Takes a prefix (Domain, or Domain+subdomain) and a collection of path
128 // components and elides if possible. Returns a string containing the longest
129 // possible elided path, or an empty string if elision is not possible.
ElideComponentizedPath(const base::string16 & url_path_prefix,const std::vector<base::string16> & url_path_elements,const base::string16 & url_filename,const base::string16 & url_query,const FontList & font_list,float available_pixel_width)130 base::string16 ElideComponentizedPath(
131 const base::string16& url_path_prefix,
132 const std::vector<base::string16>& url_path_elements,
133 const base::string16& url_filename,
134 const base::string16& url_query,
135 const FontList& font_list,
136 float available_pixel_width) {
137 const size_t url_path_number_of_elements = url_path_elements.size();
138
139 CHECK(url_path_number_of_elements);
140 for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
141 base::string16 elided_path = BuildPathFromComponents(url_path_prefix,
142 url_path_elements, url_filename, i);
143 if (available_pixel_width >= GetStringWidthF(elided_path, font_list))
144 return ElideText(elided_path + url_query, font_list,
145 available_pixel_width, ELIDE_AT_END);
146 }
147
148 return base::string16();
149 }
150
151 } // namespace
152
ElideEmail(const base::string16 & email,const FontList & font_list,float available_pixel_width)153 base::string16 ElideEmail(const base::string16& email,
154 const FontList& font_list,
155 float available_pixel_width) {
156 if (GetStringWidthF(email, font_list) <= available_pixel_width)
157 return email;
158
159 // Split the email into its local-part (username) and domain-part. The email
160 // spec technically allows for @ symbols in the local-part (username) of the
161 // email under some special requirements. It is guaranteed that there is no @
162 // symbol in the domain part of the email however so splitting at the last @
163 // symbol is safe.
164 const size_t split_index = email.find_last_of('@');
165 DCHECK_NE(split_index, base::string16::npos);
166 base::string16 username = email.substr(0, split_index);
167 base::string16 domain = email.substr(split_index + 1);
168 DCHECK(!username.empty());
169 DCHECK(!domain.empty());
170
171 // Subtract the @ symbol from the available width as it is mandatory.
172 const base::string16 kAtSignUTF16 = ASCIIToUTF16("@");
173 available_pixel_width -= GetStringWidthF(kAtSignUTF16, font_list);
174
175 // Check whether eliding the domain is necessary: if eliding the username
176 // is sufficient, the domain will not be elided.
177 const float full_username_width = GetStringWidthF(username, font_list);
178 const float available_domain_width =
179 available_pixel_width -
180 std::min(full_username_width,
181 GetStringWidthF(username.substr(0, 1) + kEllipsisUTF16,
182 font_list));
183 if (GetStringWidthF(domain, font_list) > available_domain_width) {
184 // Elide the domain so that it only takes half of the available width.
185 // Should the username not need all the width available in its half, the
186 // domain will occupy the leftover width.
187 // If |desired_domain_width| is greater than |available_domain_width|: the
188 // minimal username elision allowed by the specifications will not fit; thus
189 // |desired_domain_width| must be <= |available_domain_width| at all cost.
190 const float desired_domain_width =
191 std::min(available_domain_width,
192 std::max(available_pixel_width - full_username_width,
193 available_pixel_width / 2));
194 domain = ElideText(domain, font_list, desired_domain_width,
195 ELIDE_IN_MIDDLE);
196 // Failing to elide the domain such that at least one character remains
197 // (other than the ellipsis itself) remains: return a single ellipsis.
198 if (domain.length() <= 1U)
199 return base::string16(kEllipsisUTF16);
200 }
201
202 // Fit the username in the remaining width (at this point the elided username
203 // is guaranteed to fit with at least one character remaining given all the
204 // precautions taken earlier).
205 available_pixel_width -= GetStringWidthF(domain, font_list);
206 username = ElideText(username, font_list, available_pixel_width,
207 ELIDE_AT_END);
208
209 return username + kAtSignUTF16 + domain;
210 }
211
212 // TODO(pkasting): http://crbug.com/77883 This whole function gets
213 // kerning/ligatures/etc. issues potentially wrong by assuming that the width of
214 // a rendered string is always the sum of the widths of its substrings. Also I
215 // suspect it could be made simpler.
ElideUrl(const GURL & url,const FontList & font_list,float available_pixel_width,const std::string & languages)216 base::string16 ElideUrl(const GURL& url,
217 const FontList& font_list,
218 float available_pixel_width,
219 const std::string& languages) {
220 // Get a formatted string and corresponding parsing of the url.
221 url_parse::Parsed parsed;
222 const base::string16 url_string =
223 net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
224 net::UnescapeRule::SPACES, &parsed, NULL, NULL);
225 if (available_pixel_width <= 0)
226 return url_string;
227
228 // If non-standard, return plain eliding.
229 if (!url.IsStandard())
230 return ElideText(url_string, font_list, available_pixel_width,
231 ELIDE_AT_END);
232
233 // Now start eliding url_string to fit within available pixel width.
234 // Fist pass - check to see whether entire url_string fits.
235 const float pixel_width_url_string = GetStringWidthF(url_string, font_list);
236 if (available_pixel_width >= pixel_width_url_string)
237 return url_string;
238
239 // Get the path substring, including query and reference.
240 const size_t path_start_index = parsed.path.begin;
241 const size_t path_len = parsed.path.len;
242 base::string16 url_path_query_etc = url_string.substr(path_start_index);
243 base::string16 url_path = url_string.substr(path_start_index, path_len);
244
245 // Return general elided text if url minus the query fits.
246 const base::string16 url_minus_query =
247 url_string.substr(0, path_start_index + path_len);
248 if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list))
249 return ElideText(url_string, font_list, available_pixel_width,
250 ELIDE_AT_END);
251
252 // Get Host.
253 base::string16 url_host = UTF8ToUTF16(url.host());
254
255 // Get domain and registry information from the URL.
256 base::string16 url_domain = UTF8ToUTF16(
257 net::registry_controlled_domains::GetDomainAndRegistry(
258 url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
259 if (url_domain.empty())
260 url_domain = url_host;
261
262 // Add port if required.
263 if (!url.port().empty()) {
264 url_host += UTF8ToUTF16(":" + url.port());
265 url_domain += UTF8ToUTF16(":" + url.port());
266 }
267
268 // Get sub domain.
269 base::string16 url_subdomain;
270 const size_t domain_start_index = url_host.find(url_domain);
271 if (domain_start_index != base::string16::npos)
272 url_subdomain = url_host.substr(0, domain_start_index);
273 const base::string16 kWwwPrefix = UTF8ToUTF16("www.");
274 if ((url_subdomain == kWwwPrefix || url_subdomain.empty() ||
275 url.SchemeIsFile())) {
276 url_subdomain.clear();
277 }
278
279 // If this is a file type, the path is now defined as everything after ":".
280 // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
281 // domain is now C: - this is a nice hack for eliding to work pleasantly.
282 if (url.SchemeIsFile()) {
283 // Split the path string using ":"
284 std::vector<base::string16> file_path_split;
285 base::SplitString(url_path, ':', &file_path_split);
286 if (file_path_split.size() > 1) { // File is of type "file:///C:/.."
287 url_host.clear();
288 url_domain.clear();
289 url_subdomain.clear();
290
291 const base::string16 kColon = UTF8ToUTF16(":");
292 url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
293 url_path_query_etc = url_path = file_path_split.at(1);
294 }
295 }
296
297 // Second Pass - remove scheme - the rest fits.
298 const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
299 const float pixel_width_url_path = GetStringWidthF(url_path_query_etc,
300 font_list);
301 if (available_pixel_width >=
302 pixel_width_url_host + pixel_width_url_path)
303 return url_host + url_path_query_etc;
304
305 // Third Pass: Subdomain, domain and entire path fits.
306 const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
307 const float pixel_width_url_subdomain =
308 GetStringWidthF(url_subdomain, font_list);
309 if (available_pixel_width >=
310 pixel_width_url_subdomain + pixel_width_url_domain +
311 pixel_width_url_path)
312 return url_subdomain + url_domain + url_path_query_etc;
313
314 // Query element.
315 base::string16 url_query;
316 const float kPixelWidthDotsTrailer = GetStringWidthF(
317 base::string16(kEllipsisUTF16), font_list);
318 if (parsed.query.is_nonempty()) {
319 url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
320 if (available_pixel_width >=
321 (pixel_width_url_subdomain + pixel_width_url_domain +
322 pixel_width_url_path - GetStringWidthF(url_query, font_list))) {
323 return ElideText(url_subdomain + url_domain + url_path_query_etc,
324 font_list, available_pixel_width, ELIDE_AT_END);
325 }
326 }
327
328 // Parse url_path using '/'.
329 std::vector<base::string16> url_path_elements;
330 base::SplitString(url_path, kForwardSlash, &url_path_elements);
331
332 // Get filename - note that for a path ending with /
333 // such as www.google.com/intl/ads/, the file name is ads/.
334 size_t url_path_number_of_elements = url_path_elements.size();
335 DCHECK(url_path_number_of_elements != 0);
336 base::string16 url_filename;
337 if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
338 url_filename = *(url_path_elements.end() - 1);
339 } else if (url_path_number_of_elements > 1) { // Path ends with a '/'.
340 url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
341 kForwardSlash;
342 url_path_number_of_elements--;
343 }
344 DCHECK(url_path_number_of_elements != 0);
345
346 const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
347 if (url_path_number_of_elements <= 1 ||
348 url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
349 // No path to elide, or too long of a path (could overflow in loop below)
350 // Just elide this as a text string.
351 return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list,
352 available_pixel_width, ELIDE_AT_END);
353 }
354
355 // Start eliding the path and replacing elements by ".../".
356 const base::string16 kEllipsisAndSlash =
357 base::string16(kEllipsisUTF16) + kForwardSlash;
358 const float pixel_width_ellipsis_slash =
359 GetStringWidthF(kEllipsisAndSlash, font_list);
360
361 // Check with both subdomain and domain.
362 base::string16 elided_path =
363 ElideComponentizedPath(url_subdomain + url_domain, url_path_elements,
364 url_filename, url_query, font_list,
365 available_pixel_width);
366 if (!elided_path.empty())
367 return elided_path;
368
369 // Check with only domain.
370 // If a subdomain is present, add an ellipsis before domain.
371 // This is added only if the subdomain pixel width is larger than
372 // the pixel width of kEllipsis. Otherwise, subdomain remains,
373 // which means that this case has been resolved earlier.
374 base::string16 url_elided_domain = url_subdomain + url_domain;
375 if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
376 if (!url_subdomain.empty())
377 url_elided_domain = kEllipsisAndSlash[0] + url_domain;
378 else
379 url_elided_domain = url_domain;
380
381 elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
382 url_filename, url_query, font_list,
383 available_pixel_width);
384
385 if (!elided_path.empty())
386 return elided_path;
387 }
388
389 // Return elided domain/.../filename anyway.
390 base::string16 final_elided_url_string(url_elided_domain);
391 const float url_elided_domain_width = GetStringWidthF(url_elided_domain,
392 font_list);
393
394 // A hack to prevent trailing ".../...".
395 if ((available_pixel_width - url_elided_domain_width) >
396 pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
397 GetStringWidthF(ASCIIToUTF16("UV"), font_list)) {
398 final_elided_url_string += BuildPathFromComponents(base::string16(),
399 url_path_elements, url_filename, 1);
400 } else {
401 final_elided_url_string += url_path;
402 }
403
404 return ElideText(final_elided_url_string, font_list, available_pixel_width,
405 ELIDE_AT_END);
406 }
407
ElideFilename(const base::FilePath & filename,const FontList & font_list,float available_pixel_width)408 base::string16 ElideFilename(const base::FilePath& filename,
409 const FontList& font_list,
410 float available_pixel_width) {
411 #if defined(OS_WIN)
412 base::string16 filename_utf16 = filename.value();
413 base::string16 extension = filename.Extension();
414 base::string16 rootname = filename.BaseName().RemoveExtension().value();
415 #elif defined(OS_POSIX)
416 base::string16 filename_utf16 = WideToUTF16(base::SysNativeMBToWide(
417 filename.value()));
418 base::string16 extension = WideToUTF16(base::SysNativeMBToWide(
419 filename.Extension()));
420 base::string16 rootname = WideToUTF16(base::SysNativeMBToWide(
421 filename.BaseName().RemoveExtension().value()));
422 #endif
423
424 const float full_width = GetStringWidthF(filename_utf16, font_list);
425 if (full_width <= available_pixel_width)
426 return base::i18n::GetDisplayStringInLTRDirectionality(filename_utf16);
427
428 if (rootname.empty() || extension.empty()) {
429 const base::string16 elided_name = ElideText(filename_utf16, font_list,
430 available_pixel_width, ELIDE_AT_END);
431 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
432 }
433
434 const float ext_width = GetStringWidthF(extension, font_list);
435 const float root_width = GetStringWidthF(rootname, font_list);
436
437 // We may have trimmed the path.
438 if (root_width + ext_width <= available_pixel_width) {
439 const base::string16 elided_name = rootname + extension;
440 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
441 }
442
443 if (ext_width >= available_pixel_width) {
444 const base::string16 elided_name = ElideText(
445 rootname + extension, font_list, available_pixel_width,
446 ELIDE_IN_MIDDLE);
447 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
448 }
449
450 float available_root_width = available_pixel_width - ext_width;
451 base::string16 elided_name =
452 ElideText(rootname, font_list, available_root_width, ELIDE_AT_END);
453 elided_name += extension;
454 return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
455 }
456
ElideText(const base::string16 & text,const FontList & font_list,float available_pixel_width,ElideBehavior elide_behavior)457 base::string16 ElideText(const base::string16& text,
458 const FontList& font_list,
459 float available_pixel_width,
460 ElideBehavior elide_behavior) {
461 if (text.empty())
462 return text;
463
464 const float current_text_pixel_width = GetStringWidthF(text, font_list);
465 const bool elide_in_middle = (elide_behavior == ELIDE_IN_MIDDLE);
466 const bool insert_ellipsis = (elide_behavior != TRUNCATE_AT_END);
467
468 const base::string16 ellipsis = base::string16(kEllipsisUTF16);
469 StringSlicer slicer(text, ellipsis, elide_in_middle);
470
471 // Pango will return 0 width for absurdly long strings. Cut the string in
472 // half and try again.
473 // This is caused by an int overflow in Pango (specifically, in
474 // pango_glyph_string_extents_range). It's actually more subtle than just
475 // returning 0, since on super absurdly long strings, the int can wrap and
476 // return positive numbers again. Detecting that is probably not worth it
477 // (eliding way too much from a ridiculous string is probably still
478 // ridiculous), but we should check other widths for bogus values as well.
479 if (current_text_pixel_width <= 0 && !text.empty()) {
480 const base::string16 cut = slicer.CutString(text.length() / 2, false);
481 return ElideText(cut, font_list, available_pixel_width, elide_behavior);
482 }
483
484 if (current_text_pixel_width <= available_pixel_width)
485 return text;
486
487 if (insert_ellipsis &&
488 GetStringWidthF(ellipsis, font_list) > available_pixel_width)
489 return base::string16();
490
491 // Use binary search to compute the elided text.
492 size_t lo = 0;
493 size_t hi = text.length() - 1;
494 size_t guess;
495 for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) {
496 // We check the length of the whole desired string at once to ensure we
497 // handle kerning/ligatures/etc. correctly.
498 const base::string16 cut = slicer.CutString(guess, insert_ellipsis);
499 const float guess_length = GetStringWidthF(cut, font_list);
500 // Check again that we didn't hit a Pango width overflow. If so, cut the
501 // current string in half and start over.
502 if (guess_length <= 0) {
503 return ElideText(slicer.CutString(guess / 2, false),
504 font_list, available_pixel_width, elide_behavior);
505 }
506 if (guess_length > available_pixel_width)
507 hi = guess - 1;
508 else
509 lo = guess + 1;
510 }
511
512 return slicer.CutString(guess, insert_ellipsis);
513 }
514
ElideText(const base::string16 & text,const Font & font,float available_pixel_width,ElideBehavior elide_behavior)515 base::string16 ElideText(const base::string16& text,
516 const Font& font,
517 float available_pixel_width,
518 ElideBehavior elide_behavior) {
519 return ElideText(text, FontList(font), available_pixel_width, elide_behavior);
520 }
521
SortedDisplayURL(const GURL & url,const std::string & languages)522 SortedDisplayURL::SortedDisplayURL(const GURL& url,
523 const std::string& languages) {
524 net::AppendFormattedHost(url, languages, &sort_host_);
525 base::string16 host_minus_www = net::StripWWW(sort_host_);
526 url_parse::Parsed parsed;
527 display_url_ =
528 net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
529 net::UnescapeRule::SPACES, &parsed, &prefix_end_, NULL);
530 if (sort_host_.length() > host_minus_www.length()) {
531 prefix_end_ += sort_host_.length() - host_minus_www.length();
532 sort_host_.swap(host_minus_www);
533 }
534 }
535
SortedDisplayURL()536 SortedDisplayURL::SortedDisplayURL() : prefix_end_(0) {
537 }
538
~SortedDisplayURL()539 SortedDisplayURL::~SortedDisplayURL() {
540 }
541
Compare(const SortedDisplayURL & other,icu::Collator * collator) const542 int SortedDisplayURL::Compare(const SortedDisplayURL& other,
543 icu::Collator* collator) const {
544 // Compare on hosts first. The host won't contain 'www.'.
545 UErrorCode compare_status = U_ZERO_ERROR;
546 UCollationResult host_compare_result = collator->compare(
547 static_cast<const UChar*>(sort_host_.c_str()),
548 static_cast<int>(sort_host_.length()),
549 static_cast<const UChar*>(other.sort_host_.c_str()),
550 static_cast<int>(other.sort_host_.length()),
551 compare_status);
552 DCHECK(U_SUCCESS(compare_status));
553 if (host_compare_result != 0)
554 return host_compare_result;
555
556 // Hosts match, compare on the portion of the url after the host.
557 base::string16 path = this->AfterHost();
558 base::string16 o_path = other.AfterHost();
559 compare_status = U_ZERO_ERROR;
560 UCollationResult path_compare_result = collator->compare(
561 static_cast<const UChar*>(path.c_str()),
562 static_cast<int>(path.length()),
563 static_cast<const UChar*>(o_path.c_str()),
564 static_cast<int>(o_path.length()),
565 compare_status);
566 DCHECK(U_SUCCESS(compare_status));
567 if (path_compare_result != 0)
568 return path_compare_result;
569
570 // Hosts and paths match, compare on the complete url. This'll push the www.
571 // ones to the end.
572 compare_status = U_ZERO_ERROR;
573 UCollationResult display_url_compare_result = collator->compare(
574 static_cast<const UChar*>(display_url_.c_str()),
575 static_cast<int>(display_url_.length()),
576 static_cast<const UChar*>(other.display_url_.c_str()),
577 static_cast<int>(other.display_url_.length()),
578 compare_status);
579 DCHECK(U_SUCCESS(compare_status));
580 return display_url_compare_result;
581 }
582
AfterHost() const583 base::string16 SortedDisplayURL::AfterHost() const {
584 const size_t slash_index = display_url_.find(sort_host_, prefix_end_);
585 if (slash_index == base::string16::npos) {
586 NOTREACHED();
587 return base::string16();
588 }
589 return display_url_.substr(slash_index + sort_host_.length());
590 }
591
ElideString(const base::string16 & input,int max_len,base::string16 * output)592 bool ElideString(const base::string16& input, int max_len,
593 base::string16* output) {
594 DCHECK_GE(max_len, 0);
595 if (static_cast<int>(input.length()) <= max_len) {
596 output->assign(input);
597 return false;
598 }
599
600 switch (max_len) {
601 case 0:
602 output->clear();
603 break;
604 case 1:
605 output->assign(input.substr(0, 1));
606 break;
607 case 2:
608 output->assign(input.substr(0, 2));
609 break;
610 case 3:
611 output->assign(input.substr(0, 1) + ASCIIToUTF16(".") +
612 input.substr(input.length() - 1));
613 break;
614 case 4:
615 output->assign(input.substr(0, 1) + ASCIIToUTF16("..") +
616 input.substr(input.length() - 1));
617 break;
618 default: {
619 int rstr_len = (max_len - 3) / 2;
620 int lstr_len = rstr_len + ((max_len - 3) % 2);
621 output->assign(input.substr(0, lstr_len) + ASCIIToUTF16("...") +
622 input.substr(input.length() - rstr_len));
623 break;
624 }
625 }
626
627 return true;
628 }
629
630 namespace {
631
632 // Internal class used to track progress of a rectangular string elide
633 // operation. Exists so the top-level ElideRectangleString() function
634 // can be broken into smaller methods sharing this state.
635 class RectangleString {
636 public:
RectangleString(size_t max_rows,size_t max_cols,bool strict,base::string16 * output)637 RectangleString(size_t max_rows, size_t max_cols,
638 bool strict, base::string16 *output)
639 : max_rows_(max_rows),
640 max_cols_(max_cols),
641 current_row_(0),
642 current_col_(0),
643 strict_(strict),
644 suppressed_(false),
645 output_(output) {}
646
647 // Perform deferred initializations following creation. Must be called
648 // before any input can be added via AddString().
Init()649 void Init() { output_->clear(); }
650
651 // Add an input string, reformatting to fit the desired dimensions.
652 // AddString() may be called multiple times to concatenate together
653 // multiple strings into the region (the current caller doesn't do
654 // this, however).
655 void AddString(const base::string16& input);
656
657 // Perform any deferred output processing. Must be called after the
658 // last AddString() call has occurred.
659 bool Finalize();
660
661 private:
662 // Add a line to the rectangular region at the current position,
663 // either by itself or by breaking it into words.
664 void AddLine(const base::string16& line);
665
666 // Add a word to the rectangular region at the current position,
667 // either by itself or by breaking it into characters.
668 void AddWord(const base::string16& word);
669
670 // Add text to the output string if the rectangular boundaries
671 // have not been exceeded, advancing the current position.
672 void Append(const base::string16& string);
673
674 // Set the current position to the beginning of the next line. If
675 // |output| is true, add a newline to the output string if the rectangular
676 // boundaries have not been exceeded. If |output| is false, we assume
677 // some other mechanism will (likely) do similar breaking after the fact.
678 void NewLine(bool output);
679
680 // Maximum number of rows allowed in the output string.
681 size_t max_rows_;
682
683 // Maximum number of characters allowed in the output string.
684 size_t max_cols_;
685
686 // Current row position, always incremented and may exceed max_rows_
687 // when the input can not fit in the region. We stop appending to
688 // the output string, however, when this condition occurs. In the
689 // future, we may want to expose this value to allow the caller to
690 // determine how many rows would actually be required to hold the
691 // formatted string.
692 size_t current_row_;
693
694 // Current character position, should never exceed max_cols_.
695 size_t current_col_;
696
697 // True when we do whitespace to newline conversions ourselves.
698 bool strict_;
699
700 // True when some of the input has been truncated.
701 bool suppressed_;
702
703 // String onto which the output is accumulated.
704 base::string16* output_;
705
706 DISALLOW_COPY_AND_ASSIGN(RectangleString);
707 };
708
AddString(const base::string16 & input)709 void RectangleString::AddString(const base::string16& input) {
710 base::i18n::BreakIterator lines(input,
711 base::i18n::BreakIterator::BREAK_NEWLINE);
712 if (lines.Init()) {
713 while (lines.Advance())
714 AddLine(lines.GetString());
715 } else {
716 NOTREACHED() << "BreakIterator (lines) init failed";
717 }
718 }
719
Finalize()720 bool RectangleString::Finalize() {
721 if (suppressed_) {
722 output_->append(ASCIIToUTF16("..."));
723 return true;
724 }
725 return false;
726 }
727
AddLine(const base::string16 & line)728 void RectangleString::AddLine(const base::string16& line) {
729 if (line.length() < max_cols_) {
730 Append(line);
731 } else {
732 base::i18n::BreakIterator words(line,
733 base::i18n::BreakIterator::BREAK_SPACE);
734 if (words.Init()) {
735 while (words.Advance())
736 AddWord(words.GetString());
737 } else {
738 NOTREACHED() << "BreakIterator (words) init failed";
739 }
740 }
741 // Account for naturally-occuring newlines.
742 ++current_row_;
743 current_col_ = 0;
744 }
745
AddWord(const base::string16 & word)746 void RectangleString::AddWord(const base::string16& word) {
747 if (word.length() < max_cols_) {
748 // Word can be made to fit, no need to fragment it.
749 if (current_col_ + word.length() >= max_cols_)
750 NewLine(strict_);
751 Append(word);
752 } else {
753 // Word is so big that it must be fragmented.
754 int array_start = 0;
755 int char_start = 0;
756 base::i18n::UTF16CharIterator chars(&word);
757 while (!chars.end()) {
758 // When boundary is hit, add as much as will fit on this line.
759 if (current_col_ + (chars.char_pos() - char_start) >= max_cols_) {
760 Append(word.substr(array_start, chars.array_pos() - array_start));
761 NewLine(true);
762 array_start = chars.array_pos();
763 char_start = chars.char_pos();
764 }
765 chars.Advance();
766 }
767 // Add the last remaining fragment, if any.
768 if (array_start != chars.array_pos())
769 Append(word.substr(array_start, chars.array_pos() - array_start));
770 }
771 }
772
Append(const base::string16 & string)773 void RectangleString::Append(const base::string16& string) {
774 if (current_row_ < max_rows_)
775 output_->append(string);
776 else
777 suppressed_ = true;
778 current_col_ += string.length();
779 }
780
NewLine(bool output)781 void RectangleString::NewLine(bool output) {
782 if (current_row_ < max_rows_) {
783 if (output)
784 output_->append(ASCIIToUTF16("\n"));
785 } else {
786 suppressed_ = true;
787 }
788 ++current_row_;
789 current_col_ = 0;
790 }
791
792 // Internal class used to track progress of a rectangular text elide
793 // operation. Exists so the top-level ElideRectangleText() function
794 // can be broken into smaller methods sharing this state.
795 class RectangleText {
796 public:
RectangleText(const FontList & font_list,float available_pixel_width,int available_pixel_height,WordWrapBehavior wrap_behavior,std::vector<base::string16> * lines)797 RectangleText(const FontList& font_list,
798 float available_pixel_width,
799 int available_pixel_height,
800 WordWrapBehavior wrap_behavior,
801 std::vector<base::string16>* lines)
802 : font_list_(font_list),
803 line_height_(font_list.GetHeight()),
804 available_pixel_width_(available_pixel_width),
805 available_pixel_height_(available_pixel_height),
806 wrap_behavior_(wrap_behavior),
807 current_width_(0),
808 current_height_(0),
809 last_line_ended_in_lf_(false),
810 lines_(lines),
811 insufficient_width_(false),
812 insufficient_height_(false) {}
813
814 // Perform deferred initializions following creation. Must be called
815 // before any input can be added via AddString().
Init()816 void Init() { lines_->clear(); }
817
818 // Add an input string, reformatting to fit the desired dimensions.
819 // AddString() may be called multiple times to concatenate together
820 // multiple strings into the region (the current caller doesn't do
821 // this, however).
822 void AddString(const base::string16& input);
823
824 // Perform any deferred output processing. Must be called after the last
825 // AddString() call has occured. Returns a combination of
826 // |ReformattingResultFlags| indicating whether the given width or height was
827 // insufficient, leading to elision or truncation.
828 int Finalize();
829
830 private:
831 // Add a line to the rectangular region at the current position,
832 // either by itself or by breaking it into words.
833 void AddLine(const base::string16& line);
834
835 // Wrap the specified word across multiple lines.
836 int WrapWord(const base::string16& word);
837
838 // Add a long word - wrapping, eliding or truncating per the wrap behavior.
839 int AddWordOverflow(const base::string16& word);
840
841 // Add a word to the rectangluar region at the current position.
842 int AddWord(const base::string16& word);
843
844 // Append the specified |text| to the current output line, incrementing the
845 // running width by the specified amount. This is an optimization over
846 // |AddToCurrentLine()| when |text_width| is already known.
847 void AddToCurrentLineWithWidth(const base::string16& text, float text_width);
848
849 // Append the specified |text| to the current output line.
850 void AddToCurrentLine(const base::string16& text);
851
852 // Set the current position to the beginning of the next line.
853 bool NewLine();
854
855 // The font list used for measuring text width.
856 const FontList& font_list_;
857
858 // The height of each line of text.
859 const int line_height_;
860
861 // The number of pixels of available width in the rectangle.
862 const float available_pixel_width_;
863
864 // The number of pixels of available height in the rectangle.
865 const int available_pixel_height_;
866
867 // The wrap behavior for words that are too long to fit on a single line.
868 const WordWrapBehavior wrap_behavior_;
869
870 // The current running width.
871 float current_width_;
872
873 // The current running height.
874 int current_height_;
875
876 // The current line of text.
877 base::string16 current_line_;
878
879 // Indicates whether the last line ended with \n.
880 bool last_line_ended_in_lf_;
881
882 // The output vector of lines.
883 std::vector<base::string16>* lines_;
884
885 // Indicates whether a word was so long that it had to be truncated or elided
886 // to fit the available width.
887 bool insufficient_width_;
888
889 // Indicates whether there were too many lines for the available height.
890 bool insufficient_height_;
891
892 DISALLOW_COPY_AND_ASSIGN(RectangleText);
893 };
894
AddString(const base::string16 & input)895 void RectangleText::AddString(const base::string16& input) {
896 base::i18n::BreakIterator lines(input,
897 base::i18n::BreakIterator::BREAK_NEWLINE);
898 if (lines.Init()) {
899 while (!insufficient_height_ && lines.Advance()) {
900 base::string16 line = lines.GetString();
901 // The BREAK_NEWLINE iterator will keep the trailing newline character,
902 // except in the case of the last line, which may not have one. Remove
903 // the newline character, if it exists.
904 last_line_ended_in_lf_ = !line.empty() && line[line.length() - 1] == '\n';
905 if (last_line_ended_in_lf_)
906 line.resize(line.length() - 1);
907 AddLine(line);
908 }
909 } else {
910 NOTREACHED() << "BreakIterator (lines) init failed";
911 }
912 }
913
Finalize()914 int RectangleText::Finalize() {
915 // Remove trailing whitespace from the last line or remove the last line
916 // completely, if it's just whitespace.
917 if (!insufficient_height_ && !lines_->empty()) {
918 TrimWhitespace(lines_->back(), TRIM_TRAILING, &lines_->back());
919 if (lines_->back().empty() && !last_line_ended_in_lf_)
920 lines_->pop_back();
921 }
922 if (last_line_ended_in_lf_)
923 lines_->push_back(base::string16());
924 return (insufficient_width_ ? INSUFFICIENT_SPACE_HORIZONTAL : 0) |
925 (insufficient_height_ ? INSUFFICIENT_SPACE_VERTICAL : 0);
926 }
927
AddLine(const base::string16 & line)928 void RectangleText::AddLine(const base::string16& line) {
929 const float line_width = GetStringWidthF(line, font_list_);
930 if (line_width <= available_pixel_width_) {
931 AddToCurrentLineWithWidth(line, line_width);
932 } else {
933 // Iterate over positions that are valid to break the line at. In general,
934 // these are word boundaries but after any punctuation following the word.
935 base::i18n::BreakIterator words(line,
936 base::i18n::BreakIterator::BREAK_LINE);
937 if (words.Init()) {
938 while (words.Advance()) {
939 const bool truncate = !current_line_.empty();
940 const base::string16& word = words.GetString();
941 const int lines_added = AddWord(word);
942 if (lines_added) {
943 if (truncate) {
944 // Trim trailing whitespace from the line that was added.
945 const int line = lines_->size() - lines_added;
946 TrimWhitespace(lines_->at(line), TRIM_TRAILING, &lines_->at(line));
947 }
948 if (ContainsOnlyWhitespace(word)) {
949 // Skip the first space if the previous line was carried over.
950 current_width_ = 0;
951 current_line_.clear();
952 }
953 }
954 }
955 } else {
956 NOTREACHED() << "BreakIterator (words) init failed";
957 }
958 }
959 // Account for naturally-occuring newlines.
960 NewLine();
961 }
962
WrapWord(const base::string16 & word)963 int RectangleText::WrapWord(const base::string16& word) {
964 // Word is so wide that it must be fragmented.
965 base::string16 text = word;
966 int lines_added = 0;
967 bool first_fragment = true;
968 while (!insufficient_height_ && !text.empty()) {
969 base::string16 fragment =
970 ElideText(text, font_list_, available_pixel_width_,
971 TRUNCATE_AT_END);
972 // At least one character has to be added at every line, even if the
973 // available space is too small.
974 if(fragment.empty())
975 fragment = text.substr(0, 1);
976 if (!first_fragment && NewLine())
977 lines_added++;
978 AddToCurrentLine(fragment);
979 text = text.substr(fragment.length());
980 first_fragment = false;
981 }
982 return lines_added;
983 }
984
AddWordOverflow(const base::string16 & word)985 int RectangleText::AddWordOverflow(const base::string16& word) {
986 int lines_added = 0;
987
988 // Unless this is the very first word, put it on a new line.
989 if (!current_line_.empty()) {
990 if (!NewLine())
991 return 0;
992 lines_added++;
993 }
994
995 if (wrap_behavior_ == IGNORE_LONG_WORDS) {
996 current_line_ = word;
997 current_width_ = available_pixel_width_;
998 } else if (wrap_behavior_ == WRAP_LONG_WORDS) {
999 lines_added += WrapWord(word);
1000 } else {
1001 const ElideBehavior elide_behavior =
1002 (wrap_behavior_ == ELIDE_LONG_WORDS ? ELIDE_AT_END : TRUNCATE_AT_END);
1003 const base::string16 elided_word =
1004 ElideText(word, font_list_, available_pixel_width_, elide_behavior);
1005 AddToCurrentLine(elided_word);
1006 insufficient_width_ = true;
1007 }
1008
1009 return lines_added;
1010 }
1011
AddWord(const base::string16 & word)1012 int RectangleText::AddWord(const base::string16& word) {
1013 int lines_added = 0;
1014 base::string16 trimmed;
1015 TrimWhitespace(word, TRIM_TRAILING, &trimmed);
1016 const float trimmed_width = GetStringWidthF(trimmed, font_list_);
1017 if (trimmed_width <= available_pixel_width_) {
1018 // Word can be made to fit, no need to fragment it.
1019 if ((current_width_ + trimmed_width > available_pixel_width_) && NewLine())
1020 lines_added++;
1021 // Append the non-trimmed word, in case more words are added after.
1022 AddToCurrentLine(word);
1023 } else {
1024 lines_added = AddWordOverflow(wrap_behavior_ == IGNORE_LONG_WORDS ?
1025 trimmed : word);
1026 }
1027 return lines_added;
1028 }
1029
AddToCurrentLine(const base::string16 & text)1030 void RectangleText::AddToCurrentLine(const base::string16& text) {
1031 AddToCurrentLineWithWidth(text, GetStringWidthF(text, font_list_));
1032 }
1033
AddToCurrentLineWithWidth(const base::string16 & text,float text_width)1034 void RectangleText::AddToCurrentLineWithWidth(const base::string16& text,
1035 float text_width) {
1036 if (current_height_ >= available_pixel_height_) {
1037 insufficient_height_ = true;
1038 return;
1039 }
1040 current_line_.append(text);
1041 current_width_ += text_width;
1042 }
1043
NewLine()1044 bool RectangleText::NewLine() {
1045 bool line_added = false;
1046 if (current_height_ < available_pixel_height_) {
1047 lines_->push_back(current_line_);
1048 current_line_.clear();
1049 line_added = true;
1050 } else {
1051 insufficient_height_ = true;
1052 }
1053 current_height_ += line_height_;
1054 current_width_ = 0;
1055 return line_added;
1056 }
1057
1058 } // namespace
1059
ElideRectangleString(const base::string16 & input,size_t max_rows,size_t max_cols,bool strict,base::string16 * output)1060 bool ElideRectangleString(const base::string16& input, size_t max_rows,
1061 size_t max_cols, bool strict,
1062 base::string16* output) {
1063 RectangleString rect(max_rows, max_cols, strict, output);
1064 rect.Init();
1065 rect.AddString(input);
1066 return rect.Finalize();
1067 }
1068
ElideRectangleText(const base::string16 & input,const FontList & font_list,float available_pixel_width,int available_pixel_height,WordWrapBehavior wrap_behavior,std::vector<base::string16> * lines)1069 int ElideRectangleText(const base::string16& input,
1070 const FontList& font_list,
1071 float available_pixel_width,
1072 int available_pixel_height,
1073 WordWrapBehavior wrap_behavior,
1074 std::vector<base::string16>* lines) {
1075 RectangleText rect(font_list,
1076 available_pixel_width,
1077 available_pixel_height,
1078 wrap_behavior,
1079 lines);
1080 rect.Init();
1081 rect.AddString(input);
1082 return rect.Finalize();
1083 }
1084
TruncateString(const base::string16 & string,size_t length)1085 base::string16 TruncateString(const base::string16& string, size_t length) {
1086 if (string.size() <= length)
1087 // String fits, return it.
1088 return string;
1089
1090 if (length == 0)
1091 // No room for the elide string, return an empty string.
1092 return base::string16();
1093
1094 size_t max = length - 1;
1095
1096 // Added to the end of strings that are too big.
1097 static const base::char16 kElideString[] = { 0x2026, 0 };
1098
1099 if (max == 0)
1100 // Just enough room for the elide string.
1101 return kElideString;
1102
1103 // Use a line iterator to find the first boundary.
1104 UErrorCode status = U_ZERO_ERROR;
1105 scoped_ptr<icu::RuleBasedBreakIterator> bi(
1106 static_cast<icu::RuleBasedBreakIterator*>(
1107 icu::RuleBasedBreakIterator::createLineInstance(
1108 icu::Locale::getDefault(), status)));
1109 if (U_FAILURE(status))
1110 return string.substr(0, max) + kElideString;
1111 bi->setText(string.c_str());
1112 int32_t index = bi->preceding(static_cast<int32_t>(max));
1113 if (index == icu::BreakIterator::DONE) {
1114 index = static_cast<int32_t>(max);
1115 } else {
1116 // Found a valid break (may be the beginning of the string). Now use
1117 // a character iterator to find the previous non-whitespace character.
1118 icu::StringCharacterIterator char_iterator(string.c_str());
1119 if (index == 0) {
1120 // No valid line breaks. Start at the end again. This ensures we break
1121 // on a valid character boundary.
1122 index = static_cast<int32_t>(max);
1123 }
1124 char_iterator.setIndex(index);
1125 while (char_iterator.hasPrevious()) {
1126 char_iterator.previous();
1127 if (!(u_isspace(char_iterator.current()) ||
1128 u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
1129 u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
1130 // Not a whitespace character. Advance the iterator so that we
1131 // include the current character in the truncated string.
1132 char_iterator.next();
1133 break;
1134 }
1135 }
1136 if (char_iterator.hasPrevious()) {
1137 // Found a valid break point.
1138 index = char_iterator.getIndex();
1139 } else {
1140 // String has leading whitespace, return the elide string.
1141 return kElideString;
1142 }
1143 }
1144 return string.substr(0, index) + kElideString;
1145 }
1146
1147 } // namespace gfx
1148