1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/i18n/break_iterator.h"
6
7 #include "base/logging.h"
8 #include "unicode/ubrk.h"
9 #include "unicode/uchar.h"
10 #include "unicode/ustring.h"
11
12 namespace base {
13
14 const size_t npos = -1;
15
BreakIterator(const string16 * str,BreakType break_type)16 BreakIterator::BreakIterator(const string16* str, BreakType break_type)
17 : iter_(NULL),
18 string_(str),
19 break_type_(break_type),
20 prev_(npos),
21 pos_(0) {
22 }
23
~BreakIterator()24 BreakIterator::~BreakIterator() {
25 if (iter_)
26 ubrk_close(static_cast<UBreakIterator*>(iter_));
27 }
28
Init()29 bool BreakIterator::Init() {
30 UErrorCode status = U_ZERO_ERROR;
31 UBreakIteratorType break_type;
32 switch (break_type_) {
33 case BREAK_WORD:
34 break_type = UBRK_WORD;
35 break;
36 case BREAK_LINE:
37 case BREAK_NEWLINE:
38 break_type = UBRK_LINE;
39 break;
40 default:
41 NOTREACHED() << "invalid break_type_";
42 return false;
43 }
44 iter_ = ubrk_open(break_type, NULL,
45 string_->data(), static_cast<int32_t>(string_->size()),
46 &status);
47 if (U_FAILURE(status)) {
48 NOTREACHED() << "ubrk_open failed";
49 return false;
50 }
51 // Move the iterator to the beginning of the string.
52 ubrk_first(static_cast<UBreakIterator*>(iter_));
53 return true;
54 }
55
Advance()56 bool BreakIterator::Advance() {
57 int32_t pos;
58 int32_t status;
59 prev_ = pos_;
60 switch (break_type_) {
61 case BREAK_WORD:
62 case BREAK_LINE:
63 pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
64 if (pos == UBRK_DONE) {
65 pos_ = npos;
66 return false;
67 }
68 pos_ = static_cast<size_t>(pos);
69 return true;
70 case BREAK_NEWLINE:
71 do {
72 pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
73 if (pos == UBRK_DONE) {
74 break;
75 }
76 pos_ = static_cast<size_t>(pos);
77 status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
78 } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
79 if (pos == UBRK_DONE && prev_ == pos_) {
80 pos_ = npos;
81 return false;
82 }
83 return true;
84 default:
85 NOTREACHED() << "invalid break_type_";
86 return false;
87 }
88 }
89
IsWord() const90 bool BreakIterator::IsWord() const {
91 return (break_type_ == BREAK_WORD &&
92 ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_)) !=
93 UBRK_WORD_NONE);
94 }
95
GetString() const96 string16 BreakIterator::GetString() const {
97 DCHECK(prev_ != npos && pos_ != npos);
98 return string_->substr(prev_, pos_ - prev_);
99 }
100
101 } // namespace base
102