• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/i18n/break_iterator.h"
6 
7 #include "base/logging.h"
8 #include "third_party/icu/source/common/unicode/ubrk.h"
9 #include "third_party/icu/source/common/unicode/uchar.h"
10 #include "third_party/icu/source/common/unicode/ustring.h"
11 
12 namespace base {
13 namespace i18n {
14 
15 const size_t npos = -1;
16 
BreakIterator(const string16 & str,BreakType break_type)17 BreakIterator::BreakIterator(const string16& str, BreakType break_type)
18     : iter_(NULL),
19       string_(str),
20       break_type_(break_type),
21       prev_(npos),
22       pos_(0) {
23 }
24 
~BreakIterator()25 BreakIterator::~BreakIterator() {
26   if (iter_)
27     ubrk_close(static_cast<UBreakIterator*>(iter_));
28 }
29 
Init()30 bool BreakIterator::Init() {
31   UErrorCode status = U_ZERO_ERROR;
32   UBreakIteratorType break_type;
33   switch (break_type_) {
34     case BREAK_CHARACTER:
35       break_type = UBRK_CHARACTER;
36       break;
37     case BREAK_WORD:
38       break_type = UBRK_WORD;
39       break;
40     case BREAK_LINE:
41     case BREAK_NEWLINE:
42       break_type = UBRK_LINE;
43       break;
44     default:
45       NOTREACHED() << "invalid break_type_";
46       return false;
47   }
48   iter_ = ubrk_open(break_type, NULL,
49                     string_.data(), static_cast<int32_t>(string_.size()),
50                     &status);
51   if (U_FAILURE(status)) {
52     NOTREACHED() << "ubrk_open failed";
53     return false;
54   }
55   // Move the iterator to the beginning of the string.
56   ubrk_first(static_cast<UBreakIterator*>(iter_));
57   return true;
58 }
59 
Advance()60 bool BreakIterator::Advance() {
61   int32_t pos;
62   int32_t status;
63   prev_ = pos_;
64   switch (break_type_) {
65     case BREAK_CHARACTER:
66     case BREAK_WORD:
67     case BREAK_LINE:
68       pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
69       if (pos == UBRK_DONE) {
70         pos_ = npos;
71         return false;
72       }
73       pos_ = static_cast<size_t>(pos);
74       return true;
75     case BREAK_NEWLINE:
76       do {
77         pos = ubrk_next(static_cast<UBreakIterator*>(iter_));
78         if (pos == UBRK_DONE)
79           break;
80         pos_ = static_cast<size_t>(pos);
81         status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
82       } while (status >= UBRK_LINE_SOFT && status < UBRK_LINE_SOFT_LIMIT);
83       if (pos == UBRK_DONE && prev_ == pos_) {
84         pos_ = npos;
85         return false;
86       }
87       return true;
88     default:
89       NOTREACHED() << "invalid break_type_";
90       return false;
91   }
92 }
93 
IsWord() const94 bool BreakIterator::IsWord() const {
95   int32_t status = ubrk_getRuleStatus(static_cast<UBreakIterator*>(iter_));
96   return (break_type_ == BREAK_WORD && status != UBRK_WORD_NONE);
97 }
98 
IsEndOfWord(size_t position) const99 bool BreakIterator::IsEndOfWord(size_t position) const {
100   if (break_type_ != BREAK_WORD)
101     return false;
102 
103   UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
104   UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
105   int32_t status = ubrk_getRuleStatus(iter);
106   return (!!boundary && status != UBRK_WORD_NONE);
107 }
108 
IsStartOfWord(size_t position) const109 bool BreakIterator::IsStartOfWord(size_t position) const {
110   if (break_type_ != BREAK_WORD)
111     return false;
112 
113   UBreakIterator* iter = static_cast<UBreakIterator*>(iter_);
114   UBool boundary = ubrk_isBoundary(iter, static_cast<int32_t>(position));
115   ubrk_next(iter);
116   int32_t next_status = ubrk_getRuleStatus(iter);
117   return (!!boundary && next_status != UBRK_WORD_NONE);
118 }
119 
GetString() const120 string16 BreakIterator::GetString() const {
121   DCHECK(prev_ != npos && pos_ != npos);
122   return string_.substr(prev_, pos_ - prev_);
123 }
124 
125 }  // namespace i18n
126 }  // namespace base
127