1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
8 */
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/ubrk.h"
15
16 #include "unicode/brkiter.h"
17 #include "unicode/uloc.h"
18 #include "unicode/ustring.h"
19 #include "unicode/uchriter.h"
20 #include "unicode/rbbi.h"
21 #include "rbbirb.h"
22 #include "uassert.h"
23 #include "cmemory.h"
24
25 U_NAMESPACE_USE
26
27 //------------------------------------------------------------------------------
28 //
29 // ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
30 // and locale.
31 //
32 //------------------------------------------------------------------------------
33 U_CAPI UBreakIterator* U_EXPORT2
ubrk_open(UBreakIteratorType type,const char * locale,const UChar * text,int32_t textLength,UErrorCode * status)34 ubrk_open(UBreakIteratorType type,
35 const char *locale,
36 const UChar *text,
37 int32_t textLength,
38 UErrorCode *status)
39 {
40
41 if(U_FAILURE(*status)) return 0;
42
43 BreakIterator *result = 0;
44
45 switch(type) {
46
47 case UBRK_CHARACTER:
48 result = BreakIterator::createCharacterInstance(Locale(locale), *status);
49 break;
50
51 case UBRK_WORD:
52 result = BreakIterator::createWordInstance(Locale(locale), *status);
53 break;
54
55 case UBRK_LINE:
56 result = BreakIterator::createLineInstance(Locale(locale), *status);
57 break;
58
59 case UBRK_SENTENCE:
60 result = BreakIterator::createSentenceInstance(Locale(locale), *status);
61 break;
62
63 case UBRK_TITLE:
64 result = BreakIterator::createTitleInstance(Locale(locale), *status);
65 break;
66
67 default:
68 *status = U_ILLEGAL_ARGUMENT_ERROR;
69 }
70
71 // check for allocation error
72 if (U_FAILURE(*status)) {
73 return 0;
74 }
75 if(result == 0) {
76 *status = U_MEMORY_ALLOCATION_ERROR;
77 return 0;
78 }
79
80
81 UBreakIterator *uBI = (UBreakIterator *)result;
82 if (text != NULL) {
83 ubrk_setText(uBI, text, textLength, status);
84 }
85 return uBI;
86 }
87
88
89
90 //------------------------------------------------------------------------------
91 //
92 // ubrk_openRules open a break iterator from a set of break rules.
93 // Invokes the rule builder.
94 //
95 //------------------------------------------------------------------------------
96 U_CAPI UBreakIterator* U_EXPORT2
ubrk_openRules(const UChar * rules,int32_t rulesLength,const UChar * text,int32_t textLength,UParseError * parseErr,UErrorCode * status)97 ubrk_openRules( const UChar *rules,
98 int32_t rulesLength,
99 const UChar *text,
100 int32_t textLength,
101 UParseError *parseErr,
102 UErrorCode *status) {
103
104 if (status == NULL || U_FAILURE(*status)){
105 return 0;
106 }
107
108 BreakIterator *result = 0;
109 UnicodeString ruleString(rules, rulesLength);
110 result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
111 if(U_FAILURE(*status)) {
112 return 0;
113 }
114
115 UBreakIterator *uBI = (UBreakIterator *)result;
116 if (text != NULL) {
117 ubrk_setText(uBI, text, textLength, status);
118 }
119 return uBI;
120 }
121
122
123 U_CAPI UBreakIterator* U_EXPORT2
ubrk_openBinaryRules(const uint8_t * binaryRules,int32_t rulesLength,const UChar * text,int32_t textLength,UErrorCode * status)124 ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
125 const UChar * text, int32_t textLength,
126 UErrorCode * status)
127 {
128 if (U_FAILURE(*status)) {
129 return NULL;
130 }
131 if (rulesLength < 0) {
132 *status = U_ILLEGAL_ARGUMENT_ERROR;
133 return NULL;
134 }
135 LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
136 if (U_FAILURE(*status)) {
137 return NULL;
138 }
139 UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
140 if (text != NULL) {
141 ubrk_setText(uBI, text, textLength, status);
142 }
143 return uBI;
144 }
145
146
147 U_CAPI UBreakIterator * U_EXPORT2
ubrk_safeClone(const UBreakIterator * bi,void *,int32_t * pBufferSize,UErrorCode * status)148 ubrk_safeClone(
149 const UBreakIterator *bi,
150 void * /*stackBuffer*/,
151 int32_t *pBufferSize,
152 UErrorCode *status)
153 {
154 if (status == NULL || U_FAILURE(*status)){
155 return NULL;
156 }
157 if (bi == NULL) {
158 *status = U_ILLEGAL_ARGUMENT_ERROR;
159 return NULL;
160 }
161 if (pBufferSize != NULL) {
162 int32_t inputSize = *pBufferSize;
163 *pBufferSize = 1;
164 if (inputSize == 0) {
165 return NULL; // preflighting for deprecated functionality
166 }
167 }
168 BreakIterator *newBI = ((BreakIterator *)bi)->clone();
169 if (newBI == NULL) {
170 *status = U_MEMORY_ALLOCATION_ERROR;
171 } else {
172 *status = U_SAFECLONE_ALLOCATED_WARNING;
173 }
174 return (UBreakIterator *)newBI;
175 }
176
177
178
179 U_CAPI void U_EXPORT2
ubrk_close(UBreakIterator * bi)180 ubrk_close(UBreakIterator *bi)
181 {
182 delete (BreakIterator *)bi;
183 }
184
185 U_CAPI void U_EXPORT2
ubrk_setText(UBreakIterator * bi,const UChar * text,int32_t textLength,UErrorCode * status)186 ubrk_setText(UBreakIterator* bi,
187 const UChar* text,
188 int32_t textLength,
189 UErrorCode* status)
190 {
191 UText ut = UTEXT_INITIALIZER;
192 utext_openUChars(&ut, text, textLength, status);
193 ((BreakIterator*)bi)->setText(&ut, *status);
194 // A stack allocated UText wrapping a UChar * string
195 // can be dumped without explicitly closing it.
196 }
197
198
199
200 U_CAPI void U_EXPORT2
ubrk_setUText(UBreakIterator * bi,UText * text,UErrorCode * status)201 ubrk_setUText(UBreakIterator *bi,
202 UText *text,
203 UErrorCode *status)
204 {
205 ((BreakIterator*)bi)->setText(text, *status);
206 }
207
208
209
210
211
212 U_CAPI int32_t U_EXPORT2
ubrk_current(const UBreakIterator * bi)213 ubrk_current(const UBreakIterator *bi)
214 {
215
216 return ((BreakIterator*)bi)->current();
217 }
218
219 U_CAPI int32_t U_EXPORT2
ubrk_next(UBreakIterator * bi)220 ubrk_next(UBreakIterator *bi)
221 {
222
223 return ((BreakIterator*)bi)->next();
224 }
225
226 U_CAPI int32_t U_EXPORT2
ubrk_previous(UBreakIterator * bi)227 ubrk_previous(UBreakIterator *bi)
228 {
229
230 return ((BreakIterator*)bi)->previous();
231 }
232
233 U_CAPI int32_t U_EXPORT2
ubrk_first(UBreakIterator * bi)234 ubrk_first(UBreakIterator *bi)
235 {
236
237 return ((BreakIterator*)bi)->first();
238 }
239
240 U_CAPI int32_t U_EXPORT2
ubrk_last(UBreakIterator * bi)241 ubrk_last(UBreakIterator *bi)
242 {
243
244 return ((BreakIterator*)bi)->last();
245 }
246
247 U_CAPI int32_t U_EXPORT2
ubrk_preceding(UBreakIterator * bi,int32_t offset)248 ubrk_preceding(UBreakIterator *bi,
249 int32_t offset)
250 {
251
252 return ((BreakIterator*)bi)->preceding(offset);
253 }
254
255 U_CAPI int32_t U_EXPORT2
ubrk_following(UBreakIterator * bi,int32_t offset)256 ubrk_following(UBreakIterator *bi,
257 int32_t offset)
258 {
259
260 return ((BreakIterator*)bi)->following(offset);
261 }
262
263 U_CAPI const char* U_EXPORT2
ubrk_getAvailable(int32_t index)264 ubrk_getAvailable(int32_t index)
265 {
266
267 return uloc_getAvailable(index);
268 }
269
270 U_CAPI int32_t U_EXPORT2
ubrk_countAvailable()271 ubrk_countAvailable()
272 {
273
274 return uloc_countAvailable();
275 }
276
277
278 U_CAPI UBool U_EXPORT2
ubrk_isBoundary(UBreakIterator * bi,int32_t offset)279 ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
280 {
281 return ((BreakIterator*)bi)->isBoundary(offset);
282 }
283
284
285 U_CAPI int32_t U_EXPORT2
ubrk_getRuleStatus(UBreakIterator * bi)286 ubrk_getRuleStatus(UBreakIterator *bi)
287 {
288 return ((BreakIterator*)bi)->getRuleStatus();
289 }
290
291 U_CAPI int32_t U_EXPORT2
ubrk_getRuleStatusVec(UBreakIterator * bi,int32_t * fillInVec,int32_t capacity,UErrorCode * status)292 ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
293 {
294 return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
295 }
296
297
298 U_CAPI const char* U_EXPORT2
ubrk_getLocaleByType(const UBreakIterator * bi,ULocDataLocaleType type,UErrorCode * status)299 ubrk_getLocaleByType(const UBreakIterator *bi,
300 ULocDataLocaleType type,
301 UErrorCode* status)
302 {
303 if (bi == NULL) {
304 if (U_SUCCESS(*status)) {
305 *status = U_ILLEGAL_ARGUMENT_ERROR;
306 }
307 return NULL;
308 }
309 return ((BreakIterator*)bi)->getLocaleID(type, *status);
310 }
311
312
313 U_CAPI void U_EXPORT2
ubrk_refreshUText(UBreakIterator * bi,UText * text,UErrorCode * status)314 ubrk_refreshUText(UBreakIterator *bi,
315 UText *text,
316 UErrorCode *status)
317 {
318 BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
319 bii->refreshInputText(text, *status);
320 }
321
322 U_CAPI int32_t U_EXPORT2
ubrk_getBinaryRules(UBreakIterator * bi,uint8_t * binaryRules,int32_t rulesCapacity,UErrorCode * status)323 ubrk_getBinaryRules(UBreakIterator *bi,
324 uint8_t * binaryRules, int32_t rulesCapacity,
325 UErrorCode * status)
326 {
327 if (U_FAILURE(*status)) {
328 return 0;
329 }
330 if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) {
331 *status = U_ILLEGAL_ARGUMENT_ERROR;
332 return 0;
333 }
334 RuleBasedBreakIterator* rbbi;
335 if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) {
336 *status = U_ILLEGAL_ARGUMENT_ERROR;
337 return 0;
338 }
339 uint32_t rulesLength;
340 const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
341 if (rulesLength > INT32_MAX) {
342 *status = U_INDEX_OUTOFBOUNDS_ERROR;
343 return 0;
344 }
345 if (binaryRules != NULL) { // if not preflighting
346 // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
347 if ((int32_t)rulesLength > rulesCapacity) {
348 *status = U_BUFFER_OVERFLOW_ERROR;
349 } else {
350 uprv_memcpy(binaryRules, returnedRules, rulesLength);
351 }
352 }
353 return (int32_t)rulesLength;
354 }
355
356
357 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
358