1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 * Copyright (C) 2010 , Yahoo! Inc.
6 ********************************************************************
7 *
8 * File SELFMT.CPP
9 *
10 * Modification History:
11 *
12 * Date Name Description
13 * 11/11/09 kirtig Finished first cut of implementation.
14 * 11/16/09 kirtig Improved version
15 ********************************************************************/
16
17 #include "unicode/utypeinfo.h" // for 'typeid' to work
18
19 #include "unicode/utypes.h"
20 #include "unicode/ustring.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/uchar.h"
23 #include "unicode/umsg.h"
24 #include "unicode/rbnf.h"
25 #include "cmemory.h"
26 #include "util.h"
27 #include "uassert.h"
28 #include "ustrfmt.h"
29 #include "uvector.h"
30
31 #include "unicode/selfmt.h"
32 #include "selfmtimpl.h"
33
34 #if !UCONFIG_NO_FORMATTING
35
36 U_NAMESPACE_BEGIN
37
38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
39
40 #define MAX_KEYWORD_SIZE 30
41 static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
42
SelectFormat(const UnicodeString & pat,UErrorCode & status)43 SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) {
44 if (U_FAILURE(status)) {
45 return;
46 }
47 initHashTable(status);
48 applyPattern(pat, status);
49 }
50
SelectFormat(const SelectFormat & other)51 SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) {
52 UErrorCode status = U_ZERO_ERROR;
53 pattern = other.pattern;
54 copyHashtable(other.parsedValuesHash, status);
55 }
56
~SelectFormat()57 SelectFormat::~SelectFormat() {
58 cleanHashTable();
59 }
60
initHashTable(UErrorCode & status)61 void SelectFormat::initHashTable(UErrorCode &status) {
62 if (U_FAILURE(status)) {
63 return;
64 }
65 // has inited
66 if (parsedValuesHash != NULL) {
67 return;
68 }
69
70 parsedValuesHash = new Hashtable(TRUE, status);
71 if (U_FAILURE(status)) {
72 cleanHashTable();
73 return;
74 } else {
75 if (parsedValuesHash == NULL) {
76 status = U_MEMORY_ALLOCATION_ERROR;
77 return;
78 }
79 }
80 // to use hashtable->equals(), must set Value Compartor.
81 parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString);
82 }
83
cleanHashTable()84 void SelectFormat::cleanHashTable() {
85 if (parsedValuesHash != NULL) {
86 delete parsedValuesHash;
87 parsedValuesHash = NULL;
88 }
89 }
90
91 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)92 SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
93 if (U_FAILURE(status)) {
94 return;
95 }
96
97 pattern = newPattern;
98 enum State{ startState, keywordState, pastKeywordState, phraseState};
99
100 //Initialization
101 UnicodeString keyword ;
102 UnicodeString phrase ;
103 UnicodeString* ptrPhrase ;
104 int32_t braceCount = 0;
105
106 if (parsedValuesHash == NULL) {
107 initHashTable(status);
108 if (U_FAILURE(status)) {
109 return;
110 }
111 }
112 parsedValuesHash->removeAll();
113 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
114
115 //Process the state machine
116 State state = startState;
117 for (int32_t i = 0; i < pattern.length(); ++i) {
118 //Get the character and check its type
119 UChar ch = pattern.charAt(i);
120 CharacterClass type = classifyCharacter(ch);
121
122 //Allow any character in phrase but nowhere else
123 if ( type == tOther ) {
124 if ( state == phraseState ){
125 phrase += ch;
126 continue;
127 }else {
128 status = U_PATTERN_SYNTAX_ERROR;
129 cleanHashTable();
130 return;
131 }
132 }
133
134 //Process the state machine
135 switch (state) {
136 //At the start of pattern
137 case startState:
138 switch (type) {
139 case tSpace:
140 break;
141 case tStartKeyword:
142 state = keywordState;
143 keyword += ch;
144 break;
145 //If anything else is encountered, it's a syntax error
146 default:
147 status = U_PATTERN_SYNTAX_ERROR;
148 cleanHashTable();
149 return;
150 }//end of switch(type)
151 break;
152
153 //Handle the keyword state
154 case keywordState:
155 switch (type) {
156 case tSpace:
157 state = pastKeywordState;
158 break;
159 case tStartKeyword:
160 case tContinueKeyword:
161 keyword += ch;
162 break;
163 case tLeftBrace:
164 state = phraseState;
165 break;
166 //If anything else is encountered, it's a syntax error
167 default:
168 status = U_PATTERN_SYNTAX_ERROR;
169 cleanHashTable();
170 return;
171 }//end of switch(type)
172 break;
173
174 //Handle the pastkeyword state
175 case pastKeywordState:
176 switch (type) {
177 case tSpace:
178 break;
179 case tLeftBrace:
180 state = phraseState;
181 break;
182 //If anything else is encountered, it's a syntax error
183 default:
184 status = U_PATTERN_SYNTAX_ERROR;
185 cleanHashTable();
186 return;
187 }//end of switch(type)
188 break;
189
190 //Handle the phrase state
191 case phraseState:
192 switch (type) {
193 case tLeftBrace:
194 braceCount++;
195 phrase += ch;
196 break;
197 case tRightBrace:
198 //Matching keyword, phrase pair found
199 if (braceCount == 0){
200 //Check validity of keyword
201 if (parsedValuesHash->get(keyword) != NULL) {
202 status = U_DUPLICATE_KEYWORD;
203 cleanHashTable();
204 return;
205 }
206 if (keyword.length() == 0) {
207 status = U_PATTERN_SYNTAX_ERROR;
208 cleanHashTable();
209 return;
210 }
211
212 //Store the keyword, phrase pair in hashTable
213 ptrPhrase = new UnicodeString(phrase);
214 parsedValuesHash->put( keyword, ptrPhrase, status);
215
216 //Reinitialize
217 keyword.remove();
218 phrase.remove();
219 ptrPhrase = NULL;
220 state = startState;
221 }
222
223 if (braceCount > 0){
224 braceCount-- ;
225 phrase += ch;
226 }
227 break;
228 default:
229 phrase += ch;
230 }//end of switch(type)
231 break;
232
233 //Handle the default case of switch(state)
234 default:
235 status = U_PATTERN_SYNTAX_ERROR;
236 cleanHashTable();
237 return;
238
239 }//end of switch(state)
240 }
241
242 //Check if the state machine is back to startState
243 if ( state != startState){
244 status = U_PATTERN_SYNTAX_ERROR;
245 cleanHashTable();
246 return;
247 }
248
249 //Check if "other" keyword is present
250 if ( !checkSufficientDefinition() ) {
251 status = U_DEFAULT_KEYWORD_MISSING;
252 cleanHashTable();
253 }
254 return;
255 }
256
257 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const258 SelectFormat::format(const Formattable& obj,
259 UnicodeString& appendTo,
260 FieldPosition& pos,
261 UErrorCode& status) const
262 {
263 switch (obj.getType())
264 {
265 case Formattable::kString:
266 return format(obj.getString(), appendTo, pos, status);
267 default:
268 if( U_SUCCESS(status) ){
269 status = U_ILLEGAL_ARGUMENT_ERROR;
270 }
271 return appendTo;
272 }
273 }
274
275 UnicodeString&
format(const UnicodeString & keyword,UnicodeString & appendTo,FieldPosition &,UErrorCode & status) const276 SelectFormat::format(const UnicodeString& keyword,
277 UnicodeString& appendTo,
278 FieldPosition& /*pos */,
279 UErrorCode& status) const {
280
281 if (U_FAILURE(status)) return appendTo;
282
283 if (parsedValuesHash == NULL) {
284 status = U_INVALID_FORMAT_ERROR;
285 return appendTo;
286 }
287
288 //Check for the validity of the keyword
289 if ( !checkValidKeyword(keyword) ){
290 status = U_ILLEGAL_ARGUMENT_ERROR;
291 return appendTo;
292 }
293
294 UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
295 if (selectedPattern == NULL) {
296 selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
297 }
298
299 return appendTo += *selectedPattern;
300 }
301
302 UnicodeString&
toPattern(UnicodeString & appendTo)303 SelectFormat::toPattern(UnicodeString& appendTo) {
304 return appendTo += pattern;
305 }
306
307 SelectFormat::CharacterClass
classifyCharacter(UChar ch) const308 SelectFormat::classifyCharacter(UChar ch) const{
309 if ((ch >= CAP_A) && (ch <= CAP_Z)) {
310 return tStartKeyword;
311 }
312 if ((ch >= LOW_A) && (ch <= LOW_Z)) {
313 return tStartKeyword;
314 }
315 if ((ch >= U_ZERO) && (ch <= U_NINE)) {
316 return tContinueKeyword;
317 }
318 if ( uprv_isRuleWhiteSpace(ch) ){
319 return tSpace;
320 }
321 switch (ch) {
322 case LEFTBRACE:
323 return tLeftBrace;
324 case RIGHTBRACE:
325 return tRightBrace;
326 case HYPHEN:
327 case LOWLINE:
328 return tContinueKeyword;
329 default :
330 return tOther;
331 }
332 }
333
334 UBool
checkSufficientDefinition()335 SelectFormat::checkSufficientDefinition() {
336 // Check that at least the default rule is defined.
337 return (parsedValuesHash != NULL &&
338 parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
339 }
340
341 UBool
checkValidKeyword(const UnicodeString & argKeyword) const342 SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
343 int32_t len = argKeyword.length();
344 if (len < 1){
345 return FALSE;
346 }
347 CharacterClass type = classifyCharacter(argKeyword.charAt(0));
348 if( type != tStartKeyword ){
349 return FALSE;
350 }
351
352 for (int32_t i = 0; i < argKeyword.length(); ++i) {
353 type = classifyCharacter(argKeyword.charAt(i));
354 if( type != tStartKeyword && type != tContinueKeyword ){
355 return FALSE;
356 }
357 }
358 return TRUE;
359 }
360
clone() const361 Format* SelectFormat::clone() const
362 {
363 return new SelectFormat(*this);
364 }
365
366 SelectFormat&
operator =(const SelectFormat & other)367 SelectFormat::operator=(const SelectFormat& other) {
368 if (this != &other) {
369 UErrorCode status = U_ZERO_ERROR;
370 pattern = other.pattern;
371 copyHashtable(other.parsedValuesHash, status);
372 }
373 return *this;
374 }
375
376 UBool
operator ==(const Format & other) const377 SelectFormat::operator==(const Format& other) const {
378 if( this == &other){
379 return TRUE;
380 }
381 if (typeid(*this) != typeid(other)) {
382 return FALSE;
383 }
384 SelectFormat* fmt = (SelectFormat*)&other;
385 Hashtable* hashOther = fmt->parsedValuesHash;
386 if ( parsedValuesHash == NULL && hashOther == NULL)
387 return TRUE;
388 if ( parsedValuesHash == NULL || hashOther == NULL)
389 return FALSE;
390 return parsedValuesHash->equals(*hashOther);
391 }
392
393 UBool
operator !=(const Format & other) const394 SelectFormat::operator!=(const Format& other) const {
395 return !operator==(other);
396 }
397
398 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const399 SelectFormat::parseObject(const UnicodeString& /*source*/,
400 Formattable& /*result*/,
401 ParsePosition& pos) const
402 {
403 // TODO: not yet supported in icu4j and icu4c
404 pos.setErrorIndex(pos.getIndex());
405 }
406
407 void
copyHashtable(Hashtable * other,UErrorCode & status)408 SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
409 if (U_FAILURE(status)) {
410 return;
411 }
412 if (other == NULL) {
413 cleanHashTable();
414 return;
415 }
416 if (parsedValuesHash == NULL) {
417 initHashTable(status);
418 if (U_FAILURE(status)) {
419 return;
420 }
421 }
422
423 parsedValuesHash->removeAll();
424 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
425
426 int32_t pos = -1;
427 const UHashElement* elem = NULL;
428
429 // walk through the hash table and create a deep clone
430 while ((elem = other->nextElement(pos)) != NULL){
431 const UHashTok otherKeyTok = elem->key;
432 UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
433 const UHashTok otherKeyToVal = elem->value;
434 UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
435 parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
436 if (U_FAILURE(status)){
437 cleanHashTable();
438 return;
439 }
440 }
441 }
442
443 U_NAMESPACE_END
444
445 #endif /* #if !UCONFIG_NO_FORMATTING */
446
447 //eof
448