1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 * Copyright (C) 2010 , Yahoo! Inc.
6 ********************************************************************
7 *
8 * File SELFMT.CPP
9 *
10 * Modification History:
11 *
12 * Date Name Description
13 * 11/11/09 kirtig Finished first cut of implementation.
14 * 11/16/09 kirtig Improved version
15 ********************************************************************/
16
17 #include "unicode/utypes.h"
18 #include "unicode/ustring.h"
19 #include "unicode/ucnv_err.h"
20 #include "unicode/uchar.h"
21 #include "unicode/umsg.h"
22 #include "unicode/rbnf.h"
23 #include "cmemory.h"
24 #include "util.h"
25 #include "uassert.h"
26 #include "ustrfmt.h"
27 #include "uvector.h"
28
29 #include "unicode/selfmt.h"
30 #include "selfmtimpl.h"
31
32 #if !UCONFIG_NO_FORMATTING
33
34 U_NAMESPACE_BEGIN
35
36 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
37
38 #define MAX_KEYWORD_SIZE 30
39 static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
40
SelectFormat(const UnicodeString & pat,UErrorCode & status)41 SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) {
42 if (U_FAILURE(status)) {
43 return;
44 }
45 init(status);
46 applyPattern(pat, status);
47 }
48
SelectFormat(const SelectFormat & other)49 SelectFormat::SelectFormat(const SelectFormat& other) : Format(other) {
50 UErrorCode status = U_ZERO_ERROR;
51 pattern = other.pattern;
52 copyHashtable(other.parsedValuesHash, status);
53 }
54
~SelectFormat()55 SelectFormat::~SelectFormat() {
56 delete parsedValuesHash;
57 }
58
59 void
init(UErrorCode & status)60 SelectFormat::init(UErrorCode& status) {
61 if (U_FAILURE(status)) {
62 return;
63 }
64 parsedValuesHash = NULL;
65 pattern.remove();
66 }
67
68
69 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)70 SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
71 if (U_FAILURE(status)) {
72 return;
73 }
74
75 this->pattern = newPattern;
76 enum State{ startState, keywordState, pastKeywordState, phraseState};
77
78 //Initialization
79 UnicodeString keyword ;
80 UnicodeString phrase ;
81 UnicodeString* ptrPhrase ;
82 int32_t braceCount = 0;
83
84 delete parsedValuesHash;
85 this->parsedValuesHash = NULL;
86 parsedValuesHash = new Hashtable(TRUE, status);
87 if (U_FAILURE(status)) {
88 return;
89 }
90 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
91
92 //Process the state machine
93 State state = startState;
94 for (int32_t i = 0; i < pattern.length(); ++i) {
95 //Get the character and check its type
96 UChar ch = pattern.charAt(i);
97 CharacterClass type = classifyCharacter(ch);
98
99 //Allow any character in phrase but nowhere else
100 if ( type == tOther ) {
101 if ( state == phraseState ){
102 phrase += ch;
103 continue;
104 }else {
105 status = U_PATTERN_SYNTAX_ERROR;
106 return;
107 }
108 }
109
110 //Process the state machine
111 switch (state) {
112 //At the start of pattern
113 case startState:
114 switch (type) {
115 case tSpace:
116 break;
117 case tStartKeyword:
118 state = keywordState;
119 keyword += ch;
120 break;
121 //If anything else is encountered, it's a syntax error
122 default:
123 status = U_PATTERN_SYNTAX_ERROR;
124 return;
125 }//end of switch(type)
126 break;
127
128 //Handle the keyword state
129 case keywordState:
130 switch (type) {
131 case tSpace:
132 state = pastKeywordState;
133 break;
134 case tStartKeyword:
135 case tContinueKeyword:
136 keyword += ch;
137 break;
138 case tLeftBrace:
139 state = phraseState;
140 break;
141 //If anything else is encountered, it's a syntax error
142 default:
143 status = U_PATTERN_SYNTAX_ERROR;
144 return;
145 }//end of switch(type)
146 break;
147
148 //Handle the pastkeyword state
149 case pastKeywordState:
150 switch (type) {
151 case tSpace:
152 break;
153 case tLeftBrace:
154 state = phraseState;
155 break;
156 //If anything else is encountered, it's a syntax error
157 default:
158 status = U_PATTERN_SYNTAX_ERROR;
159 return;
160 }//end of switch(type)
161 break;
162
163 //Handle the phrase state
164 case phraseState:
165 switch (type) {
166 case tLeftBrace:
167 braceCount++;
168 phrase += ch;
169 break;
170 case tRightBrace:
171 //Matching keyword, phrase pair found
172 if (braceCount == 0){
173 //Check validity of keyword
174 if (parsedValuesHash->get(keyword) != NULL) {
175 status = U_DUPLICATE_KEYWORD;
176 return;
177 }
178 if (keyword.length() == 0) {
179 status = U_PATTERN_SYNTAX_ERROR;
180 return;
181 }
182
183 //Store the keyword, phrase pair in hashTable
184 ptrPhrase = new UnicodeString(phrase);
185 parsedValuesHash->put( keyword, ptrPhrase, status);
186
187 //Reinitialize
188 keyword.remove();
189 phrase.remove();
190 ptrPhrase = NULL;
191 state = startState;
192 }
193
194 if (braceCount > 0){
195 braceCount-- ;
196 phrase += ch;
197 }
198 break;
199 default:
200 phrase += ch;
201 }//end of switch(type)
202 break;
203
204 //Handle the default case of switch(state)
205 default:
206 status = U_PATTERN_SYNTAX_ERROR;
207 return;
208
209 }//end of switch(state)
210 }
211
212 //Check if the state machine is back to startState
213 if ( state != startState){
214 status = U_PATTERN_SYNTAX_ERROR;
215 return;
216 }
217
218 //Check if "other" keyword is present
219 if ( !checkSufficientDefinition() ) {
220 status = U_DEFAULT_KEYWORD_MISSING;
221 }
222 return;
223 }
224
225 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const226 SelectFormat::format(const Formattable& obj,
227 UnicodeString& appendTo,
228 FieldPosition& pos,
229 UErrorCode& status) const
230 {
231 switch (obj.getType())
232 {
233 case Formattable::kString:
234 return format(obj.getString(), appendTo, pos, status);
235 default:
236 if( U_SUCCESS(status) ){
237 status = U_ILLEGAL_ARGUMENT_ERROR;
238 }
239 return appendTo;
240 }
241 }
242
243 UnicodeString&
format(const UnicodeString & keyword,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const244 SelectFormat::format(const UnicodeString& keyword,
245 UnicodeString& appendTo,
246 FieldPosition& pos,
247 UErrorCode& status) const {
248
249 if (U_FAILURE(status)) return appendTo;
250
251 //Check for the validity of the keyword
252 if ( !checkValidKeyword(keyword) ){
253 status = U_ILLEGAL_ARGUMENT_ERROR;
254 return appendTo;
255 }
256
257 if (parsedValuesHash == NULL) {
258 status = U_INVALID_FORMAT_ERROR;
259 return appendTo;
260 }
261
262 UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
263 if (selectedPattern == NULL) {
264 selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
265 }
266
267 return appendTo += *selectedPattern;
268 }
269
270 UnicodeString&
toPattern(UnicodeString & appendTo)271 SelectFormat::toPattern(UnicodeString& appendTo) {
272 return appendTo += pattern;
273 }
274
275 SelectFormat::CharacterClass
classifyCharacter(UChar ch) const276 SelectFormat::classifyCharacter(UChar ch) const{
277 if ((ch >= CAP_A) && (ch <= CAP_Z)) {
278 return tStartKeyword;
279 }
280 if ((ch >= LOW_A) && (ch <= LOW_Z)) {
281 return tStartKeyword;
282 }
283 if ((ch >= U_ZERO) && (ch <= U_NINE)) {
284 return tContinueKeyword;
285 }
286 if ( uprv_isRuleWhiteSpace(ch) ){
287 return tSpace;
288 }
289 switch (ch) {
290 case LEFTBRACE:
291 return tLeftBrace;
292 case RIGHTBRACE:
293 return tRightBrace;
294 case HYPHEN:
295 case LOWLINE:
296 return tContinueKeyword;
297 default :
298 return tOther;
299 }
300 }
301
302 UBool
checkSufficientDefinition()303 SelectFormat::checkSufficientDefinition() {
304 // Check that at least the default rule is defined.
305 return (parsedValuesHash != NULL &&
306 parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
307 }
308
309 UBool
checkValidKeyword(const UnicodeString & argKeyword) const310 SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
311 int32_t len = argKeyword.length();
312 if (len < 1){
313 return FALSE;
314 }
315 CharacterClass type = classifyCharacter(argKeyword.charAt(0));
316 if( type != tStartKeyword ){
317 return FALSE;
318 }
319
320 for (int32_t i = 0; i < argKeyword.length(); ++i) {
321 type = classifyCharacter(argKeyword.charAt(i));
322 if( type != tStartKeyword && type != tContinueKeyword ){
323 return FALSE;
324 }
325 }
326 return TRUE;
327 }
328
clone() const329 Format* SelectFormat::clone() const
330 {
331 return new SelectFormat(*this);
332 }
333
334 SelectFormat&
operator =(const SelectFormat & other)335 SelectFormat::operator=(const SelectFormat& other) {
336 if (this != &other) {
337 UErrorCode status = U_ZERO_ERROR;
338 delete parsedValuesHash;
339 pattern = other.pattern;
340 copyHashtable(other.parsedValuesHash, status);
341 }
342 return *this;
343 }
344
345 UBool
operator ==(const Format & other) const346 SelectFormat::operator==(const Format& other) const {
347 if( this == &other){
348 return TRUE;
349 }
350 if( other.getDynamicClassID() != SelectFormat::getStaticClassID() ){
351 return FALSE;
352 }
353 SelectFormat* fmt = (SelectFormat*)&other;
354 Hashtable* hashOther = fmt->parsedValuesHash;
355 if ( parsedValuesHash == NULL && hashOther == NULL)
356 return TRUE;
357 if ( parsedValuesHash == NULL || hashOther == NULL)
358 return FALSE;
359 if ( hashOther->count() != parsedValuesHash->count() ){
360 return FALSE;
361 }
362
363 const UHashElement* elem = NULL;
364 int32_t pos = -1;
365 while ((elem = hashOther->nextElement(pos)) != NULL) {
366 const UHashTok otherKeyTok = elem->key;
367 UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
368 const UHashTok otherKeyToVal = elem->value;
369 UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
370
371 UnicodeString* thisElemValue = (UnicodeString*)parsedValuesHash->get(*otherKey);
372 if ( thisElemValue == NULL ){
373 return FALSE;
374 }
375 if ( *thisElemValue != *otherValue){
376 return FALSE;
377 }
378
379 }
380 pos = -1;
381 while ((elem = parsedValuesHash->nextElement(pos)) != NULL) {
382 const UHashTok thisKeyTok = elem->key;
383 UnicodeString* thisKey = (UnicodeString*)thisKeyTok.pointer;
384 const UHashTok thisKeyToVal = elem->value;
385 UnicodeString* thisValue = (UnicodeString*)thisKeyToVal.pointer;
386
387 UnicodeString* otherElemValue = (UnicodeString*)hashOther->get(*thisKey);
388 if ( otherElemValue == NULL ){
389 return FALSE;
390 }
391 if ( *otherElemValue != *thisValue){
392 return FALSE;
393 }
394
395 }
396 return TRUE;
397 }
398
399 UBool
operator !=(const Format & other) const400 SelectFormat::operator!=(const Format& other) const {
401 return !operator==(other);
402 }
403
404 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const405 SelectFormat::parseObject(const UnicodeString& /*source*/,
406 Formattable& /*result*/,
407 ParsePosition& pos) const
408 {
409 // TODO: not yet supported in icu4j and icu4c
410 pos.setErrorIndex(pos.getIndex());
411 }
412
413 void
copyHashtable(Hashtable * other,UErrorCode & status)414 SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
415 if (other == NULL) {
416 parsedValuesHash = NULL;
417 return;
418 }
419 parsedValuesHash = new Hashtable(TRUE, status);
420 if (U_FAILURE(status)){
421 return;
422 }
423 parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
424
425 int32_t pos = -1;
426 const UHashElement* elem = NULL;
427
428 // walk through the hash table and create a deep clone
429 while ((elem = other->nextElement(pos)) != NULL){
430 const UHashTok otherKeyTok = elem->key;
431 UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
432 const UHashTok otherKeyToVal = elem->value;
433 UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
434 parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
435 if (U_FAILURE(status)){
436 return;
437 }
438 }
439 }
440
441 U_NAMESPACE_END
442
443 #endif /* #if !UCONFIG_NO_FORMATTING */
444
445 //eof
446