• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  * Copyright (C) 2010 , Yahoo! Inc.
6  ********************************************************************
7  *
8  * File SELFMT.CPP
9  *
10  * Modification History:
11  *
12  *   Date        Name        Description
13  *   11/11/09    kirtig      Finished first cut of implementation.
14  *   11/16/09    kirtig      Improved version
15  ********************************************************************/
16 
17 #include "unicode/utypeinfo.h"  // for 'typeid' to work
18 
19 #include "unicode/utypes.h"
20 #include "unicode/ustring.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/uchar.h"
23 #include "unicode/umsg.h"
24 #include "unicode/rbnf.h"
25 #include "cmemory.h"
26 #include "util.h"
27 #include "uassert.h"
28 #include "ustrfmt.h"
29 #include "uvector.h"
30 
31 #include "unicode/selfmt.h"
32 #include "selfmtimpl.h"
33 
34 #if !UCONFIG_NO_FORMATTING
35 
36 U_NAMESPACE_BEGIN
37 
38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat)
39 
40 #define MAX_KEYWORD_SIZE 30
41 static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0};
42 
SelectFormat(const UnicodeString & pat,UErrorCode & status)43 SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) {
44    if (U_FAILURE(status)) {
45       return;
46    }
47    initHashTable(status);
48    applyPattern(pat, status);
49 }
50 
SelectFormat(const SelectFormat & other)51 SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) {
52    UErrorCode status = U_ZERO_ERROR;
53    pattern = other.pattern;
54    copyHashtable(other.parsedValuesHash, status);
55 }
56 
~SelectFormat()57 SelectFormat::~SelectFormat() {
58   cleanHashTable();
59 }
60 
initHashTable(UErrorCode & status)61 void SelectFormat::initHashTable(UErrorCode &status) {
62   if (U_FAILURE(status)) {
63     return;
64   }
65   // has inited
66   if (parsedValuesHash != NULL) {
67     return;
68   }
69 
70   parsedValuesHash = new Hashtable(TRUE, status);
71   if (U_FAILURE(status)) {
72     cleanHashTable();
73     return;
74   } else {
75     if (parsedValuesHash == NULL) {
76       status = U_MEMORY_ALLOCATION_ERROR;
77       return;
78     }
79   }
80   // to use hashtable->equals(), must set Value Compartor.
81   parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString);
82 }
83 
cleanHashTable()84 void SelectFormat::cleanHashTable() {
85   if (parsedValuesHash != NULL) {
86     delete parsedValuesHash;
87     parsedValuesHash = NULL;
88   }
89 }
90 
91 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)92 SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
93     if (U_FAILURE(status)) {
94       return;
95     }
96 
97     pattern = newPattern;
98     enum State{ startState, keywordState, pastKeywordState, phraseState};
99 
100     //Initialization
101     UnicodeString keyword ;
102     UnicodeString phrase ;
103     UnicodeString* ptrPhrase ;
104     int32_t braceCount = 0;
105 
106     if (parsedValuesHash == NULL) {
107       initHashTable(status);
108       if (U_FAILURE(status)) {
109         return;
110       }
111     }
112     parsedValuesHash->removeAll();
113     parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
114 
115     //Process the state machine
116     State state = startState;
117     for (int32_t i = 0; i < pattern.length(); ++i) {
118         //Get the character and check its type
119         UChar ch = pattern.charAt(i);
120         CharacterClass type = classifyCharacter(ch);
121 
122         //Allow any character in phrase but nowhere else
123         if ( type == tOther ) {
124             if ( state == phraseState ){
125                 phrase += ch;
126                 continue;
127             }else {
128                 status = U_PATTERN_SYNTAX_ERROR;
129                 cleanHashTable();
130                 return;
131             }
132         }
133 
134         //Process the state machine
135         switch (state) {
136             //At the start of pattern
137             case startState:
138                 switch (type) {
139                     case tSpace:
140                         break;
141                     case tStartKeyword:
142                         state = keywordState;
143                         keyword += ch;
144                         break;
145                     //If anything else is encountered, it's a syntax error
146                     default:
147                         status = U_PATTERN_SYNTAX_ERROR;
148                         cleanHashTable();
149                         return;
150                 }//end of switch(type)
151                 break;
152 
153             //Handle the keyword state
154             case keywordState:
155                 switch (type) {
156                     case tSpace:
157                         state = pastKeywordState;
158                         break;
159                     case tStartKeyword:
160                     case tContinueKeyword:
161                         keyword += ch;
162                         break;
163                     case tLeftBrace:
164                         state = phraseState;
165                         break;
166                     //If anything else is encountered, it's a syntax error
167                     default:
168                         status = U_PATTERN_SYNTAX_ERROR;
169                         cleanHashTable();
170                         return;
171                 }//end of switch(type)
172                 break;
173 
174             //Handle the pastkeyword state
175             case pastKeywordState:
176                 switch (type) {
177                     case tSpace:
178                         break;
179                     case tLeftBrace:
180                         state = phraseState;
181                         break;
182                     //If anything else is encountered, it's a syntax error
183                     default:
184                         status = U_PATTERN_SYNTAX_ERROR;
185                         cleanHashTable();
186                         return;
187                 }//end of switch(type)
188                 break;
189 
190             //Handle the phrase state
191             case phraseState:
192                 switch (type) {
193                     case tLeftBrace:
194                         braceCount++;
195                         phrase += ch;
196                         break;
197                     case tRightBrace:
198                         //Matching keyword, phrase pair found
199                         if (braceCount == 0){
200                             //Check validity of keyword
201                             if (parsedValuesHash->get(keyword) != NULL) {
202                                 status = U_DUPLICATE_KEYWORD;
203                                 cleanHashTable();
204                                 return;
205                             }
206                             if (keyword.length() == 0) {
207                                 status = U_PATTERN_SYNTAX_ERROR;
208                                 cleanHashTable();
209                                 return;
210                             }
211 
212                             //Store the keyword, phrase pair in hashTable
213                             ptrPhrase = new UnicodeString(phrase);
214                             parsedValuesHash->put( keyword, ptrPhrase, status);
215 
216                             //Reinitialize
217                             keyword.remove();
218                             phrase.remove();
219                             ptrPhrase = NULL;
220                             state = startState;
221                         }
222 
223                         if (braceCount > 0){
224                             braceCount-- ;
225                             phrase += ch;
226                         }
227                         break;
228                     default:
229                         phrase += ch;
230                 }//end of switch(type)
231                 break;
232 
233             //Handle the  default case of switch(state)
234             default:
235                 status = U_PATTERN_SYNTAX_ERROR;
236                 cleanHashTable();
237                 return;
238 
239         }//end of switch(state)
240     }
241 
242     //Check if the state machine is back to startState
243     if ( state != startState){
244         status = U_PATTERN_SYNTAX_ERROR;
245         cleanHashTable();
246         return;
247     }
248 
249     //Check if "other" keyword is present
250     if ( !checkSufficientDefinition() ) {
251         status = U_DEFAULT_KEYWORD_MISSING;
252         cleanHashTable();
253     }
254     return;
255 }
256 
257 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const258 SelectFormat::format(const Formattable& obj,
259                    UnicodeString& appendTo,
260                    FieldPosition& pos,
261                    UErrorCode& status) const
262 {
263     switch (obj.getType())
264     {
265     case Formattable::kString:
266         return format(obj.getString(), appendTo, pos, status);
267     default:
268         if( U_SUCCESS(status) ){
269             status = U_ILLEGAL_ARGUMENT_ERROR;
270         }
271         return appendTo;
272     }
273 }
274 
275 UnicodeString&
format(const UnicodeString & keyword,UnicodeString & appendTo,FieldPosition &,UErrorCode & status) const276 SelectFormat::format(const UnicodeString& keyword,
277                      UnicodeString& appendTo,
278                      FieldPosition& /*pos */,
279                      UErrorCode& status) const {
280 
281     if (U_FAILURE(status)) return appendTo;
282 
283     if (parsedValuesHash == NULL) {
284         status = U_INVALID_FORMAT_ERROR;
285         return appendTo;
286     }
287 
288     //Check for the validity of the keyword
289     if ( !checkValidKeyword(keyword) ){
290         status = U_ILLEGAL_ARGUMENT_ERROR;
291         return appendTo;
292     }
293 
294     UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword);
295     if (selectedPattern == NULL) {
296         selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER);
297     }
298 
299     return appendTo += *selectedPattern;
300 }
301 
302 UnicodeString&
toPattern(UnicodeString & appendTo)303 SelectFormat::toPattern(UnicodeString& appendTo) {
304     return appendTo += pattern;
305 }
306 
307 SelectFormat::CharacterClass
classifyCharacter(UChar ch) const308 SelectFormat::classifyCharacter(UChar ch) const{
309     if ((ch >= CAP_A) && (ch <= CAP_Z)) {
310         return tStartKeyword;
311     }
312     if ((ch >= LOW_A) && (ch <= LOW_Z)) {
313         return tStartKeyword;
314     }
315     if ((ch >= U_ZERO) && (ch <= U_NINE)) {
316         return tContinueKeyword;
317     }
318     if ( uprv_isRuleWhiteSpace(ch) ){
319         return tSpace;
320     }
321     switch (ch) {
322         case LEFTBRACE:
323             return tLeftBrace;
324         case RIGHTBRACE:
325             return tRightBrace;
326         case HYPHEN:
327         case LOWLINE:
328             return tContinueKeyword;
329         default :
330             return tOther;
331     }
332 }
333 
334 UBool
checkSufficientDefinition()335 SelectFormat::checkSufficientDefinition() {
336     // Check that at least the default rule is defined.
337     return (parsedValuesHash != NULL &&
338            parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ;
339 }
340 
341 UBool
checkValidKeyword(const UnicodeString & argKeyword) const342 SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{
343     int32_t len = argKeyword.length();
344     if (len < 1){
345         return FALSE;
346     }
347     CharacterClass type = classifyCharacter(argKeyword.charAt(0));
348     if( type != tStartKeyword ){
349         return FALSE;
350     }
351 
352     for (int32_t i = 0; i < argKeyword.length(); ++i) {
353         type = classifyCharacter(argKeyword.charAt(i));
354         if( type != tStartKeyword && type != tContinueKeyword ){
355             return FALSE;
356         }
357     }
358     return TRUE;
359 }
360 
clone() const361 Format* SelectFormat::clone() const
362 {
363     return new SelectFormat(*this);
364 }
365 
366 SelectFormat&
operator =(const SelectFormat & other)367 SelectFormat::operator=(const SelectFormat& other) {
368     if (this != &other) {
369         UErrorCode status = U_ZERO_ERROR;
370         pattern = other.pattern;
371         copyHashtable(other.parsedValuesHash, status);
372     }
373     return *this;
374 }
375 
376 UBool
operator ==(const Format & other) const377 SelectFormat::operator==(const Format& other) const {
378     if( this == &other){
379         return TRUE;
380     }
381     if (typeid(*this) != typeid(other)) {
382         return  FALSE;
383     }
384     SelectFormat* fmt = (SelectFormat*)&other;
385     Hashtable* hashOther = fmt->parsedValuesHash;
386     if ( parsedValuesHash == NULL && hashOther == NULL)
387         return TRUE;
388     if ( parsedValuesHash == NULL || hashOther == NULL)
389         return FALSE;
390     return parsedValuesHash->equals(*hashOther);
391 }
392 
393 UBool
operator !=(const Format & other) const394 SelectFormat::operator!=(const Format& other) const {
395     return  !operator==(other);
396 }
397 
398 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const399 SelectFormat::parseObject(const UnicodeString& /*source*/,
400                         Formattable& /*result*/,
401                         ParsePosition& pos) const
402 {
403     // TODO: not yet supported in icu4j and icu4c
404     pos.setErrorIndex(pos.getIndex());
405 }
406 
407 void
copyHashtable(Hashtable * other,UErrorCode & status)408 SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) {
409     if (U_FAILURE(status)) {
410       return;
411     }
412     if (other == NULL) {
413       cleanHashTable();
414       return;
415     }
416     if (parsedValuesHash == NULL) {
417       initHashTable(status);
418       if (U_FAILURE(status)) {
419         return;
420       }
421     }
422 
423     parsedValuesHash->removeAll();
424     parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString);
425 
426     int32_t pos = -1;
427     const UHashElement* elem = NULL;
428 
429     // walk through the hash table and create a deep clone
430     while ((elem = other->nextElement(pos)) != NULL){
431         const UHashTok otherKeyTok = elem->key;
432         UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer;
433         const UHashTok otherKeyToVal = elem->value;
434         UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer;
435         parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status);
436         if (U_FAILURE(status)){
437             cleanHashTable();
438             return;
439         }
440     }
441 }
442 
443 U_NAMESPACE_END
444 
445 #endif /* #if !UCONFIG_NO_FORMATTING */
446 
447 //eof
448