• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File line.cpp
12 *
13 * Modification History:
14 *
15 *   Date        Name        Description
16 *   03/18/2003  weiv        Creation.
17 *******************************************************************************
18 */
19 
20 #include "line.h"
21 #include <stdio.h>
22 
23 UnicodeSet * Line::needsQuoting = NULL;
24 
25 void
init()26 Line::init()
27 {
28         len = 0;
29         expLen = 0;
30         strength = UCOL_OFF;
31         strengthFromEmpty = UCOL_OFF;
32         cumulativeStrength = UCOL_OFF;
33         expStrength = UCOL_OFF;
34         previous = NULL;
35         next = NULL;
36         left = NULL;
37         right = NULL;
38         isContraction = FALSE;
39         isExpansion = FALSE;
40         isRemoved = FALSE;
41         isReset = FALSE;
42         expIndex = 0;
43         firstCC = 0;
44         lastCC = 0;
45         sortKey = NULL;
46 }
47 
Line()48 Line::Line()
49 {
50   init();
51   memset(name, 0, 25*sizeof(UChar));
52   memset(expansionString, 0, 25*sizeof(UChar));
53 }
54 
Line(const UChar * name,int32_t len)55 Line::Line(const UChar* name, int32_t len)
56 {
57   init();
58   this->len = len;
59   u_memcpy(this->name, name, len);
60   memset(expansionString, 0, 25*sizeof(UChar));
61   UChar32 c;
62   U16_GET(name, 0, 0, len, c);
63   firstCC = u_getCombiningClass(c);
64   U16_GET(name, 0, len-1, len, c);
65   lastCC = u_getCombiningClass(c);
66 }
67 
Line(const UChar name)68 Line::Line(const UChar name)
69 {
70     init();
71     len = 1;
72     this->name[0] = name;
73     this->name[1] = 0;
74   memset(expansionString, 0, 25*sizeof(UChar));
75   firstCC = u_getCombiningClass(name);
76   lastCC = firstCC;
77 }
78 
Line(const UnicodeString & string)79 Line::Line(const UnicodeString &string)
80 {
81   init();
82   setTo(string);
83 }
84 
Line(const char * buff,int32_t buffLen,UErrorCode & status)85 Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) :
86 previous(NULL),
87 next(NULL),
88 left(NULL),
89 right(NULL)
90 {
91   initFromString(buff, buffLen, status);
92 }
93 
Line(const Line & other)94 Line::Line(const Line &other) :
95   previous(NULL),
96   next(NULL),
97 left(NULL),
98 right(NULL)
99 {
100   *this = other;
101 }
102 
103 Line &
operator =(const Line & other)104 Line::operator=(const Line &other) {
105   len = other.len;
106   expLen = other.expLen;
107   strength = other.strength;
108   strengthFromEmpty = other.strengthFromEmpty;
109   cumulativeStrength = other.cumulativeStrength;
110   expStrength = other.expStrength;
111   isContraction = other.isContraction;
112   isExpansion = other.isExpansion;
113   isRemoved = other.isRemoved;
114   isReset = other.isReset;
115   expIndex = other.expIndex;
116   firstCC = other.firstCC;
117   lastCC = other.lastCC;
118   u_strcpy(name, other.name);
119   u_strcpy(expansionString, other.expansionString);
120   sortKey = other.sortKey;
121   left = other.left;
122   right = other.right;
123   return *this;
124 }
125 
126 UBool
operator ==(const Line & other) const127 Line::operator==(const Line &other) const {
128   if(this == &other) {
129     return TRUE;
130   }
131   if(len != other.len) {
132     return FALSE;
133   }
134   if(u_strcmp(name, other.name) != 0) {
135     return FALSE;
136   }
137   return TRUE;
138 }
139 
140 UBool
equals(const Line & other) const141 Line::equals(const Line &other) const {
142   if(this == &other) {
143     return TRUE;
144   }
145   if(len != other.len) {
146     return FALSE;
147   }
148   if(u_strcmp(name, other.name) != 0) {
149     return FALSE;
150   }
151   if(strength != other.strength) {
152     return FALSE;
153   }
154   if(expLen != other.expLen) {
155     return FALSE;
156   }
157   if(u_strcmp(expansionString, other.expansionString)) {
158     return FALSE;
159   }
160   return TRUE;
161 }
162 
163 UBool
operator !=(const Line & other) const164 Line::operator!=(const Line &other) const {
165   return !(*this == other);
166 }
167 
168 
~Line()169 Line::~Line() {
170 }
171 
172 void
copyArray(Line * dest,const Line * src,int32_t size)173 Line::copyArray(Line *dest, const Line *src, int32_t size) {
174   int32_t i = 0;
175   for(i = 0; i < size; i++) {
176     dest[i] = src[i];
177   }
178 }
179 
180 void
setName(const UChar * name,int32_t len)181 Line::setName(const UChar* name, int32_t len) {
182   this->len = len;
183   u_memcpy(this->name, name, len);
184   UChar32 c;
185   U16_GET(name, 0, 0, len, c);
186   firstCC = u_getCombiningClass(c);
187   U16_GET(name, 0, len-1, len, c);
188   lastCC = u_getCombiningClass(c);
189 }
190 
191 void
setToConcat(const Line * first,const Line * second)192 Line::setToConcat(const Line *first, const Line *second) {
193   u_strcpy(name, first->name);
194   u_strcat(name, second->name);
195   len = first->len + second->len;
196   firstCC = first->firstCC;
197   lastCC = second->lastCC;
198 }
199 
200 UnicodeString
stringToName(UChar * string,int32_t len)201 Line::stringToName(UChar *string, int32_t len) {
202   UErrorCode status = U_ZERO_ERROR;
203   UnicodeString result;
204   char buffer[256];
205   int32_t i = 0;
206   UChar32 c;
207   while(i < len) {
208     U16_NEXT(string, i, len, c);
209     if(c < 0x10000) {
210       sprintf(buffer, "%04X ", c);
211     } else {
212       sprintf(buffer, "%06X ", c);
213     }
214     result.append(buffer);
215   }
216   i = 0;
217   while(i < len) {
218     U16_NEXT(string, i, len, c);
219     u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status);
220     result.append("{");
221     result.append(buffer);
222     result.append("} ");
223   }
224 /*
225   for(i = 0; i < len; i++) {
226     sprintf(buffer, "%04X ", string[i]);
227     result.append(buffer);
228   }
229   for(i = 0; i < len; i++) {
230     u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status);
231     result.append("{");
232     result.append(buffer);
233     result.append("} ");
234   }
235 */
236   return result;
237 }
238 
239 UnicodeString
toBundleString()240 Line::toBundleString()
241 {
242 
243   UnicodeString result;
244   UErrorCode status = U_ZERO_ERROR;
245   if(!needsQuoting) {
246     needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status);
247   }
248   UChar NFC[50];
249   int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
250   result.append("\"");
251   if(isReset) {
252     result.append("&");
253   } else {
254     result.append(strengthToString(strength, FALSE, FALSE));
255   }
256   UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC);
257   if(quote) {
258     result.append("'");
259   }
260   if(NFC[0] == 0x22) {
261     result.append("\\u0022");
262   } else {
263     result.append(NFC, NFCLen);
264   }
265   if(quote && NFC[0] != 0x0027) {
266     result.append("'");
267   }
268   if(expLen && !isReset) {
269     quote = needsQuoting->containsSome(expansionString);
270     result.append(" / ");
271     if(quote) {
272       result.append("'");
273     }
274     result.append(expansionString);
275     if(quote) {
276       result.append("'");
277     }
278   }
279   result.append("\" //");
280 
281   result.append(stringToName(NFC, NFCLen));
282   if(expLen && !isReset) {
283     result.append(" / ");
284     result.append(stringToName(expansionString, expLen));
285   }
286   result.append("\n");
287   return result;
288 }
289 
290 UnicodeString
toHTMLString()291 Line::toHTMLString()
292 {
293   UnicodeString result;
294   UErrorCode status = U_ZERO_ERROR;
295   UChar NFC[50];
296   int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
297   result.append("<span title=\"");
298   result.append(stringToName(NFC, NFCLen));
299   if(expLen && !isReset) {
300     result.append(" / ");
301     result.append(stringToName(expansionString, expLen));
302   }
303   result.append("\">");
304   if(isReset) {
305     result.append("&amp;");
306   } else {
307     result.append(strengthToString(strength, FALSE, TRUE));
308   }
309   result.append(NFC, NFCLen);
310   if(expLen && !isReset) {
311     result.append("&nbsp;/&nbsp;");
312     result.append(expansionString);
313   }
314   result.append("</span><br>\n");
315   return result;
316 }
317 
318 UnicodeString
toString(UBool pretty)319 Line::toString(UBool pretty) {
320   UnicodeString result;
321   if(!pretty) {
322     result.setTo(name);
323     if(expLen) {
324       result.append("/");
325       result.append(expansionString);
326     }
327   } else {
328     UErrorCode status = U_ZERO_ERROR;
329     UChar NFC[50];
330     int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
331     result.setTo(NFC, NFCLen);
332     if(expLen) {
333       result.append("/");
334       result.append(expansionString);
335     }
336     /*
337     if(NFCLen != len || u_strncmp(name, NFC, len) != 0) {
338       result.append("(NFC: ");
339       result.append(NFC, NFCLen);
340       result.append(stringToName(NFC, NFCLen));
341       result.append(")");
342     }
343     */
344     result.append("    # ");
345     result.append(stringToName(NFC, NFCLen));
346     if(expLen) {
347       result.append("/ ");
348       result.append(stringToName(expansionString, expLen));
349     }
350   }
351   return result;
352 }
353 
354 
355 void
setTo(const UnicodeString & string)356 Line::setTo(const UnicodeString &string) {
357   int32_t len = string.length();
358   u_strncpy(name, string.getBuffer(), len);
359   name[len] = 0;
360   this->len = len;
361   UChar32 c;
362   U16_GET(name, 0, 0, len, c);
363   firstCC = u_getCombiningClass(c);
364   U16_GET(name, 0, len-1, len, c);
365   lastCC = u_getCombiningClass(c);
366 }
367 
368 void
setTo(const UChar32 n)369 Line::setTo(const UChar32 n) {
370   UBool isError = FALSE;
371   len = 0; // we are setting the line to char, not appending
372   U16_APPEND(name, len, 25, n, isError);
373   name[len] = 0;
374   firstCC = u_getCombiningClass(n);
375   lastCC = firstCC;
376 }
377 
378 
379 UnicodeString
strengthIndent(UColAttributeValue strength,int indentSize,UnicodeString & result)380 Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result)
381 {
382   int i;
383   int numIndents = strength+1;
384   if(strength > UCOL_IDENTICAL) {
385     return result;
386   } else if(strength == UCOL_IDENTICAL) {
387     numIndents = 5;
388   }
389   for(i = 0; i < numIndents*indentSize; i++) {
390     result.append(" ");
391   }
392   return result;
393 }
394 
395 UnicodeString
strengthToString(UColAttributeValue strength,UBool pretty,UBool html)396 Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) {
397   UnicodeString result;
398   if(html) {
399     switch(strength) {
400     case UCOL_IDENTICAL:
401       result.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;=&nbsp;");
402       break;
403     case UCOL_QUATERNARY:
404       result.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;&lt;&lt;&lt;&nbsp;");
405       break;
406     case UCOL_TERTIARY:
407       result.append("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&lt;&lt;&lt;&nbsp;");
408       break;
409     case UCOL_SECONDARY:
410       result.append("&nbsp;&nbsp;&nbsp;&nbsp;&lt;&lt;&nbsp;");
411       break;
412     case UCOL_PRIMARY:
413       result.append("&nbsp;&nbsp;&lt;&nbsp;");
414       break;
415     case UCOL_OFF:
416       result.append("&nbsp;&gt;?&nbsp;");
417       break;
418     default:
419       result.append("&nbsp;?!&nbsp;");
420       break;
421     }
422   } else {
423     switch(strength) {
424     case UCOL_IDENTICAL:
425       if(pretty) {
426         result.append("        ");
427       }
428       result.append(" = ");
429       break;
430     case UCOL_QUATERNARY:
431       if(pretty) {
432         result.append("        ");
433       }
434       result.append(" <<<< ");
435       break;
436     case UCOL_TERTIARY:
437       //u_fprintf(file, "<3");
438       if(pretty) {
439         result.append("      ");
440       }
441       result.append(" <<< ");
442       break;
443     case UCOL_SECONDARY:
444       //u_fprintf(file, "<2");
445       if(pretty) {
446         result.append("    ");
447       }
448       result.append(" << ");
449       break;
450     case UCOL_PRIMARY:
451       //u_fprintf(file, "<1");
452       if(pretty) {
453         result.append("  ");
454       }
455       result.append(" < ");
456       break;
457     case UCOL_OFF:
458       result.append(" >? ");
459       break;
460     default:
461       result.append(" ?! ");
462       break;
463     }
464   }
465   return result;
466 }
467 
468 Line *
nextInteresting()469 Line::nextInteresting() {
470   Line *result = this->next;
471   while(result && result->strength != UCOL_IDENTICAL) {
472     result = result->next;
473   }
474   return result;
475 }
476 
477 void
append(const UChar * n,int32_t length)478 Line::append(const UChar* n, int32_t length)
479 {
480   u_strncat(name, n, length);
481   name[len+length] = 0;
482   len += length;
483   UChar32 end;
484   U16_GET(n, 0, length-1, length, end);
485   lastCC = u_getCombiningClass(end);
486 }
487 
488 void
append(const UChar n)489 Line::append(const UChar n)
490 {
491   name[len] = n;
492   name[len+1] = 0;
493   len++;
494   lastCC = u_getCombiningClass(n);
495 }
496 
497 void
append(const Line & l)498 Line::append(const Line &l)
499 {
500   append(l.name, l.len);
501   lastCC = l.lastCC;
502 }
503 
504 void
clear()505 Line::clear()
506 {
507   name[0] = 0;
508   len = 0;
509 }
510 
511 int32_t
write(char * buff,int32_t,UErrorCode &)512 Line::write(char *buff, int32_t, UErrorCode &)
513 {
514   /*
515     UChar     name[25];
516     int32_t   len;
517     UChar     expansionString[25];
518     int32_t   expLen;
519 
520     UColAttributeValue strength;
521     UColAttributeValue strengthFromEmpty;
522     UColAttributeValue cumulativeStrength;
523     UColAttributeValue expStrength;
524 
525     Line *previous;
526     Line *next;
527 
528     UBool   isContraction;
529     UBool   isExpansion;
530     UBool   isRemoved;
531     UBool   isReset;
532 
533     int32_t expIndex;
534     uint8_t firstCC;
535     uint8_t lastCC;
536 */
537   int32_t resLen = 0;
538   int32_t i = 0;
539   sprintf(buff+resLen, "%04X", name[0]);
540   resLen += 4;
541   for(i = 1; i < len; i++) {
542     sprintf(buff+resLen, " %04X", name[i]);
543     resLen += 5;
544   }
545   sprintf(buff+resLen, "/");
546   resLen += 1;
547 
548   i = 0;
549   if(expLen) {
550     sprintf(buff+resLen, "%04X", expansionString[0]);
551     resLen += 4;
552     for(i = 1; i < expLen; i++) {
553       sprintf(buff+resLen, " %04X", expansionString[i]);
554       resLen += 5;
555     }
556   }
557   sprintf(buff+resLen, "; ");
558   resLen += 2;
559 
560   sprintf(buff+resLen, "%02i ", strength);
561   resLen += 3;
562   sprintf(buff+resLen, "%02i", strengthFromEmpty);
563   resLen += 2;
564   sprintf(buff+resLen, "%02i", cumulativeStrength);
565   resLen += 2;
566   sprintf(buff+resLen, "%02i", expStrength);
567   resLen += 2;
568 
569   // Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines
570   //sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset);
571   //resLen += 5;
572   sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset);
573   resLen += 3;
574 
575   // first and last CC
576   // can be calculated on reading
577   //sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC);
578   //resLen += 8;
579 
580   sprintf(buff+resLen, "%08X", expIndex);
581   resLen += 8;
582 
583   buff[resLen] = 0;
584 
585   return resLen;
586 }
587 
588 void
initFromString(const char * buff,int32_t,UErrorCode &)589 Line::initFromString(const char *buff, int32_t, UErrorCode &)
590 {
591   int32_t bufIndex = 0;
592   int32_t i = 0;
593 
594   sscanf(buff+bufIndex, "%04X", &name[i]);
595   i++;
596   bufIndex += 4;
597   while(buff[bufIndex] != '/') {
598     sscanf(buff+bufIndex, " %04X", &name[i]);
599     i++;
600     bufIndex += 5;
601   }
602   len = i;
603   name[len] = 0;
604   bufIndex++;
605 
606   if(i > 1) {
607     isContraction = TRUE;
608   } else {
609     isContraction = FALSE;
610   }
611 
612   if(buff[bufIndex] == ';') {
613     isExpansion = FALSE;
614     bufIndex += 2;
615     expansionString[0] = 0;
616     expLen = 0;
617   } else {
618     i = 0;
619     sscanf(buff+bufIndex, "%04X", &expansionString[i]);
620     i++;
621     bufIndex += 4;
622     while(buff[bufIndex] != ';') {
623       sscanf(buff+bufIndex, " %04X", &expansionString[i]);
624       i++;
625       bufIndex += 5;
626     }
627     expLen = i;
628     expansionString[expLen] = 0;
629     bufIndex += 2;
630   }
631   sscanf(buff+bufIndex, "%02i ", &strength);
632   bufIndex += 3;
633   sscanf(buff+bufIndex, "%02i", &strengthFromEmpty);
634   bufIndex += 2;
635   sscanf(buff+bufIndex, "%02i", &cumulativeStrength);
636   bufIndex += 2;
637   sscanf(buff+bufIndex, "%02i", &expStrength);
638   bufIndex += 2;
639 
640   sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset);
641   bufIndex += 3;
642 
643   sscanf(buff+bufIndex, "%08X", &expIndex);
644   bufIndex += 8;
645 
646   // calculate first and last CC
647   UChar32 c;
648   U16_GET(name, 0, 0, len, c);
649   firstCC = u_getCombiningClass(c);
650   U16_GET(name, 0, len-1, len, c);
651   lastCC = u_getCombiningClass(c);
652 }
653 
654 void
swapCase(UChar * string,int32_t & sLen)655 Line::swapCase(UChar *string, int32_t &sLen)
656 {
657   UChar32 c = 0;
658   int32_t i = 0, j = 0;
659   UChar buff[256];
660   UBool isError = FALSE;
661   while(i < sLen) {
662     U16_NEXT(string, i, sLen, c);
663     if(u_isUUppercase(c)) {
664       c = u_tolower(c);
665     } else if(u_isULowercase(c)) {
666       c = u_toupper(c);
667     }
668     U16_APPEND(buff, j, 256, c, isError);
669   }
670   buff[j] = 0;
671   u_strcpy(string, buff);
672   sLen = j;
673 }
674 
675 
676 void
swapCase()677 Line::swapCase()
678 {
679   swapCase(name, len);
680   swapCase(expansionString, expLen);
681 }
682 
683 UnicodeString
dumpSortkey()684 Line::dumpSortkey()
685 {
686 
687   char buffer[256];
688   char *buff = buffer;
689   *buff = 0;
690   uint8_t *key = sortKey;
691   if(sortKey) {
692     while(*key) {
693       sprintf(buff, "%02X ", *key);
694       key++;
695       buff += 3;
696       if(buff - buffer > 252) {
697         break;
698       }
699     }
700   }
701   return UnicodeString(buffer);
702 }
703 
704