1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File line.cpp
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 03/18/2003 weiv Creation.
17 *******************************************************************************
18 */
19
20 #include "line.h"
21 #include <stdio.h>
22
23 UnicodeSet * Line::needsQuoting = NULL;
24
25 void
init()26 Line::init()
27 {
28 len = 0;
29 expLen = 0;
30 strength = UCOL_OFF;
31 strengthFromEmpty = UCOL_OFF;
32 cumulativeStrength = UCOL_OFF;
33 expStrength = UCOL_OFF;
34 previous = NULL;
35 next = NULL;
36 left = NULL;
37 right = NULL;
38 isContraction = false;
39 isExpansion = false;
40 isRemoved = false;
41 isReset = false;
42 expIndex = 0;
43 firstCC = 0;
44 lastCC = 0;
45 sortKey = NULL;
46 }
47
Line()48 Line::Line()
49 {
50 init();
51 memset(name, 0, 25*sizeof(UChar));
52 memset(expansionString, 0, 25*sizeof(UChar));
53 }
54
Line(const UChar * name,int32_t len)55 Line::Line(const UChar* name, int32_t len)
56 {
57 init();
58 this->len = len;
59 u_memcpy(this->name, name, len);
60 memset(expansionString, 0, 25*sizeof(UChar));
61 UChar32 c;
62 U16_GET(name, 0, 0, len, c);
63 firstCC = u_getCombiningClass(c);
64 U16_GET(name, 0, len-1, len, c);
65 lastCC = u_getCombiningClass(c);
66 }
67
Line(const UChar name)68 Line::Line(const UChar name)
69 {
70 init();
71 len = 1;
72 this->name[0] = name;
73 this->name[1] = 0;
74 memset(expansionString, 0, 25*sizeof(UChar));
75 firstCC = u_getCombiningClass(name);
76 lastCC = firstCC;
77 }
78
Line(const UnicodeString & string)79 Line::Line(const UnicodeString &string)
80 {
81 init();
82 setTo(string);
83 }
84
Line(const char * buff,int32_t buffLen,UErrorCode & status)85 Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) :
86 previous(NULL),
87 next(NULL),
88 left(NULL),
89 right(NULL)
90 {
91 initFromString(buff, buffLen, status);
92 }
93
Line(const Line & other)94 Line::Line(const Line &other) :
95 previous(NULL),
96 next(NULL),
97 left(NULL),
98 right(NULL)
99 {
100 *this = other;
101 }
102
103 Line &
operator =(const Line & other)104 Line::operator=(const Line &other) {
105 len = other.len;
106 expLen = other.expLen;
107 strength = other.strength;
108 strengthFromEmpty = other.strengthFromEmpty;
109 cumulativeStrength = other.cumulativeStrength;
110 expStrength = other.expStrength;
111 isContraction = other.isContraction;
112 isExpansion = other.isExpansion;
113 isRemoved = other.isRemoved;
114 isReset = other.isReset;
115 expIndex = other.expIndex;
116 firstCC = other.firstCC;
117 lastCC = other.lastCC;
118 u_strcpy(name, other.name);
119 u_strcpy(expansionString, other.expansionString);
120 sortKey = other.sortKey;
121 left = other.left;
122 right = other.right;
123 return *this;
124 }
125
126 UBool
operator ==(const Line & other) const127 Line::operator==(const Line &other) const {
128 if(this == &other) {
129 return true;
130 }
131 if(len != other.len) {
132 return false;
133 }
134 if(u_strcmp(name, other.name) != 0) {
135 return false;
136 }
137 return true;
138 }
139
140 UBool
equals(const Line & other) const141 Line::equals(const Line &other) const {
142 if(this == &other) {
143 return true;
144 }
145 if(len != other.len) {
146 return false;
147 }
148 if(u_strcmp(name, other.name) != 0) {
149 return false;
150 }
151 if(strength != other.strength) {
152 return false;
153 }
154 if(expLen != other.expLen) {
155 return false;
156 }
157 if(u_strcmp(expansionString, other.expansionString)) {
158 return false;
159 }
160 return true;
161 }
162
163 UBool
operator !=(const Line & other) const164 Line::operator!=(const Line &other) const {
165 return !(*this == other);
166 }
167
168
~Line()169 Line::~Line() {
170 }
171
172 void
copyArray(Line * dest,const Line * src,int32_t size)173 Line::copyArray(Line *dest, const Line *src, int32_t size) {
174 int32_t i = 0;
175 for(i = 0; i < size; i++) {
176 dest[i] = src[i];
177 }
178 }
179
180 void
setName(const UChar * name,int32_t len)181 Line::setName(const UChar* name, int32_t len) {
182 this->len = len;
183 u_memcpy(this->name, name, len);
184 UChar32 c;
185 U16_GET(name, 0, 0, len, c);
186 firstCC = u_getCombiningClass(c);
187 U16_GET(name, 0, len-1, len, c);
188 lastCC = u_getCombiningClass(c);
189 }
190
191 void
setToConcat(const Line * first,const Line * second)192 Line::setToConcat(const Line *first, const Line *second) {
193 u_strcpy(name, first->name);
194 u_strcat(name, second->name);
195 len = first->len + second->len;
196 firstCC = first->firstCC;
197 lastCC = second->lastCC;
198 }
199
200 UnicodeString
stringToName(UChar * string,int32_t len)201 Line::stringToName(UChar *string, int32_t len) {
202 UErrorCode status = U_ZERO_ERROR;
203 UnicodeString result;
204 char buffer[256];
205 int32_t i = 0;
206 UChar32 c;
207 while(i < len) {
208 U16_NEXT(string, i, len, c);
209 if(c < 0x10000) {
210 sprintf(buffer, "%04X ", c);
211 } else {
212 sprintf(buffer, "%06X ", c);
213 }
214 result.append(buffer);
215 }
216 i = 0;
217 while(i < len) {
218 U16_NEXT(string, i, len, c);
219 u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status);
220 result.append("{");
221 result.append(buffer);
222 result.append("} ");
223 }
224 /*
225 for(i = 0; i < len; i++) {
226 sprintf(buffer, "%04X ", string[i]);
227 result.append(buffer);
228 }
229 for(i = 0; i < len; i++) {
230 u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status);
231 result.append("{");
232 result.append(buffer);
233 result.append("} ");
234 }
235 */
236 return result;
237 }
238
239 UnicodeString
toBundleString()240 Line::toBundleString()
241 {
242
243 UnicodeString result;
244 UErrorCode status = U_ZERO_ERROR;
245 if(!needsQuoting) {
246 needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status);
247 }
248 UChar NFC[50];
249 int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
250 result.append("\"");
251 if(isReset) {
252 result.append("&");
253 } else {
254 result.append(strengthToString(strength, false, false));
255 }
256 UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC);
257 if(quote) {
258 result.append("'");
259 }
260 if(NFC[0] == 0x22) {
261 result.append("\\u0022");
262 } else {
263 result.append(NFC, NFCLen);
264 }
265 if(quote && NFC[0] != 0x0027) {
266 result.append("'");
267 }
268 if(expLen && !isReset) {
269 quote = needsQuoting->containsSome(expansionString);
270 result.append(" / ");
271 if(quote) {
272 result.append("'");
273 }
274 result.append(expansionString);
275 if(quote) {
276 result.append("'");
277 }
278 }
279 result.append("\" //");
280
281 result.append(stringToName(NFC, NFCLen));
282 if(expLen && !isReset) {
283 result.append(" / ");
284 result.append(stringToName(expansionString, expLen));
285 }
286 result.append("\n");
287 return result;
288 }
289
290 UnicodeString
toHTMLString()291 Line::toHTMLString()
292 {
293 UnicodeString result;
294 UErrorCode status = U_ZERO_ERROR;
295 UChar NFC[50];
296 int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
297 result.append("<span title=\"");
298 result.append(stringToName(NFC, NFCLen));
299 if(expLen && !isReset) {
300 result.append(" / ");
301 result.append(stringToName(expansionString, expLen));
302 }
303 result.append("\">");
304 if(isReset) {
305 result.append("&");
306 } else {
307 result.append(strengthToString(strength, false, true));
308 }
309 result.append(NFC, NFCLen);
310 if(expLen && !isReset) {
311 result.append(" / ");
312 result.append(expansionString);
313 }
314 result.append("</span><br>\n");
315 return result;
316 }
317
318 UnicodeString
toString(UBool pretty)319 Line::toString(UBool pretty) {
320 UnicodeString result;
321 if(!pretty) {
322 result.setTo(name);
323 if(expLen) {
324 result.append("/");
325 result.append(expansionString);
326 }
327 } else {
328 UErrorCode status = U_ZERO_ERROR;
329 UChar NFC[50];
330 int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
331 result.setTo(NFC, NFCLen);
332 if(expLen) {
333 result.append("/");
334 result.append(expansionString);
335 }
336 /*
337 if(NFCLen != len || u_strncmp(name, NFC, len) != 0) {
338 result.append("(NFC: ");
339 result.append(NFC, NFCLen);
340 result.append(stringToName(NFC, NFCLen));
341 result.append(")");
342 }
343 */
344 result.append(" # ");
345 result.append(stringToName(NFC, NFCLen));
346 if(expLen) {
347 result.append("/ ");
348 result.append(stringToName(expansionString, expLen));
349 }
350 }
351 return result;
352 }
353
354
355 void
setTo(const UnicodeString & string)356 Line::setTo(const UnicodeString &string) {
357 int32_t len = string.length();
358 u_strncpy(name, string.getBuffer(), len);
359 name[len] = 0;
360 this->len = len;
361 UChar32 c;
362 U16_GET(name, 0, 0, len, c);
363 firstCC = u_getCombiningClass(c);
364 U16_GET(name, 0, len-1, len, c);
365 lastCC = u_getCombiningClass(c);
366 }
367
368 void
setTo(const UChar32 n)369 Line::setTo(const UChar32 n) {
370 UBool isError = false;
371 len = 0; // we are setting the line to char, not appending
372 U16_APPEND(name, len, 25, n, isError);
373 name[len] = 0;
374 firstCC = u_getCombiningClass(n);
375 lastCC = firstCC;
376 }
377
378
379 UnicodeString
strengthIndent(UColAttributeValue strength,int indentSize,UnicodeString & result)380 Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result)
381 {
382 int i;
383 int numIndents = strength+1;
384 if(strength > UCOL_IDENTICAL) {
385 return result;
386 } else if(strength == UCOL_IDENTICAL) {
387 numIndents = 5;
388 }
389 for(i = 0; i < numIndents*indentSize; i++) {
390 result.append(" ");
391 }
392 return result;
393 }
394
395 UnicodeString
strengthToString(UColAttributeValue strength,UBool pretty,UBool html)396 Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) {
397 UnicodeString result;
398 if(html) {
399 switch(strength) {
400 case UCOL_IDENTICAL:
401 result.append(" = ");
402 break;
403 case UCOL_QUATERNARY:
404 result.append(" <<<< ");
405 break;
406 case UCOL_TERTIARY:
407 result.append(" <<< ");
408 break;
409 case UCOL_SECONDARY:
410 result.append(" << ");
411 break;
412 case UCOL_PRIMARY:
413 result.append(" < ");
414 break;
415 case UCOL_OFF:
416 result.append(" >? ");
417 break;
418 default:
419 result.append(" ?! ");
420 break;
421 }
422 } else {
423 switch(strength) {
424 case UCOL_IDENTICAL:
425 if(pretty) {
426 result.append(" ");
427 }
428 result.append(" = ");
429 break;
430 case UCOL_QUATERNARY:
431 if(pretty) {
432 result.append(" ");
433 }
434 result.append(" <<<< ");
435 break;
436 case UCOL_TERTIARY:
437 //u_fprintf(file, "<3");
438 if(pretty) {
439 result.append(" ");
440 }
441 result.append(" <<< ");
442 break;
443 case UCOL_SECONDARY:
444 //u_fprintf(file, "<2");
445 if(pretty) {
446 result.append(" ");
447 }
448 result.append(" << ");
449 break;
450 case UCOL_PRIMARY:
451 //u_fprintf(file, "<1");
452 if(pretty) {
453 result.append(" ");
454 }
455 result.append(" < ");
456 break;
457 case UCOL_OFF:
458 result.append(" >? ");
459 break;
460 default:
461 result.append(" ?! ");
462 break;
463 }
464 }
465 return result;
466 }
467
468 Line *
nextInteresting()469 Line::nextInteresting() {
470 Line *result = this->next;
471 while(result && result->strength != UCOL_IDENTICAL) {
472 result = result->next;
473 }
474 return result;
475 }
476
477 void
append(const UChar * n,int32_t length)478 Line::append(const UChar* n, int32_t length)
479 {
480 u_strncat(name, n, length);
481 name[len+length] = 0;
482 len += length;
483 UChar32 end;
484 U16_GET(n, 0, length-1, length, end);
485 lastCC = u_getCombiningClass(end);
486 }
487
488 void
append(const UChar n)489 Line::append(const UChar n)
490 {
491 name[len] = n;
492 name[len+1] = 0;
493 len++;
494 lastCC = u_getCombiningClass(n);
495 }
496
497 void
append(const Line & l)498 Line::append(const Line &l)
499 {
500 append(l.name, l.len);
501 lastCC = l.lastCC;
502 }
503
504 void
clear()505 Line::clear()
506 {
507 name[0] = 0;
508 len = 0;
509 }
510
511 int32_t
write(char * buff,int32_t,UErrorCode &)512 Line::write(char *buff, int32_t, UErrorCode &)
513 {
514 /*
515 UChar name[25];
516 int32_t len;
517 UChar expansionString[25];
518 int32_t expLen;
519
520 UColAttributeValue strength;
521 UColAttributeValue strengthFromEmpty;
522 UColAttributeValue cumulativeStrength;
523 UColAttributeValue expStrength;
524
525 Line *previous;
526 Line *next;
527
528 UBool isContraction;
529 UBool isExpansion;
530 UBool isRemoved;
531 UBool isReset;
532
533 int32_t expIndex;
534 uint8_t firstCC;
535 uint8_t lastCC;
536 */
537 int32_t resLen = 0;
538 int32_t i = 0;
539 sprintf(buff+resLen, "%04X", name[0]);
540 resLen += 4;
541 for(i = 1; i < len; i++) {
542 sprintf(buff+resLen, " %04X", name[i]);
543 resLen += 5;
544 }
545 sprintf(buff+resLen, "/");
546 resLen += 1;
547
548 i = 0;
549 if(expLen) {
550 sprintf(buff+resLen, "%04X", expansionString[0]);
551 resLen += 4;
552 for(i = 1; i < expLen; i++) {
553 sprintf(buff+resLen, " %04X", expansionString[i]);
554 resLen += 5;
555 }
556 }
557 sprintf(buff+resLen, "; ");
558 resLen += 2;
559
560 sprintf(buff+resLen, "%02i ", strength);
561 resLen += 3;
562 sprintf(buff+resLen, "%02i", strengthFromEmpty);
563 resLen += 2;
564 sprintf(buff+resLen, "%02i", cumulativeStrength);
565 resLen += 2;
566 sprintf(buff+resLen, "%02i", expStrength);
567 resLen += 2;
568
569 // Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines
570 //sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset);
571 //resLen += 5;
572 sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset);
573 resLen += 3;
574
575 // first and last CC
576 // can be calculated on reading
577 //sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC);
578 //resLen += 8;
579
580 sprintf(buff+resLen, "%08X", expIndex);
581 resLen += 8;
582
583 buff[resLen] = 0;
584
585 return resLen;
586 }
587
588 void
initFromString(const char * buff,int32_t,UErrorCode &)589 Line::initFromString(const char *buff, int32_t, UErrorCode &)
590 {
591 int32_t bufIndex = 0;
592 int32_t i = 0;
593
594 sscanf(buff+bufIndex, "%04X", &name[i]);
595 i++;
596 bufIndex += 4;
597 while(buff[bufIndex] != '/') {
598 sscanf(buff+bufIndex, " %04X", &name[i]);
599 i++;
600 bufIndex += 5;
601 }
602 len = i;
603 name[len] = 0;
604 bufIndex++;
605
606 if(i > 1) {
607 isContraction = true;
608 } else {
609 isContraction = false;
610 }
611
612 if(buff[bufIndex] == ';') {
613 isExpansion = false;
614 bufIndex += 2;
615 expansionString[0] = 0;
616 expLen = 0;
617 } else {
618 i = 0;
619 sscanf(buff+bufIndex, "%04X", &expansionString[i]);
620 i++;
621 bufIndex += 4;
622 while(buff[bufIndex] != ';') {
623 sscanf(buff+bufIndex, " %04X", &expansionString[i]);
624 i++;
625 bufIndex += 5;
626 }
627 expLen = i;
628 expansionString[expLen] = 0;
629 bufIndex += 2;
630 }
631 sscanf(buff+bufIndex, "%02i ", &strength);
632 bufIndex += 3;
633 sscanf(buff+bufIndex, "%02i", &strengthFromEmpty);
634 bufIndex += 2;
635 sscanf(buff+bufIndex, "%02i", &cumulativeStrength);
636 bufIndex += 2;
637 sscanf(buff+bufIndex, "%02i", &expStrength);
638 bufIndex += 2;
639
640 sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset);
641 bufIndex += 3;
642
643 sscanf(buff+bufIndex, "%08X", &expIndex);
644 bufIndex += 8;
645
646 // calculate first and last CC
647 UChar32 c;
648 U16_GET(name, 0, 0, len, c);
649 firstCC = u_getCombiningClass(c);
650 U16_GET(name, 0, len-1, len, c);
651 lastCC = u_getCombiningClass(c);
652 }
653
654 void
swapCase(UChar * string,int32_t & sLen)655 Line::swapCase(UChar *string, int32_t &sLen)
656 {
657 UChar32 c = 0;
658 int32_t i = 0, j = 0;
659 UChar buff[256];
660 UBool isError = false;
661 while(i < sLen) {
662 U16_NEXT(string, i, sLen, c);
663 if(u_isUUppercase(c)) {
664 c = u_tolower(c);
665 } else if(u_isULowercase(c)) {
666 c = u_toupper(c);
667 }
668 U16_APPEND(buff, j, 256, c, isError);
669 }
670 buff[j] = 0;
671 u_strcpy(string, buff);
672 sLen = j;
673 }
674
675
676 void
swapCase()677 Line::swapCase()
678 {
679 swapCase(name, len);
680 swapCase(expansionString, expLen);
681 }
682
683 UnicodeString
dumpSortkey()684 Line::dumpSortkey()
685 {
686
687 char buffer[256];
688 char *buff = buffer;
689 *buff = 0;
690 uint8_t *key = sortKey;
691 if(sortKey) {
692 while(*key) {
693 sprintf(buff, "%02X ", *key);
694 key++;
695 buff += 3;
696 if(buff - buffer > 252) {
697 break;
698 }
699 }
700 }
701 return UnicodeString(buffer);
702 }
703
704