1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2001-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: ucol_cnt.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created 02/22/2001
14 * created by: Vladimir Weinstein
15 *
16 * This module maintains a contraction table structure in expanded form
17 * and provides means to flatten this structure
18 *
19 */
20
21 #include "unicode/utypes.h"
22
23 #if !UCONFIG_NO_COLLATION
24
25 #include "unicode/uchar.h"
26 #include "ucol_cnt.h"
27 #include "cmemory.h"
28
uprv_growTable(ContractionTable * tbl,UErrorCode * status)29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
30 if(tbl->position == tbl->size) {
31 uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
32 if(newData == NULL) {
33 *status = U_MEMORY_ALLOCATION_ERROR;
34 return;
35 }
36 UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
37 if(newCPs == NULL) {
38 uprv_free(newData);
39 *status = U_MEMORY_ALLOCATION_ERROR;
40 return;
41 }
42 tbl->CEs = newData;
43 tbl->codePoints = newCPs;
44 tbl->size *= 2;
45 }
46 }
47
48 U_CAPI CntTable* U_EXPORT2
49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
uprv_cnttab_open(UNewTrie * mapping,UErrorCode * status)50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
51 if(U_FAILURE(*status)) {
52 return 0;
53 }
54 CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
55 if(tbl == NULL) {
56 *status = U_MEMORY_ALLOCATION_ERROR;
57 return NULL;
58 }
59 tbl->mapping = mapping;
60 tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
61 if(tbl->elements == NULL) {
62 *status = U_MEMORY_ALLOCATION_ERROR;
63 uprv_free(tbl);
64 return NULL;
65 }
66 tbl->capacity = INIT_EXP_TABLE_SIZE;
67 uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
68 tbl->size = 0;
69 tbl->position = 0;
70 tbl->CEs = NULL;
71 tbl->codePoints = NULL;
72 tbl->offsets = NULL;
73 tbl->currentTag = NOT_FOUND_TAG;
74 return tbl;
75 }
76
addATableElement(CntTable * table,uint32_t * key,UErrorCode * status)77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
78 ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
79 if(el == NULL) {
80 goto outOfMemory;
81 }
82 el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
83 if(el->CEs == NULL) {
84 goto outOfMemory;
85 }
86
87 el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
88 if(el->codePoints == NULL) {
89 uprv_free(el->CEs);
90 goto outOfMemory;
91 }
92
93 el->position = 0;
94 el->size = INIT_EXP_TABLE_SIZE;
95 uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
96 uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
97
98 table->elements[table->size] = el;
99
100 //uhash_put(table->elements, (void *)table->size, el, status);
101
102 *key = table->size++;
103
104 if(table->size == table->capacity) {
105 ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
106 // do realloc
107 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
108 if(newElements == NULL) {
109 uprv_free(el->codePoints);
110 uprv_free(el->CEs);
111 goto outOfMemory;
112 }
113 ContractionTable **oldElements = table->elements;
114 uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
115 uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
116 table->capacity *= 2;
117 table->elements = newElements;
118 uprv_free(oldElements);
119 }
120
121 return el;
122
123 outOfMemory:
124 *status = U_MEMORY_ALLOCATION_ERROR;
125 if (el) uprv_free(el);
126 return NULL;
127 }
128
129 U_CAPI int32_t U_EXPORT2
uprv_cnttab_constructTable(CntTable * table,uint32_t mainOffset,UErrorCode * status)130 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
131 int32_t i = 0, j = 0;
132 if(U_FAILURE(*status) || table->size == 0) {
133 return 0;
134 }
135
136 table->position = 0;
137
138 if(table->offsets != NULL) {
139 uprv_free(table->offsets);
140 }
141 table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
142 if(table->offsets == NULL) {
143 *status = U_MEMORY_ALLOCATION_ERROR;
144 return 0;
145 }
146
147
148 /* See how much memory we need */
149 for(i = 0; i<table->size; i++) {
150 table->offsets[i] = table->position+mainOffset;
151 table->position += table->elements[i]->position;
152 }
153
154 /* Allocate it */
155 if(table->CEs != NULL) {
156 uprv_free(table->CEs);
157 }
158 table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
159 if(table->CEs == NULL) {
160 *status = U_MEMORY_ALLOCATION_ERROR;
161 uprv_free(table->offsets);
162 table->offsets = NULL;
163 return 0;
164 }
165 uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
166
167 if(table->codePoints != NULL) {
168 uprv_free(table->codePoints);
169 }
170 table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
171 if(table->codePoints == NULL) {
172 *status = U_MEMORY_ALLOCATION_ERROR;
173 uprv_free(table->offsets);
174 table->offsets = NULL;
175 uprv_free(table->CEs);
176 table->CEs = NULL;
177 return 0;
178 }
179 uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
180
181 /* Now stuff the things in*/
182
183 UChar *cpPointer = table->codePoints;
184 uint32_t *CEPointer = table->CEs;
185 for(i = 0; i<table->size; i++) {
186 int32_t size = table->elements[i]->position;
187 uint8_t ccMax = 0, ccMin = 255, cc = 0;
188 for(j = 1; j<size; j++) {
189 cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
190 if(cc>ccMax) {
191 ccMax = cc;
192 }
193 if(cc<ccMin) {
194 ccMin = cc;
195 }
196 *(cpPointer+j) = table->elements[i]->codePoints[j];
197 }
198 *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
199
200 uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
201 for(j = 0; j<size; j++) {
202 if(isCntTableElement(*(CEPointer+j))) {
203 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
204 }
205 }
206 cpPointer += size;
207 CEPointer += size;
208 }
209
210 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
211 // start of the flat file). However, what is done below is just wrong and it affects building of
212 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
213 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
214 // feasible.
215 uint32_t CE;
216 for(i = 0; i<=0x10FFFF; i++) {
217 /*CE = ucmpe32_get(table->mapping, i);*/
218 CE = utrie_get32(table->mapping, i, NULL);
219 if(isCntTableElement(CE)) {
220 CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
221 /*ucmpe32_set(table->mapping, i, CE);*/
222 utrie_set32(table->mapping, i, CE);
223 }
224 }
225
226
227 return table->position;
228 }
229
uprv_cnttab_cloneContraction(ContractionTable * t,UErrorCode * status)230 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
231 ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
232 if(r == NULL) {
233 goto outOfMemory;
234 }
235
236 r->position = t->position;
237 r->size = t->size;
238
239 r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
240 if(r->codePoints == NULL) {
241 goto outOfMemory;
242 }
243 r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
244 if(r->CEs == NULL) {
245 uprv_free(r->codePoints);
246 goto outOfMemory;
247 }
248 uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
249 uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
250
251 return r;
252
253 outOfMemory:
254 *status = U_MEMORY_ALLOCATION_ERROR;
255 if (r) uprv_free(r);
256 return NULL;
257 }
258
259 U_CAPI CntTable* U_EXPORT2
uprv_cnttab_clone(CntTable * t,UErrorCode * status)260 uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
261 if(U_FAILURE(*status)) {
262 return NULL;
263 }
264 int32_t i = 0;
265 CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
266 /* test for NULL */
267 if (r == NULL) {
268 goto outOfMemory;
269 }
270 r->position = t->position;
271 r->size = t->size;
272 r->capacity = t->capacity;
273
274 r->mapping = t->mapping;
275
276 r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
277 /* test for NULL */
278 if (r->elements == NULL) {
279 goto outOfMemory;
280 }
281 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
282
283 for(i = 0; i<t->size; i++) {
284 r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
285 }
286
287 if(t->CEs != NULL) {
288 r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
289 /* test for NULL */
290 if (r->CEs == NULL) {
291 uprv_free(r->elements);
292 goto outOfMemory;
293 }
294 uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
295 } else {
296 r->CEs = NULL;
297 }
298
299 if(t->codePoints != NULL) {
300 r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
301 /* test for NULL */
302 if (r->codePoints == NULL) {
303 uprv_free(r->CEs);
304 uprv_free(r->elements);
305 goto outOfMemory;
306 }
307 uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
308 } else {
309 r->codePoints = NULL;
310 }
311
312 if(t->offsets != NULL) {
313 r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
314 /* test for NULL */
315 if (r->offsets == NULL) {
316 uprv_free(r->codePoints);
317 uprv_free(r->CEs);
318 uprv_free(r->elements);
319 goto outOfMemory;
320 }
321 uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
322 } else {
323 r->offsets = NULL;
324 }
325
326 return r;
327
328 outOfMemory:
329 *status = U_MEMORY_ALLOCATION_ERROR;
330 if (r) uprv_free(r);
331 return NULL;
332 }
333
334 U_CAPI void U_EXPORT2
uprv_cnttab_close(CntTable * table)335 uprv_cnttab_close(CntTable *table) {
336 int32_t i = 0;
337 for(i = 0; i<table->size; i++) {
338 uprv_free(table->elements[i]->CEs);
339 uprv_free(table->elements[i]->codePoints);
340 uprv_free(table->elements[i]);
341 }
342 uprv_free(table->elements);
343 uprv_free(table->CEs);
344 uprv_free(table->offsets);
345 uprv_free(table->codePoints);
346 uprv_free(table);
347 }
348
349 /* this is for adding non contractions */
350 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_changeLastCE(CntTable * table,uint32_t element,uint32_t value,UErrorCode * status)351 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
352 element &= 0xFFFFFF;
353
354 ContractionTable *tbl = NULL;
355 if(U_FAILURE(*status)) {
356 return 0;
357 }
358
359 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
360 return 0;
361 }
362
363 tbl->CEs[tbl->position-1] = value;
364
365 return(constructContractCE(table->currentTag, element));
366 }
367
368
369 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
370 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_insertContraction(CntTable * table,uint32_t element,UChar codePoint,uint32_t value,UErrorCode * status)371 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
372
373 ContractionTable *tbl = NULL;
374
375 if(U_FAILURE(*status)) {
376 return 0;
377 }
378 element &= 0xFFFFFF;
379
380 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
381 tbl = addATableElement(table, &element, status);
382 if (U_FAILURE(*status)) {
383 return 0;
384 }
385 }
386
387 uprv_growTable(tbl, status);
388
389 uint32_t offset = 0;
390
391
392 while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
393 offset++;
394 }
395
396 uint32_t i = tbl->position;
397 for(i = tbl->position; i > offset; i--) {
398 tbl->CEs[i] = tbl->CEs[i-1];
399 tbl->codePoints[i] = tbl->codePoints[i-1];
400 }
401
402 tbl->CEs[offset] = value;
403 tbl->codePoints[offset] = codePoint;
404
405 tbl->position++;
406
407 return(constructContractCE(table->currentTag, element));
408 }
409
410
411 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
412 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_addContraction(CntTable * table,uint32_t element,UChar codePoint,uint32_t value,UErrorCode * status)413 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
414
415 element &= 0xFFFFFF;
416
417 ContractionTable *tbl = NULL;
418
419 if(U_FAILURE(*status)) {
420 return 0;
421 }
422
423 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
424 tbl = addATableElement(table, &element, status);
425 if (U_FAILURE(*status)) {
426 return 0;
427 }
428 }
429
430 uprv_growTable(tbl, status);
431
432 tbl->CEs[tbl->position] = value;
433 tbl->codePoints[tbl->position] = codePoint;
434
435 tbl->position++;
436
437 return(constructContractCE(table->currentTag, element));
438 }
439
440 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
441 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_setContraction(CntTable * table,uint32_t element,uint32_t offset,UChar codePoint,uint32_t value,UErrorCode * status)442 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
443
444 element &= 0xFFFFFF;
445 ContractionTable *tbl = NULL;
446
447 if(U_FAILURE(*status)) {
448 return 0;
449 }
450
451 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
452 tbl = addATableElement(table, &element, status);
453 if (U_FAILURE(*status)) {
454 return 0;
455 }
456
457 }
458
459 if(offset >= tbl->size) {
460 *status = U_INDEX_OUTOFBOUNDS_ERROR;
461 return 0;
462 }
463 tbl->CEs[offset] = value;
464 tbl->codePoints[offset] = codePoint;
465
466 //return(offset);
467 return(constructContractCE(table->currentTag, element));
468 }
469
_cnttab_getContractionTable(CntTable * table,uint32_t element)470 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
471 element &= 0xFFFFFF;
472 ContractionTable *tbl = NULL;
473
474 if(element != 0xFFFFFF) {
475 tbl = table->elements[element]; /* This could also return NULL */
476 }
477 return tbl;
478 }
479
_cnttab_findCP(ContractionTable * tbl,UChar codePoint)480 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
481 uint32_t position = 0;
482 if(tbl == NULL) {
483 return -1;
484 }
485
486 while(codePoint > tbl->codePoints[position]) {
487 position++;
488 if(position > tbl->position) {
489 return -1;
490 }
491 }
492 if (codePoint == tbl->codePoints[position]) {
493 return position;
494 } else {
495 return -1;
496 }
497 }
498
_cnttab_getCE(ContractionTable * tbl,int32_t position)499 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
500 if(tbl == NULL) {
501 return UCOL_NOT_FOUND;
502 }
503 if((uint32_t)position > tbl->position || position == -1) {
504 return UCOL_NOT_FOUND;
505 } else {
506 return tbl->CEs[position];
507 }
508 }
509
510 U_CAPI int32_t U_EXPORT2
uprv_cnttab_findCP(CntTable * table,uint32_t element,UChar codePoint,UErrorCode * status)511 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
512
513 if(U_FAILURE(*status)) {
514 return 0;
515 }
516
517 return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
518 }
519
520 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_getCE(CntTable * table,uint32_t element,uint32_t position,UErrorCode * status)521 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
522 if(U_FAILURE(*status)) {
523 return UCOL_NOT_FOUND;
524 }
525
526 return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
527 }
528
529 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_findCE(CntTable * table,uint32_t element,UChar codePoint,UErrorCode * status)530 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
531 if(U_FAILURE(*status)) {
532 return UCOL_NOT_FOUND;
533 }
534 ContractionTable *tbl = _cnttab_getContractionTable(table, element);
535 return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
536 }
537
538 U_CAPI UBool U_EXPORT2
uprv_cnttab_isTailored(CntTable * table,uint32_t element,UChar * ztString,UErrorCode * status)539 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
540 if(U_FAILURE(*status)) {
541 return FALSE;
542 }
543
544 while(*(ztString)!=0) {
545 element = uprv_cnttab_findCE(table, element, *(ztString), status);
546 if(element == UCOL_NOT_FOUND) {
547 return FALSE;
548 }
549 if(!isCntTableElement(element)) {
550 return TRUE;
551 }
552 ztString++;
553 }
554 return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
555 }
556
557 U_CAPI uint32_t U_EXPORT2
uprv_cnttab_changeContraction(CntTable * table,uint32_t element,UChar codePoint,uint32_t newCE,UErrorCode * status)558 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
559
560 element &= 0xFFFFFF;
561 ContractionTable *tbl = NULL;
562
563 if(U_FAILURE(*status)) {
564 return 0;
565 }
566
567 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
568 return 0;
569 }
570
571 uint32_t position = 0;
572
573 while(codePoint > tbl->codePoints[position]) {
574 position++;
575 if(position > tbl->position) {
576 return UCOL_NOT_FOUND;
577 }
578 }
579 if (codePoint == tbl->codePoints[position]) {
580 tbl->CEs[position] = newCE;
581 return element;
582 } else {
583 return UCOL_NOT_FOUND;
584 }
585 }
586
587 #endif /* #if !UCONFIG_NO_COLLATION */
588