• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  *
4  *   Copyright (C) 2001-2008, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *******************************************************************************
8  *   file name:  ucol_cnt.cpp
9  *   encoding:   US-ASCII
10  *   tab size:   8 (not used)
11  *   indentation:4
12  *
13  *   created 02/22/2001
14  *   created by: Vladimir Weinstein
15  *
16  * This module maintains a contraction table structure in expanded form
17  * and provides means to flatten this structure
18  *
19  */
20 
21 #include "unicode/utypes.h"
22 
23 #if !UCONFIG_NO_COLLATION
24 
25 #include "unicode/uchar.h"
26 #include "ucol_cnt.h"
27 #include "cmemory.h"
28 
uprv_growTable(ContractionTable * tbl,UErrorCode * status)29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
30     if(tbl->position == tbl->size) {
31         uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
32         if(newData == NULL) {
33             *status = U_MEMORY_ALLOCATION_ERROR;
34             return;
35         }
36         UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
37         if(newCPs == NULL) {
38             uprv_free(newData);
39             *status = U_MEMORY_ALLOCATION_ERROR;
40             return;
41         }
42         tbl->CEs = newData;
43         tbl->codePoints = newCPs;
44         tbl->size *= 2;
45     }
46 }
47 
48 U_CAPI CntTable*  U_EXPORT2
49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
uprv_cnttab_open(UNewTrie * mapping,UErrorCode * status)50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
51     if(U_FAILURE(*status)) {
52         return 0;
53     }
54     CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
55     if(tbl == NULL) {
56         *status = U_MEMORY_ALLOCATION_ERROR;
57         return NULL;
58     }
59     tbl->mapping = mapping;
60     tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
61     if(tbl->elements == NULL) {
62         *status = U_MEMORY_ALLOCATION_ERROR;
63         uprv_free(tbl);
64         return NULL;
65     }
66     tbl->capacity = INIT_EXP_TABLE_SIZE;
67     uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
68     tbl->size = 0;
69     tbl->position = 0;
70     tbl->CEs = NULL;
71     tbl->codePoints = NULL;
72     tbl->offsets = NULL;
73     tbl->currentTag = NOT_FOUND_TAG;
74     return tbl;
75 }
76 
addATableElement(CntTable * table,uint32_t * key,UErrorCode * status)77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
78     ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
79     if(el == NULL) {
80         goto outOfMemory;
81     }
82     el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
83     if(el->CEs == NULL) {
84         goto outOfMemory;
85     }
86 
87     el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
88     if(el->codePoints == NULL) {
89         uprv_free(el->CEs);
90         goto outOfMemory;
91     }
92 
93     el->position = 0;
94     el->size = INIT_EXP_TABLE_SIZE;
95     uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
96     uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
97 
98     table->elements[table->size] = el;
99 
100     //uhash_put(table->elements, (void *)table->size, el, status);
101 
102     *key = table->size++;
103 
104     if(table->size == table->capacity) {
105         ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
106         // do realloc
107         /*        table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
108         if(newElements == NULL) {
109             uprv_free(el->codePoints);
110             uprv_free(el->CEs);
111             goto outOfMemory;
112         }
113         ContractionTable **oldElements = table->elements;
114         uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
115         uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
116         table->capacity *= 2;
117         table->elements = newElements;
118         uprv_free(oldElements);
119     }
120 
121     return el;
122 
123 outOfMemory:
124     *status = U_MEMORY_ALLOCATION_ERROR;
125     if (el) uprv_free(el);
126     return NULL;
127 }
128 
129 U_CAPI int32_t  U_EXPORT2
uprv_cnttab_constructTable(CntTable * table,uint32_t mainOffset,UErrorCode * status)130 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
131     int32_t i = 0, j = 0;
132     if(U_FAILURE(*status) || table->size == 0) {
133         return 0;
134     }
135 
136     table->position = 0;
137 
138     if(table->offsets != NULL) {
139         uprv_free(table->offsets);
140     }
141     table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
142     if(table->offsets == NULL) {
143         *status = U_MEMORY_ALLOCATION_ERROR;
144         return 0;
145     }
146 
147 
148     /* See how much memory we need */
149     for(i = 0; i<table->size; i++) {
150         table->offsets[i] = table->position+mainOffset;
151         table->position += table->elements[i]->position;
152     }
153 
154     /* Allocate it */
155     if(table->CEs != NULL) {
156         uprv_free(table->CEs);
157     }
158     table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
159     if(table->CEs == NULL) {
160         *status = U_MEMORY_ALLOCATION_ERROR;
161         uprv_free(table->offsets);
162         table->offsets = NULL;
163         return 0;
164     }
165     uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
166 
167     if(table->codePoints != NULL) {
168         uprv_free(table->codePoints);
169     }
170     table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
171     if(table->codePoints == NULL) {
172         *status = U_MEMORY_ALLOCATION_ERROR;
173         uprv_free(table->offsets);
174         table->offsets = NULL;
175         uprv_free(table->CEs);
176         table->CEs = NULL;
177         return 0;
178     }
179     uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
180 
181     /* Now stuff the things in*/
182 
183     UChar *cpPointer = table->codePoints;
184     uint32_t *CEPointer = table->CEs;
185     for(i = 0; i<table->size; i++) {
186         int32_t size = table->elements[i]->position;
187         uint8_t ccMax = 0, ccMin = 255, cc = 0;
188         for(j = 1; j<size; j++) {
189             cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
190             if(cc>ccMax) {
191                 ccMax = cc;
192             }
193             if(cc<ccMin) {
194                 ccMin = cc;
195             }
196             *(cpPointer+j) = table->elements[i]->codePoints[j];
197         }
198         *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
199 
200         uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
201         for(j = 0; j<size; j++) {
202             if(isCntTableElement(*(CEPointer+j))) {
203                 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
204             }
205         }
206         cpPointer += size;
207         CEPointer += size;
208     }
209 
210     // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
211     // start of the flat file). However, what is done below is just wrong and it affects building of
212     // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
213     // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
214     // feasible.
215     uint32_t CE;
216     for(i = 0; i<=0x10FFFF; i++) {
217         /*CE = ucmpe32_get(table->mapping, i);*/
218         CE = utrie_get32(table->mapping, i, NULL);
219         if(isCntTableElement(CE)) {
220             CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
221             /*ucmpe32_set(table->mapping, i, CE);*/
222             utrie_set32(table->mapping, i, CE);
223         }
224     }
225 
226 
227     return table->position;
228 }
229 
uprv_cnttab_cloneContraction(ContractionTable * t,UErrorCode * status)230 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
231     ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
232     if(r == NULL) {
233         goto outOfMemory;
234     }
235 
236     r->position = t->position;
237     r->size = t->size;
238 
239     r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
240     if(r->codePoints == NULL) {
241         goto outOfMemory;
242     }
243     r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
244     if(r->CEs == NULL) {
245         uprv_free(r->codePoints);
246         goto outOfMemory;
247     }
248     uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
249     uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
250 
251     return r;
252 
253 outOfMemory:
254     *status = U_MEMORY_ALLOCATION_ERROR;
255     if (r) uprv_free(r);
256     return NULL;
257 }
258 
259 U_CAPI CntTable* U_EXPORT2
uprv_cnttab_clone(CntTable * t,UErrorCode * status)260 uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
261     if(U_FAILURE(*status)) {
262         return NULL;
263     }
264     int32_t i = 0;
265     CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
266     /* test for NULL */
267     if (r == NULL) {
268         goto outOfMemory;
269     }
270     r->position = t->position;
271     r->size = t->size;
272     r->capacity = t->capacity;
273 
274     r->mapping = t->mapping;
275 
276     r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
277     /* test for NULL */
278     if (r->elements == NULL) {
279         goto outOfMemory;
280     }
281     //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
282 
283     for(i = 0; i<t->size; i++) {
284         r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
285     }
286 
287     if(t->CEs != NULL) {
288         r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
289         /* test for NULL */
290         if (r->CEs == NULL) {
291             uprv_free(r->elements);
292             goto outOfMemory;
293         }
294         uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
295     } else {
296         r->CEs = NULL;
297     }
298 
299     if(t->codePoints != NULL) {
300         r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
301         /* test for NULL */
302         if (r->codePoints == NULL) {
303             uprv_free(r->CEs);
304             uprv_free(r->elements);
305             goto outOfMemory;
306         }
307         uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
308     } else {
309         r->codePoints = NULL;
310     }
311 
312     if(t->offsets != NULL) {
313         r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
314         /* test for NULL */
315         if (r->offsets == NULL) {
316             uprv_free(r->codePoints);
317             uprv_free(r->CEs);
318             uprv_free(r->elements);
319             goto outOfMemory;
320         }
321         uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
322     } else {
323         r->offsets = NULL;
324     }
325 
326     return r;
327 
328 outOfMemory:
329     *status = U_MEMORY_ALLOCATION_ERROR;
330     if (r) uprv_free(r);
331     return NULL;
332 }
333 
334 U_CAPI void  U_EXPORT2
uprv_cnttab_close(CntTable * table)335 uprv_cnttab_close(CntTable *table) {
336     int32_t i = 0;
337     for(i = 0; i<table->size; i++) {
338         uprv_free(table->elements[i]->CEs);
339         uprv_free(table->elements[i]->codePoints);
340         uprv_free(table->elements[i]);
341     }
342     uprv_free(table->elements);
343     uprv_free(table->CEs);
344     uprv_free(table->offsets);
345     uprv_free(table->codePoints);
346     uprv_free(table);
347 }
348 
349 /* this is for adding non contractions */
350 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_changeLastCE(CntTable * table,uint32_t element,uint32_t value,UErrorCode * status)351 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
352     element &= 0xFFFFFF;
353 
354     ContractionTable *tbl = NULL;
355     if(U_FAILURE(*status)) {
356         return 0;
357     }
358 
359     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
360         return 0;
361     }
362 
363     tbl->CEs[tbl->position-1] = value;
364 
365     return(constructContractCE(table->currentTag, element));
366 }
367 
368 
369 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
370 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_insertContraction(CntTable * table,uint32_t element,UChar codePoint,uint32_t value,UErrorCode * status)371 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
372 
373     ContractionTable *tbl = NULL;
374 
375     if(U_FAILURE(*status)) {
376         return 0;
377     }
378     element &= 0xFFFFFF;
379 
380     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
381         tbl = addATableElement(table, &element, status);
382         if (U_FAILURE(*status)) {
383             return 0;
384         }
385     }
386 
387     uprv_growTable(tbl, status);
388 
389     uint32_t offset = 0;
390 
391 
392     while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
393         offset++;
394     }
395 
396     uint32_t i = tbl->position;
397     for(i = tbl->position; i > offset; i--) {
398         tbl->CEs[i] = tbl->CEs[i-1];
399         tbl->codePoints[i] = tbl->codePoints[i-1];
400     }
401 
402     tbl->CEs[offset] = value;
403     tbl->codePoints[offset] = codePoint;
404 
405     tbl->position++;
406 
407     return(constructContractCE(table->currentTag, element));
408 }
409 
410 
411 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
412 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_addContraction(CntTable * table,uint32_t element,UChar codePoint,uint32_t value,UErrorCode * status)413 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
414 
415     element &= 0xFFFFFF;
416 
417     ContractionTable *tbl = NULL;
418 
419     if(U_FAILURE(*status)) {
420         return 0;
421     }
422 
423     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
424         tbl = addATableElement(table, &element, status);
425         if (U_FAILURE(*status)) {
426             return 0;
427         }
428     }
429 
430     uprv_growTable(tbl, status);
431 
432     tbl->CEs[tbl->position] = value;
433     tbl->codePoints[tbl->position] = codePoint;
434 
435     tbl->position++;
436 
437     return(constructContractCE(table->currentTag, element));
438 }
439 
440 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
441 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_setContraction(CntTable * table,uint32_t element,uint32_t offset,UChar codePoint,uint32_t value,UErrorCode * status)442 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
443 
444     element &= 0xFFFFFF;
445     ContractionTable *tbl = NULL;
446 
447     if(U_FAILURE(*status)) {
448         return 0;
449     }
450 
451     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
452         tbl = addATableElement(table, &element, status);
453         if (U_FAILURE(*status)) {
454             return 0;
455         }
456 
457     }
458 
459     if(offset >= tbl->size) {
460         *status = U_INDEX_OUTOFBOUNDS_ERROR;
461         return 0;
462     }
463     tbl->CEs[offset] = value;
464     tbl->codePoints[offset] = codePoint;
465 
466     //return(offset);
467     return(constructContractCE(table->currentTag, element));
468 }
469 
_cnttab_getContractionTable(CntTable * table,uint32_t element)470 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
471     element &= 0xFFFFFF;
472     ContractionTable *tbl = NULL;
473 
474     if(element != 0xFFFFFF) {
475         tbl = table->elements[element]; /* This could also return NULL */
476     }
477     return tbl;
478 }
479 
_cnttab_findCP(ContractionTable * tbl,UChar codePoint)480 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
481     uint32_t position = 0;
482     if(tbl == NULL) {
483         return -1;
484     }
485 
486     while(codePoint > tbl->codePoints[position]) {
487         position++;
488         if(position > tbl->position) {
489             return -1;
490         }
491     }
492     if (codePoint == tbl->codePoints[position]) {
493         return position;
494     } else {
495         return -1;
496     }
497 }
498 
_cnttab_getCE(ContractionTable * tbl,int32_t position)499 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
500     if(tbl == NULL) {
501         return UCOL_NOT_FOUND;
502     }
503     if((uint32_t)position > tbl->position || position == -1) {
504         return UCOL_NOT_FOUND;
505     } else {
506         return tbl->CEs[position];
507     }
508 }
509 
510 U_CAPI int32_t  U_EXPORT2
uprv_cnttab_findCP(CntTable * table,uint32_t element,UChar codePoint,UErrorCode * status)511 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
512 
513     if(U_FAILURE(*status)) {
514         return 0;
515     }
516 
517     return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
518 }
519 
520 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_getCE(CntTable * table,uint32_t element,uint32_t position,UErrorCode * status)521 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
522     if(U_FAILURE(*status)) {
523         return UCOL_NOT_FOUND;
524     }
525 
526     return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
527 }
528 
529 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_findCE(CntTable * table,uint32_t element,UChar codePoint,UErrorCode * status)530 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
531     if(U_FAILURE(*status)) {
532         return UCOL_NOT_FOUND;
533     }
534     ContractionTable *tbl = _cnttab_getContractionTable(table, element);
535     return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
536 }
537 
538 U_CAPI UBool  U_EXPORT2
uprv_cnttab_isTailored(CntTable * table,uint32_t element,UChar * ztString,UErrorCode * status)539 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
540     if(U_FAILURE(*status)) {
541         return FALSE;
542     }
543 
544     while(*(ztString)!=0) {
545         element = uprv_cnttab_findCE(table, element, *(ztString), status);
546         if(element == UCOL_NOT_FOUND) {
547             return FALSE;
548         }
549         if(!isCntTableElement(element)) {
550             return TRUE;
551         }
552         ztString++;
553     }
554     return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
555 }
556 
557 U_CAPI uint32_t  U_EXPORT2
uprv_cnttab_changeContraction(CntTable * table,uint32_t element,UChar codePoint,uint32_t newCE,UErrorCode * status)558 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
559 
560     element &= 0xFFFFFF;
561     ContractionTable *tbl = NULL;
562 
563     if(U_FAILURE(*status)) {
564         return 0;
565     }
566 
567     if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
568         return 0;
569     }
570 
571     uint32_t position = 0;
572 
573     while(codePoint > tbl->codePoints[position]) {
574         position++;
575         if(position > tbl->position) {
576             return UCOL_NOT_FOUND;
577         }
578     }
579     if (codePoint == tbl->codePoints[position]) {
580         tbl->CEs[position] = newCE;
581         return element;
582     } else {
583         return UCOL_NOT_FOUND;
584     }
585 }
586 
587 #endif /* #if !UCONFIG_NO_COLLATION */
588