• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2010-2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * collation.cpp
9 *
10 * created on: 2010oct27
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_COLLATION
17 
18 #include "collation.h"
19 #include "uassert.h"
20 
21 U_NAMESPACE_BEGIN
22 
23 uint32_t
incTwoBytePrimaryByOffset(uint32_t basePrimary,UBool isCompressible,int32_t offset)24 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
25     // Extract the second byte, minus the minimum byte value,
26     // plus the offset, modulo the number of usable byte values, plus the minimum.
27     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
28     uint32_t primary;
29     if(isCompressible) {
30         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
31         primary = (uint32_t)((offset % 251) + 4) << 16;
32         offset /= 251;
33     } else {
34         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
35         primary = (uint32_t)((offset % 254) + 2) << 16;
36         offset /= 254;
37     }
38     // First byte, assume no further overflow.
39     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
40 }
41 
42 uint32_t
incThreeBytePrimaryByOffset(uint32_t basePrimary,UBool isCompressible,int32_t offset)43 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) {
44     // Extract the third byte, minus the minimum byte value,
45     // plus the offset, modulo the number of usable byte values, plus the minimum.
46     offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2;
47     uint32_t primary = (uint32_t)((offset % 254) + 2) << 8;
48     offset /= 254;
49     // Same with the second byte,
50     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
51     if(isCompressible) {
52         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4;
53         primary |= (uint32_t)((offset % 251) + 4) << 16;
54         offset /= 251;
55     } else {
56         offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2;
57         primary |= (uint32_t)((offset % 254) + 2) << 16;
58         offset /= 254;
59     }
60     // First byte, assume no further overflow.
61     return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24));
62 }
63 
64 uint32_t
decTwoBytePrimaryByOneStep(uint32_t basePrimary,UBool isCompressible,int32_t step)65 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
66     // Extract the second byte, minus the minimum byte value,
67     // minus the step, modulo the number of usable byte values, plus the minimum.
68     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
69     // Assume no further underflow for the first byte.
70     U_ASSERT(0 < step && step <= 0x7f);
71     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step;
72     if(isCompressible) {
73         if(byte2 < 4) {
74             byte2 += 251;
75             basePrimary -= 0x1000000;
76         }
77     } else {
78         if(byte2 < 2) {
79             byte2 += 254;
80             basePrimary -= 0x1000000;
81         }
82     }
83     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16);
84 }
85 
86 uint32_t
decThreeBytePrimaryByOneStep(uint32_t basePrimary,UBool isCompressible,int32_t step)87 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) {
88     // Extract the third byte, minus the minimum byte value,
89     // minus the step, modulo the number of usable byte values, plus the minimum.
90     U_ASSERT(0 < step && step <= 0x7f);
91     int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step;
92     if(byte3 >= 2) {
93         return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8);
94     }
95     byte3 += 254;
96     // Same with the second byte,
97     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary.
98     int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1;
99     if(isCompressible) {
100         if(byte2 < 4) {
101             byte2 = 0xfe;
102             basePrimary -= 0x1000000;
103         }
104     } else {
105         if(byte2 < 2) {
106             byte2 = 0xff;
107             basePrimary -= 0x1000000;
108         }
109     }
110     // First byte, assume no further underflow.
111     return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8);
112 }
113 
114 uint32_t
getThreeBytePrimaryForOffsetData(UChar32 c,int64_t dataCE)115 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) {
116     uint32_t p = (uint32_t)(dataCE >> 32);  // three-byte primary pppppp00
117     int32_t lower32 = (int32_t)dataCE;  // base code point b & step s: bbbbbbss (bit 7: isCompressible)
118     int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f);  // delta * increment
119     UBool isCompressible = (lower32 & 0x80) != 0;
120     return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset);
121 }
122 
123 uint32_t
unassignedPrimaryFromCodePoint(UChar32 c)124 Collation::unassignedPrimaryFromCodePoint(UChar32 c) {
125     // Create a gap before U+0000. Use c=-1 for [first unassigned].
126     ++c;
127     // Fourth byte: 18 values, every 14th byte value (gap of 13).
128     uint32_t primary = 2 + (c % 18) * 14;
129     c /= 18;
130     // Third byte: 254 values.
131     primary |= (2 + (c % 254)) << 8;
132     c /= 254;
133     // Second byte: 251 values 04..FE excluding the primary compression bytes.
134     primary |= (4 + (c % 251)) << 16;
135     // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
136     return primary | (UNASSIGNED_IMPLICIT_BYTE << 24);
137 }
138 
139 U_NAMESPACE_END
140 
141 #endif  // !UCONFIG_NO_COLLATION
142