• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // layoutpropsbuilder.cpp
5 // created: 2018aug30 Markus W. Scherer
6 
7 #include <stdio.h>
8 #include <string.h>
9 #include "unicode/utypes.h"
10 #include "unicode/uchar.h"
11 #include "unicode/ucptrie.h"
12 #include "unicode/udata.h"
13 #include "unicode/umutablecptrie.h"
14 #include "unicode/uniset.h"
15 #include "cmemory.h"
16 #include "genprops.h"
17 #include "ppucd.h"
18 #include "uassert.h"
19 #include "ulayout_props.h"
20 #include "unewdata.h"
21 
22 /* Unicode layout properties file format ---------------------------------------
23 
24 The file format prepared and written here contains several data
25 structures that store indexes or data.
26 
27 Before the data contents described below, there are the headers required by
28 the udata API for loading ICU data. Especially, a UDataInfo structure
29 precedes the actual data. It contains platform properties values and the
30 file format version.
31 
32 The following is a description of format version 1.0 .
33 
34 The file contains the following structures:
35 
36     const int32_t indexes[i0] with values i0, i1, ...:
37     (see ULAYOUT_IX_... constants for names of indexes)
38 
39     i0 indexesLength; -- length of indexes[] (ULAYOUT_IX_COUNT)
40     i1 inpcTop; -- limit byte offset of the InPC trie
41     i2 inscTop; -- limit byte offset of the InSC trie
42     i3 voTop; -- limit byte offset of the vo trie
43     i4..i7 -- reserved, same as the last limit byte offset
44     i8 -- reserved, 0
45 
46     i9 maxValues; -- max values of the InPC, InSC, vo properties
47         (8 bits each; lowest 8 bits reserved, 0)
48     i10..i11 -- reserved, 0
49 
50     After the indexes array follow consecutive, serialized,
51     single-property code point tries for the following properties,
52     each built "small" or "fast",
53     each padded to a multiple of 16 bytes:
54     - InPC
55     - InSC
56     - vo
57 
58 ----------------------------------------------------------------------------- */
59 
60 U_NAMESPACE_USE
61 
62 // UDataInfo cf. udata.h
63 static UDataInfo dataInfo = {
64     sizeof(UDataInfo),
65     0,
66 
67     U_IS_BIG_ENDIAN,
68     U_CHARSET_FAMILY,
69     U_SIZEOF_UCHAR,
70     0,
71 
72     // dataFormat="Layo"
73     { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 },
74     { 1, 0, 0, 0 },  // formatVersion
75     { 12, 0, 0, 0 }  // dataVersion
76 };
77 
78 class LayoutPropsBuilder : public PropsBuilder {
79 public:
80     LayoutPropsBuilder(UErrorCode &errorCode);
81     virtual ~LayoutPropsBuilder() U_OVERRIDE;
82 
83     virtual void setUnicodeVersion(const UVersionInfo version) U_OVERRIDE;
84     virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) U_OVERRIDE;
85     virtual void build(UErrorCode &errorCode) U_OVERRIDE;
86     virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) U_OVERRIDE;
87 
88 private:
89     void setIntProp(const UniProps &, const UnicodeSet &newValues,
90                     UProperty prop, UMutableCPTrie *trie,
91                     UErrorCode &errorCode);
getMaxIntValue(UProperty prop) const92     int32_t getMaxIntValue(UProperty prop) const {
93         return maxIntValues[prop - UCHAR_INT_START];
94     }
95     void checkMaxIntValue(UProperty prop, int32_t maxMax, UErrorCode &errorCode) const;
96 
97     int32_t maxIntValues[UCHAR_INT_LIMIT - UCHAR_INT_START];
98     UMutableCPTrie *inpcMutableTrie;
99     UMutableCPTrie *inscMutableTrie;
100     UMutableCPTrie *voMutableTrie;
101 
102     UCPTrie *inpcTrie;
103     UCPTrie *inscTrie;
104     UCPTrie *voTrie;
105 };
106 
LayoutPropsBuilder(UErrorCode & errorCode)107 LayoutPropsBuilder::LayoutPropsBuilder(UErrorCode &errorCode) :
108         inpcTrie(nullptr), inscTrie(nullptr), voTrie(nullptr) {
109     memset(maxIntValues, 0, sizeof(maxIntValues));
110     inpcMutableTrie = umutablecptrie_open(0, 0, &errorCode);
111     inscMutableTrie = umutablecptrie_open(0, 0, &errorCode);
112     voMutableTrie = umutablecptrie_open(0, 0, &errorCode);
113     if (U_FAILURE(errorCode)) {
114         fprintf(stderr, "genprops error: layoutpropsbuilder umutablecptrie_open() failed - %s\n",
115                 u_errorName(errorCode));
116     }
117 }
118 
~LayoutPropsBuilder()119 LayoutPropsBuilder::~LayoutPropsBuilder() {
120     umutablecptrie_close(inpcMutableTrie);
121     umutablecptrie_close(inscMutableTrie);
122     umutablecptrie_close(voMutableTrie);
123     ucptrie_close(inpcTrie);
124     ucptrie_close(inscTrie);
125     ucptrie_close(voTrie);
126 }
127 
128 void
setUnicodeVersion(const UVersionInfo version)129 LayoutPropsBuilder::setUnicodeVersion(const UVersionInfo version) {
130     uprv_memcpy(dataInfo.dataVersion, version, 4);
131 }
132 
133 void
setProps(const UniProps & props,const UnicodeSet & newValues,UErrorCode & errorCode)134 LayoutPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
135                              UErrorCode &errorCode) {
136     setIntProp(props, newValues, UCHAR_INDIC_POSITIONAL_CATEGORY, inpcMutableTrie, errorCode);
137     setIntProp(props, newValues, UCHAR_INDIC_SYLLABIC_CATEGORY, inscMutableTrie, errorCode);
138     setIntProp(props, newValues, UCHAR_VERTICAL_ORIENTATION, voMutableTrie, errorCode);
139 }
140 
setIntProp(const UniProps & props,const UnicodeSet & newValues,UProperty prop,UMutableCPTrie * trie,UErrorCode & errorCode)141 void LayoutPropsBuilder::setIntProp(const UniProps &props, const UnicodeSet &newValues,
142                                     UProperty prop, UMutableCPTrie *trie,
143                                     UErrorCode &errorCode) {
144     if (U_SUCCESS(errorCode) && newValues.contains(prop)) {
145         UChar32 start=props.start;
146         UChar32 end=props.end;
147         int32_t value = props.getIntProp(prop);
148         if (value < 0) {
149             fprintf(stderr, "error: unencodable negative value for property 0x%x %04lX..%04lX=%ld\n",
150                     (int)prop, (long)start, (long)end, (long)value);
151             errorCode = U_ILLEGAL_ARGUMENT_ERROR;
152             return;
153         }
154         if (value > maxIntValues[prop - UCHAR_INT_START]) {
155             maxIntValues[prop - UCHAR_INT_START] = value;
156         }
157         if (start == end) {
158             umutablecptrie_set(trie, start, value, &errorCode);
159         } else {
160             umutablecptrie_setRange(trie, start, end, value, &errorCode);
161         }
162         if (U_FAILURE(errorCode)) {
163             fprintf(stderr, "error: umutablecptrie_set(prop 0x%x trie %04lX..%04lX) failed - %s\n",
164                     (int)prop, (long)start, (long)end, u_errorName(errorCode));
165         }
166     }
167 }
168 
169 namespace {
170 
buildUCPTrie(const char * name,UMutableCPTrie * mutableTrie,UCPTrieType type,UCPTrieValueWidth valueWidth,UErrorCode & errorCode)171 UCPTrie *buildUCPTrie(const char *name, UMutableCPTrie *mutableTrie,
172                       UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
173     UCPTrie *trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
174     if(U_FAILURE(errorCode)) {
175         fprintf(stderr, "genprops error: %s trie buildImmutable() failed: %s\n",
176                 name, u_errorName(errorCode));
177         return trie;
178     }
179     if (!beQuiet) {
180         UErrorCode overflow = U_ZERO_ERROR;
181         int32_t length = ucptrie_toBinary(trie, nullptr, 0, &overflow);
182         printf("%11s trie size in bytes:        %5u\n", name, (int)length);
183     }
184     return trie;
185 }
186 
187 constexpr int32_t TRIE_BLOCK_CAPACITY = 100000;
188 
189 uint8_t inpcBytes[TRIE_BLOCK_CAPACITY];
190 uint8_t inscBytes[TRIE_BLOCK_CAPACITY];
191 uint8_t voBytes[TRIE_BLOCK_CAPACITY];
192 
193 int32_t inpcLength = 0;
194 int32_t inscLength = 0;
195 int32_t voLength = 0;
196 
writeTrieBytes(const UCPTrie * trie,uint8_t block[],UErrorCode & errorCode)197 int32_t writeTrieBytes(const UCPTrie *trie, uint8_t block[], UErrorCode &errorCode) {
198     int32_t length = ucptrie_toBinary(trie, block, TRIE_BLOCK_CAPACITY, &errorCode);
199     while ((length & 0xf) != 0) {
200         block[length++] = 0xaa;
201     }
202     return length;
203 }
204 
205 }  // namespace
206 
207 void
build(UErrorCode & errorCode)208 LayoutPropsBuilder::build(UErrorCode &errorCode) {
209     if (U_FAILURE(errorCode)) { return; }
210     if (!beQuiet) {
211         puts("* text layout properties stats *");
212     }
213 
214     checkMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY, 0xff, errorCode);
215     checkMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY, 0xff, errorCode);
216     checkMaxIntValue(UCHAR_VERTICAL_ORIENTATION, 0xff, errorCode);
217     inpcTrie = buildUCPTrie("inpc", inpcMutableTrie,
218                             UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
219     inscTrie = buildUCPTrie("insc", inscMutableTrie,
220                             UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
221     voTrie = buildUCPTrie("vo", voMutableTrie,
222                           UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
223 
224     inpcLength = writeTrieBytes(inpcTrie, inpcBytes, errorCode);
225     inscLength = writeTrieBytes(inscTrie, inscBytes, errorCode);
226     voLength = writeTrieBytes(voTrie, voBytes, errorCode);
227 
228     if (!beQuiet) {
229         int32_t size = ULAYOUT_IX_COUNT * 4 + inpcLength + inscLength + voLength;
230         printf("data size:                             %5d\n", (int)size);
231     }
232 }
233 
checkMaxIntValue(UProperty prop,int32_t maxMax,UErrorCode & errorCode) const234 void LayoutPropsBuilder::checkMaxIntValue(UProperty prop, int32_t maxMax,
235                                           UErrorCode &errorCode) const {
236     int32_t max = getMaxIntValue(prop);
237     if (max > maxMax) {
238         fprintf(stderr, "genprops error: 0x%x max value = %d overflow\n", (int)prop, (int)max);
239         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
240     }
241 }
242 
243 // In ICU 63, we had functions writeCSourceFile() and writeJavaSourceFile().
244 // For Java, each serialized trie was written as a String constant with
245 // one byte per char and an optimization for byte 0,
246 // to optimize for Java .class file size.
247 // (See ICU 63 if we need to resurrect some of that code.)
248 // Since ICU 64, we write a binary ulayout.icu file for use in both C++ & Java.
249 
250 void
writeBinaryData(const char * path,UBool withCopyright,UErrorCode & errorCode)251 LayoutPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
252     if (U_FAILURE(errorCode)) { return; }
253 
254     UNewDataMemory *pData = udata_create(
255         path, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME, &dataInfo,
256         withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
257     if (U_FAILURE(errorCode)) {
258         fprintf(stderr, "genprops: udata_create(%s, ulayout.icu) failed - %s\n",
259                 path, u_errorName(errorCode));
260         return;
261     }
262 
263     int32_t indexes[ULAYOUT_IX_COUNT] = { ULAYOUT_IX_COUNT };
264     int32_t top = ULAYOUT_IX_COUNT * 4;
265 
266     indexes[ULAYOUT_IX_INPC_TRIE_TOP] = (top += inpcLength);
267     indexes[ULAYOUT_IX_INSC_TRIE_TOP] = (top += inscLength);
268     indexes[ULAYOUT_IX_VO_TRIE_TOP] = (top += voLength);
269 
270     // Set reserved trie-top values to the top of the last trie
271     // so that they look empty until a later file format version
272     // uses one or more of these slots.
273     for (int32_t i = ULAYOUT_IX_RESERVED_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
274         indexes[i] = top;
275     }
276 
277     indexes[ULAYOUT_IX_MAX_VALUES] =
278         ((getMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY)) << ULAYOUT_MAX_INPC_SHIFT) |
279         ((getMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY)) << ULAYOUT_MAX_INSC_SHIFT) |
280         ((getMaxIntValue(UCHAR_VERTICAL_ORIENTATION)) << ULAYOUT_MAX_VO_SHIFT);
281 
282     udata_writeBlock(pData, indexes, sizeof(indexes));
283     udata_writeBlock(pData, inpcBytes, inpcLength);
284     udata_writeBlock(pData, inscBytes, inscLength);
285     udata_writeBlock(pData, voBytes, voLength);
286 
287     long dataLength = udata_finish(pData, &errorCode);
288     if (U_FAILURE(errorCode)) {
289         fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode));
290         return;
291     }
292 
293     if (dataLength != (long)top) {
294         fprintf(stderr,
295                 "udata_finish(ulayout.icu) reports %ld bytes written but should be %ld\n",
296                 dataLength, (long)top);
297         errorCode = U_INTERNAL_PROGRAM_ERROR;
298     }
299 }
300 
301 PropsBuilder *
createLayoutPropsBuilder(UErrorCode & errorCode)302 createLayoutPropsBuilder(UErrorCode &errorCode) {
303     if(U_FAILURE(errorCode)) { return nullptr; }
304     PropsBuilder *pb=new LayoutPropsBuilder(errorCode);
305     if(pb==nullptr) {
306         errorCode=U_MEMORY_ALLOCATION_ERROR;
307     }
308     return pb;
309 }
310