1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // layoutpropsbuilder.cpp
5 // created: 2018aug30 Markus W. Scherer
6
7 #include <stdio.h>
8 #include <string.h>
9 #include "unicode/utypes.h"
10 #include "unicode/uchar.h"
11 #include "unicode/ucptrie.h"
12 #include "unicode/udata.h"
13 #include "unicode/umutablecptrie.h"
14 #include "unicode/uniset.h"
15 #include "cmemory.h"
16 #include "genprops.h"
17 #include "ppucd.h"
18 #include "uassert.h"
19 #include "ulayout_props.h"
20 #include "unewdata.h"
21
22 /* Unicode layout properties file format ---------------------------------------
23
24 The file format prepared and written here contains several data
25 structures that store indexes or data.
26
27 Before the data contents described below, there are the headers required by
28 the udata API for loading ICU data. Especially, a UDataInfo structure
29 precedes the actual data. It contains platform properties values and the
30 file format version.
31
32 The following is a description of format version 1.0 .
33
34 The file contains the following structures:
35
36 const int32_t indexes[i0] with values i0, i1, ...:
37 (see ULAYOUT_IX_... constants for names of indexes)
38
39 i0 indexesLength; -- length of indexes[] (ULAYOUT_IX_COUNT)
40 i1 inpcTop; -- limit byte offset of the InPC trie
41 i2 inscTop; -- limit byte offset of the InSC trie
42 i3 voTop; -- limit byte offset of the vo trie
43 i4..i7 -- reserved, same as the last limit byte offset
44 i8 -- reserved, 0
45
46 i9 maxValues; -- max values of the InPC, InSC, vo properties
47 (8 bits each; lowest 8 bits reserved, 0)
48 i10..i11 -- reserved, 0
49
50 After the indexes array follow consecutive, serialized,
51 single-property code point tries for the following properties,
52 each built "small" or "fast",
53 each padded to a multiple of 16 bytes:
54 - InPC
55 - InSC
56 - vo
57
58 ----------------------------------------------------------------------------- */
59
60 U_NAMESPACE_USE
61
62 // UDataInfo cf. udata.h
63 static UDataInfo dataInfo = {
64 sizeof(UDataInfo),
65 0,
66
67 U_IS_BIG_ENDIAN,
68 U_CHARSET_FAMILY,
69 U_SIZEOF_UCHAR,
70 0,
71
72 // dataFormat="Layo"
73 { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 },
74 { 1, 0, 0, 0 }, // formatVersion
75 { 12, 0, 0, 0 } // dataVersion
76 };
77
78 class LayoutPropsBuilder : public PropsBuilder {
79 public:
80 LayoutPropsBuilder(UErrorCode &errorCode);
81 virtual ~LayoutPropsBuilder() U_OVERRIDE;
82
83 virtual void setUnicodeVersion(const UVersionInfo version) U_OVERRIDE;
84 virtual void setProps(const UniProps &props, const UnicodeSet &newValues, UErrorCode &errorCode) U_OVERRIDE;
85 virtual void build(UErrorCode &errorCode) U_OVERRIDE;
86 virtual void writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) U_OVERRIDE;
87
88 private:
89 void setIntProp(const UniProps &, const UnicodeSet &newValues,
90 UProperty prop, UMutableCPTrie *trie,
91 UErrorCode &errorCode);
getMaxIntValue(UProperty prop) const92 int32_t getMaxIntValue(UProperty prop) const {
93 return maxIntValues[prop - UCHAR_INT_START];
94 }
95 void checkMaxIntValue(UProperty prop, int32_t maxMax, UErrorCode &errorCode) const;
96
97 int32_t maxIntValues[UCHAR_INT_LIMIT - UCHAR_INT_START];
98 UMutableCPTrie *inpcMutableTrie;
99 UMutableCPTrie *inscMutableTrie;
100 UMutableCPTrie *voMutableTrie;
101
102 UCPTrie *inpcTrie;
103 UCPTrie *inscTrie;
104 UCPTrie *voTrie;
105 };
106
LayoutPropsBuilder(UErrorCode & errorCode)107 LayoutPropsBuilder::LayoutPropsBuilder(UErrorCode &errorCode) :
108 inpcTrie(nullptr), inscTrie(nullptr), voTrie(nullptr) {
109 memset(maxIntValues, 0, sizeof(maxIntValues));
110 inpcMutableTrie = umutablecptrie_open(0, 0, &errorCode);
111 inscMutableTrie = umutablecptrie_open(0, 0, &errorCode);
112 voMutableTrie = umutablecptrie_open(0, 0, &errorCode);
113 if (U_FAILURE(errorCode)) {
114 fprintf(stderr, "genprops error: layoutpropsbuilder umutablecptrie_open() failed - %s\n",
115 u_errorName(errorCode));
116 }
117 }
118
~LayoutPropsBuilder()119 LayoutPropsBuilder::~LayoutPropsBuilder() {
120 umutablecptrie_close(inpcMutableTrie);
121 umutablecptrie_close(inscMutableTrie);
122 umutablecptrie_close(voMutableTrie);
123 ucptrie_close(inpcTrie);
124 ucptrie_close(inscTrie);
125 ucptrie_close(voTrie);
126 }
127
128 void
setUnicodeVersion(const UVersionInfo version)129 LayoutPropsBuilder::setUnicodeVersion(const UVersionInfo version) {
130 uprv_memcpy(dataInfo.dataVersion, version, 4);
131 }
132
133 void
setProps(const UniProps & props,const UnicodeSet & newValues,UErrorCode & errorCode)134 LayoutPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
135 UErrorCode &errorCode) {
136 setIntProp(props, newValues, UCHAR_INDIC_POSITIONAL_CATEGORY, inpcMutableTrie, errorCode);
137 setIntProp(props, newValues, UCHAR_INDIC_SYLLABIC_CATEGORY, inscMutableTrie, errorCode);
138 setIntProp(props, newValues, UCHAR_VERTICAL_ORIENTATION, voMutableTrie, errorCode);
139 }
140
setIntProp(const UniProps & props,const UnicodeSet & newValues,UProperty prop,UMutableCPTrie * trie,UErrorCode & errorCode)141 void LayoutPropsBuilder::setIntProp(const UniProps &props, const UnicodeSet &newValues,
142 UProperty prop, UMutableCPTrie *trie,
143 UErrorCode &errorCode) {
144 if (U_SUCCESS(errorCode) && newValues.contains(prop)) {
145 UChar32 start=props.start;
146 UChar32 end=props.end;
147 int32_t value = props.getIntProp(prop);
148 if (value < 0) {
149 fprintf(stderr, "error: unencodable negative value for property 0x%x %04lX..%04lX=%ld\n",
150 (int)prop, (long)start, (long)end, (long)value);
151 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
152 return;
153 }
154 if (value > maxIntValues[prop - UCHAR_INT_START]) {
155 maxIntValues[prop - UCHAR_INT_START] = value;
156 }
157 if (start == end) {
158 umutablecptrie_set(trie, start, value, &errorCode);
159 } else {
160 umutablecptrie_setRange(trie, start, end, value, &errorCode);
161 }
162 if (U_FAILURE(errorCode)) {
163 fprintf(stderr, "error: umutablecptrie_set(prop 0x%x trie %04lX..%04lX) failed - %s\n",
164 (int)prop, (long)start, (long)end, u_errorName(errorCode));
165 }
166 }
167 }
168
169 namespace {
170
buildUCPTrie(const char * name,UMutableCPTrie * mutableTrie,UCPTrieType type,UCPTrieValueWidth valueWidth,UErrorCode & errorCode)171 UCPTrie *buildUCPTrie(const char *name, UMutableCPTrie *mutableTrie,
172 UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
173 UCPTrie *trie = umutablecptrie_buildImmutable(mutableTrie, type, valueWidth, &errorCode);
174 if(U_FAILURE(errorCode)) {
175 fprintf(stderr, "genprops error: %s trie buildImmutable() failed: %s\n",
176 name, u_errorName(errorCode));
177 return trie;
178 }
179 if (!beQuiet) {
180 UErrorCode overflow = U_ZERO_ERROR;
181 int32_t length = ucptrie_toBinary(trie, nullptr, 0, &overflow);
182 printf("%11s trie size in bytes: %5u\n", name, (int)length);
183 }
184 return trie;
185 }
186
187 constexpr int32_t TRIE_BLOCK_CAPACITY = 100000;
188
189 uint8_t inpcBytes[TRIE_BLOCK_CAPACITY];
190 uint8_t inscBytes[TRIE_BLOCK_CAPACITY];
191 uint8_t voBytes[TRIE_BLOCK_CAPACITY];
192
193 int32_t inpcLength = 0;
194 int32_t inscLength = 0;
195 int32_t voLength = 0;
196
writeTrieBytes(const UCPTrie * trie,uint8_t block[],UErrorCode & errorCode)197 int32_t writeTrieBytes(const UCPTrie *trie, uint8_t block[], UErrorCode &errorCode) {
198 int32_t length = ucptrie_toBinary(trie, block, TRIE_BLOCK_CAPACITY, &errorCode);
199 while ((length & 0xf) != 0) {
200 block[length++] = 0xaa;
201 }
202 return length;
203 }
204
205 } // namespace
206
207 void
build(UErrorCode & errorCode)208 LayoutPropsBuilder::build(UErrorCode &errorCode) {
209 if (U_FAILURE(errorCode)) { return; }
210 if (!beQuiet) {
211 puts("* text layout properties stats *");
212 }
213
214 checkMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY, 0xff, errorCode);
215 checkMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY, 0xff, errorCode);
216 checkMaxIntValue(UCHAR_VERTICAL_ORIENTATION, 0xff, errorCode);
217 inpcTrie = buildUCPTrie("inpc", inpcMutableTrie,
218 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
219 inscTrie = buildUCPTrie("insc", inscMutableTrie,
220 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
221 voTrie = buildUCPTrie("vo", voMutableTrie,
222 UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_8, errorCode);
223
224 inpcLength = writeTrieBytes(inpcTrie, inpcBytes, errorCode);
225 inscLength = writeTrieBytes(inscTrie, inscBytes, errorCode);
226 voLength = writeTrieBytes(voTrie, voBytes, errorCode);
227
228 if (!beQuiet) {
229 int32_t size = ULAYOUT_IX_COUNT * 4 + inpcLength + inscLength + voLength;
230 printf("data size: %5d\n", (int)size);
231 }
232 }
233
checkMaxIntValue(UProperty prop,int32_t maxMax,UErrorCode & errorCode) const234 void LayoutPropsBuilder::checkMaxIntValue(UProperty prop, int32_t maxMax,
235 UErrorCode &errorCode) const {
236 int32_t max = getMaxIntValue(prop);
237 if (max > maxMax) {
238 fprintf(stderr, "genprops error: 0x%x max value = %d overflow\n", (int)prop, (int)max);
239 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
240 }
241 }
242
243 // In ICU 63, we had functions writeCSourceFile() and writeJavaSourceFile().
244 // For Java, each serialized trie was written as a String constant with
245 // one byte per char and an optimization for byte 0,
246 // to optimize for Java .class file size.
247 // (See ICU 63 if we need to resurrect some of that code.)
248 // Since ICU 64, we write a binary ulayout.icu file for use in both C++ & Java.
249
250 void
writeBinaryData(const char * path,UBool withCopyright,UErrorCode & errorCode)251 LayoutPropsBuilder::writeBinaryData(const char *path, UBool withCopyright, UErrorCode &errorCode) {
252 if (U_FAILURE(errorCode)) { return; }
253
254 UNewDataMemory *pData = udata_create(
255 path, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME, &dataInfo,
256 withCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
257 if (U_FAILURE(errorCode)) {
258 fprintf(stderr, "genprops: udata_create(%s, ulayout.icu) failed - %s\n",
259 path, u_errorName(errorCode));
260 return;
261 }
262
263 int32_t indexes[ULAYOUT_IX_COUNT] = { ULAYOUT_IX_COUNT };
264 int32_t top = ULAYOUT_IX_COUNT * 4;
265
266 indexes[ULAYOUT_IX_INPC_TRIE_TOP] = (top += inpcLength);
267 indexes[ULAYOUT_IX_INSC_TRIE_TOP] = (top += inscLength);
268 indexes[ULAYOUT_IX_VO_TRIE_TOP] = (top += voLength);
269
270 // Set reserved trie-top values to the top of the last trie
271 // so that they look empty until a later file format version
272 // uses one or more of these slots.
273 for (int32_t i = ULAYOUT_IX_RESERVED_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
274 indexes[i] = top;
275 }
276
277 indexes[ULAYOUT_IX_MAX_VALUES] =
278 ((getMaxIntValue(UCHAR_INDIC_POSITIONAL_CATEGORY)) << ULAYOUT_MAX_INPC_SHIFT) |
279 ((getMaxIntValue(UCHAR_INDIC_SYLLABIC_CATEGORY)) << ULAYOUT_MAX_INSC_SHIFT) |
280 ((getMaxIntValue(UCHAR_VERTICAL_ORIENTATION)) << ULAYOUT_MAX_VO_SHIFT);
281
282 udata_writeBlock(pData, indexes, sizeof(indexes));
283 udata_writeBlock(pData, inpcBytes, inpcLength);
284 udata_writeBlock(pData, inscBytes, inscLength);
285 udata_writeBlock(pData, voBytes, voLength);
286
287 long dataLength = udata_finish(pData, &errorCode);
288 if (U_FAILURE(errorCode)) {
289 fprintf(stderr, "genprops: error %s writing the output file\n", u_errorName(errorCode));
290 return;
291 }
292
293 if (dataLength != (long)top) {
294 fprintf(stderr,
295 "udata_finish(ulayout.icu) reports %ld bytes written but should be %ld\n",
296 dataLength, (long)top);
297 errorCode = U_INTERNAL_PROGRAM_ERROR;
298 }
299 }
300
301 PropsBuilder *
createLayoutPropsBuilder(UErrorCode & errorCode)302 createLayoutPropsBuilder(UErrorCode &errorCode) {
303 if(U_FAILURE(errorCode)) { return nullptr; }
304 PropsBuilder *pb=new LayoutPropsBuilder(errorCode);
305 if(pb==nullptr) {
306 errorCode=U_MEMORY_ALLOCATION_ERROR;
307 }
308 return pb;
309 }
310