1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: writesrc.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005apr23
16 * created by: Markus W. Scherer
17 *
18 * Helper functions for writing source code for data.
19 */
20
21 #include <stdio.h>
22 #include <time.h>
23 #include "unicode/utypes.h"
24 #include "unicode/putil.h"
25 #include "unicode/ucptrie.h"
26 #include "unicode/errorcode.h"
27 #include "unicode/uniset.h"
28 #include "unicode/usetiter.h"
29 #include "unicode/utf16.h"
30 #include "utrie2.h"
31 #include "cstring.h"
32 #include "writesrc.h"
33 #include "util.h"
34
35 U_NAMESPACE_BEGIN
36
~ValueNameGetter()37 ValueNameGetter::~ValueNameGetter() {}
38
39 U_NAMESPACE_END
40
41 U_NAMESPACE_USE
42
43 static FILE *
usrc_createWithoutHeader(const char * path,const char * filename)44 usrc_createWithoutHeader(const char *path, const char *filename) {
45 char buffer[1024];
46 const char *p;
47 char *q;
48 FILE *f;
49 char c;
50
51 if(path==NULL) {
52 p=filename;
53 } else {
54 /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
55 uprv_strcpy(buffer, path);
56 q=buffer+uprv_strlen(buffer);
57 if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
58 *q++=U_FILE_SEP_CHAR;
59 }
60 uprv_strcpy(q, filename);
61 p=buffer;
62 }
63
64 f=fopen(p, "w");
65 if (f==NULL) {
66 fprintf(
67 stderr,
68 "usrc_create(%s, %s): unable to create file\n",
69 path!=NULL ? path : "", filename);
70 }
71 return f;
72 }
73
74 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)75 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
76 FILE *f = usrc_createWithoutHeader(path, filename);
77 if (f == NULL) {
78 return f;
79 }
80 usrc_writeCopyrightHeader(f, "//", copyrightYear);
81 usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
82 return f;
83 }
84
85 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,int32_t copyrightYear,const char * generator)86 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
87 FILE *f = usrc_createWithoutHeader(path, filename);
88 if (f == NULL) {
89 return f;
90 }
91 usrc_writeCopyrightHeader(f, "#", copyrightYear);
92 usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
93 return f;
94 }
95
96 U_CAPI void U_EXPORT2
usrc_writeCopyrightHeader(FILE * f,const char * prefix,int32_t copyrightYear)97 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
98 fprintf(f,
99 "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
100 "%s License & terms of use: http://www.unicode.org/copyright.html\n",
101 prefix, copyrightYear, prefix);
102 if (copyrightYear <= 2016) {
103 fprintf(f,
104 "%s Copyright (C) 1999-2016, International Business Machines\n"
105 "%s Corporation and others. All Rights Reserved.\n",
106 prefix, prefix);
107 }
108 }
109
110 U_CAPI void U_EXPORT2
usrc_writeFileNameGeneratedBy(FILE * f,const char * prefix,const char * filename,const char * generator)111 usrc_writeFileNameGeneratedBy(
112 FILE *f,
113 const char *prefix,
114 const char *filename,
115 const char *generator) {
116 char buffer[1024];
117 const struct tm *lt;
118 time_t t;
119
120 const char *pattern =
121 "%s\n"
122 "%s file name: %s\n"
123 "%s\n"
124 "%s machine-generated by: %s\n"
125 "\n";
126
127 time(&t);
128 lt=localtime(&t);
129 if(generator==NULL) {
130 strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
131 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
132 } else {
133 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
134 }
135 }
136
137 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * indent,const char * postfix)138 usrc_writeArray(FILE *f,
139 const char *prefix,
140 const void *p, int32_t width, int32_t length,
141 const char *indent,
142 const char *postfix) {
143 const uint8_t *p8;
144 const uint16_t *p16;
145 const uint32_t *p32;
146 uint32_t value;
147 int32_t i, col;
148
149 p8=NULL;
150 p16=NULL;
151 p32=NULL;
152 switch(width) {
153 case 8:
154 p8=(const uint8_t *)p;
155 break;
156 case 16:
157 p16=(const uint16_t *)p;
158 break;
159 case 32:
160 p32=(const uint32_t *)p;
161 break;
162 default:
163 fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
164 return;
165 }
166 if(prefix!=NULL) {
167 fprintf(f, prefix, (long)length);
168 }
169 for(i=col=0; i<length; ++i, ++col) {
170 if(i>0) {
171 if(col<16) {
172 fputc(',', f);
173 } else {
174 fputs(",\n", f);
175 fputs(indent, f);
176 col=0;
177 }
178 }
179 switch(width) {
180 case 8:
181 value=p8[i];
182 break;
183 case 16:
184 value=p16[i];
185 break;
186 case 32:
187 value=p32[i];
188 break;
189 default:
190 value=0; /* unreachable */
191 break;
192 }
193 fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value);
194 }
195 if(postfix!=NULL) {
196 fputs(postfix, f);
197 }
198 }
199
200 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)201 usrc_writeUTrie2Arrays(FILE *f,
202 const char *indexPrefix, const char *data32Prefix,
203 const UTrie2 *pTrie,
204 const char *postfix) {
205 if(pTrie->data32==NULL) {
206 /* 16-bit trie */
207 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
208 } else {
209 /* 32-bit trie */
210 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
211 usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
212 }
213 }
214
215 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)216 usrc_writeUTrie2Struct(FILE *f,
217 const char *prefix,
218 const UTrie2 *pTrie,
219 const char *indexName, const char *data32Name,
220 const char *postfix) {
221 if(prefix!=NULL) {
222 fputs(prefix, f);
223 }
224 if(pTrie->data32==NULL) {
225 /* 16-bit trie */
226 fprintf(
227 f,
228 " %s,\n" /* index */
229 " %s+%ld,\n" /* data16 */
230 " NULL,\n", /* data32 */
231 indexName,
232 indexName,
233 (long)pTrie->indexLength);
234 } else {
235 /* 32-bit trie */
236 fprintf(
237 f,
238 " %s,\n" /* index */
239 " NULL,\n" /* data16 */
240 " %s,\n", /* data32 */
241 indexName,
242 data32Name);
243 }
244 fprintf(
245 f,
246 " %ld,\n" /* indexLength */
247 " %ld,\n" /* dataLength */
248 " 0x%hx,\n" /* index2NullOffset */
249 " 0x%hx,\n" /* dataNullOffset */
250 " 0x%lx,\n" /* initialValue */
251 " 0x%lx,\n" /* errorValue */
252 " 0x%lx,\n" /* highStart */
253 " 0x%lx,\n" /* highValueIndex */
254 " NULL, 0, FALSE, FALSE, 0, NULL\n",
255 (long)pTrie->indexLength, (long)pTrie->dataLength,
256 (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
257 (long)pTrie->initialValue, (long)pTrie->errorValue,
258 (long)pTrie->highStart, (long)pTrie->highValueIndex);
259 if(postfix!=NULL) {
260 fputs(postfix, f);
261 }
262 }
263
264 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix,UTargetSyntax syntax)265 usrc_writeUCPTrieArrays(FILE *f,
266 const char *indexPrefix, const char *dataPrefix,
267 const UCPTrie *pTrie,
268 const char *postfix,
269 UTargetSyntax syntax) {
270 const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : "";
271 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
272 int32_t width=
273 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
274 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
275 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
276 usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
277 }
278
279 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix,UTargetSyntax syntax)280 usrc_writeUCPTrieStruct(FILE *f,
281 const char *prefix,
282 const UCPTrie *pTrie,
283 const char *indexName, const char *dataName,
284 const char *postfix,
285 UTargetSyntax syntax) {
286 if(prefix!=NULL) {
287 fputs(prefix, f);
288 }
289 if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
290 fprintf(
291 f,
292 " %s,\n" // index
293 " { %s },\n", // data (union)
294 indexName,
295 dataName);
296 }
297 const char* pattern =
298 (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
299 " %ld, %ld,\n" // indexLength, dataLength
300 " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart
301 " %d, %d,\n" // type, valueWidth
302 " 0, 0,\n" // reserved32, reserved16
303 " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset
304 " 0x%lx,\n" // nullValue
305 :
306 "indexLength = %ld\n"
307 "dataLength = %ld\n"
308 "highStart = 0x%lx\n"
309 "shifted12HighStart = 0x%x\n"
310 "type = %d\n"
311 "valueWidth = %d\n"
312 "index3NullOffset = 0x%x\n"
313 "dataNullOffset = 0x%lx\n"
314 "nullValue = 0x%lx\n"
315 ;
316 fprintf(
317 f,
318 pattern,
319 (long)pTrie->indexLength, (long)pTrie->dataLength,
320 (long)pTrie->highStart, pTrie->shifted12HighStart,
321 pTrie->type, pTrie->valueWidth,
322 pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
323 (long)pTrie->nullValue);
324 if(postfix!=NULL) {
325 fputs(postfix, f);
326 }
327 }
328
329 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie,UTargetSyntax syntax)330 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
331 int32_t width=
332 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
333 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
334 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
335 char line[100], line2[100], line3[100], line4[100];
336
337 switch (syntax) {
338 case UPRV_TARGET_SYNTAX_CCODE:
339 sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name);
340 sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
341 sprintf(line3, "\n};\n\n");
342 break;
343 case UPRV_TARGET_SYNTAX_TOML:
344 sprintf(line, "index = [\n ");
345 sprintf(line2, "data_%d = [\n ", (int)width);
346 sprintf(line3, "\n]\n");
347 break;
348 default:
349 UPRV_UNREACHABLE_EXIT;
350 }
351 usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
352
353 switch (syntax) {
354 case UPRV_TARGET_SYNTAX_CCODE:
355 sprintf(line, "static const UCPTrie %s_trie={\n", name);
356 sprintf(line2, "%s_trieIndex", name);
357 sprintf(line3, "%s_trieData", name);
358 sprintf(line4, "};\n\n");
359 break;
360 case UPRV_TARGET_SYNTAX_TOML:
361 line[0] = 0;
362 line2[0] = 0;
363 line3[0] = 0;
364 line4[0] = 0;
365 break;
366 default:
367 UPRV_UNREACHABLE_EXIT;
368 }
369 usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
370 }
371
372 U_CAPI void U_EXPORT2
usrc_writeUnicodeSet(FILE * f,const USet * pSet,UTargetSyntax syntax)373 usrc_writeUnicodeSet(
374 FILE *f,
375 const USet *pSet,
376 UTargetSyntax syntax) {
377 // ccode is not yet supported
378 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
379
380 // Write out a list of ranges
381 const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
382 UnicodeSetIterator it(*set);
383 fprintf(f, "# Inclusive ranges of the code points in the set.\n");
384 fprintf(f, "ranges = [\n");
385 bool seenFirstString = false;
386 while (it.nextRange()) {
387 if (it.isString()) {
388 if (!seenFirstString) {
389 seenFirstString = true;
390 fprintf(f, "]\nstrings = [\n");
391 }
392 const UnicodeString& str = it.getString();
393 fprintf(f, " ");
394 usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
395 fprintf(f, ",\n");
396 } else {
397 U_ASSERT(!seenFirstString);
398 UChar32 start = it.getCodepoint();
399 UChar32 end = it.getCodepointEnd();
400 fprintf(f, " [0x%x, 0x%x],\n", start, end);
401 }
402 }
403 fprintf(f, "]\n");
404 }
405
406 U_CAPI void U_EXPORT2
usrc_writeUCPMap(FILE * f,const UCPMap * pMap,icu::ValueNameGetter * valueNameGetter,UTargetSyntax syntax)407 usrc_writeUCPMap(
408 FILE *f,
409 const UCPMap *pMap,
410 icu::ValueNameGetter *valueNameGetter,
411 UTargetSyntax syntax) {
412 // ccode is not yet supported
413 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
414 (void) syntax; // silence unused variable errors
415
416 // Print out list of ranges
417 UChar32 start = 0, end;
418 uint32_t value;
419 fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
420 fprintf(f, "ranges = [\n");
421 while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
422 if (valueNameGetter != nullptr) {
423 const char *name = valueNameGetter->getName(value);
424 fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
425 } else {
426 fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
427 }
428 start = end + 1;
429 }
430 fprintf(f, "]\n");
431 }
432
433 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)434 usrc_writeArrayOfMostlyInvChars(FILE *f,
435 const char *prefix,
436 const char *p, int32_t length,
437 const char *postfix) {
438 int32_t i, col;
439 int prev2, prev, c;
440
441 if(prefix!=NULL) {
442 fprintf(f, prefix, (long)length);
443 }
444 prev2=prev=-1;
445 for(i=col=0; i<length; ++i, ++col) {
446 c=(uint8_t)p[i];
447 if(i>0) {
448 /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
449 if(
450 /* Very long line. */
451 col>=32 ||
452 /* Long line, break after terminating NUL. */
453 (col>=24 && prev2>=0x20 && prev==0) ||
454 /* Medium-long line, break before non-NUL, non-character byte. */
455 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
456 ) {
457 fputs(",\n", f);
458 col=0;
459 } else {
460 fputc(',', f);
461 }
462 }
463 fprintf(f, c<0x20 ? "%u" : "'%c'", c);
464 prev2=prev;
465 prev=c;
466 }
467 if(postfix!=NULL) {
468 fputs(postfix, f);
469 }
470 }
471
472 U_CAPI void U_EXPORT2
usrc_writeStringAsASCII(FILE * f,const UChar * ptr,int32_t length,UTargetSyntax)473 usrc_writeStringAsASCII(FILE *f,
474 const UChar* ptr, int32_t length,
475 UTargetSyntax) {
476 // For now, assume all UTargetSyntax values are valid here.
477 fprintf(f, "\"");
478 int32_t i = 0;
479 UChar32 cp;
480 while (i < length) {
481 U16_NEXT(ptr, i, length, cp);
482 if (cp == u'"') {
483 fprintf(f, "\\\"");
484 } else if (ICU_Utility::isUnprintable(cp)) {
485 UnicodeString u16result;
486 ICU_Utility::escapeUnprintable(u16result, cp);
487 std::string u8result;
488 u16result.toUTF8String(u8result);
489 fprintf(f, "%s", u8result.data());
490 } else {
491 U_ASSERT(cp < 0x80);
492 char s[2] = {static_cast<char>(cp), 0};
493 fprintf(f, "%s", s);
494 }
495 }
496 fprintf(f, "\"");
497 }
498