1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: writesrc.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005apr23
16 * created by: Markus W. Scherer
17 *
18 * Helper functions for writing source code for data.
19 */
20
21 #include <stdio.h>
22 #include <time.h>
23
24 // The C99 standard suggested that C++ implementations not define PRId64 etc. constants
25 // unless this macro is defined.
26 // See the Notes at https://en.cppreference.com/w/cpp/types/integer .
27 // Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h .
28 #ifndef __STDC_FORMAT_MACROS
29 # define __STDC_FORMAT_MACROS
30 #endif
31 #include <cinttypes>
32
33 #include "unicode/utypes.h"
34 #include "unicode/putil.h"
35 #include "unicode/ucptrie.h"
36 #include "unicode/errorcode.h"
37 #include "unicode/uniset.h"
38 #include "unicode/usetiter.h"
39 #include "unicode/utf16.h"
40 #include "utrie2.h"
41 #include "cstring.h"
42 #include "writesrc.h"
43 #include "util.h"
44
45 U_NAMESPACE_BEGIN
46
~ValueNameGetter()47 ValueNameGetter::~ValueNameGetter() {}
48
49 U_NAMESPACE_END
50
51 U_NAMESPACE_USE
52
53 static FILE *
usrc_createWithoutHeader(const char * path,const char * filename)54 usrc_createWithoutHeader(const char *path, const char *filename) {
55 char buffer[1024];
56 const char *p;
57 char *q;
58 FILE *f;
59 char c;
60
61 if(path==nullptr) {
62 p=filename;
63 } else {
64 /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
65 uprv_strcpy(buffer, path);
66 q=buffer+uprv_strlen(buffer);
67 if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
68 *q++=U_FILE_SEP_CHAR;
69 }
70 uprv_strcpy(q, filename);
71 p=buffer;
72 }
73
74 f=fopen(p, "w");
75 if (f==nullptr) {
76 fprintf(
77 stderr,
78 "usrc_create(%s, %s): unable to create file\n",
79 path!=nullptr ? path : "", filename);
80 }
81 return f;
82 }
83
84 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)85 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
86 FILE *f = usrc_createWithoutHeader(path, filename);
87 if (f == nullptr) {
88 return f;
89 }
90 usrc_writeCopyrightHeader(f, "//", copyrightYear);
91 usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
92 return f;
93 }
94
95 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,int32_t copyrightYear,const char * generator)96 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
97 FILE *f = usrc_createWithoutHeader(path, filename);
98 if (f == nullptr) {
99 return f;
100 }
101 usrc_writeCopyrightHeader(f, "#", copyrightYear);
102 usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
103 return f;
104 }
105
106 U_CAPI void U_EXPORT2
usrc_writeCopyrightHeader(FILE * f,const char * prefix,int32_t copyrightYear)107 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
108 fprintf(f,
109 "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
110 "%s License & terms of use: http://www.unicode.org/copyright.html\n",
111 prefix, copyrightYear, prefix);
112 if (copyrightYear <= 2016) {
113 fprintf(f,
114 "%s Copyright (C) 1999-2016, International Business Machines\n"
115 "%s Corporation and others. All Rights Reserved.\n",
116 prefix, prefix);
117 }
118 }
119
120 U_CAPI void U_EXPORT2
usrc_writeFileNameGeneratedBy(FILE * f,const char * prefix,const char * filename,const char * generator)121 usrc_writeFileNameGeneratedBy(
122 FILE *f,
123 const char *prefix,
124 const char *filename,
125 const char *generator) {
126 char buffer[1024];
127 const struct tm *lt;
128 time_t t;
129
130 const char *pattern =
131 "%s\n"
132 "%s file name: %s\n"
133 "%s\n"
134 "%s machine-generated by: %s\n"
135 "\n";
136
137 time(&t);
138 lt=localtime(&t);
139 if(generator==nullptr) {
140 strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
141 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
142 } else {
143 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
144 }
145 }
146
147 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * indent,const char * postfix)148 usrc_writeArray(FILE *f,
149 const char *prefix,
150 const void *p, int32_t width, int32_t length,
151 const char *indent,
152 const char *postfix) {
153 const uint8_t *p8;
154 const uint16_t *p16;
155 const uint32_t *p32;
156 const int64_t *p64; // Signed due to TOML!
157 int64_t value; // Signed due to TOML!
158 int32_t i, col;
159
160 p8=nullptr;
161 p16=nullptr;
162 p32=nullptr;
163 p64=nullptr;
164 switch(width) {
165 case 8:
166 p8=(const uint8_t *)p;
167 break;
168 case 16:
169 p16=(const uint16_t *)p;
170 break;
171 case 32:
172 p32=(const uint32_t *)p;
173 break;
174 case 64:
175 p64=(const int64_t *)p;
176 break;
177 default:
178 fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
179 return;
180 }
181 if(prefix!=nullptr) {
182 fprintf(f, prefix, (long)length);
183 }
184 for(i=col=0; i<length; ++i, ++col) {
185 if(i>0) {
186 if(col<16) {
187 fputc(',', f);
188 } else {
189 fputs(",\n", f);
190 fputs(indent, f);
191 col=0;
192 }
193 }
194 switch(width) {
195 case 8:
196 value=p8[i];
197 break;
198 case 16:
199 value=p16[i];
200 break;
201 case 32:
202 value=p32[i];
203 break;
204 case 64:
205 value=p64[i];
206 break;
207 default:
208 value=0; /* unreachable */
209 break;
210 }
211 fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value);
212 }
213 if(postfix!=nullptr) {
214 fputs(postfix, f);
215 }
216 }
217
218 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)219 usrc_writeUTrie2Arrays(FILE *f,
220 const char *indexPrefix, const char *data32Prefix,
221 const UTrie2 *pTrie,
222 const char *postfix) {
223 if(pTrie->data32==nullptr) {
224 /* 16-bit trie */
225 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
226 } else {
227 /* 32-bit trie */
228 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
229 usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
230 }
231 }
232
233 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)234 usrc_writeUTrie2Struct(FILE *f,
235 const char *prefix,
236 const UTrie2 *pTrie,
237 const char *indexName, const char *data32Name,
238 const char *postfix) {
239 if(prefix!=nullptr) {
240 fputs(prefix, f);
241 }
242 if(pTrie->data32==nullptr) {
243 /* 16-bit trie */
244 fprintf(
245 f,
246 " %s,\n" /* index */
247 " %s+%ld,\n" /* data16 */
248 " nullptr,\n", /* data32 */
249 indexName,
250 indexName,
251 (long)pTrie->indexLength);
252 } else {
253 /* 32-bit trie */
254 fprintf(
255 f,
256 " %s,\n" /* index */
257 " nullptr,\n" /* data16 */
258 " %s,\n", /* data32 */
259 indexName,
260 data32Name);
261 }
262 fprintf(
263 f,
264 " %ld,\n" /* indexLength */
265 " %ld,\n" /* dataLength */
266 " 0x%hx,\n" /* index2NullOffset */
267 " 0x%hx,\n" /* dataNullOffset */
268 " 0x%lx,\n" /* initialValue */
269 " 0x%lx,\n" /* errorValue */
270 " 0x%lx,\n" /* highStart */
271 " 0x%lx,\n" /* highValueIndex */
272 " nullptr, 0, false, false, 0, nullptr\n",
273 (long)pTrie->indexLength, (long)pTrie->dataLength,
274 (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
275 (long)pTrie->initialValue, (long)pTrie->errorValue,
276 (long)pTrie->highStart, (long)pTrie->highValueIndex);
277 if(postfix!=nullptr) {
278 fputs(postfix, f);
279 }
280 }
281
282 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix,UTargetSyntax syntax)283 usrc_writeUCPTrieArrays(FILE *f,
284 const char *indexPrefix, const char *dataPrefix,
285 const UCPTrie *pTrie,
286 const char *postfix,
287 UTargetSyntax syntax) {
288 const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : "";
289 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
290 int32_t width=
291 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
292 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
293 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
294 usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
295 }
296
297 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix,UTargetSyntax syntax)298 usrc_writeUCPTrieStruct(FILE *f,
299 const char *prefix,
300 const UCPTrie *pTrie,
301 const char *indexName, const char *dataName,
302 const char *postfix,
303 UTargetSyntax syntax) {
304 if(prefix!=nullptr) {
305 fputs(prefix, f);
306 }
307 if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
308 fprintf(
309 f,
310 " %s,\n" // index
311 " { %s },\n", // data (union)
312 indexName,
313 dataName);
314 }
315 const char* pattern =
316 (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
317 " %ld, %ld,\n" // indexLength, dataLength
318 " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart
319 " %d, %d,\n" // type, valueWidth
320 " 0, 0,\n" // reserved32, reserved16
321 " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset
322 " 0x%lx,\n" // nullValue
323 :
324 "indexLength = %ld\n"
325 "dataLength = %ld\n"
326 "highStart = 0x%lx\n"
327 "shifted12HighStart = 0x%x\n"
328 "type = %d\n"
329 "valueWidth = %d\n"
330 "index3NullOffset = 0x%x\n"
331 "dataNullOffset = 0x%lx\n"
332 "nullValue = 0x%lx\n"
333 ;
334 fprintf(
335 f,
336 pattern,
337 (long)pTrie->indexLength, (long)pTrie->dataLength,
338 (long)pTrie->highStart, pTrie->shifted12HighStart,
339 pTrie->type, pTrie->valueWidth,
340 pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
341 (long)pTrie->nullValue);
342 if(postfix!=nullptr) {
343 fputs(postfix, f);
344 }
345 }
346
347 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie,UTargetSyntax syntax)348 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
349 int32_t width=
350 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
351 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
352 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
353 char line[100], line2[100], line3[100], line4[100];
354
355 switch (syntax) {
356 case UPRV_TARGET_SYNTAX_CCODE:
357 snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name);
358 snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
359 snprintf(line3, sizeof(line3), "\n};\n\n");
360 break;
361 case UPRV_TARGET_SYNTAX_TOML:
362 snprintf(line, sizeof(line), "index = [\n ");
363 snprintf(line2, sizeof(line2), "data_%d = [\n ", (int)width);
364 snprintf(line3, sizeof(line3), "\n]\n");
365 break;
366 default:
367 UPRV_UNREACHABLE_EXIT;
368 }
369 usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
370
371 switch (syntax) {
372 case UPRV_TARGET_SYNTAX_CCODE:
373 snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name);
374 snprintf(line2, sizeof(line2), "%s_trieIndex", name);
375 snprintf(line3, sizeof(line3), "%s_trieData", name);
376 snprintf(line4, sizeof(line4), "};\n\n");
377 break;
378 case UPRV_TARGET_SYNTAX_TOML:
379 line[0] = 0;
380 line2[0] = 0;
381 line3[0] = 0;
382 line4[0] = 0;
383 break;
384 default:
385 UPRV_UNREACHABLE_EXIT;
386 }
387 usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
388 }
389
390 U_CAPI void U_EXPORT2
usrc_writeUnicodeSet(FILE * f,const USet * pSet,UTargetSyntax syntax)391 usrc_writeUnicodeSet(
392 FILE *f,
393 const USet *pSet,
394 UTargetSyntax syntax) {
395 // ccode is not yet supported
396 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
397
398 // Write out a list of ranges
399 const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
400 UnicodeSetIterator it(*set);
401 fprintf(f, "# Inclusive ranges of the code points in the set.\n");
402 fprintf(f, "ranges = [\n");
403 bool seenFirstString = false;
404 while (it.nextRange()) {
405 if (it.isString()) {
406 if (!seenFirstString) {
407 seenFirstString = true;
408 fprintf(f, "]\nstrings = [\n");
409 }
410 const UnicodeString& str = it.getString();
411 fprintf(f, " ");
412 usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
413 fprintf(f, ",\n");
414 } else {
415 U_ASSERT(!seenFirstString);
416 UChar32 start = it.getCodepoint();
417 UChar32 end = it.getCodepointEnd();
418 fprintf(f, " [0x%x, 0x%x],\n", start, end);
419 }
420 }
421 fprintf(f, "]\n");
422 }
423
424 U_CAPI void U_EXPORT2
usrc_writeUCPMap(FILE * f,const UCPMap * pMap,icu::ValueNameGetter * valueNameGetter,UTargetSyntax syntax)425 usrc_writeUCPMap(
426 FILE *f,
427 const UCPMap *pMap,
428 icu::ValueNameGetter *valueNameGetter,
429 UTargetSyntax syntax) {
430 // ccode is not yet supported
431 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
432 (void) syntax; // silence unused variable errors
433
434 // Print out list of ranges
435 UChar32 start = 0, end;
436 uint32_t value;
437 fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
438 fprintf(f, "ranges = [\n");
439 while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
440 if (valueNameGetter != nullptr) {
441 const char *name = valueNameGetter->getName(value);
442 fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
443 } else {
444 fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
445 }
446 start = end + 1;
447 }
448 fprintf(f, "]\n");
449 }
450
451 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)452 usrc_writeArrayOfMostlyInvChars(FILE *f,
453 const char *prefix,
454 const char *p, int32_t length,
455 const char *postfix) {
456 int32_t i, col;
457 int prev2, prev, c;
458
459 if(prefix!=nullptr) {
460 fprintf(f, prefix, (long)length);
461 }
462 prev2=prev=-1;
463 for(i=col=0; i<length; ++i, ++col) {
464 c=(uint8_t)p[i];
465 if(i>0) {
466 /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
467 if(
468 /* Very long line. */
469 col>=32 ||
470 /* Long line, break after terminating NUL. */
471 (col>=24 && prev2>=0x20 && prev==0) ||
472 /* Medium-long line, break before non-NUL, non-character byte. */
473 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
474 ) {
475 fputs(",\n", f);
476 col=0;
477 } else {
478 fputc(',', f);
479 }
480 }
481 fprintf(f, c<0x20 ? "%u" : "'%c'", c);
482 prev2=prev;
483 prev=c;
484 }
485 if(postfix!=nullptr) {
486 fputs(postfix, f);
487 }
488 }
489
490 U_CAPI void U_EXPORT2
usrc_writeStringAsASCII(FILE * f,const char16_t * ptr,int32_t length,UTargetSyntax)491 usrc_writeStringAsASCII(FILE *f,
492 const char16_t* ptr, int32_t length,
493 UTargetSyntax) {
494 // For now, assume all UTargetSyntax values are valid here.
495 fprintf(f, "\"");
496 int32_t i = 0;
497 UChar32 cp;
498 while (i < length) {
499 U16_NEXT(ptr, i, length, cp);
500 if (cp == u'"') {
501 fprintf(f, "\\\"");
502 } else if (ICU_Utility::isUnprintable(cp)) {
503 UnicodeString u16result;
504 ICU_Utility::escapeUnprintable(u16result, cp);
505 std::string u8result;
506 u16result.toUTF8String(u8result);
507 fprintf(f, "%s", u8result.data());
508 } else {
509 U_ASSERT(cp < 0x80);
510 char s[2] = {static_cast<char>(cp), 0};
511 fprintf(f, "%s", s);
512 }
513 }
514 fprintf(f, "\"");
515 }
516