1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2005-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: writesrc.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005apr23
16 * created by: Markus W. Scherer
17 *
18 * Helper functions for writing source code for data.
19 */
20
21 #include <stdio.h>
22 #include <inttypes.h>
23 #include <time.h>
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/ucptrie.h"
27 #include "unicode/errorcode.h"
28 #include "unicode/uniset.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/utf16.h"
31 #include "utrie2.h"
32 #include "cstring.h"
33 #include "writesrc.h"
34 #include "util.h"
35
36 U_NAMESPACE_BEGIN
37
~ValueNameGetter()38 ValueNameGetter::~ValueNameGetter() {}
39
40 U_NAMESPACE_END
41
42 U_NAMESPACE_USE
43
44 static FILE *
usrc_createWithoutHeader(const char * path,const char * filename)45 usrc_createWithoutHeader(const char *path, const char *filename) {
46 char buffer[1024];
47 const char *p;
48 char *q;
49 FILE *f;
50 char c;
51
52 if(path==NULL) {
53 p=filename;
54 } else {
55 /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
56 uprv_strcpy(buffer, path);
57 q=buffer+uprv_strlen(buffer);
58 if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
59 *q++=U_FILE_SEP_CHAR;
60 }
61 uprv_strcpy(q, filename);
62 p=buffer;
63 }
64
65 f=fopen(p, "w");
66 if (f==NULL) {
67 fprintf(
68 stderr,
69 "usrc_create(%s, %s): unable to create file\n",
70 path!=NULL ? path : "", filename);
71 }
72 return f;
73 }
74
75 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)76 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
77 FILE *f = usrc_createWithoutHeader(path, filename);
78 if (f == NULL) {
79 return f;
80 }
81 usrc_writeCopyrightHeader(f, "//", copyrightYear);
82 usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
83 return f;
84 }
85
86 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,int32_t copyrightYear,const char * generator)87 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
88 FILE *f = usrc_createWithoutHeader(path, filename);
89 if (f == NULL) {
90 return f;
91 }
92 usrc_writeCopyrightHeader(f, "#", copyrightYear);
93 usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
94 return f;
95 }
96
97 U_CAPI void U_EXPORT2
usrc_writeCopyrightHeader(FILE * f,const char * prefix,int32_t copyrightYear)98 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
99 fprintf(f,
100 "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
101 "%s License & terms of use: http://www.unicode.org/copyright.html\n",
102 prefix, copyrightYear, prefix);
103 if (copyrightYear <= 2016) {
104 fprintf(f,
105 "%s Copyright (C) 1999-2016, International Business Machines\n"
106 "%s Corporation and others. All Rights Reserved.\n",
107 prefix, prefix);
108 }
109 }
110
111 U_CAPI void U_EXPORT2
usrc_writeFileNameGeneratedBy(FILE * f,const char * prefix,const char * filename,const char * generator)112 usrc_writeFileNameGeneratedBy(
113 FILE *f,
114 const char *prefix,
115 const char *filename,
116 const char *generator) {
117 char buffer[1024];
118 const struct tm *lt;
119 time_t t;
120
121 const char *pattern =
122 "%s\n"
123 "%s file name: %s\n"
124 "%s\n"
125 "%s machine-generated by: %s\n"
126 "\n";
127
128 time(&t);
129 lt=localtime(&t);
130 if(generator==NULL) {
131 strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
132 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
133 } else {
134 fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
135 }
136 }
137
138 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * indent,const char * postfix)139 usrc_writeArray(FILE *f,
140 const char *prefix,
141 const void *p, int32_t width, int32_t length,
142 const char *indent,
143 const char *postfix) {
144 const uint8_t *p8;
145 const uint16_t *p16;
146 const uint32_t *p32;
147 const int64_t *p64; // Signed due to TOML!
148 int64_t value; // Signed due to TOML!
149 int32_t i, col;
150
151 p8=NULL;
152 p16=NULL;
153 p32=NULL;
154 p64=NULL;
155 switch(width) {
156 case 8:
157 p8=(const uint8_t *)p;
158 break;
159 case 16:
160 p16=(const uint16_t *)p;
161 break;
162 case 32:
163 p32=(const uint32_t *)p;
164 break;
165 case 64:
166 p64=(const int64_t *)p;
167 break;
168 default:
169 fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
170 return;
171 }
172 if(prefix!=NULL) {
173 fprintf(f, prefix, (long)length);
174 }
175 for(i=col=0; i<length; ++i, ++col) {
176 if(i>0) {
177 if(col<16) {
178 fputc(',', f);
179 } else {
180 fputs(",\n", f);
181 fputs(indent, f);
182 col=0;
183 }
184 }
185 switch(width) {
186 case 8:
187 value=p8[i];
188 break;
189 case 16:
190 value=p16[i];
191 break;
192 case 32:
193 value=p32[i];
194 break;
195 case 64:
196 value=p64[i];
197 break;
198 default:
199 value=0; /* unreachable */
200 break;
201 }
202 fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value);
203 }
204 if(postfix!=NULL) {
205 fputs(postfix, f);
206 }
207 }
208
209 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)210 usrc_writeUTrie2Arrays(FILE *f,
211 const char *indexPrefix, const char *data32Prefix,
212 const UTrie2 *pTrie,
213 const char *postfix) {
214 if(pTrie->data32==NULL) {
215 /* 16-bit trie */
216 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
217 } else {
218 /* 32-bit trie */
219 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
220 usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
221 }
222 }
223
224 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)225 usrc_writeUTrie2Struct(FILE *f,
226 const char *prefix,
227 const UTrie2 *pTrie,
228 const char *indexName, const char *data32Name,
229 const char *postfix) {
230 if(prefix!=NULL) {
231 fputs(prefix, f);
232 }
233 if(pTrie->data32==NULL) {
234 /* 16-bit trie */
235 fprintf(
236 f,
237 " %s,\n" /* index */
238 " %s+%ld,\n" /* data16 */
239 " NULL,\n", /* data32 */
240 indexName,
241 indexName,
242 (long)pTrie->indexLength);
243 } else {
244 /* 32-bit trie */
245 fprintf(
246 f,
247 " %s,\n" /* index */
248 " NULL,\n" /* data16 */
249 " %s,\n", /* data32 */
250 indexName,
251 data32Name);
252 }
253 fprintf(
254 f,
255 " %ld,\n" /* indexLength */
256 " %ld,\n" /* dataLength */
257 " 0x%hx,\n" /* index2NullOffset */
258 " 0x%hx,\n" /* dataNullOffset */
259 " 0x%lx,\n" /* initialValue */
260 " 0x%lx,\n" /* errorValue */
261 " 0x%lx,\n" /* highStart */
262 " 0x%lx,\n" /* highValueIndex */
263 " NULL, 0, false, false, 0, NULL\n",
264 (long)pTrie->indexLength, (long)pTrie->dataLength,
265 (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
266 (long)pTrie->initialValue, (long)pTrie->errorValue,
267 (long)pTrie->highStart, (long)pTrie->highValueIndex);
268 if(postfix!=NULL) {
269 fputs(postfix, f);
270 }
271 }
272
273 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix,UTargetSyntax syntax)274 usrc_writeUCPTrieArrays(FILE *f,
275 const char *indexPrefix, const char *dataPrefix,
276 const UCPTrie *pTrie,
277 const char *postfix,
278 UTargetSyntax syntax) {
279 const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? " " : "";
280 usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
281 int32_t width=
282 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
283 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
284 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
285 usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
286 }
287
288 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix,UTargetSyntax syntax)289 usrc_writeUCPTrieStruct(FILE *f,
290 const char *prefix,
291 const UCPTrie *pTrie,
292 const char *indexName, const char *dataName,
293 const char *postfix,
294 UTargetSyntax syntax) {
295 if(prefix!=NULL) {
296 fputs(prefix, f);
297 }
298 if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
299 fprintf(
300 f,
301 " %s,\n" // index
302 " { %s },\n", // data (union)
303 indexName,
304 dataName);
305 }
306 const char* pattern =
307 (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
308 " %ld, %ld,\n" // indexLength, dataLength
309 " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart
310 " %d, %d,\n" // type, valueWidth
311 " 0, 0,\n" // reserved32, reserved16
312 " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset
313 " 0x%lx,\n" // nullValue
314 :
315 "indexLength = %ld\n"
316 "dataLength = %ld\n"
317 "highStart = 0x%lx\n"
318 "shifted12HighStart = 0x%x\n"
319 "type = %d\n"
320 "valueWidth = %d\n"
321 "index3NullOffset = 0x%x\n"
322 "dataNullOffset = 0x%lx\n"
323 "nullValue = 0x%lx\n"
324 ;
325 fprintf(
326 f,
327 pattern,
328 (long)pTrie->indexLength, (long)pTrie->dataLength,
329 (long)pTrie->highStart, pTrie->shifted12HighStart,
330 pTrie->type, pTrie->valueWidth,
331 pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
332 (long)pTrie->nullValue);
333 if(postfix!=NULL) {
334 fputs(postfix, f);
335 }
336 }
337
338 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie,UTargetSyntax syntax)339 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
340 int32_t width=
341 pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
342 pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
343 pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
344 char line[100], line2[100], line3[100], line4[100];
345
346 switch (syntax) {
347 case UPRV_TARGET_SYNTAX_CCODE:
348 sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name);
349 sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
350 sprintf(line3, "\n};\n\n");
351 break;
352 case UPRV_TARGET_SYNTAX_TOML:
353 sprintf(line, "index = [\n ");
354 sprintf(line2, "data_%d = [\n ", (int)width);
355 sprintf(line3, "\n]\n");
356 break;
357 default:
358 UPRV_UNREACHABLE_EXIT;
359 }
360 usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
361
362 switch (syntax) {
363 case UPRV_TARGET_SYNTAX_CCODE:
364 sprintf(line, "static const UCPTrie %s_trie={\n", name);
365 sprintf(line2, "%s_trieIndex", name);
366 sprintf(line3, "%s_trieData", name);
367 sprintf(line4, "};\n\n");
368 break;
369 case UPRV_TARGET_SYNTAX_TOML:
370 line[0] = 0;
371 line2[0] = 0;
372 line3[0] = 0;
373 line4[0] = 0;
374 break;
375 default:
376 UPRV_UNREACHABLE_EXIT;
377 }
378 usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
379 }
380
381 U_CAPI void U_EXPORT2
usrc_writeUnicodeSet(FILE * f,const USet * pSet,UTargetSyntax syntax)382 usrc_writeUnicodeSet(
383 FILE *f,
384 const USet *pSet,
385 UTargetSyntax syntax) {
386 // ccode is not yet supported
387 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
388
389 // Write out a list of ranges
390 const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
391 UnicodeSetIterator it(*set);
392 fprintf(f, "# Inclusive ranges of the code points in the set.\n");
393 fprintf(f, "ranges = [\n");
394 bool seenFirstString = false;
395 while (it.nextRange()) {
396 if (it.isString()) {
397 if (!seenFirstString) {
398 seenFirstString = true;
399 fprintf(f, "]\nstrings = [\n");
400 }
401 const UnicodeString& str = it.getString();
402 fprintf(f, " ");
403 usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
404 fprintf(f, ",\n");
405 } else {
406 U_ASSERT(!seenFirstString);
407 UChar32 start = it.getCodepoint();
408 UChar32 end = it.getCodepointEnd();
409 fprintf(f, " [0x%x, 0x%x],\n", start, end);
410 }
411 }
412 fprintf(f, "]\n");
413 }
414
415 U_CAPI void U_EXPORT2
usrc_writeUCPMap(FILE * f,const UCPMap * pMap,icu::ValueNameGetter * valueNameGetter,UTargetSyntax syntax)416 usrc_writeUCPMap(
417 FILE *f,
418 const UCPMap *pMap,
419 icu::ValueNameGetter *valueNameGetter,
420 UTargetSyntax syntax) {
421 // ccode is not yet supported
422 U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
423 (void) syntax; // silence unused variable errors
424
425 // Print out list of ranges
426 UChar32 start = 0, end;
427 uint32_t value;
428 fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
429 fprintf(f, "ranges = [\n");
430 while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
431 if (valueNameGetter != nullptr) {
432 const char *name = valueNameGetter->getName(value);
433 fprintf(f, " {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
434 } else {
435 fprintf(f, " {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
436 }
437 start = end + 1;
438 }
439 fprintf(f, "]\n");
440 }
441
442 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)443 usrc_writeArrayOfMostlyInvChars(FILE *f,
444 const char *prefix,
445 const char *p, int32_t length,
446 const char *postfix) {
447 int32_t i, col;
448 int prev2, prev, c;
449
450 if(prefix!=NULL) {
451 fprintf(f, prefix, (long)length);
452 }
453 prev2=prev=-1;
454 for(i=col=0; i<length; ++i, ++col) {
455 c=(uint8_t)p[i];
456 if(i>0) {
457 /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
458 if(
459 /* Very long line. */
460 col>=32 ||
461 /* Long line, break after terminating NUL. */
462 (col>=24 && prev2>=0x20 && prev==0) ||
463 /* Medium-long line, break before non-NUL, non-character byte. */
464 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
465 ) {
466 fputs(",\n", f);
467 col=0;
468 } else {
469 fputc(',', f);
470 }
471 }
472 fprintf(f, c<0x20 ? "%u" : "'%c'", c);
473 prev2=prev;
474 prev=c;
475 }
476 if(postfix!=NULL) {
477 fputs(postfix, f);
478 }
479 }
480
481 U_CAPI void U_EXPORT2
usrc_writeStringAsASCII(FILE * f,const UChar * ptr,int32_t length,UTargetSyntax)482 usrc_writeStringAsASCII(FILE *f,
483 const UChar* ptr, int32_t length,
484 UTargetSyntax) {
485 // For now, assume all UTargetSyntax values are valid here.
486 fprintf(f, "\"");
487 int32_t i = 0;
488 UChar32 cp;
489 while (i < length) {
490 U16_NEXT(ptr, i, length, cp);
491 if (cp == u'"') {
492 fprintf(f, "\\\"");
493 } else if (ICU_Utility::isUnprintable(cp)) {
494 UnicodeString u16result;
495 ICU_Utility::escapeUnprintable(u16result, cp);
496 std::string u8result;
497 u16result.toUTF8String(u8result);
498 fprintf(f, "%s", u8result.data());
499 } else {
500 U_ASSERT(cp < 0x80);
501 char s[2] = {static_cast<char>(cp), 0};
502 fprintf(f, "%s", s);
503 }
504 }
505 fprintf(f, "\"");
506 }
507