• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  writesrc.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005apr23
16 *   created by: Markus W. Scherer
17 *
18 *   Helper functions for writing source code for data.
19 */
20 
21 #include <stdio.h>
22 #include <time.h>
23 
24 // The C99 standard suggested that C++ implementations not define PRId64 etc. constants
25 // unless this macro is defined.
26 // See the Notes at https://en.cppreference.com/w/cpp/types/integer .
27 // Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h .
28 #ifndef __STDC_FORMAT_MACROS
29 #   define __STDC_FORMAT_MACROS
30 #endif
31 #include <cinttypes>
32 
33 #include "unicode/utypes.h"
34 #include "unicode/putil.h"
35 #include "unicode/ucptrie.h"
36 #include "unicode/errorcode.h"
37 #include "unicode/uniset.h"
38 #include "unicode/usetiter.h"
39 #include "unicode/utf16.h"
40 #include "utrie2.h"
41 #include "cstring.h"
42 #include "writesrc.h"
43 #include "util.h"
44 
45 U_NAMESPACE_BEGIN
46 
~ValueNameGetter()47 ValueNameGetter::~ValueNameGetter() {}
48 
49 U_NAMESPACE_END
50 
51 U_NAMESPACE_USE
52 
53 static FILE *
usrc_createWithoutHeader(const char * path,const char * filename)54 usrc_createWithoutHeader(const char *path, const char *filename) {
55     char buffer[1024];
56     const char *p;
57     char *q;
58     FILE *f;
59     char c;
60 
61     if(path==nullptr) {
62         p=filename;
63     } else {
64         /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
65         uprv_strcpy(buffer, path);
66         q=buffer+uprv_strlen(buffer);
67         if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
68             *q++=U_FILE_SEP_CHAR;
69         }
70         uprv_strcpy(q, filename);
71         p=buffer;
72     }
73 
74     f=fopen(p, "w");
75     if (f==nullptr) {
76         fprintf(
77             stderr,
78             "usrc_create(%s, %s): unable to create file\n",
79             path!=nullptr ? path : "", filename);
80     }
81     return f;
82 }
83 
84 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)85 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
86     FILE *f = usrc_createWithoutHeader(path, filename);
87     if (f == nullptr) {
88         return f;
89     }
90     usrc_writeCopyrightHeader(f, "//", copyrightYear);
91     usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
92     return f;
93 }
94 
95 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,int32_t copyrightYear,const char * generator)96 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
97     FILE *f = usrc_createWithoutHeader(path, filename);
98     if (f == nullptr) {
99         return f;
100     }
101     usrc_writeCopyrightHeader(f, "#", copyrightYear);
102     usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
103     return f;
104 }
105 
106 U_CAPI void U_EXPORT2
usrc_writeCopyrightHeader(FILE * f,const char * prefix,int32_t copyrightYear)107 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
108     fprintf(f,
109         "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
110         "%s License & terms of use: http://www.unicode.org/copyright.html\n",
111         prefix, copyrightYear, prefix);
112     if (copyrightYear <= 2016) {
113         fprintf(f,
114             "%s Copyright (C) 1999-2016, International Business Machines\n"
115             "%s Corporation and others.  All Rights Reserved.\n",
116             prefix, prefix);
117     }
118 }
119 
120 U_CAPI void U_EXPORT2
usrc_writeFileNameGeneratedBy(FILE * f,const char * prefix,const char * filename,const char * generator)121 usrc_writeFileNameGeneratedBy(
122         FILE *f,
123         const char *prefix,
124         const char *filename,
125         const char *generator) {
126     char buffer[1024];
127     const struct tm *lt;
128     time_t t;
129 
130     const char *pattern =
131         "%s\n"
132         "%s file name: %s\n"
133         "%s\n"
134         "%s machine-generated by: %s\n"
135         "\n";
136 
137     time(&t);
138     lt=localtime(&t);
139     if(generator==nullptr) {
140         strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
141         fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
142     } else {
143         fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
144     }
145 }
146 
147 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * indent,const char * postfix)148 usrc_writeArray(FILE *f,
149                 const char *prefix,
150                 const void *p, int32_t width, int32_t length,
151                 const char *indent,
152                 const char *postfix) {
153     const uint8_t *p8;
154     const uint16_t *p16;
155     const uint32_t *p32;
156     const int64_t *p64; // Signed due to TOML!
157     int64_t value; // Signed due to TOML!
158     int32_t i, col;
159 
160     p8=nullptr;
161     p16=nullptr;
162     p32=nullptr;
163     p64=nullptr;
164     switch(width) {
165     case 8:
166         p8=(const uint8_t *)p;
167         break;
168     case 16:
169         p16=(const uint16_t *)p;
170         break;
171     case 32:
172         p32=(const uint32_t *)p;
173         break;
174     case 64:
175         p64=(const int64_t *)p;
176         break;
177     default:
178         fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
179         return;
180     }
181     if(prefix!=nullptr) {
182         fprintf(f, prefix, (long)length);
183     }
184     for(i=col=0; i<length; ++i, ++col) {
185         if(i>0) {
186             if(col<16) {
187                 fputc(',', f);
188             } else {
189                 fputs(",\n", f);
190                 fputs(indent, f);
191                 col=0;
192             }
193         }
194         switch(width) {
195         case 8:
196             value=p8[i];
197             break;
198         case 16:
199             value=p16[i];
200             break;
201         case 32:
202             value=p32[i];
203             break;
204         case 64:
205             value=p64[i];
206             break;
207         default:
208             value=0; /* unreachable */
209             break;
210         }
211         fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value);
212     }
213     if(postfix!=nullptr) {
214         fputs(postfix, f);
215     }
216 }
217 
218 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)219 usrc_writeUTrie2Arrays(FILE *f,
220                        const char *indexPrefix, const char *data32Prefix,
221                        const UTrie2 *pTrie,
222                        const char *postfix) {
223     if(pTrie->data32==nullptr) {
224         /* 16-bit trie */
225         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
226     } else {
227         /* 32-bit trie */
228         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
229         usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
230     }
231 }
232 
233 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)234 usrc_writeUTrie2Struct(FILE *f,
235                        const char *prefix,
236                        const UTrie2 *pTrie,
237                        const char *indexName, const char *data32Name,
238                        const char *postfix) {
239     if(prefix!=nullptr) {
240         fputs(prefix, f);
241     }
242     if(pTrie->data32==nullptr) {
243         /* 16-bit trie */
244         fprintf(
245             f,
246             "    %s,\n"         /* index */
247             "    %s+%ld,\n"     /* data16 */
248             "    nullptr,\n",      /* data32 */
249             indexName,
250             indexName,
251             (long)pTrie->indexLength);
252     } else {
253         /* 32-bit trie */
254         fprintf(
255             f,
256             "    %s,\n"         /* index */
257             "    nullptr,\n"       /* data16 */
258             "    %s,\n",        /* data32 */
259             indexName,
260             data32Name);
261     }
262     fprintf(
263         f,
264         "    %ld,\n"            /* indexLength */
265         "    %ld,\n"            /* dataLength */
266         "    0x%hx,\n"          /* index2NullOffset */
267         "    0x%hx,\n"          /* dataNullOffset */
268         "    0x%lx,\n"          /* initialValue */
269         "    0x%lx,\n"          /* errorValue */
270         "    0x%lx,\n"          /* highStart */
271         "    0x%lx,\n"          /* highValueIndex */
272         "    nullptr, 0, false, false, 0, nullptr\n",
273         (long)pTrie->indexLength, (long)pTrie->dataLength,
274         (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
275         (long)pTrie->initialValue, (long)pTrie->errorValue,
276         (long)pTrie->highStart, (long)pTrie->highValueIndex);
277     if(postfix!=nullptr) {
278         fputs(postfix, f);
279     }
280 }
281 
282 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix,UTargetSyntax syntax)283 usrc_writeUCPTrieArrays(FILE *f,
284                         const char *indexPrefix, const char *dataPrefix,
285                         const UCPTrie *pTrie,
286                         const char *postfix,
287                         UTargetSyntax syntax) {
288     const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? "  " : "";
289     usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
290     int32_t width=
291         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
292         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
293         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
294     usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
295 }
296 
297 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix,UTargetSyntax syntax)298 usrc_writeUCPTrieStruct(FILE *f,
299                         const char *prefix,
300                         const UCPTrie *pTrie,
301                         const char *indexName, const char *dataName,
302                         const char *postfix,
303                         UTargetSyntax syntax) {
304     if(prefix!=nullptr) {
305         fputs(prefix, f);
306     }
307     if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
308         fprintf(
309             f,
310             "    %s,\n"             // index
311             "    { %s },\n",        // data (union)
312             indexName,
313             dataName);
314     }
315     const char* pattern =
316         (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
317         "    %ld, %ld,\n"       // indexLength, dataLength
318         "    0x%lx, 0x%x,\n"    // highStart, shifted12HighStart
319         "    %d, %d,\n"         // type, valueWidth
320         "    0, 0,\n"           // reserved32, reserved16
321         "    0x%x, 0x%lx,\n"    // index3NullOffset, dataNullOffset
322         "    0x%lx,\n"          // nullValue
323         :
324         "indexLength = %ld\n"
325         "dataLength = %ld\n"
326         "highStart = 0x%lx\n"
327         "shifted12HighStart = 0x%x\n"
328         "type = %d\n"
329         "valueWidth = %d\n"
330         "index3NullOffset = 0x%x\n"
331         "dataNullOffset = 0x%lx\n"
332         "nullValue = 0x%lx\n"
333         ;
334     fprintf(
335         f,
336         pattern,
337         (long)pTrie->indexLength, (long)pTrie->dataLength,
338         (long)pTrie->highStart, pTrie->shifted12HighStart,
339         pTrie->type, pTrie->valueWidth,
340         pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
341         (long)pTrie->nullValue);
342     if(postfix!=nullptr) {
343         fputs(postfix, f);
344     }
345 }
346 
347 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie,UTargetSyntax syntax)348 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
349     int32_t width=
350         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
351         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
352         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
353     char line[100], line2[100], line3[100], line4[100];
354 
355     switch (syntax) {
356     case UPRV_TARGET_SYNTAX_CCODE:
357         snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name);
358         snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
359         snprintf(line3, sizeof(line3), "\n};\n\n");
360         break;
361     case UPRV_TARGET_SYNTAX_TOML:
362         snprintf(line, sizeof(line), "index = [\n  ");
363         snprintf(line2, sizeof(line2), "data_%d = [\n  ", (int)width);
364         snprintf(line3, sizeof(line3), "\n]\n");
365         break;
366     default:
367         UPRV_UNREACHABLE_EXIT;
368     }
369     usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
370 
371     switch (syntax) {
372     case UPRV_TARGET_SYNTAX_CCODE:
373         snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name);
374         snprintf(line2, sizeof(line2), "%s_trieIndex", name);
375         snprintf(line3, sizeof(line3), "%s_trieData", name);
376         snprintf(line4, sizeof(line4), "};\n\n");
377         break;
378     case UPRV_TARGET_SYNTAX_TOML:
379         line[0] = 0;
380         line2[0] = 0;
381         line3[0] = 0;
382         line4[0] = 0;
383         break;
384     default:
385         UPRV_UNREACHABLE_EXIT;
386     }
387     usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
388 }
389 
390 U_CAPI void U_EXPORT2
usrc_writeUnicodeSet(FILE * f,const USet * pSet,UTargetSyntax syntax)391 usrc_writeUnicodeSet(
392         FILE *f,
393         const USet *pSet,
394         UTargetSyntax syntax) {
395     // ccode is not yet supported
396     U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
397 
398     // Write out a list of ranges
399     const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
400     UnicodeSetIterator it(*set);
401     fprintf(f, "# Inclusive ranges of the code points in the set.\n");
402     fprintf(f, "ranges = [\n");
403     bool seenFirstString = false;
404     while (it.nextRange()) {
405         if (it.isString()) {
406             if (!seenFirstString) {
407                 seenFirstString = true;
408                 fprintf(f, "]\nstrings = [\n");
409             }
410             const UnicodeString& str = it.getString();
411             fprintf(f, "  ");
412             usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
413             fprintf(f, ",\n");
414         } else {
415             U_ASSERT(!seenFirstString);
416             UChar32 start = it.getCodepoint();
417             UChar32 end = it.getCodepointEnd();
418             fprintf(f, "  [0x%x, 0x%x],\n", start, end);
419         }
420     }
421     fprintf(f, "]\n");
422 }
423 
424 U_CAPI void U_EXPORT2
usrc_writeUCPMap(FILE * f,const UCPMap * pMap,icu::ValueNameGetter * valueNameGetter,UTargetSyntax syntax)425 usrc_writeUCPMap(
426         FILE *f,
427         const UCPMap *pMap,
428         icu::ValueNameGetter *valueNameGetter,
429         UTargetSyntax syntax) {
430     // ccode is not yet supported
431     U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
432     (void) syntax; // silence unused variable errors
433 
434     // Print out list of ranges
435     UChar32 start = 0, end;
436     uint32_t value;
437     fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
438     fprintf(f, "ranges = [\n");
439     while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
440         if (valueNameGetter != nullptr) {
441             const char *name = valueNameGetter->getName(value);
442             fprintf(f, "  {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
443         } else {
444             fprintf(f, "  {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
445         }
446         start = end + 1;
447     }
448     fprintf(f, "]\n");
449 }
450 
451 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)452 usrc_writeArrayOfMostlyInvChars(FILE *f,
453                                 const char *prefix,
454                                 const char *p, int32_t length,
455                                 const char *postfix) {
456     int32_t i, col;
457     int prev2, prev, c;
458 
459     if(prefix!=nullptr) {
460         fprintf(f, prefix, (long)length);
461     }
462     prev2=prev=-1;
463     for(i=col=0; i<length; ++i, ++col) {
464         c=(uint8_t)p[i];
465         if(i>0) {
466             /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
467             if(
468                 /* Very long line. */
469                 col>=32 ||
470                 /* Long line, break after terminating NUL. */
471                 (col>=24 && prev2>=0x20 && prev==0) ||
472                 /* Medium-long line, break before non-NUL, non-character byte. */
473                 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
474             ) {
475                 fputs(",\n", f);
476                 col=0;
477             } else {
478                 fputc(',', f);
479             }
480         }
481         fprintf(f, c<0x20 ? "%u" : "'%c'", c);
482         prev2=prev;
483         prev=c;
484     }
485     if(postfix!=nullptr) {
486         fputs(postfix, f);
487     }
488 }
489 
490 U_CAPI void U_EXPORT2
usrc_writeStringAsASCII(FILE * f,const char16_t * ptr,int32_t length,UTargetSyntax)491 usrc_writeStringAsASCII(FILE *f,
492         const char16_t* ptr, int32_t length,
493         UTargetSyntax) {
494     // For now, assume all UTargetSyntax values are valid here.
495     fprintf(f, "\"");
496     int32_t i = 0;
497     UChar32 cp;
498     while (i < length) {
499         U16_NEXT(ptr, i, length, cp);
500         if (cp == u'"') {
501             fprintf(f, "\\\"");
502         } else if (ICU_Utility::isUnprintable(cp)) {
503             UnicodeString u16result;
504             ICU_Utility::escapeUnprintable(u16result, cp);
505             std::string u8result;
506             u16result.toUTF8String(u8result);
507             fprintf(f, "%s", u8result.data());
508         } else {
509             U_ASSERT(cp < 0x80);
510             char s[2] = {static_cast<char>(cp), 0};
511             fprintf(f, "%s", s);
512         }
513     }
514     fprintf(f, "\"");
515 }
516