• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  writesrc.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005apr23
16 *   created by: Markus W. Scherer
17 *
18 *   Helper functions for writing source code for data.
19 */
20 
21 #include <stdio.h>
22 #include <inttypes.h>
23 #include <time.h>
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/ucptrie.h"
27 #include "unicode/errorcode.h"
28 #include "unicode/uniset.h"
29 #include "unicode/usetiter.h"
30 #include "unicode/utf16.h"
31 #include "utrie2.h"
32 #include "cstring.h"
33 #include "writesrc.h"
34 #include "util.h"
35 
36 U_NAMESPACE_BEGIN
37 
~ValueNameGetter()38 ValueNameGetter::~ValueNameGetter() {}
39 
40 U_NAMESPACE_END
41 
42 U_NAMESPACE_USE
43 
44 static FILE *
usrc_createWithoutHeader(const char * path,const char * filename)45 usrc_createWithoutHeader(const char *path, const char *filename) {
46     char buffer[1024];
47     const char *p;
48     char *q;
49     FILE *f;
50     char c;
51 
52     if(path==NULL) {
53         p=filename;
54     } else {
55         /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
56         uprv_strcpy(buffer, path);
57         q=buffer+uprv_strlen(buffer);
58         if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
59             *q++=U_FILE_SEP_CHAR;
60         }
61         uprv_strcpy(q, filename);
62         p=buffer;
63     }
64 
65     f=fopen(p, "w");
66     if (f==NULL) {
67         fprintf(
68             stderr,
69             "usrc_create(%s, %s): unable to create file\n",
70             path!=NULL ? path : "", filename);
71     }
72     return f;
73 }
74 
75 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)76 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
77     FILE *f = usrc_createWithoutHeader(path, filename);
78     if (f == NULL) {
79         return f;
80     }
81     usrc_writeCopyrightHeader(f, "//", copyrightYear);
82     usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
83     return f;
84 }
85 
86 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,int32_t copyrightYear,const char * generator)87 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
88     FILE *f = usrc_createWithoutHeader(path, filename);
89     if (f == NULL) {
90         return f;
91     }
92     usrc_writeCopyrightHeader(f, "#", copyrightYear);
93     usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
94     return f;
95 }
96 
97 U_CAPI void U_EXPORT2
usrc_writeCopyrightHeader(FILE * f,const char * prefix,int32_t copyrightYear)98 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
99     fprintf(f,
100         "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
101         "%s License & terms of use: http://www.unicode.org/copyright.html\n",
102         prefix, copyrightYear, prefix);
103     if (copyrightYear <= 2016) {
104         fprintf(f,
105             "%s Copyright (C) 1999-2016, International Business Machines\n"
106             "%s Corporation and others.  All Rights Reserved.\n",
107             prefix, prefix);
108     }
109 }
110 
111 U_CAPI void U_EXPORT2
usrc_writeFileNameGeneratedBy(FILE * f,const char * prefix,const char * filename,const char * generator)112 usrc_writeFileNameGeneratedBy(
113         FILE *f,
114         const char *prefix,
115         const char *filename,
116         const char *generator) {
117     char buffer[1024];
118     const struct tm *lt;
119     time_t t;
120 
121     const char *pattern =
122         "%s\n"
123         "%s file name: %s\n"
124         "%s\n"
125         "%s machine-generated by: %s\n"
126         "\n";
127 
128     time(&t);
129     lt=localtime(&t);
130     if(generator==NULL) {
131         strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
132         fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
133     } else {
134         fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
135     }
136 }
137 
138 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * indent,const char * postfix)139 usrc_writeArray(FILE *f,
140                 const char *prefix,
141                 const void *p, int32_t width, int32_t length,
142                 const char *indent,
143                 const char *postfix) {
144     const uint8_t *p8;
145     const uint16_t *p16;
146     const uint32_t *p32;
147     const int64_t *p64; // Signed due to TOML!
148     int64_t value; // Signed due to TOML!
149     int32_t i, col;
150 
151     p8=NULL;
152     p16=NULL;
153     p32=NULL;
154     p64=NULL;
155     switch(width) {
156     case 8:
157         p8=(const uint8_t *)p;
158         break;
159     case 16:
160         p16=(const uint16_t *)p;
161         break;
162     case 32:
163         p32=(const uint32_t *)p;
164         break;
165     case 64:
166         p64=(const int64_t *)p;
167         break;
168     default:
169         fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
170         return;
171     }
172     if(prefix!=NULL) {
173         fprintf(f, prefix, (long)length);
174     }
175     for(i=col=0; i<length; ++i, ++col) {
176         if(i>0) {
177             if(col<16) {
178                 fputc(',', f);
179             } else {
180                 fputs(",\n", f);
181                 fputs(indent, f);
182                 col=0;
183             }
184         }
185         switch(width) {
186         case 8:
187             value=p8[i];
188             break;
189         case 16:
190             value=p16[i];
191             break;
192         case 32:
193             value=p32[i];
194             break;
195         case 64:
196             value=p64[i];
197             break;
198         default:
199             value=0; /* unreachable */
200             break;
201         }
202         fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value);
203     }
204     if(postfix!=NULL) {
205         fputs(postfix, f);
206     }
207 }
208 
209 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)210 usrc_writeUTrie2Arrays(FILE *f,
211                        const char *indexPrefix, const char *data32Prefix,
212                        const UTrie2 *pTrie,
213                        const char *postfix) {
214     if(pTrie->data32==NULL) {
215         /* 16-bit trie */
216         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
217     } else {
218         /* 32-bit trie */
219         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
220         usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
221     }
222 }
223 
224 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)225 usrc_writeUTrie2Struct(FILE *f,
226                        const char *prefix,
227                        const UTrie2 *pTrie,
228                        const char *indexName, const char *data32Name,
229                        const char *postfix) {
230     if(prefix!=NULL) {
231         fputs(prefix, f);
232     }
233     if(pTrie->data32==NULL) {
234         /* 16-bit trie */
235         fprintf(
236             f,
237             "    %s,\n"         /* index */
238             "    %s+%ld,\n"     /* data16 */
239             "    NULL,\n",      /* data32 */
240             indexName,
241             indexName,
242             (long)pTrie->indexLength);
243     } else {
244         /* 32-bit trie */
245         fprintf(
246             f,
247             "    %s,\n"         /* index */
248             "    NULL,\n"       /* data16 */
249             "    %s,\n",        /* data32 */
250             indexName,
251             data32Name);
252     }
253     fprintf(
254         f,
255         "    %ld,\n"            /* indexLength */
256         "    %ld,\n"            /* dataLength */
257         "    0x%hx,\n"          /* index2NullOffset */
258         "    0x%hx,\n"          /* dataNullOffset */
259         "    0x%lx,\n"          /* initialValue */
260         "    0x%lx,\n"          /* errorValue */
261         "    0x%lx,\n"          /* highStart */
262         "    0x%lx,\n"          /* highValueIndex */
263         "    NULL, 0, false, false, 0, NULL\n",
264         (long)pTrie->indexLength, (long)pTrie->dataLength,
265         (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
266         (long)pTrie->initialValue, (long)pTrie->errorValue,
267         (long)pTrie->highStart, (long)pTrie->highValueIndex);
268     if(postfix!=NULL) {
269         fputs(postfix, f);
270     }
271 }
272 
273 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix,UTargetSyntax syntax)274 usrc_writeUCPTrieArrays(FILE *f,
275                         const char *indexPrefix, const char *dataPrefix,
276                         const UCPTrie *pTrie,
277                         const char *postfix,
278                         UTargetSyntax syntax) {
279     const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? "  " : "";
280     usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
281     int32_t width=
282         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
283         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
284         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
285     usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
286 }
287 
288 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix,UTargetSyntax syntax)289 usrc_writeUCPTrieStruct(FILE *f,
290                         const char *prefix,
291                         const UCPTrie *pTrie,
292                         const char *indexName, const char *dataName,
293                         const char *postfix,
294                         UTargetSyntax syntax) {
295     if(prefix!=NULL) {
296         fputs(prefix, f);
297     }
298     if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
299         fprintf(
300             f,
301             "    %s,\n"             // index
302             "    { %s },\n",        // data (union)
303             indexName,
304             dataName);
305     }
306     const char* pattern =
307         (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
308         "    %ld, %ld,\n"       // indexLength, dataLength
309         "    0x%lx, 0x%x,\n"    // highStart, shifted12HighStart
310         "    %d, %d,\n"         // type, valueWidth
311         "    0, 0,\n"           // reserved32, reserved16
312         "    0x%x, 0x%lx,\n"    // index3NullOffset, dataNullOffset
313         "    0x%lx,\n"          // nullValue
314         :
315         "indexLength = %ld\n"
316         "dataLength = %ld\n"
317         "highStart = 0x%lx\n"
318         "shifted12HighStart = 0x%x\n"
319         "type = %d\n"
320         "valueWidth = %d\n"
321         "index3NullOffset = 0x%x\n"
322         "dataNullOffset = 0x%lx\n"
323         "nullValue = 0x%lx\n"
324         ;
325     fprintf(
326         f,
327         pattern,
328         (long)pTrie->indexLength, (long)pTrie->dataLength,
329         (long)pTrie->highStart, pTrie->shifted12HighStart,
330         pTrie->type, pTrie->valueWidth,
331         pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
332         (long)pTrie->nullValue);
333     if(postfix!=NULL) {
334         fputs(postfix, f);
335     }
336 }
337 
338 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie,UTargetSyntax syntax)339 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
340     int32_t width=
341         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
342         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
343         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
344     char line[100], line2[100], line3[100], line4[100];
345 
346     switch (syntax) {
347     case UPRV_TARGET_SYNTAX_CCODE:
348         sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name);
349         sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
350         sprintf(line3, "\n};\n\n");
351         break;
352     case UPRV_TARGET_SYNTAX_TOML:
353         sprintf(line, "index = [\n  ");
354         sprintf(line2, "data_%d = [\n  ", (int)width);
355         sprintf(line3, "\n]\n");
356         break;
357     default:
358         UPRV_UNREACHABLE_EXIT;
359     }
360     usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
361 
362     switch (syntax) {
363     case UPRV_TARGET_SYNTAX_CCODE:
364         sprintf(line, "static const UCPTrie %s_trie={\n", name);
365         sprintf(line2, "%s_trieIndex", name);
366         sprintf(line3, "%s_trieData", name);
367         sprintf(line4, "};\n\n");
368         break;
369     case UPRV_TARGET_SYNTAX_TOML:
370         line[0] = 0;
371         line2[0] = 0;
372         line3[0] = 0;
373         line4[0] = 0;
374         break;
375     default:
376         UPRV_UNREACHABLE_EXIT;
377     }
378     usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
379 }
380 
381 U_CAPI void U_EXPORT2
usrc_writeUnicodeSet(FILE * f,const USet * pSet,UTargetSyntax syntax)382 usrc_writeUnicodeSet(
383         FILE *f,
384         const USet *pSet,
385         UTargetSyntax syntax) {
386     // ccode is not yet supported
387     U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
388 
389     // Write out a list of ranges
390     const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
391     UnicodeSetIterator it(*set);
392     fprintf(f, "# Inclusive ranges of the code points in the set.\n");
393     fprintf(f, "ranges = [\n");
394     bool seenFirstString = false;
395     while (it.nextRange()) {
396         if (it.isString()) {
397             if (!seenFirstString) {
398                 seenFirstString = true;
399                 fprintf(f, "]\nstrings = [\n");
400             }
401             const UnicodeString& str = it.getString();
402             fprintf(f, "  ");
403             usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
404             fprintf(f, ",\n");
405         } else {
406             U_ASSERT(!seenFirstString);
407             UChar32 start = it.getCodepoint();
408             UChar32 end = it.getCodepointEnd();
409             fprintf(f, "  [0x%x, 0x%x],\n", start, end);
410         }
411     }
412     fprintf(f, "]\n");
413 }
414 
415 U_CAPI void U_EXPORT2
usrc_writeUCPMap(FILE * f,const UCPMap * pMap,icu::ValueNameGetter * valueNameGetter,UTargetSyntax syntax)416 usrc_writeUCPMap(
417         FILE *f,
418         const UCPMap *pMap,
419         icu::ValueNameGetter *valueNameGetter,
420         UTargetSyntax syntax) {
421     // ccode is not yet supported
422     U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
423     (void) syntax; // silence unused variable errors
424 
425     // Print out list of ranges
426     UChar32 start = 0, end;
427     uint32_t value;
428     fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
429     fprintf(f, "ranges = [\n");
430     while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
431         if (valueNameGetter != nullptr) {
432             const char *name = valueNameGetter->getName(value);
433             fprintf(f, "  {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
434         } else {
435             fprintf(f, "  {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
436         }
437         start = end + 1;
438     }
439     fprintf(f, "]\n");
440 }
441 
442 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)443 usrc_writeArrayOfMostlyInvChars(FILE *f,
444                                 const char *prefix,
445                                 const char *p, int32_t length,
446                                 const char *postfix) {
447     int32_t i, col;
448     int prev2, prev, c;
449 
450     if(prefix!=NULL) {
451         fprintf(f, prefix, (long)length);
452     }
453     prev2=prev=-1;
454     for(i=col=0; i<length; ++i, ++col) {
455         c=(uint8_t)p[i];
456         if(i>0) {
457             /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
458             if(
459                 /* Very long line. */
460                 col>=32 ||
461                 /* Long line, break after terminating NUL. */
462                 (col>=24 && prev2>=0x20 && prev==0) ||
463                 /* Medium-long line, break before non-NUL, non-character byte. */
464                 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
465             ) {
466                 fputs(",\n", f);
467                 col=0;
468             } else {
469                 fputc(',', f);
470             }
471         }
472         fprintf(f, c<0x20 ? "%u" : "'%c'", c);
473         prev2=prev;
474         prev=c;
475     }
476     if(postfix!=NULL) {
477         fputs(postfix, f);
478     }
479 }
480 
481 U_CAPI void U_EXPORT2
usrc_writeStringAsASCII(FILE * f,const UChar * ptr,int32_t length,UTargetSyntax)482 usrc_writeStringAsASCII(FILE *f,
483         const UChar* ptr, int32_t length,
484         UTargetSyntax) {
485     // For now, assume all UTargetSyntax values are valid here.
486     fprintf(f, "\"");
487     int32_t i = 0;
488     UChar32 cp;
489     while (i < length) {
490         U16_NEXT(ptr, i, length, cp);
491         if (cp == u'"') {
492             fprintf(f, "\\\"");
493         } else if (ICU_Utility::isUnprintable(cp)) {
494             UnicodeString u16result;
495             ICU_Utility::escapeUnprintable(u16result, cp);
496             std::string u8result;
497             u16result.toUTF8String(u8result);
498             fprintf(f, "%s", u8result.data());
499         } else {
500             U_ASSERT(cp < 0x80);
501             char s[2] = {static_cast<char>(cp), 0};
502             fprintf(f, "%s", s);
503         }
504     }
505     fprintf(f, "\"");
506 }
507