• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  writesrc.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005apr23
16 *   created by: Markus W. Scherer
17 *
18 *   Helper functions for writing source code for data.
19 */
20 
21 #include <stdio.h>
22 #include <time.h>
23 #include "unicode/utypes.h"
24 #include "unicode/putil.h"
25 #include "unicode/ucptrie.h"
26 #include "unicode/errorcode.h"
27 #include "unicode/uniset.h"
28 #include "unicode/usetiter.h"
29 #include "unicode/utf16.h"
30 #include "utrie2.h"
31 #include "cstring.h"
32 #include "writesrc.h"
33 #include "util.h"
34 
35 U_NAMESPACE_BEGIN
36 
~ValueNameGetter()37 ValueNameGetter::~ValueNameGetter() {}
38 
39 U_NAMESPACE_END
40 
41 U_NAMESPACE_USE
42 
43 static FILE *
usrc_createWithoutHeader(const char * path,const char * filename)44 usrc_createWithoutHeader(const char *path, const char *filename) {
45     char buffer[1024];
46     const char *p;
47     char *q;
48     FILE *f;
49     char c;
50 
51     if(path==NULL) {
52         p=filename;
53     } else {
54         /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
55         uprv_strcpy(buffer, path);
56         q=buffer+uprv_strlen(buffer);
57         if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
58             *q++=U_FILE_SEP_CHAR;
59         }
60         uprv_strcpy(q, filename);
61         p=buffer;
62     }
63 
64     f=fopen(p, "w");
65     if (f==NULL) {
66         fprintf(
67             stderr,
68             "usrc_create(%s, %s): unable to create file\n",
69             path!=NULL ? path : "", filename);
70     }
71     return f;
72 }
73 
74 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)75 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
76     FILE *f = usrc_createWithoutHeader(path, filename);
77     if (f == NULL) {
78         return f;
79     }
80     usrc_writeCopyrightHeader(f, "//", copyrightYear);
81     usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
82     return f;
83 }
84 
85 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,int32_t copyrightYear,const char * generator)86 usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
87     FILE *f = usrc_createWithoutHeader(path, filename);
88     if (f == NULL) {
89         return f;
90     }
91     usrc_writeCopyrightHeader(f, "#", copyrightYear);
92     usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
93     return f;
94 }
95 
96 U_CAPI void U_EXPORT2
usrc_writeCopyrightHeader(FILE * f,const char * prefix,int32_t copyrightYear)97 usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
98     fprintf(f,
99         "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
100         "%s License & terms of use: http://www.unicode.org/copyright.html\n",
101         prefix, copyrightYear, prefix);
102     if (copyrightYear <= 2016) {
103         fprintf(f,
104             "%s Copyright (C) 1999-2016, International Business Machines\n"
105             "%s Corporation and others.  All Rights Reserved.\n",
106             prefix, prefix);
107     }
108 }
109 
110 U_CAPI void U_EXPORT2
usrc_writeFileNameGeneratedBy(FILE * f,const char * prefix,const char * filename,const char * generator)111 usrc_writeFileNameGeneratedBy(
112         FILE *f,
113         const char *prefix,
114         const char *filename,
115         const char *generator) {
116     char buffer[1024];
117     const struct tm *lt;
118     time_t t;
119 
120     const char *pattern =
121         "%s\n"
122         "%s file name: %s\n"
123         "%s\n"
124         "%s machine-generated by: %s\n"
125         "\n";
126 
127     time(&t);
128     lt=localtime(&t);
129     if(generator==NULL) {
130         strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
131         fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
132     } else {
133         fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
134     }
135 }
136 
137 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * indent,const char * postfix)138 usrc_writeArray(FILE *f,
139                 const char *prefix,
140                 const void *p, int32_t width, int32_t length,
141                 const char *indent,
142                 const char *postfix) {
143     const uint8_t *p8;
144     const uint16_t *p16;
145     const uint32_t *p32;
146     uint32_t value;
147     int32_t i, col;
148 
149     p8=NULL;
150     p16=NULL;
151     p32=NULL;
152     switch(width) {
153     case 8:
154         p8=(const uint8_t *)p;
155         break;
156     case 16:
157         p16=(const uint16_t *)p;
158         break;
159     case 32:
160         p32=(const uint32_t *)p;
161         break;
162     default:
163         fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
164         return;
165     }
166     if(prefix!=NULL) {
167         fprintf(f, prefix, (long)length);
168     }
169     for(i=col=0; i<length; ++i, ++col) {
170         if(i>0) {
171             if(col<16) {
172                 fputc(',', f);
173             } else {
174                 fputs(",\n", f);
175                 fputs(indent, f);
176                 col=0;
177             }
178         }
179         switch(width) {
180         case 8:
181             value=p8[i];
182             break;
183         case 16:
184             value=p16[i];
185             break;
186         case 32:
187             value=p32[i];
188             break;
189         default:
190             value=0; /* unreachable */
191             break;
192         }
193         fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value);
194     }
195     if(postfix!=NULL) {
196         fputs(postfix, f);
197     }
198 }
199 
200 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)201 usrc_writeUTrie2Arrays(FILE *f,
202                        const char *indexPrefix, const char *data32Prefix,
203                        const UTrie2 *pTrie,
204                        const char *postfix) {
205     if(pTrie->data32==NULL) {
206         /* 16-bit trie */
207         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
208     } else {
209         /* 32-bit trie */
210         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
211         usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
212     }
213 }
214 
215 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)216 usrc_writeUTrie2Struct(FILE *f,
217                        const char *prefix,
218                        const UTrie2 *pTrie,
219                        const char *indexName, const char *data32Name,
220                        const char *postfix) {
221     if(prefix!=NULL) {
222         fputs(prefix, f);
223     }
224     if(pTrie->data32==NULL) {
225         /* 16-bit trie */
226         fprintf(
227             f,
228             "    %s,\n"         /* index */
229             "    %s+%ld,\n"     /* data16 */
230             "    NULL,\n",      /* data32 */
231             indexName,
232             indexName,
233             (long)pTrie->indexLength);
234     } else {
235         /* 32-bit trie */
236         fprintf(
237             f,
238             "    %s,\n"         /* index */
239             "    NULL,\n"       /* data16 */
240             "    %s,\n",        /* data32 */
241             indexName,
242             data32Name);
243     }
244     fprintf(
245         f,
246         "    %ld,\n"            /* indexLength */
247         "    %ld,\n"            /* dataLength */
248         "    0x%hx,\n"          /* index2NullOffset */
249         "    0x%hx,\n"          /* dataNullOffset */
250         "    0x%lx,\n"          /* initialValue */
251         "    0x%lx,\n"          /* errorValue */
252         "    0x%lx,\n"          /* highStart */
253         "    0x%lx,\n"          /* highValueIndex */
254         "    NULL, 0, FALSE, FALSE, 0, NULL\n",
255         (long)pTrie->indexLength, (long)pTrie->dataLength,
256         (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
257         (long)pTrie->initialValue, (long)pTrie->errorValue,
258         (long)pTrie->highStart, (long)pTrie->highValueIndex);
259     if(postfix!=NULL) {
260         fputs(postfix, f);
261     }
262 }
263 
264 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix,UTargetSyntax syntax)265 usrc_writeUCPTrieArrays(FILE *f,
266                         const char *indexPrefix, const char *dataPrefix,
267                         const UCPTrie *pTrie,
268                         const char *postfix,
269                         UTargetSyntax syntax) {
270     const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? "  " : "";
271     usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
272     int32_t width=
273         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
274         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
275         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
276     usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
277 }
278 
279 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix,UTargetSyntax syntax)280 usrc_writeUCPTrieStruct(FILE *f,
281                         const char *prefix,
282                         const UCPTrie *pTrie,
283                         const char *indexName, const char *dataName,
284                         const char *postfix,
285                         UTargetSyntax syntax) {
286     if(prefix!=NULL) {
287         fputs(prefix, f);
288     }
289     if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
290         fprintf(
291             f,
292             "    %s,\n"             // index
293             "    { %s },\n",        // data (union)
294             indexName,
295             dataName);
296     }
297     const char* pattern =
298         (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
299         "    %ld, %ld,\n"       // indexLength, dataLength
300         "    0x%lx, 0x%x,\n"    // highStart, shifted12HighStart
301         "    %d, %d,\n"         // type, valueWidth
302         "    0, 0,\n"           // reserved32, reserved16
303         "    0x%x, 0x%lx,\n"    // index3NullOffset, dataNullOffset
304         "    0x%lx,\n"          // nullValue
305         :
306         "indexLength = %ld\n"
307         "dataLength = %ld\n"
308         "highStart = 0x%lx\n"
309         "shifted12HighStart = 0x%x\n"
310         "type = %d\n"
311         "valueWidth = %d\n"
312         "index3NullOffset = 0x%x\n"
313         "dataNullOffset = 0x%lx\n"
314         "nullValue = 0x%lx\n"
315         ;
316     fprintf(
317         f,
318         pattern,
319         (long)pTrie->indexLength, (long)pTrie->dataLength,
320         (long)pTrie->highStart, pTrie->shifted12HighStart,
321         pTrie->type, pTrie->valueWidth,
322         pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
323         (long)pTrie->nullValue);
324     if(postfix!=NULL) {
325         fputs(postfix, f);
326     }
327 }
328 
329 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie,UTargetSyntax syntax)330 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
331     int32_t width=
332         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
333         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
334         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
335     char line[100], line2[100], line3[100], line4[100];
336 
337     switch (syntax) {
338     case UPRV_TARGET_SYNTAX_CCODE:
339         sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name);
340         sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
341         sprintf(line3, "\n};\n\n");
342         break;
343     case UPRV_TARGET_SYNTAX_TOML:
344         sprintf(line, "index = [\n  ");
345         sprintf(line2, "data_%d = [\n  ", (int)width);
346         sprintf(line3, "\n]\n");
347         break;
348     default:
349         UPRV_UNREACHABLE_EXIT;
350     }
351     usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
352 
353     switch (syntax) {
354     case UPRV_TARGET_SYNTAX_CCODE:
355         sprintf(line, "static const UCPTrie %s_trie={\n", name);
356         sprintf(line2, "%s_trieIndex", name);
357         sprintf(line3, "%s_trieData", name);
358         sprintf(line4, "};\n\n");
359         break;
360     case UPRV_TARGET_SYNTAX_TOML:
361         line[0] = 0;
362         line2[0] = 0;
363         line3[0] = 0;
364         line4[0] = 0;
365         break;
366     default:
367         UPRV_UNREACHABLE_EXIT;
368     }
369     usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
370 }
371 
372 U_CAPI void U_EXPORT2
usrc_writeUnicodeSet(FILE * f,const USet * pSet,UTargetSyntax syntax)373 usrc_writeUnicodeSet(
374         FILE *f,
375         const USet *pSet,
376         UTargetSyntax syntax) {
377     // ccode is not yet supported
378     U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
379 
380     // Write out a list of ranges
381     const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
382     UnicodeSetIterator it(*set);
383     fprintf(f, "# Inclusive ranges of the code points in the set.\n");
384     fprintf(f, "ranges = [\n");
385     bool seenFirstString = false;
386     while (it.nextRange()) {
387         if (it.isString()) {
388             if (!seenFirstString) {
389                 seenFirstString = true;
390                 fprintf(f, "]\nstrings = [\n");
391             }
392             const UnicodeString& str = it.getString();
393             fprintf(f, "  ");
394             usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
395             fprintf(f, ",\n");
396         } else {
397             U_ASSERT(!seenFirstString);
398             UChar32 start = it.getCodepoint();
399             UChar32 end = it.getCodepointEnd();
400             fprintf(f, "  [0x%x, 0x%x],\n", start, end);
401         }
402     }
403     fprintf(f, "]\n");
404 }
405 
406 U_CAPI void U_EXPORT2
usrc_writeUCPMap(FILE * f,const UCPMap * pMap,icu::ValueNameGetter * valueNameGetter,UTargetSyntax syntax)407 usrc_writeUCPMap(
408         FILE *f,
409         const UCPMap *pMap,
410         icu::ValueNameGetter *valueNameGetter,
411         UTargetSyntax syntax) {
412     // ccode is not yet supported
413     U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
414     (void) syntax; // silence unused variable errors
415 
416     // Print out list of ranges
417     UChar32 start = 0, end;
418     uint32_t value;
419     fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
420     fprintf(f, "ranges = [\n");
421     while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
422         if (valueNameGetter != nullptr) {
423             const char *name = valueNameGetter->getName(value);
424             fprintf(f, "  {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
425         } else {
426             fprintf(f, "  {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
427         }
428         start = end + 1;
429     }
430     fprintf(f, "]\n");
431 }
432 
433 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)434 usrc_writeArrayOfMostlyInvChars(FILE *f,
435                                 const char *prefix,
436                                 const char *p, int32_t length,
437                                 const char *postfix) {
438     int32_t i, col;
439     int prev2, prev, c;
440 
441     if(prefix!=NULL) {
442         fprintf(f, prefix, (long)length);
443     }
444     prev2=prev=-1;
445     for(i=col=0; i<length; ++i, ++col) {
446         c=(uint8_t)p[i];
447         if(i>0) {
448             /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
449             if(
450                 /* Very long line. */
451                 col>=32 ||
452                 /* Long line, break after terminating NUL. */
453                 (col>=24 && prev2>=0x20 && prev==0) ||
454                 /* Medium-long line, break before non-NUL, non-character byte. */
455                 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
456             ) {
457                 fputs(",\n", f);
458                 col=0;
459             } else {
460                 fputc(',', f);
461             }
462         }
463         fprintf(f, c<0x20 ? "%u" : "'%c'", c);
464         prev2=prev;
465         prev=c;
466     }
467     if(postfix!=NULL) {
468         fputs(postfix, f);
469     }
470 }
471 
472 U_CAPI void U_EXPORT2
usrc_writeStringAsASCII(FILE * f,const UChar * ptr,int32_t length,UTargetSyntax)473 usrc_writeStringAsASCII(FILE *f,
474         const UChar* ptr, int32_t length,
475         UTargetSyntax) {
476     // For now, assume all UTargetSyntax values are valid here.
477     fprintf(f, "\"");
478     int32_t i = 0;
479     UChar32 cp;
480     while (i < length) {
481         U16_NEXT(ptr, i, length, cp);
482         if (cp == u'"') {
483             fprintf(f, "\\\"");
484         } else if (ICU_Utility::isUnprintable(cp)) {
485             UnicodeString u16result;
486             ICU_Utility::escapeUnprintable(u16result, cp);
487             std::string u8result;
488             u16result.toUTF8String(u8result);
489             fprintf(f, "%s", u8result.data());
490         } else {
491             U_ASSERT(cp < 0x80);
492             char s[2] = {static_cast<char>(cp), 0};
493             fprintf(f, "%s", s);
494         }
495     }
496     fprintf(f, "\"");
497 }
498