• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *   Copyright (C) 2009-2012, International Business Machines
3  *   Corporation and others.  All Rights Reserved.
4  *******************************************************************************
5  */
6 #include "unicode/utypes.h"
7 
8 #if U_PLATFORM_HAS_WIN32_API
9 #   define VC_EXTRALEAN
10 #   define WIN32_LEAN_AND_MEAN
11 #   define NOUSER
12 #   define NOSERVICE
13 #   define NOIME
14 #   define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 #   ifdef __GNUC__
18 #       define WINDOWS_WITH_GNUC
19 #   endif
20 #endif
21 
22 #if U_PLATFORM_IS_LINUX_BASED
23 #   define U_ELF
24 #endif
25 
26 #ifdef U_ELF
27 #   include <elf.h>
28 #   if defined(ELFCLASS64)
29 #       define U_ELF64
30 #   endif
31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 #   ifndef EM_X86_64
33 #       define EM_X86_64 62
34 #   endif
35 #   define ICU_ENTRY_OFFSET 0
36 #endif
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48 
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50 
51 #define HEX_0X 0 /*  0x1234 */
52 #define HEX_0H 1 /*  01234h */
53 
54 /* prototypes --------------------------------------------------------------- */
55 static void
56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57 
58 static uint32_t
59 write8(FileStream *out, uint8_t byte, uint32_t column);
60 
61 static uint32_t
62 write32(FileStream *out, uint32_t byte, uint32_t column);
63 
64 #if U_PLATFORM == U_PF_OS400
65 static uint32_t
66 write8str(FileStream *out, uint8_t byte, uint32_t column);
67 #endif
68 /* -------------------------------------------------------------------------- */
69 
70 /*
71 Creating Template Files for New Platforms
72 
73 Let the cc compiler help you get started.
74 Compile this program
75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76 with the -S option to produce assembly output.
77 
78 For example, this will generate array.s:
79 gcc -S array.c
80 
81 This will produce a .s file that may look like this:
82 
83     .file   "array.c"
84     .version        "01.01"
85 gcc2_compiled.:
86     .globl x
87     .section        .rodata
88     .align 4
89     .type    x,@object
90     .size    x,20
91 x:
92     .long   1
93     .long   2
94     .long   -559038737
95     .long   -1
96     .long   16
97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98 
99 which gives a starting point that will compile, and can be transformed
100 to become the template, generally with some consulting of as docs and
101 some experimentation.
102 
103 If you want ICU to automatically use this assembly, you should
104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105 where the name is the compiler or platform that you used in this
106 assemblyHeader data structure.
107 */
108 static const struct AssemblyType {
109     const char *name;
110     const char *header;
111     const char *beginLine;
112     const char *footer;
113     int8_t      hexType; /* HEX_0X or HEX_0h */
114 } assemblyHeader[] = {
115     {"gcc",
116         ".globl %s\n"
117         "\t.section .note.GNU-stack,\"\",%%progbits\n"
118         "\t.section .rodata\n"
119         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
120         "\t.type %s,%%object\n"
121         "%s:\n\n",
122 
123         ".long ","",HEX_0X
124     },
125     {"gcc-darwin",
126         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
127         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
128         ".globl _%s\n"
129         "\t.data\n"
130         "\t.const\n"
131         "\t.align 4\n"  /* 1<<4 = 16 */
132         "_%s:\n\n",
133 
134         ".long ","",HEX_0X
135     },
136     {"gcc-cygwin",
137         ".globl _%s\n"
138         "\t.section .rodata\n"
139         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
140         "_%s:\n\n",
141 
142         ".long ","",HEX_0X
143     },
144     {"gcc-mingw64",
145         ".globl %s\n"
146         "\t.section .rodata\n"
147         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
148         "%s:\n\n",
149 
150         ".long ","",HEX_0X
151     },
152     {"sun",
153         "\t.section \".rodata\"\n"
154         "\t.align   8\n"
155         ".globl     %s\n"
156         "%s:\n",
157 
158         ".word ","",HEX_0X
159     },
160     {"sun-x86",
161         "Drodata.rodata:\n"
162         "\t.type   Drodata.rodata,@object\n"
163         "\t.size   Drodata.rodata,0\n"
164         "\t.globl  %s\n"
165         "\t.align  8\n"
166         "%s:\n",
167 
168         ".4byte ","",HEX_0X
169     },
170     {"xlc",
171         ".globl %s{RO}\n"
172         "\t.toc\n"
173         "%s:\n"
174         "\t.csect %s{RO}, 4\n",
175 
176         ".long ","",HEX_0X
177     },
178     {"aCC-ia64",
179         "\t.file   \"%s.s\"\n"
180         "\t.type   %s,@object\n"
181         "\t.global %s\n"
182         "\t.secalias .abe$0.rodata, \".rodata\"\n"
183         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
184         "\t.align  16\n"
185         "%s::\t",
186 
187         "data4 ","",HEX_0X
188     },
189     {"aCC-parisc",
190         "\t.SPACE  $TEXT$\n"
191         "\t.SUBSPA $LIT$\n"
192         "%s\n"
193         "\t.EXPORT %s\n"
194         "\t.ALIGN  16\n",
195 
196         ".WORD ","",HEX_0X
197     },
198     { "masm",
199       "\tTITLE %s\n"
200       "; generated by genccode\n"
201       ".386\n"
202       ".model flat\n"
203       "\tPUBLIC _%s\n"
204       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
205       "\tALIGN 16\n"
206       "_%s\tLABEL DWORD\n",
207       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
208     }
209 };
210 
211 static int32_t assemblyHeaderIndex = -1;
212 static int32_t hexType = HEX_0X;
213 
214 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)215 checkAssemblyHeaderName(const char* optAssembly) {
216     int32_t idx;
217     assemblyHeaderIndex = -1;
218     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
219         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
220             assemblyHeaderIndex = idx;
221             hexType = assemblyHeader[idx].hexType; /* set the hex type */
222             return TRUE;
223         }
224     }
225 
226     return FALSE;
227 }
228 
229 
230 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)231 printAssemblyHeadersToStdErr(void) {
232     int32_t idx;
233     fprintf(stderr, "%s", assemblyHeader[0].name);
234     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
235         fprintf(stderr, ", %s", assemblyHeader[idx].name);
236     }
237     fprintf(stderr,
238         ")\n");
239 }
240 
241 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)242 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
243     uint32_t column = MAX_COLUMN;
244     char entry[64];
245     uint32_t buffer[1024];
246     char *bufferStr = (char *)buffer;
247     FileStream *in, *out;
248     size_t i, length;
249 
250     in=T_FileStream_open(filename, "rb");
251     if(in==NULL) {
252         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
253         exit(U_FILE_ACCESS_ERROR);
254     }
255 
256     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
257     out=T_FileStream_open(bufferStr, "w");
258     if(out==NULL) {
259         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
260         exit(U_FILE_ACCESS_ERROR);
261     }
262 
263     if (outFilePath != NULL) {
264         uprv_strcpy(outFilePath, bufferStr);
265     }
266 
267 #ifdef WINDOWS_WITH_GNUC
268     /* Need to fix the file seperator character when using MinGW. */
269     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
270 #endif
271 
272     if(optEntryPoint != NULL) {
273         uprv_strcpy(entry, optEntryPoint);
274         uprv_strcat(entry, "_dat");
275     }
276 
277     /* turn dashes or dots in the entry name into underscores */
278     length=uprv_strlen(entry);
279     for(i=0; i<length; ++i) {
280         if(entry[i]=='-' || entry[i]=='.') {
281             entry[i]='_';
282         }
283     }
284 
285     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
286         entry, entry, entry, entry,
287         entry, entry, entry, entry);
288     T_FileStream_writeLine(out, bufferStr);
289     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
290 
291     for(;;) {
292         length=T_FileStream_read(in, buffer, sizeof(buffer));
293         if(length==0) {
294             break;
295         }
296         if (length != sizeof(buffer)) {
297             /* pad with extra 0's when at the end of the file */
298             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
299                 buffer[length+i] = 0;
300             }
301         }
302         for(i=0; i<(length/sizeof(buffer[0])); i++) {
303             column = write32(out, buffer[i], column);
304         }
305     }
306 
307     T_FileStream_writeLine(out, "\n");
308 
309     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
310         entry, entry, entry, entry,
311         entry, entry, entry, entry);
312     T_FileStream_writeLine(out, bufferStr);
313 
314     if(T_FileStream_error(in)) {
315         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
316         exit(U_FILE_ACCESS_ERROR);
317     }
318 
319     if(T_FileStream_error(out)) {
320         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
321         exit(U_FILE_ACCESS_ERROR);
322     }
323 
324     T_FileStream_close(out);
325     T_FileStream_close(in);
326 }
327 
328 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)329 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
330     uint32_t column = MAX_COLUMN;
331     char buffer[4096], entry[64];
332     FileStream *in, *out;
333     size_t i, length;
334 
335     in=T_FileStream_open(filename, "rb");
336     if(in==NULL) {
337         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
338         exit(U_FILE_ACCESS_ERROR);
339     }
340 
341     if(optName != NULL) { /* prepend  'icudt28_' */
342       strcpy(entry, optName);
343       strcat(entry, "_");
344     } else {
345       entry[0] = 0;
346     }
347 
348     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
349     if (outFilePath != NULL) {
350         uprv_strcpy(outFilePath, buffer);
351     }
352     out=T_FileStream_open(buffer, "w");
353     if(out==NULL) {
354         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
355         exit(U_FILE_ACCESS_ERROR);
356     }
357 
358     /* turn dashes or dots in the entry name into underscores */
359     length=uprv_strlen(entry);
360     for(i=0; i<length; ++i) {
361         if(entry[i]=='-' || entry[i]=='.') {
362             entry[i]='_';
363         }
364     }
365 
366 #if U_PLATFORM == U_PF_OS400
367     /*
368     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
369 
370     This is here because this platform can't currently put
371     const data into the read-only pages of an object or
372     shared library (service program). Only strings are allowed in read-only
373     pages, so we use char * strings to store the data.
374 
375     In order to prevent the beginning of the data from ever matching the
376     magic numbers we must still use the initial double.
377     [grhoten 4/24/2003]
378     */
379     sprintf(buffer,
380         "#ifndef IN_GENERATED_CCODE\n"
381         "#define IN_GENERATED_CCODE\n"
382         "#define U_DISABLE_RENAMING 1\n"
383         "#include \"unicode/umachine.h\"\n"
384         "#endif\n"
385         "U_CDECL_BEGIN\n"
386         "const struct {\n"
387         "    double bogus;\n"
388         "    const char *bytes; \n"
389         "} %s={ 0.0, \n",
390         entry);
391     T_FileStream_writeLine(out, buffer);
392 
393     for(;;) {
394         length=T_FileStream_read(in, buffer, sizeof(buffer));
395         if(length==0) {
396             break;
397         }
398         for(i=0; i<length; ++i) {
399             column = write8str(out, (uint8_t)buffer[i], column);
400         }
401     }
402 
403     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
404 #else
405     /* Function renaming shouldn't be done in data */
406     sprintf(buffer,
407         "#ifndef IN_GENERATED_CCODE\n"
408         "#define IN_GENERATED_CCODE\n"
409         "#define U_DISABLE_RENAMING 1\n"
410         "#include \"unicode/umachine.h\"\n"
411         "#endif\n"
412         "U_CDECL_BEGIN\n"
413         "const struct {\n"
414         "    double bogus;\n"
415         "    uint8_t bytes[%ld]; \n"
416         "} %s={ 0.0, {\n",
417         (long)T_FileStream_size(in), entry);
418     T_FileStream_writeLine(out, buffer);
419 
420     for(;;) {
421         length=T_FileStream_read(in, buffer, sizeof(buffer));
422         if(length==0) {
423             break;
424         }
425         for(i=0; i<length; ++i) {
426             column = write8(out, (uint8_t)buffer[i], column);
427         }
428     }
429 
430     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
431 #endif
432 
433     if(T_FileStream_error(in)) {
434         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
435         exit(U_FILE_ACCESS_ERROR);
436     }
437 
438     if(T_FileStream_error(out)) {
439         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
440         exit(U_FILE_ACCESS_ERROR);
441     }
442 
443     T_FileStream_close(out);
444     T_FileStream_close(in);
445 }
446 
447 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)448 write32(FileStream *out, uint32_t bitField, uint32_t column) {
449     int32_t i;
450     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
451     char *s = bitFieldStr;
452     uint8_t *ptrIdx = (uint8_t *)&bitField;
453     static const char hexToStr[16] = {
454         '0','1','2','3',
455         '4','5','6','7',
456         '8','9','A','B',
457         'C','D','E','F'
458     };
459 
460     /* write the value, possibly with comma and newline */
461     if(column==MAX_COLUMN) {
462         /* first byte */
463         column=1;
464     } else if(column<32) {
465         *(s++)=',';
466         ++column;
467     } else {
468         *(s++)='\n';
469         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
470         s+=uprv_strlen(s);
471         column=1;
472     }
473 
474     if (bitField < 10) {
475         /* It's a small number. Don't waste the space for 0x */
476         *(s++)=hexToStr[bitField];
477     }
478     else {
479         int seenNonZero = 0; /* This is used to remove leading zeros */
480 
481         if(hexType==HEX_0X) {
482          *(s++)='0';
483          *(s++)='x';
484         } else if(hexType==HEX_0H) {
485          *(s++)='0';
486         }
487 
488         /* This creates a 32-bit field */
489 #if U_IS_BIG_ENDIAN
490         for (i = 0; i < sizeof(uint32_t); i++)
491 #else
492         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
493 #endif
494         {
495             uint8_t value = ptrIdx[i];
496             if (value || seenNonZero) {
497                 *(s++)=hexToStr[value>>4];
498                 *(s++)=hexToStr[value&0xF];
499                 seenNonZero = 1;
500             }
501         }
502         if(hexType==HEX_0H) {
503          *(s++)='h';
504         }
505     }
506 
507     *(s++)=0;
508     T_FileStream_writeLine(out, bitFieldStr);
509     return column;
510 }
511 
512 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)513 write8(FileStream *out, uint8_t byte, uint32_t column) {
514     char s[4];
515     int i=0;
516 
517     /* convert the byte value to a string */
518     if(byte>=100) {
519         s[i++]=(char)('0'+byte/100);
520         byte%=100;
521     }
522     if(i>0 || byte>=10) {
523         s[i++]=(char)('0'+byte/10);
524         byte%=10;
525     }
526     s[i++]=(char)('0'+byte);
527     s[i]=0;
528 
529     /* write the value, possibly with comma and newline */
530     if(column==MAX_COLUMN) {
531         /* first byte */
532         column=1;
533     } else if(column<16) {
534         T_FileStream_writeLine(out, ",");
535         ++column;
536     } else {
537         T_FileStream_writeLine(out, ",\n");
538         column=1;
539     }
540     T_FileStream_writeLine(out, s);
541     return column;
542 }
543 
544 #if U_PLATFORM == U_PF_OS400
545 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)546 write8str(FileStream *out, uint8_t byte, uint32_t column) {
547     char s[8];
548 
549     if (byte > 7)
550         sprintf(s, "\\x%X", byte);
551     else
552         sprintf(s, "\\%X", byte);
553 
554     /* write the value, possibly with comma and newline */
555     if(column==MAX_COLUMN) {
556         /* first byte */
557         column=1;
558         T_FileStream_writeLine(out, "\"");
559     } else if(column<24) {
560         ++column;
561     } else {
562         T_FileStream_writeLine(out, "\"\n\"");
563         column=1;
564     }
565     T_FileStream_writeLine(out, s);
566     return column;
567 }
568 #endif
569 
570 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)571 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
572     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
573 
574     /* copy path */
575     if(destdir!=NULL && *destdir!=0) {
576         do {
577             *outFilename++=*destdir++;
578         } while(*destdir!=0);
579         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
580             *outFilename++=U_FILE_SEP_CHAR;
581         }
582         inFilename=basename;
583     } else {
584         while(inFilename<basename) {
585             *outFilename++=*inFilename++;
586         }
587     }
588 
589     if(suffix==NULL) {
590         /* the filename does not have a suffix */
591         uprv_strcpy(entryName, inFilename);
592         if(optFilename != NULL) {
593           uprv_strcpy(outFilename, optFilename);
594         } else {
595           uprv_strcpy(outFilename, inFilename);
596         }
597         uprv_strcat(outFilename, newSuffix);
598     } else {
599         char *saveOutFilename = outFilename;
600         /* copy basename */
601         while(inFilename<suffix) {
602             if(*inFilename=='-') {
603                 /* iSeries cannot have '-' in the .o objects. */
604                 *outFilename++=*entryName++='_';
605                 inFilename++;
606             }
607             else {
608                 *outFilename++=*entryName++=*inFilename++;
609             }
610         }
611 
612         /* replace '.' by '_' */
613         *outFilename++=*entryName++='_';
614         ++inFilename;
615 
616         /* copy suffix */
617         while(*inFilename!=0) {
618             *outFilename++=*entryName++=*inFilename++;
619         }
620 
621         *entryName=0;
622 
623         if(optFilename != NULL) {
624             uprv_strcpy(saveOutFilename, optFilename);
625             uprv_strcat(saveOutFilename, newSuffix);
626         } else {
627             /* add ".c" */
628             uprv_strcpy(outFilename, newSuffix);
629         }
630     }
631 }
632 
633 #ifdef CAN_GENERATE_OBJECTS
634 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)635 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
636     union {
637         char        bytes[2048];
638 #ifdef U_ELF
639         Elf32_Ehdr  header32;
640         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
641 #elif U_PLATFORM_HAS_WIN32_API
642         IMAGE_FILE_HEADER header;
643 #endif
644     } buffer;
645 
646     const char *filename;
647     FileStream *in;
648     int32_t length;
649 
650 #ifdef U_ELF
651 
652 #elif U_PLATFORM_HAS_WIN32_API
653     const IMAGE_FILE_HEADER *pHeader;
654 #else
655 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
656 #endif
657 
658     if(optMatchArch != NULL) {
659         filename=optMatchArch;
660     } else {
661         /* set defaults */
662 #ifdef U_ELF
663         /* set EM_386 because elf.h does not provide better defaults */
664         *pCPU=EM_386;
665         *pBits=32;
666         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
667 #elif U_PLATFORM_HAS_WIN32_API
668 /* _M_IA64 should be defined in windows.h */
669 #   if defined(_M_IA64)
670         *pCPU=IMAGE_FILE_MACHINE_IA64;
671 #   elif defined(_M_AMD64)
672         *pCPU=IMAGE_FILE_MACHINE_AMD64;
673 #   else
674         *pCPU=IMAGE_FILE_MACHINE_I386;
675 #   endif
676         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
677         *pIsBigEndian=FALSE;
678 #else
679 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
680 #endif
681         return;
682     }
683 
684     in=T_FileStream_open(filename, "rb");
685     if(in==NULL) {
686         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
687         exit(U_FILE_ACCESS_ERROR);
688     }
689     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
690 
691 #ifdef U_ELF
692     if(length<sizeof(Elf32_Ehdr)) {
693         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
694         exit(U_UNSUPPORTED_ERROR);
695     }
696     if(
697         buffer.header32.e_ident[0]!=ELFMAG0 ||
698         buffer.header32.e_ident[1]!=ELFMAG1 ||
699         buffer.header32.e_ident[2]!=ELFMAG2 ||
700         buffer.header32.e_ident[3]!=ELFMAG3 ||
701         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
702     ) {
703         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
704         exit(U_UNSUPPORTED_ERROR);
705     }
706 
707     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
708 #ifdef U_ELF64
709     if(*pBits!=32 && *pBits!=64) {
710         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
711         exit(U_UNSUPPORTED_ERROR);
712     }
713 #else
714     if(*pBits!=32) {
715         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
716         exit(U_UNSUPPORTED_ERROR);
717     }
718 #endif
719 
720     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
721     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
722         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
723         exit(U_UNSUPPORTED_ERROR);
724     }
725     /* TODO: Support byte swapping */
726 
727     *pCPU=buffer.header32.e_machine;
728 #elif U_PLATFORM_HAS_WIN32_API
729     if(length<sizeof(IMAGE_FILE_HEADER)) {
730         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
731         exit(U_UNSUPPORTED_ERROR);
732     }
733     /* TODO: Use buffer.header.  Keep aliasing legal.  */
734     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
735     *pCPU=pHeader->Machine;
736     /*
737      * The number of bits is implicit with the Machine value.
738      * *pBits is ignored in the calling code, so this need not be precise.
739      */
740     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
741     /* Windows always runs on little-endian CPUs. */
742     *pIsBigEndian=FALSE;
743 #else
744 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
745 #endif
746 
747     T_FileStream_close(in);
748 }
749 
750 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)751 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
752     /* common variables */
753     char buffer[4096], entry[40]={ 0 };
754     FileStream *in, *out;
755     const char *newSuffix;
756     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
757 
758     uint16_t cpu, bits;
759     UBool makeBigEndian;
760 
761     /* platform-specific variables and initialization code */
762 #ifdef U_ELF
763     /* 32-bit Elf file header */
764     static Elf32_Ehdr header32={
765         {
766             /* e_ident[] */
767             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
768             ELFCLASS32,
769             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
770             EV_CURRENT /* EI_VERSION */
771         },
772         ET_REL,
773         EM_386,
774         EV_CURRENT, /* e_version */
775         0, /* e_entry */
776         0, /* e_phoff */
777         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
778         0, /* e_flags */
779         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
780         0, /* e_phentsize */
781         0, /* e_phnum */
782         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
783         5, /* e_shnum */
784         2 /* e_shstrndx */
785     };
786 
787     /* 32-bit Elf section header table */
788     static Elf32_Shdr sectionHeaders32[5]={
789         { /* SHN_UNDEF */
790             0
791         },
792         { /* .symtab */
793             1, /* sh_name */
794             SHT_SYMTAB,
795             0, /* sh_flags */
796             0, /* sh_addr */
797             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
798             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
799             3, /* sh_link=sect hdr index of .strtab */
800             1, /* sh_info=One greater than the symbol table index of the last
801                 * local symbol (with STB_LOCAL). */
802             4, /* sh_addralign */
803             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
804         },
805         { /* .shstrtab */
806             9, /* sh_name */
807             SHT_STRTAB,
808             0, /* sh_flags */
809             0, /* sh_addr */
810             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
811             40, /* sh_size */
812             0, /* sh_link */
813             0, /* sh_info */
814             1, /* sh_addralign */
815             0 /* sh_entsize */
816         },
817         { /* .strtab */
818             19, /* sh_name */
819             SHT_STRTAB,
820             0, /* sh_flags */
821             0, /* sh_addr */
822             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
823             (Elf32_Word)sizeof(entry), /* sh_size */
824             0, /* sh_link */
825             0, /* sh_info */
826             1, /* sh_addralign */
827             0 /* sh_entsize */
828         },
829         { /* .rodata */
830             27, /* sh_name */
831             SHT_PROGBITS,
832             SHF_ALLOC, /* sh_flags */
833             0, /* sh_addr */
834             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
835             0, /* sh_size */
836             0, /* sh_link */
837             0, /* sh_info */
838             16, /* sh_addralign */
839             0 /* sh_entsize */
840         }
841     };
842 
843     /* symbol table */
844     static Elf32_Sym symbols32[2]={
845         { /* STN_UNDEF */
846             0
847         },
848         { /* data entry point */
849             1, /* st_name */
850             0, /* st_value */
851             0, /* st_size */
852             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
853             0, /* st_other */
854             4 /* st_shndx=index of related section table entry */
855         }
856     };
857 
858     /* section header string table, with decimal string offsets */
859     static const char sectionStrings[40]=
860         /*  0 */ "\0"
861         /*  1 */ ".symtab\0"
862         /*  9 */ ".shstrtab\0"
863         /* 19 */ ".strtab\0"
864         /* 27 */ ".rodata\0"
865         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
866         /* 40: padded to multiple of 8 bytes */
867 
868     /*
869      * Use entry[] for the string table which will contain only the
870      * entry point name.
871      * entry[0] must be 0 (NUL)
872      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
873      */
874 
875     /* 16-align .rodata in the .o file, just in case */
876     static const char padding[16]={ 0 };
877     int32_t paddingSize;
878 
879 #ifdef U_ELF64
880     /* 64-bit Elf file header */
881     static Elf64_Ehdr header64={
882         {
883             /* e_ident[] */
884             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
885             ELFCLASS64,
886             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
887             EV_CURRENT /* EI_VERSION */
888         },
889         ET_REL,
890         EM_X86_64,
891         EV_CURRENT, /* e_version */
892         0, /* e_entry */
893         0, /* e_phoff */
894         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
895         0, /* e_flags */
896         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
897         0, /* e_phentsize */
898         0, /* e_phnum */
899         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
900         5, /* e_shnum */
901         2 /* e_shstrndx */
902     };
903 
904     /* 64-bit Elf section header table */
905     static Elf64_Shdr sectionHeaders64[5]={
906         { /* SHN_UNDEF */
907             0
908         },
909         { /* .symtab */
910             1, /* sh_name */
911             SHT_SYMTAB,
912             0, /* sh_flags */
913             0, /* sh_addr */
914             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
915             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
916             3, /* sh_link=sect hdr index of .strtab */
917             1, /* sh_info=One greater than the symbol table index of the last
918                 * local symbol (with STB_LOCAL). */
919             4, /* sh_addralign */
920             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
921         },
922         { /* .shstrtab */
923             9, /* sh_name */
924             SHT_STRTAB,
925             0, /* sh_flags */
926             0, /* sh_addr */
927             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
928             40, /* sh_size */
929             0, /* sh_link */
930             0, /* sh_info */
931             1, /* sh_addralign */
932             0 /* sh_entsize */
933         },
934         { /* .strtab */
935             19, /* sh_name */
936             SHT_STRTAB,
937             0, /* sh_flags */
938             0, /* sh_addr */
939             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
940             (Elf64_Xword)sizeof(entry), /* sh_size */
941             0, /* sh_link */
942             0, /* sh_info */
943             1, /* sh_addralign */
944             0 /* sh_entsize */
945         },
946         { /* .rodata */
947             27, /* sh_name */
948             SHT_PROGBITS,
949             SHF_ALLOC, /* sh_flags */
950             0, /* sh_addr */
951             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
952             0, /* sh_size */
953             0, /* sh_link */
954             0, /* sh_info */
955             16, /* sh_addralign */
956             0 /* sh_entsize */
957         }
958     };
959 
960     /*
961      * 64-bit symbol table
962      * careful: different order of items compared with Elf32_sym!
963      */
964     static Elf64_Sym symbols64[2]={
965         { /* STN_UNDEF */
966             0
967         },
968         { /* data entry point */
969             1, /* st_name */
970             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
971             0, /* st_other */
972             4, /* st_shndx=index of related section table entry */
973             0, /* st_value */
974             0 /* st_size */
975         }
976     };
977 
978 #endif /* U_ELF64 */
979 
980     /* entry[] have a leading NUL */
981     entryOffset=1;
982 
983     /* in the common code, count entryLength from after the NUL */
984     entryLengthOffset=1;
985 
986     newSuffix=".o";
987 
988 #elif U_PLATFORM_HAS_WIN32_API
989     struct {
990         IMAGE_FILE_HEADER fileHeader;
991         IMAGE_SECTION_HEADER sections[2];
992         char linkerOptions[100];
993     } objHeader;
994     IMAGE_SYMBOL symbols[1];
995     struct {
996         DWORD sizeofLongNames;
997         char longNames[100];
998     } symbolNames;
999 
1000     /*
1001      * entry sometimes have a leading '_'
1002      * overwritten if entryOffset==0 depending on the target platform
1003      * see check for cpu below
1004      */
1005     entry[0]='_';
1006 
1007     newSuffix=".obj";
1008 #else
1009 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1010 #endif
1011 
1012     /* deal with options, files and the entry point name */
1013     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1014     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1015 #if U_PLATFORM_HAS_WIN32_API
1016     if(cpu==IMAGE_FILE_MACHINE_I386) {
1017         entryOffset=1;
1018     }
1019 #endif
1020 
1021     in=T_FileStream_open(filename, "rb");
1022     if(in==NULL) {
1023         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1024         exit(U_FILE_ACCESS_ERROR);
1025     }
1026     size=T_FileStream_size(in);
1027 
1028     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1029     if (outFilePath != NULL) {
1030         uprv_strcpy(outFilePath, buffer);
1031     }
1032 
1033     if(optEntryPoint != NULL) {
1034         uprv_strcpy(entry+entryOffset, optEntryPoint);
1035         uprv_strcat(entry+entryOffset, "_dat");
1036     }
1037     /* turn dashes in the entry name into underscores */
1038     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1039     for(i=0; i<entryLength; ++i) {
1040         if(entry[entryLengthOffset+i]=='-') {
1041             entry[entryLengthOffset+i]='_';
1042         }
1043     }
1044 
1045     /* open the output file */
1046     out=T_FileStream_open(buffer, "wb");
1047     if(out==NULL) {
1048         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1049         exit(U_FILE_ACCESS_ERROR);
1050     }
1051 
1052 #ifdef U_ELF
1053     if(bits==32) {
1054         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1055         header32.e_machine=cpu;
1056 
1057         /* 16-align .rodata in the .o file, just in case */
1058         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1059         if(paddingSize!=0) {
1060                 paddingSize=0x10-paddingSize;
1061                 sectionHeaders32[4].sh_offset+=paddingSize;
1062         }
1063 
1064         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1065 
1066         symbols32[1].st_size=(Elf32_Word)size;
1067 
1068         /* write .o headers */
1069         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1070         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1071         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1072     } else /* bits==64 */ {
1073 #ifdef U_ELF64
1074         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1075         header64.e_machine=cpu;
1076 
1077         /* 16-align .rodata in the .o file, just in case */
1078         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1079         if(paddingSize!=0) {
1080                 paddingSize=0x10-paddingSize;
1081                 sectionHeaders64[4].sh_offset+=paddingSize;
1082         }
1083 
1084         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1085 
1086         symbols64[1].st_size=(Elf64_Xword)size;
1087 
1088         /* write .o headers */
1089         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1090         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1091         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1092 #endif
1093     }
1094 
1095     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1096     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1097     if(paddingSize!=0) {
1098         T_FileStream_write(out, padding, paddingSize);
1099     }
1100 #elif U_PLATFORM_HAS_WIN32_API
1101     /* populate the .obj headers */
1102     uprv_memset(&objHeader, 0, sizeof(objHeader));
1103     uprv_memset(&symbols, 0, sizeof(symbols));
1104     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1105 
1106     /* write the linker export directive */
1107     uprv_strcpy(objHeader.linkerOptions, "-export:");
1108     length=8;
1109     uprv_strcpy(objHeader.linkerOptions+length, entry);
1110     length+=entryLength;
1111     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1112     length+=6;
1113 
1114     /* set the file header */
1115     objHeader.fileHeader.Machine=cpu;
1116     objHeader.fileHeader.NumberOfSections=2;
1117     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1118     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1119     objHeader.fileHeader.NumberOfSymbols=1;
1120 
1121     /* set the section for the linker options */
1122     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1123     objHeader.sections[0].SizeOfRawData=length;
1124     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1125     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1126 
1127     /* set the data section */
1128     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1129     objHeader.sections[1].SizeOfRawData=size;
1130     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1131     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1132 
1133     /* set the symbol table */
1134     if(entryLength<=8) {
1135         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1136         symbolNames.sizeofLongNames=4;
1137     } else {
1138         symbols[0].N.Name.Short=0;
1139         symbols[0].N.Name.Long=4;
1140         symbolNames.sizeofLongNames=4+entryLength+1;
1141         uprv_strcpy(symbolNames.longNames, entry);
1142     }
1143     symbols[0].SectionNumber=2;
1144     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1145 
1146     /* write the file header and the linker options section */
1147     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1148 #else
1149 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1150 #endif
1151 
1152     /* copy the data file into section 2 */
1153     for(;;) {
1154         length=T_FileStream_read(in, buffer, sizeof(buffer));
1155         if(length==0) {
1156             break;
1157         }
1158         T_FileStream_write(out, buffer, (int32_t)length);
1159     }
1160 
1161 #if U_PLATFORM_HAS_WIN32_API
1162     /* write the symbol table */
1163     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1164     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1165 #endif
1166 
1167     if(T_FileStream_error(in)) {
1168         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1169         exit(U_FILE_ACCESS_ERROR);
1170     }
1171 
1172     if(T_FileStream_error(out)) {
1173         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1174         exit(U_FILE_ACCESS_ERROR);
1175     }
1176 
1177     T_FileStream_close(out);
1178     T_FileStream_close(in);
1179 }
1180 #endif
1181