• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *   Copyright (C) 2009-2012, International Business Machines
3  *   Corporation and others.  All Rights Reserved.
4  *******************************************************************************
5  */
6 #include "unicode/utypes.h"
7 
8 #if U_PLATFORM_HAS_WIN32_API
9 #   define VC_EXTRALEAN
10 #   define WIN32_LEAN_AND_MEAN
11 #   define NOUSER
12 #   define NOSERVICE
13 #   define NOIME
14 #   define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 #   ifdef __GNUC__
18 #       define WINDOWS_WITH_GNUC
19 #   endif
20 #endif
21 
22 #if U_PLATFORM_IS_LINUX_BASED
23 #   define U_ELF
24 #endif
25 
26 #ifdef U_ELF
27 #   include <elf.h>
28 #   if defined(ELFCLASS64)
29 #       define U_ELF64
30 #   endif
31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 #   ifndef EM_X86_64
33 #       define EM_X86_64 62
34 #   endif
35 #   define ICU_ENTRY_OFFSET 0
36 #endif
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48 
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50 
51 #define HEX_0X 0 /*  0x1234 */
52 #define HEX_0H 1 /*  01234h */
53 
54 /*
55  * The following is needed by MinGW64
56  */
57 #ifndef __USER_LABEL_PREFIX__
58 #define __USER_LABEL_PREFIX__ _
59 #endif
60 #define GCC_LABEL_PREFIX_INTERNAL(a) #a
61 #define GCC_LABEL_PREFIX(a) GCC_LABEL_PREFIX_INTERNAL(a)
62 
63 /* prototypes --------------------------------------------------------------- */
64 static void
65 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
66 
67 static uint32_t
68 write8(FileStream *out, uint8_t byte, uint32_t column);
69 
70 static uint32_t
71 write32(FileStream *out, uint32_t byte, uint32_t column);
72 
73 #if U_PLATFORM == U_PF_OS400
74 static uint32_t
75 write8str(FileStream *out, uint8_t byte, uint32_t column);
76 #endif
77 /* -------------------------------------------------------------------------- */
78 
79 /*
80 Creating Template Files for New Platforms
81 
82 Let the cc compiler help you get started.
83 Compile this program
84     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
85 with the -S option to produce assembly output.
86 
87 For example, this will generate array.s:
88 gcc -S array.c
89 
90 This will produce a .s file that may look like this:
91 
92     .file   "array.c"
93     .version        "01.01"
94 gcc2_compiled.:
95     .globl x
96     .section        .rodata
97     .align 4
98     .type    x,@object
99     .size    x,20
100 x:
101     .long   1
102     .long   2
103     .long   -559038737
104     .long   -1
105     .long   16
106     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
107 
108 which gives a starting point that will compile, and can be transformed
109 to become the template, generally with some consulting of as docs and
110 some experimentation.
111 
112 If you want ICU to automatically use this assembly, you should
113 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
114 where the name is the compiler or platform that you used in this
115 assemblyHeader data structure.
116 */
117 static const struct AssemblyType {
118     const char *name;
119     const char *header;
120     const char *beginLine;
121     const char *footer;
122     int8_t      hexType; /* HEX_0X or HEX_0h */
123 } assemblyHeader[] = {
124     {"gcc",
125         ".globl %s\n"
126         "\t.section .note.GNU-stack,\"\",%%progbits\n"
127         "\t.section .rodata\n"
128         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
129         "\t.type %s,%%object\n"
130         "%s:\n\n",
131 
132         ".long ","",HEX_0X
133     },
134     {"gcc-darwin",
135         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
136         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
137         ".globl _%s\n"
138         "\t.data\n"
139         "\t.const\n"
140         "\t.align 4\n"  /* 1<<4 = 16 */
141         "_%s:\n\n",
142 
143         ".long ","",HEX_0X
144     },
145     {"gcc-cygwin",
146         ".globl "GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s\n"
147         "\t.section .rodata\n"
148         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
149         GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s:\n\n",
150 
151         ".long ","",HEX_0X
152     },
153     {"sun",
154         "\t.section \".rodata\"\n"
155         "\t.align   8\n"
156         ".globl     %s\n"
157         "%s:\n",
158 
159         ".word ","",HEX_0X
160     },
161     {"sun-x86",
162         "Drodata.rodata:\n"
163         "\t.type   Drodata.rodata,@object\n"
164         "\t.size   Drodata.rodata,0\n"
165         "\t.globl  %s\n"
166         "\t.align  8\n"
167         "%s:\n",
168 
169         ".4byte ","",HEX_0X
170     },
171     {"xlc",
172         ".globl %s{RO}\n"
173         "\t.toc\n"
174         "%s:\n"
175         "\t.csect %s{RO}, 4\n",
176 
177         ".long ","",HEX_0X
178     },
179     {"aCC-ia64",
180         "\t.file   \"%s.s\"\n"
181         "\t.type   %s,@object\n"
182         "\t.global %s\n"
183         "\t.secalias .abe$0.rodata, \".rodata\"\n"
184         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
185         "\t.align  16\n"
186         "%s::\t",
187 
188         "data4 ","",HEX_0X
189     },
190     {"aCC-parisc",
191         "\t.SPACE  $TEXT$\n"
192         "\t.SUBSPA $LIT$\n"
193         "%s\n"
194         "\t.EXPORT %s\n"
195         "\t.ALIGN  16\n",
196 
197         ".WORD ","",HEX_0X
198     },
199     { "masm",
200       "\tTITLE %s\n"
201       "; generated by genccode\n"
202       ".386\n"
203       ".model flat\n"
204       "\tPUBLIC _%s\n"
205       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
206       "\tALIGN 16\n"
207       "_%s\tLABEL DWORD\n",
208       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
209     }
210 };
211 
212 static int32_t assemblyHeaderIndex = -1;
213 static int32_t hexType = HEX_0X;
214 
215 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)216 checkAssemblyHeaderName(const char* optAssembly) {
217     int32_t idx;
218     assemblyHeaderIndex = -1;
219     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
220         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
221             assemblyHeaderIndex = idx;
222             hexType = assemblyHeader[idx].hexType; /* set the hex type */
223             return TRUE;
224         }
225     }
226 
227     return FALSE;
228 }
229 
230 
231 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)232 printAssemblyHeadersToStdErr(void) {
233     int32_t idx;
234     fprintf(stderr, "%s", assemblyHeader[0].name);
235     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
236         fprintf(stderr, ", %s", assemblyHeader[idx].name);
237     }
238     fprintf(stderr,
239         ")\n");
240 }
241 
242 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)243 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
244     uint32_t column = MAX_COLUMN;
245     char entry[64];
246     uint32_t buffer[1024];
247     char *bufferStr = (char *)buffer;
248     FileStream *in, *out;
249     size_t i, length;
250 
251     in=T_FileStream_open(filename, "rb");
252     if(in==NULL) {
253         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
254         exit(U_FILE_ACCESS_ERROR);
255     }
256 
257     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
258     out=T_FileStream_open(bufferStr, "w");
259     if(out==NULL) {
260         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
261         exit(U_FILE_ACCESS_ERROR);
262     }
263 
264     if (outFilePath != NULL) {
265         uprv_strcpy(outFilePath, bufferStr);
266     }
267 
268 #ifdef WINDOWS_WITH_GNUC
269     /* Need to fix the file seperator character when using MinGW. */
270     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
271 #endif
272 
273     if(optEntryPoint != NULL) {
274         uprv_strcpy(entry, optEntryPoint);
275         uprv_strcat(entry, "_dat");
276     }
277 
278     /* turn dashes or dots in the entry name into underscores */
279     length=uprv_strlen(entry);
280     for(i=0; i<length; ++i) {
281         if(entry[i]=='-' || entry[i]=='.') {
282             entry[i]='_';
283         }
284     }
285 
286     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
287         entry, entry, entry, entry,
288         entry, entry, entry, entry);
289     T_FileStream_writeLine(out, bufferStr);
290     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
291 
292     for(;;) {
293         length=T_FileStream_read(in, buffer, sizeof(buffer));
294         if(length==0) {
295             break;
296         }
297         if (length != sizeof(buffer)) {
298             /* pad with extra 0's when at the end of the file */
299             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
300                 buffer[length+i] = 0;
301             }
302         }
303         for(i=0; i<(length/sizeof(buffer[0])); i++) {
304             column = write32(out, buffer[i], column);
305         }
306     }
307 
308     T_FileStream_writeLine(out, "\n");
309 
310     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
311         entry, entry, entry, entry,
312         entry, entry, entry, entry);
313     T_FileStream_writeLine(out, bufferStr);
314 
315     if(T_FileStream_error(in)) {
316         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
317         exit(U_FILE_ACCESS_ERROR);
318     }
319 
320     if(T_FileStream_error(out)) {
321         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
322         exit(U_FILE_ACCESS_ERROR);
323     }
324 
325     T_FileStream_close(out);
326     T_FileStream_close(in);
327 }
328 
329 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)330 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
331     uint32_t column = MAX_COLUMN;
332     char buffer[4096], entry[64];
333     FileStream *in, *out;
334     size_t i, length;
335 
336     in=T_FileStream_open(filename, "rb");
337     if(in==NULL) {
338         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
339         exit(U_FILE_ACCESS_ERROR);
340     }
341 
342     if(optName != NULL) { /* prepend  'icudt28_' */
343       strcpy(entry, optName);
344       strcat(entry, "_");
345     } else {
346       entry[0] = 0;
347     }
348 
349     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
350     if (outFilePath != NULL) {
351         uprv_strcpy(outFilePath, buffer);
352     }
353     out=T_FileStream_open(buffer, "w");
354     if(out==NULL) {
355         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
356         exit(U_FILE_ACCESS_ERROR);
357     }
358 
359     /* turn dashes or dots in the entry name into underscores */
360     length=uprv_strlen(entry);
361     for(i=0; i<length; ++i) {
362         if(entry[i]=='-' || entry[i]=='.') {
363             entry[i]='_';
364         }
365     }
366 
367 #if U_PLATFORM == U_PF_OS400
368     /*
369     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
370 
371     This is here because this platform can't currently put
372     const data into the read-only pages of an object or
373     shared library (service program). Only strings are allowed in read-only
374     pages, so we use char * strings to store the data.
375 
376     In order to prevent the beginning of the data from ever matching the
377     magic numbers we must still use the initial double.
378     [grhoten 4/24/2003]
379     */
380     sprintf(buffer,
381         "#ifndef IN_GENERATED_CCODE\n"
382         "#define IN_GENERATED_CCODE\n"
383         "#define U_DISABLE_RENAMING 1\n"
384         "#include \"unicode/umachine.h\"\n"
385         "#endif\n"
386         "U_CDECL_BEGIN\n"
387         "const struct {\n"
388         "    double bogus;\n"
389         "    const char *bytes; \n"
390         "} %s={ 0.0, \n",
391         entry);
392     T_FileStream_writeLine(out, buffer);
393 
394     for(;;) {
395         length=T_FileStream_read(in, buffer, sizeof(buffer));
396         if(length==0) {
397             break;
398         }
399         for(i=0; i<length; ++i) {
400             column = write8str(out, (uint8_t)buffer[i], column);
401         }
402     }
403 
404     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
405 #else
406     /* Function renaming shouldn't be done in data */
407     sprintf(buffer,
408         "#ifndef IN_GENERATED_CCODE\n"
409         "#define IN_GENERATED_CCODE\n"
410         "#define U_DISABLE_RENAMING 1\n"
411         "#include \"unicode/umachine.h\"\n"
412         "#endif\n"
413         "U_CDECL_BEGIN\n"
414         "const struct {\n"
415         "    double bogus;\n"
416         "    uint8_t bytes[%ld]; \n"
417         "} %s={ 0.0, {\n",
418         (long)T_FileStream_size(in), entry);
419     T_FileStream_writeLine(out, buffer);
420 
421     for(;;) {
422         length=T_FileStream_read(in, buffer, sizeof(buffer));
423         if(length==0) {
424             break;
425         }
426         for(i=0; i<length; ++i) {
427             column = write8(out, (uint8_t)buffer[i], column);
428         }
429     }
430 
431     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
432 #endif
433 
434     if(T_FileStream_error(in)) {
435         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
436         exit(U_FILE_ACCESS_ERROR);
437     }
438 
439     if(T_FileStream_error(out)) {
440         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
441         exit(U_FILE_ACCESS_ERROR);
442     }
443 
444     T_FileStream_close(out);
445     T_FileStream_close(in);
446 }
447 
448 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)449 write32(FileStream *out, uint32_t bitField, uint32_t column) {
450     int32_t i;
451     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
452     char *s = bitFieldStr;
453     uint8_t *ptrIdx = (uint8_t *)&bitField;
454     static const char hexToStr[16] = {
455         '0','1','2','3',
456         '4','5','6','7',
457         '8','9','A','B',
458         'C','D','E','F'
459     };
460 
461     /* write the value, possibly with comma and newline */
462     if(column==MAX_COLUMN) {
463         /* first byte */
464         column=1;
465     } else if(column<32) {
466         *(s++)=',';
467         ++column;
468     } else {
469         *(s++)='\n';
470         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
471         s+=uprv_strlen(s);
472         column=1;
473     }
474 
475     if (bitField < 10) {
476         /* It's a small number. Don't waste the space for 0x */
477         *(s++)=hexToStr[bitField];
478     }
479     else {
480         int seenNonZero = 0; /* This is used to remove leading zeros */
481 
482         if(hexType==HEX_0X) {
483          *(s++)='0';
484          *(s++)='x';
485         } else if(hexType==HEX_0H) {
486          *(s++)='0';
487         }
488 
489         /* This creates a 32-bit field */
490 #if U_IS_BIG_ENDIAN
491         for (i = 0; i < sizeof(uint32_t); i++)
492 #else
493         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
494 #endif
495         {
496             uint8_t value = ptrIdx[i];
497             if (value || seenNonZero) {
498                 *(s++)=hexToStr[value>>4];
499                 *(s++)=hexToStr[value&0xF];
500                 seenNonZero = 1;
501             }
502         }
503         if(hexType==HEX_0H) {
504          *(s++)='h';
505         }
506     }
507 
508     *(s++)=0;
509     T_FileStream_writeLine(out, bitFieldStr);
510     return column;
511 }
512 
513 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)514 write8(FileStream *out, uint8_t byte, uint32_t column) {
515     char s[4];
516     int i=0;
517 
518     /* convert the byte value to a string */
519     if(byte>=100) {
520         s[i++]=(char)('0'+byte/100);
521         byte%=100;
522     }
523     if(i>0 || byte>=10) {
524         s[i++]=(char)('0'+byte/10);
525         byte%=10;
526     }
527     s[i++]=(char)('0'+byte);
528     s[i]=0;
529 
530     /* write the value, possibly with comma and newline */
531     if(column==MAX_COLUMN) {
532         /* first byte */
533         column=1;
534     } else if(column<16) {
535         T_FileStream_writeLine(out, ",");
536         ++column;
537     } else {
538         T_FileStream_writeLine(out, ",\n");
539         column=1;
540     }
541     T_FileStream_writeLine(out, s);
542     return column;
543 }
544 
545 #if U_PLATFORM == U_PF_OS400
546 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)547 write8str(FileStream *out, uint8_t byte, uint32_t column) {
548     char s[8];
549 
550     if (byte > 7)
551         sprintf(s, "\\x%X", byte);
552     else
553         sprintf(s, "\\%X", byte);
554 
555     /* write the value, possibly with comma and newline */
556     if(column==MAX_COLUMN) {
557         /* first byte */
558         column=1;
559         T_FileStream_writeLine(out, "\"");
560     } else if(column<24) {
561         ++column;
562     } else {
563         T_FileStream_writeLine(out, "\"\n\"");
564         column=1;
565     }
566     T_FileStream_writeLine(out, s);
567     return column;
568 }
569 #endif
570 
571 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)572 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
573     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
574 
575     /* copy path */
576     if(destdir!=NULL && *destdir!=0) {
577         do {
578             *outFilename++=*destdir++;
579         } while(*destdir!=0);
580         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
581             *outFilename++=U_FILE_SEP_CHAR;
582         }
583         inFilename=basename;
584     } else {
585         while(inFilename<basename) {
586             *outFilename++=*inFilename++;
587         }
588     }
589 
590     if(suffix==NULL) {
591         /* the filename does not have a suffix */
592         uprv_strcpy(entryName, inFilename);
593         if(optFilename != NULL) {
594           uprv_strcpy(outFilename, optFilename);
595         } else {
596           uprv_strcpy(outFilename, inFilename);
597         }
598         uprv_strcat(outFilename, newSuffix);
599     } else {
600         char *saveOutFilename = outFilename;
601         /* copy basename */
602         while(inFilename<suffix) {
603             if(*inFilename=='-') {
604                 /* iSeries cannot have '-' in the .o objects. */
605                 *outFilename++=*entryName++='_';
606                 inFilename++;
607             }
608             else {
609                 *outFilename++=*entryName++=*inFilename++;
610             }
611         }
612 
613         /* replace '.' by '_' */
614         *outFilename++=*entryName++='_';
615         ++inFilename;
616 
617         /* copy suffix */
618         while(*inFilename!=0) {
619             *outFilename++=*entryName++=*inFilename++;
620         }
621 
622         *entryName=0;
623 
624         if(optFilename != NULL) {
625             uprv_strcpy(saveOutFilename, optFilename);
626             uprv_strcat(saveOutFilename, newSuffix);
627         } else {
628             /* add ".c" */
629             uprv_strcpy(outFilename, newSuffix);
630         }
631     }
632 }
633 
634 #ifdef CAN_GENERATE_OBJECTS
635 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)636 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
637     union {
638         char        bytes[2048];
639 #ifdef U_ELF
640         Elf32_Ehdr  header32;
641         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
642 #elif U_PLATFORM_HAS_WIN32_API
643         IMAGE_FILE_HEADER header;
644 #endif
645     } buffer;
646 
647     const char *filename;
648     FileStream *in;
649     int32_t length;
650 
651 #ifdef U_ELF
652 
653 #elif U_PLATFORM_HAS_WIN32_API
654     const IMAGE_FILE_HEADER *pHeader;
655 #else
656 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
657 #endif
658 
659     if(optMatchArch != NULL) {
660         filename=optMatchArch;
661     } else {
662         /* set defaults */
663 #ifdef U_ELF
664         /* set EM_386 because elf.h does not provide better defaults */
665         *pCPU=EM_386;
666         *pBits=32;
667         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
668 #elif U_PLATFORM_HAS_WIN32_API
669 /* _M_IA64 should be defined in windows.h */
670 #   if defined(_M_IA64)
671         *pCPU=IMAGE_FILE_MACHINE_IA64;
672 #   elif defined(_M_AMD64)
673         *pCPU=IMAGE_FILE_MACHINE_AMD64;
674 #   else
675         *pCPU=IMAGE_FILE_MACHINE_I386;
676 #   endif
677         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
678         *pIsBigEndian=FALSE;
679 #else
680 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
681 #endif
682         return;
683     }
684 
685     in=T_FileStream_open(filename, "rb");
686     if(in==NULL) {
687         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
688         exit(U_FILE_ACCESS_ERROR);
689     }
690     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
691 
692 #ifdef U_ELF
693     if(length<sizeof(Elf32_Ehdr)) {
694         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
695         exit(U_UNSUPPORTED_ERROR);
696     }
697     if(
698         buffer.header32.e_ident[0]!=ELFMAG0 ||
699         buffer.header32.e_ident[1]!=ELFMAG1 ||
700         buffer.header32.e_ident[2]!=ELFMAG2 ||
701         buffer.header32.e_ident[3]!=ELFMAG3 ||
702         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
703     ) {
704         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
705         exit(U_UNSUPPORTED_ERROR);
706     }
707 
708     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
709 #ifdef U_ELF64
710     if(*pBits!=32 && *pBits!=64) {
711         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
712         exit(U_UNSUPPORTED_ERROR);
713     }
714 #else
715     if(*pBits!=32) {
716         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
717         exit(U_UNSUPPORTED_ERROR);
718     }
719 #endif
720 
721     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
722     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
723         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
724         exit(U_UNSUPPORTED_ERROR);
725     }
726     /* TODO: Support byte swapping */
727 
728     *pCPU=buffer.header32.e_machine;
729 #elif U_PLATFORM_HAS_WIN32_API
730     if(length<sizeof(IMAGE_FILE_HEADER)) {
731         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
732         exit(U_UNSUPPORTED_ERROR);
733     }
734     /* TODO: Use buffer.header.  Keep aliasing legal.  */
735     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
736     *pCPU=pHeader->Machine;
737     /*
738      * The number of bits is implicit with the Machine value.
739      * *pBits is ignored in the calling code, so this need not be precise.
740      */
741     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
742     /* Windows always runs on little-endian CPUs. */
743     *pIsBigEndian=FALSE;
744 #else
745 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
746 #endif
747 
748     T_FileStream_close(in);
749 }
750 
751 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)752 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
753     /* common variables */
754     char buffer[4096], entry[40]={ 0 };
755     FileStream *in, *out;
756     const char *newSuffix;
757     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
758 
759     uint16_t cpu, bits;
760     UBool makeBigEndian;
761 
762     /* platform-specific variables and initialization code */
763 #ifdef U_ELF
764     /* 32-bit Elf file header */
765     static Elf32_Ehdr header32={
766         {
767             /* e_ident[] */
768             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
769             ELFCLASS32,
770             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
771             EV_CURRENT /* EI_VERSION */
772         },
773         ET_REL,
774         EM_386,
775         EV_CURRENT, /* e_version */
776         0, /* e_entry */
777         0, /* e_phoff */
778         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
779         0, /* e_flags */
780         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
781         0, /* e_phentsize */
782         0, /* e_phnum */
783         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
784         5, /* e_shnum */
785         2 /* e_shstrndx */
786     };
787 
788     /* 32-bit Elf section header table */
789     static Elf32_Shdr sectionHeaders32[5]={
790         { /* SHN_UNDEF */
791             0
792         },
793         { /* .symtab */
794             1, /* sh_name */
795             SHT_SYMTAB,
796             0, /* sh_flags */
797             0, /* sh_addr */
798             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
799             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
800             3, /* sh_link=sect hdr index of .strtab */
801             1, /* sh_info=One greater than the symbol table index of the last
802                 * local symbol (with STB_LOCAL). */
803             4, /* sh_addralign */
804             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
805         },
806         { /* .shstrtab */
807             9, /* sh_name */
808             SHT_STRTAB,
809             0, /* sh_flags */
810             0, /* sh_addr */
811             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
812             40, /* sh_size */
813             0, /* sh_link */
814             0, /* sh_info */
815             1, /* sh_addralign */
816             0 /* sh_entsize */
817         },
818         { /* .strtab */
819             19, /* sh_name */
820             SHT_STRTAB,
821             0, /* sh_flags */
822             0, /* sh_addr */
823             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
824             (Elf32_Word)sizeof(entry), /* sh_size */
825             0, /* sh_link */
826             0, /* sh_info */
827             1, /* sh_addralign */
828             0 /* sh_entsize */
829         },
830         { /* .rodata */
831             27, /* sh_name */
832             SHT_PROGBITS,
833             SHF_ALLOC, /* sh_flags */
834             0, /* sh_addr */
835             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
836             0, /* sh_size */
837             0, /* sh_link */
838             0, /* sh_info */
839             16, /* sh_addralign */
840             0 /* sh_entsize */
841         }
842     };
843 
844     /* symbol table */
845     static Elf32_Sym symbols32[2]={
846         { /* STN_UNDEF */
847             0
848         },
849         { /* data entry point */
850             1, /* st_name */
851             0, /* st_value */
852             0, /* st_size */
853             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
854             0, /* st_other */
855             4 /* st_shndx=index of related section table entry */
856         }
857     };
858 
859     /* section header string table, with decimal string offsets */
860     static const char sectionStrings[40]=
861         /*  0 */ "\0"
862         /*  1 */ ".symtab\0"
863         /*  9 */ ".shstrtab\0"
864         /* 19 */ ".strtab\0"
865         /* 27 */ ".rodata\0"
866         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
867         /* 40: padded to multiple of 8 bytes */
868 
869     /*
870      * Use entry[] for the string table which will contain only the
871      * entry point name.
872      * entry[0] must be 0 (NUL)
873      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
874      */
875 
876     /* 16-align .rodata in the .o file, just in case */
877     static const char padding[16]={ 0 };
878     int32_t paddingSize;
879 
880 #ifdef U_ELF64
881     /* 64-bit Elf file header */
882     static Elf64_Ehdr header64={
883         {
884             /* e_ident[] */
885             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
886             ELFCLASS64,
887             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
888             EV_CURRENT /* EI_VERSION */
889         },
890         ET_REL,
891         EM_X86_64,
892         EV_CURRENT, /* e_version */
893         0, /* e_entry */
894         0, /* e_phoff */
895         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
896         0, /* e_flags */
897         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
898         0, /* e_phentsize */
899         0, /* e_phnum */
900         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
901         5, /* e_shnum */
902         2 /* e_shstrndx */
903     };
904 
905     /* 64-bit Elf section header table */
906     static Elf64_Shdr sectionHeaders64[5]={
907         { /* SHN_UNDEF */
908             0
909         },
910         { /* .symtab */
911             1, /* sh_name */
912             SHT_SYMTAB,
913             0, /* sh_flags */
914             0, /* sh_addr */
915             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
916             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
917             3, /* sh_link=sect hdr index of .strtab */
918             1, /* sh_info=One greater than the symbol table index of the last
919                 * local symbol (with STB_LOCAL). */
920             4, /* sh_addralign */
921             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
922         },
923         { /* .shstrtab */
924             9, /* sh_name */
925             SHT_STRTAB,
926             0, /* sh_flags */
927             0, /* sh_addr */
928             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
929             40, /* sh_size */
930             0, /* sh_link */
931             0, /* sh_info */
932             1, /* sh_addralign */
933             0 /* sh_entsize */
934         },
935         { /* .strtab */
936             19, /* sh_name */
937             SHT_STRTAB,
938             0, /* sh_flags */
939             0, /* sh_addr */
940             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
941             (Elf64_Xword)sizeof(entry), /* sh_size */
942             0, /* sh_link */
943             0, /* sh_info */
944             1, /* sh_addralign */
945             0 /* sh_entsize */
946         },
947         { /* .rodata */
948             27, /* sh_name */
949             SHT_PROGBITS,
950             SHF_ALLOC, /* sh_flags */
951             0, /* sh_addr */
952             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
953             0, /* sh_size */
954             0, /* sh_link */
955             0, /* sh_info */
956             16, /* sh_addralign */
957             0 /* sh_entsize */
958         }
959     };
960 
961     /*
962      * 64-bit symbol table
963      * careful: different order of items compared with Elf32_sym!
964      */
965     static Elf64_Sym symbols64[2]={
966         { /* STN_UNDEF */
967             0
968         },
969         { /* data entry point */
970             1, /* st_name */
971             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
972             0, /* st_other */
973             4, /* st_shndx=index of related section table entry */
974             0, /* st_value */
975             0 /* st_size */
976         }
977     };
978 
979 #endif /* U_ELF64 */
980 
981     /* entry[] have a leading NUL */
982     entryOffset=1;
983 
984     /* in the common code, count entryLength from after the NUL */
985     entryLengthOffset=1;
986 
987     newSuffix=".o";
988 
989 #elif U_PLATFORM_HAS_WIN32_API
990     struct {
991         IMAGE_FILE_HEADER fileHeader;
992         IMAGE_SECTION_HEADER sections[2];
993         char linkerOptions[100];
994     } objHeader;
995     IMAGE_SYMBOL symbols[1];
996     struct {
997         DWORD sizeofLongNames;
998         char longNames[100];
999     } symbolNames;
1000 
1001     /*
1002      * entry sometimes have a leading '_'
1003      * overwritten if entryOffset==0 depending on the target platform
1004      * see check for cpu below
1005      */
1006     entry[0]='_';
1007 
1008     newSuffix=".obj";
1009 #else
1010 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1011 #endif
1012 
1013     /* deal with options, files and the entry point name */
1014     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1015     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1016 #if U_PLATFORM_HAS_WIN32_API
1017     if(cpu==IMAGE_FILE_MACHINE_I386) {
1018         entryOffset=1;
1019     }
1020 #endif
1021 
1022     in=T_FileStream_open(filename, "rb");
1023     if(in==NULL) {
1024         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1025         exit(U_FILE_ACCESS_ERROR);
1026     }
1027     size=T_FileStream_size(in);
1028 
1029     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1030     if (outFilePath != NULL) {
1031         uprv_strcpy(outFilePath, buffer);
1032     }
1033 
1034     if(optEntryPoint != NULL) {
1035         uprv_strcpy(entry+entryOffset, optEntryPoint);
1036         uprv_strcat(entry+entryOffset, "_dat");
1037     }
1038     /* turn dashes in the entry name into underscores */
1039     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1040     for(i=0; i<entryLength; ++i) {
1041         if(entry[entryLengthOffset+i]=='-') {
1042             entry[entryLengthOffset+i]='_';
1043         }
1044     }
1045 
1046     /* open the output file */
1047     out=T_FileStream_open(buffer, "wb");
1048     if(out==NULL) {
1049         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1050         exit(U_FILE_ACCESS_ERROR);
1051     }
1052 
1053 #ifdef U_ELF
1054     if(bits==32) {
1055         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1056         header32.e_machine=cpu;
1057 
1058         /* 16-align .rodata in the .o file, just in case */
1059         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1060         if(paddingSize!=0) {
1061                 paddingSize=0x10-paddingSize;
1062                 sectionHeaders32[4].sh_offset+=paddingSize;
1063         }
1064 
1065         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1066 
1067         symbols32[1].st_size=(Elf32_Word)size;
1068 
1069         /* write .o headers */
1070         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1071         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1072         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1073     } else /* bits==64 */ {
1074 #ifdef U_ELF64
1075         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1076         header64.e_machine=cpu;
1077 
1078         /* 16-align .rodata in the .o file, just in case */
1079         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1080         if(paddingSize!=0) {
1081                 paddingSize=0x10-paddingSize;
1082                 sectionHeaders64[4].sh_offset+=paddingSize;
1083         }
1084 
1085         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1086 
1087         symbols64[1].st_size=(Elf64_Xword)size;
1088 
1089         /* write .o headers */
1090         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1091         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1092         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1093 #endif
1094     }
1095 
1096     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1097     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1098     if(paddingSize!=0) {
1099         T_FileStream_write(out, padding, paddingSize);
1100     }
1101 #elif U_PLATFORM_HAS_WIN32_API
1102     /* populate the .obj headers */
1103     uprv_memset(&objHeader, 0, sizeof(objHeader));
1104     uprv_memset(&symbols, 0, sizeof(symbols));
1105     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1106 
1107     /* write the linker export directive */
1108     uprv_strcpy(objHeader.linkerOptions, "-export:");
1109     length=8;
1110     uprv_strcpy(objHeader.linkerOptions+length, entry);
1111     length+=entryLength;
1112     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1113     length+=6;
1114 
1115     /* set the file header */
1116     objHeader.fileHeader.Machine=cpu;
1117     objHeader.fileHeader.NumberOfSections=2;
1118     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1119     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1120     objHeader.fileHeader.NumberOfSymbols=1;
1121 
1122     /* set the section for the linker options */
1123     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1124     objHeader.sections[0].SizeOfRawData=length;
1125     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1126     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1127 
1128     /* set the data section */
1129     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1130     objHeader.sections[1].SizeOfRawData=size;
1131     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1132     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1133 
1134     /* set the symbol table */
1135     if(entryLength<=8) {
1136         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1137         symbolNames.sizeofLongNames=4;
1138     } else {
1139         symbols[0].N.Name.Short=0;
1140         symbols[0].N.Name.Long=4;
1141         symbolNames.sizeofLongNames=4+entryLength+1;
1142         uprv_strcpy(symbolNames.longNames, entry);
1143     }
1144     symbols[0].SectionNumber=2;
1145     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1146 
1147     /* write the file header and the linker options section */
1148     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1149 #else
1150 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1151 #endif
1152 
1153     /* copy the data file into section 2 */
1154     for(;;) {
1155         length=T_FileStream_read(in, buffer, sizeof(buffer));
1156         if(length==0) {
1157             break;
1158         }
1159         T_FileStream_write(out, buffer, (int32_t)length);
1160     }
1161 
1162 #if U_PLATFORM_HAS_WIN32_API
1163     /* write the symbol table */
1164     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1165     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1166 #endif
1167 
1168     if(T_FileStream_error(in)) {
1169         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1170         exit(U_FILE_ACCESS_ERROR);
1171     }
1172 
1173     if(T_FileStream_error(out)) {
1174         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1175         exit(U_FILE_ACCESS_ERROR);
1176     }
1177 
1178     T_FileStream_close(out);
1179     T_FileStream_close(in);
1180 }
1181 #endif
1182