• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /******************************************************************************
2  *   Copyright (C) 2009-2010, International Business Machines
3  *   Corporation and others.  All Rights Reserved.
4  *******************************************************************************
5  */
6 #include "unicode/utypes.h"
7 
8 #ifdef U_WINDOWS
9 #   define VC_EXTRALEAN
10 #   define WIN32_LEAN_AND_MEAN
11 #   define NOUSER
12 #   define NOSERVICE
13 #   define NOIME
14 #   define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 #   ifdef __GNUC__
18 #       define WINDOWS_WITH_GNUC
19 #   endif
20 #endif
21 
22 #ifdef U_LINUX
23 #   define U_ELF
24 #endif
25 
26 #ifdef U_ELF
27 #   include <elf.h>
28 #   if defined(ELFCLASS64)
29 #       define U_ELF64
30 #   endif
31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 #   ifndef EM_X86_64
33 #       define EM_X86_64 62
34 #   endif
35 #   define ICU_ENTRY_OFFSET 0
36 #endif
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48 
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50 
51 #define HEX_0X 0 /*  0x1234 */
52 #define HEX_0H 1 /*  01234h */
53 
54 #if defined(U_WINDOWS) || defined(U_ELF)
55 #define CAN_GENERATE_OBJECTS
56 #endif
57 
58 /* prototypes --------------------------------------------------------------- */
59 static void
60 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
61 
62 static uint32_t
63 write8(FileStream *out, uint8_t byte, uint32_t column);
64 
65 static uint32_t
66 write32(FileStream *out, uint32_t byte, uint32_t column);
67 
68 #ifdef OS400
69 static uint32_t
70 write8str(FileStream *out, uint8_t byte, uint32_t column);
71 #endif
72 /* -------------------------------------------------------------------------- */
73 
74 /*
75 Creating Template Files for New Platforms
76 
77 Let the cc compiler help you get started.
78 Compile this program
79     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
80 with the -S option to produce assembly output.
81 
82 For example, this will generate array.s:
83 gcc -S array.c
84 
85 This will produce a .s file that may look like this:
86 
87     .file   "array.c"
88     .version        "01.01"
89 gcc2_compiled.:
90     .globl x
91     .section        .rodata
92     .align 4
93     .type    x,@object
94     .size    x,20
95 x:
96     .long   1
97     .long   2
98     .long   -559038737
99     .long   -1
100     .long   16
101     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
102 
103 which gives a starting point that will compile, and can be transformed
104 to become the template, generally with some consulting of as docs and
105 some experimentation.
106 
107 If you want ICU to automatically use this assembly, you should
108 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
109 where the name is the compiler or platform that you used in this
110 assemblyHeader data structure.
111 */
112 static const struct AssemblyType {
113     const char *name;
114     const char *header;
115     const char *beginLine;
116     const char *footer;
117     int8_t      hexType; /* HEX_0X or HEX_0h */
118 } assemblyHeader[] = {
119     {"gcc",
120         ".globl %s\n"
121         "\t.section .note.GNU-stack,\"\",%%progbits\n"
122         "\t.section .rodata\n"
123         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
124         "\t.type %s,%%object\n"
125         "%s:\n\n",
126 
127         ".long ","",HEX_0X
128     },
129     {"gcc-darwin",
130         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
131         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
132         ".globl _%s\n"
133         "\t.data\n"
134         "\t.const\n"
135         "\t.align 4\n"  /* 1<<4 = 16 */
136         "_%s:\n\n",
137 
138         ".long ","",HEX_0X
139     },
140     {"gcc-cygwin",
141         ".globl _%s\n"
142         "\t.section .rodata\n"
143         "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
144         "_%s:\n\n",
145 
146         ".long ","",HEX_0X
147     },
148     {"sun",
149         "\t.section \".rodata\"\n"
150         "\t.align   8\n"
151         ".globl     %s\n"
152         "%s:\n",
153 
154         ".word ","",HEX_0X
155     },
156     {"sun-x86",
157         "Drodata.rodata:\n"
158         "\t.type   Drodata.rodata,@object\n"
159         "\t.size   Drodata.rodata,0\n"
160         "\t.globl  %s\n"
161         "\t.align  8\n"
162         "%s:\n",
163 
164         ".4byte ","",HEX_0X
165     },
166     {"xlc",
167         ".globl %s{RO}\n"
168         "\t.toc\n"
169         "%s:\n"
170         "\t.csect %s{RO}, 4\n",
171 
172         ".long ","",HEX_0X
173     },
174     {"aCC-ia64",
175         "\t.file   \"%s.s\"\n"
176         "\t.type   %s,@object\n"
177         "\t.global %s\n"
178         "\t.secalias .abe$0.rodata, \".rodata\"\n"
179         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
180         "\t.align  16\n"
181         "%s::\t",
182 
183         "data4 ","",HEX_0X
184     },
185     {"aCC-parisc",
186         "\t.SPACE  $TEXT$\n"
187         "\t.SUBSPA $LIT$\n"
188         "%s\n"
189         "\t.EXPORT %s\n"
190         "\t.ALIGN  16\n",
191 
192         ".WORD ","",HEX_0X
193     },
194     { "masm",
195       "\tTITLE %s\n"
196       "; generated by genccode\n"
197       ".386\n"
198       ".model flat\n"
199       "\tPUBLIC _%s\n"
200       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
201       "\tALIGN 16\n"
202       "_%s\tLABEL DWORD\n",
203       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
204     }
205 };
206 
207 static int32_t assemblyHeaderIndex = -1;
208 static int32_t hexType = HEX_0X;
209 
210 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)211 checkAssemblyHeaderName(const char* optAssembly) {
212     int32_t idx;
213     assemblyHeaderIndex = -1;
214     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
215         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
216             assemblyHeaderIndex = idx;
217             hexType = assemblyHeader[idx].hexType; /* set the hex type */
218             return TRUE;
219         }
220     }
221 
222     return FALSE;
223 }
224 
225 
226 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)227 printAssemblyHeadersToStdErr(void) {
228     int32_t idx;
229     fprintf(stderr, "%s", assemblyHeader[0].name);
230     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
231         fprintf(stderr, ", %s", assemblyHeader[idx].name);
232     }
233     fprintf(stderr,
234         ")\n");
235 }
236 
237 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)238 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
239     uint32_t column = MAX_COLUMN;
240     char entry[64];
241     uint32_t buffer[1024];
242     char *bufferStr = (char *)buffer;
243     FileStream *in, *out;
244     size_t i, length;
245 
246     in=T_FileStream_open(filename, "rb");
247     if(in==NULL) {
248         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
249         exit(U_FILE_ACCESS_ERROR);
250     }
251 
252     getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
253     out=T_FileStream_open(bufferStr, "w");
254     if(out==NULL) {
255         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
256         exit(U_FILE_ACCESS_ERROR);
257     }
258 
259     if (outFilePath != NULL) {
260         uprv_strcpy(outFilePath, bufferStr);
261     }
262 
263 #ifdef WINDOWS_WITH_GNUC
264     /* Need to fix the file seperator character when using MinGW. */
265     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
266 #endif
267 
268     if(optEntryPoint != NULL) {
269         uprv_strcpy(entry, optEntryPoint);
270         uprv_strcat(entry, "_dat");
271     }
272 
273     /* turn dashes or dots in the entry name into underscores */
274     length=uprv_strlen(entry);
275     for(i=0; i<length; ++i) {
276         if(entry[i]=='-' || entry[i]=='.') {
277             entry[i]='_';
278         }
279     }
280 
281     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
282         entry, entry, entry, entry,
283         entry, entry, entry, entry);
284     T_FileStream_writeLine(out, bufferStr);
285     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
286 
287     for(;;) {
288         length=T_FileStream_read(in, buffer, sizeof(buffer));
289         if(length==0) {
290             break;
291         }
292         if (length != sizeof(buffer)) {
293             /* pad with extra 0's when at the end of the file */
294             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
295                 buffer[length+i] = 0;
296             }
297         }
298         for(i=0; i<(length/sizeof(buffer[0])); i++) {
299             column = write32(out, buffer[i], column);
300         }
301     }
302 
303     T_FileStream_writeLine(out, "\n");
304 
305     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
306         entry, entry, entry, entry,
307         entry, entry, entry, entry);
308     T_FileStream_writeLine(out, bufferStr);
309 
310     if(T_FileStream_error(in)) {
311         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
312         exit(U_FILE_ACCESS_ERROR);
313     }
314 
315     if(T_FileStream_error(out)) {
316         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
317         exit(U_FILE_ACCESS_ERROR);
318     }
319 
320     T_FileStream_close(out);
321     T_FileStream_close(in);
322 }
323 
324 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)325 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
326     uint32_t column = MAX_COLUMN;
327     char buffer[4096], entry[64];
328     FileStream *in, *out;
329     size_t i, length;
330 
331     in=T_FileStream_open(filename, "rb");
332     if(in==NULL) {
333         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
334         exit(U_FILE_ACCESS_ERROR);
335     }
336 
337     if(optName != NULL) { /* prepend  'icudt28_' */
338       strcpy(entry, optName);
339       strcat(entry, "_");
340     } else {
341       entry[0] = 0;
342     }
343 
344     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
345     if (outFilePath != NULL) {
346         uprv_strcpy(outFilePath, buffer);
347     }
348     out=T_FileStream_open(buffer, "w");
349     if(out==NULL) {
350         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
351         exit(U_FILE_ACCESS_ERROR);
352     }
353 
354     /* turn dashes or dots in the entry name into underscores */
355     length=uprv_strlen(entry);
356     for(i=0; i<length; ++i) {
357         if(entry[i]=='-' || entry[i]=='.') {
358             entry[i]='_';
359         }
360     }
361 
362 #ifdef OS400
363     /*
364     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
365 
366     This is here because this platform can't currently put
367     const data into the read-only pages of an object or
368     shared library (service program). Only strings are allowed in read-only
369     pages, so we use char * strings to store the data.
370 
371     In order to prevent the beginning of the data from ever matching the
372     magic numbers we must still use the initial double.
373     [grhoten 4/24/2003]
374     */
375     sprintf(buffer,
376         "#define U_DISABLE_RENAMING 1\n"
377         "#include \"unicode/umachine.h\"\n"
378         "U_CDECL_BEGIN\n"
379         "const struct {\n"
380         "    double bogus;\n"
381         "    const char *bytes; \n"
382         "} %s={ 0.0, \n",
383         entry);
384     T_FileStream_writeLine(out, buffer);
385 
386     for(;;) {
387         length=T_FileStream_read(in, buffer, sizeof(buffer));
388         if(length==0) {
389             break;
390         }
391         for(i=0; i<length; ++i) {
392             column = write8str(out, (uint8_t)buffer[i], column);
393         }
394     }
395 
396     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
397 #else
398     /* Function renaming shouldn't be done in data */
399     sprintf(buffer,
400         "#define U_DISABLE_RENAMING 1\n"
401         "#include \"unicode/umachine.h\"\n"
402         "U_CDECL_BEGIN\n"
403         "const struct {\n"
404         "    double bogus;\n"
405         "    uint8_t bytes[%ld]; \n"
406         "} %s={ 0.0, {\n",
407         (long)T_FileStream_size(in), entry);
408     T_FileStream_writeLine(out, buffer);
409 
410     for(;;) {
411         length=T_FileStream_read(in, buffer, sizeof(buffer));
412         if(length==0) {
413             break;
414         }
415         for(i=0; i<length; ++i) {
416             column = write8(out, (uint8_t)buffer[i], column);
417         }
418     }
419 
420     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
421 #endif
422 
423     if(T_FileStream_error(in)) {
424         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
425         exit(U_FILE_ACCESS_ERROR);
426     }
427 
428     if(T_FileStream_error(out)) {
429         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
430         exit(U_FILE_ACCESS_ERROR);
431     }
432 
433     T_FileStream_close(out);
434     T_FileStream_close(in);
435 }
436 
437 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)438 write32(FileStream *out, uint32_t bitField, uint32_t column) {
439     int32_t i;
440     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
441     char *s = bitFieldStr;
442     uint8_t *ptrIdx = (uint8_t *)&bitField;
443     static const char hexToStr[16] = {
444         '0','1','2','3',
445         '4','5','6','7',
446         '8','9','A','B',
447         'C','D','E','F'
448     };
449 
450     /* write the value, possibly with comma and newline */
451     if(column==MAX_COLUMN) {
452         /* first byte */
453         column=1;
454     } else if(column<32) {
455         *(s++)=',';
456         ++column;
457     } else {
458         *(s++)='\n';
459         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
460         s+=uprv_strlen(s);
461         column=1;
462     }
463 
464     if (bitField < 10) {
465         /* It's a small number. Don't waste the space for 0x */
466         *(s++)=hexToStr[bitField];
467     }
468     else {
469         int seenNonZero = 0; /* This is used to remove leading zeros */
470 
471         if(hexType==HEX_0X) {
472          *(s++)='0';
473          *(s++)='x';
474         } else if(hexType==HEX_0H) {
475          *(s++)='0';
476         }
477 
478         /* This creates a 32-bit field */
479 #if U_IS_BIG_ENDIAN
480         for (i = 0; i < sizeof(uint32_t); i++)
481 #else
482         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
483 #endif
484         {
485             uint8_t value = ptrIdx[i];
486             if (value || seenNonZero) {
487                 *(s++)=hexToStr[value>>4];
488                 *(s++)=hexToStr[value&0xF];
489                 seenNonZero = 1;
490             }
491         }
492         if(hexType==HEX_0H) {
493          *(s++)='h';
494         }
495     }
496 
497     *(s++)=0;
498     T_FileStream_writeLine(out, bitFieldStr);
499     return column;
500 }
501 
502 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)503 write8(FileStream *out, uint8_t byte, uint32_t column) {
504     char s[4];
505     int i=0;
506 
507     /* convert the byte value to a string */
508     if(byte>=100) {
509         s[i++]=(char)('0'+byte/100);
510         byte%=100;
511     }
512     if(i>0 || byte>=10) {
513         s[i++]=(char)('0'+byte/10);
514         byte%=10;
515     }
516     s[i++]=(char)('0'+byte);
517     s[i]=0;
518 
519     /* write the value, possibly with comma and newline */
520     if(column==MAX_COLUMN) {
521         /* first byte */
522         column=1;
523     } else if(column<16) {
524         T_FileStream_writeLine(out, ",");
525         ++column;
526     } else {
527         T_FileStream_writeLine(out, ",\n");
528         column=1;
529     }
530     T_FileStream_writeLine(out, s);
531     return column;
532 }
533 
534 #ifdef OS400
535 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)536 write8str(FileStream *out, uint8_t byte, uint32_t column) {
537     char s[8];
538 
539     if (byte > 7)
540         sprintf(s, "\\x%X", byte);
541     else
542         sprintf(s, "\\%X", byte);
543 
544     /* write the value, possibly with comma and newline */
545     if(column==MAX_COLUMN) {
546         /* first byte */
547         column=1;
548         T_FileStream_writeLine(out, "\"");
549     } else if(column<24) {
550         ++column;
551     } else {
552         T_FileStream_writeLine(out, "\"\n\"");
553         column=1;
554     }
555     T_FileStream_writeLine(out, s);
556     return column;
557 }
558 #endif
559 
560 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)561 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
562     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
563 
564     /* copy path */
565     if(destdir!=NULL && *destdir!=0) {
566         do {
567             *outFilename++=*destdir++;
568         } while(*destdir!=0);
569         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
570             *outFilename++=U_FILE_SEP_CHAR;
571         }
572         inFilename=basename;
573     } else {
574         while(inFilename<basename) {
575             *outFilename++=*inFilename++;
576         }
577     }
578 
579     if(suffix==NULL) {
580         /* the filename does not have a suffix */
581         uprv_strcpy(entryName, inFilename);
582         if(optFilename != NULL) {
583           uprv_strcpy(outFilename, optFilename);
584         } else {
585           uprv_strcpy(outFilename, inFilename);
586         }
587         uprv_strcat(outFilename, newSuffix);
588     } else {
589         char *saveOutFilename = outFilename;
590         /* copy basename */
591         while(inFilename<suffix) {
592             if(*inFilename=='-') {
593                 /* iSeries cannot have '-' in the .o objects. */
594                 *outFilename++=*entryName++='_';
595                 inFilename++;
596             }
597             else {
598                 *outFilename++=*entryName++=*inFilename++;
599             }
600         }
601 
602         /* replace '.' by '_' */
603         *outFilename++=*entryName++='_';
604         ++inFilename;
605 
606         /* copy suffix */
607         while(*inFilename!=0) {
608             *outFilename++=*entryName++=*inFilename++;
609         }
610 
611         *entryName=0;
612 
613         if(optFilename != NULL) {
614             uprv_strcpy(saveOutFilename, optFilename);
615             uprv_strcat(saveOutFilename, newSuffix);
616         } else {
617             /* add ".c" */
618             uprv_strcpy(outFilename, newSuffix);
619         }
620     }
621 }
622 
623 #ifdef CAN_GENERATE_OBJECTS
624 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)625 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
626     union {
627         char        bytes[2048];
628 #ifdef U_ELF
629         Elf32_Ehdr  header32;
630         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
631 #elif defined(U_WINDOWS)
632         IMAGE_FILE_HEADER header;
633 #endif
634     } buffer;
635 
636     const char *filename;
637     FileStream *in;
638     int32_t length;
639 
640 #ifdef U_ELF
641 
642 #elif defined(U_WINDOWS)
643     const IMAGE_FILE_HEADER *pHeader;
644 #else
645 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
646 #endif
647 
648     if(optMatchArch != NULL) {
649         filename=optMatchArch;
650     } else {
651         /* set defaults */
652 #ifdef U_ELF
653         /* set EM_386 because elf.h does not provide better defaults */
654         *pCPU=EM_386;
655         *pBits=32;
656         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
657 #elif defined(U_WINDOWS)
658 /* _M_IA64 should be defined in windows.h */
659 #   if defined(_M_IA64)
660         *pCPU=IMAGE_FILE_MACHINE_IA64;
661 #   elif defined(_M_AMD64)
662         *pCPU=IMAGE_FILE_MACHINE_AMD64;
663 #   else
664         *pCPU=IMAGE_FILE_MACHINE_I386;
665 #   endif
666         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
667         *pIsBigEndian=FALSE;
668 #else
669 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
670 #endif
671         return;
672     }
673 
674     in=T_FileStream_open(filename, "rb");
675     if(in==NULL) {
676         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
677         exit(U_FILE_ACCESS_ERROR);
678     }
679     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
680 
681 #ifdef U_ELF
682     if(length<sizeof(Elf32_Ehdr)) {
683         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
684         exit(U_UNSUPPORTED_ERROR);
685     }
686     if(
687         buffer.header32.e_ident[0]!=ELFMAG0 ||
688         buffer.header32.e_ident[1]!=ELFMAG1 ||
689         buffer.header32.e_ident[2]!=ELFMAG2 ||
690         buffer.header32.e_ident[3]!=ELFMAG3 ||
691         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
692     ) {
693         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
694         exit(U_UNSUPPORTED_ERROR);
695     }
696 
697     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
698 #ifdef U_ELF64
699     if(*pBits!=32 && *pBits!=64) {
700         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
701         exit(U_UNSUPPORTED_ERROR);
702     }
703 #else
704     if(*pBits!=32) {
705         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
706         exit(U_UNSUPPORTED_ERROR);
707     }
708 #endif
709 
710     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
711     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
712         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
713         exit(U_UNSUPPORTED_ERROR);
714     }
715     /* TODO: Support byte swapping */
716 
717     *pCPU=buffer.header32.e_machine;
718 #elif defined(U_WINDOWS)
719     if(length<sizeof(IMAGE_FILE_HEADER)) {
720         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
721         exit(U_UNSUPPORTED_ERROR);
722     }
723     /* TODO: Use buffer.header.  Keep aliasing legal.  */
724     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
725     *pCPU=pHeader->Machine;
726     /*
727      * The number of bits is implicit with the Machine value.
728      * *pBits is ignored in the calling code, so this need not be precise.
729      */
730     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
731     /* Windows always runs on little-endian CPUs. */
732     *pIsBigEndian=FALSE;
733 #else
734 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
735 #endif
736 
737     T_FileStream_close(in);
738 }
739 
740 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)741 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
742     /* common variables */
743     char buffer[4096], entry[40]={ 0 };
744     FileStream *in, *out;
745     const char *newSuffix;
746     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
747 
748     uint16_t cpu, bits;
749     UBool makeBigEndian;
750 
751     /* platform-specific variables and initialization code */
752 #ifdef U_ELF
753     /* 32-bit Elf file header */
754     static Elf32_Ehdr header32={
755         {
756             /* e_ident[] */
757             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
758             ELFCLASS32,
759             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
760             EV_CURRENT /* EI_VERSION */
761         },
762         ET_REL,
763         EM_386,
764         EV_CURRENT, /* e_version */
765         0, /* e_entry */
766         0, /* e_phoff */
767         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
768         0, /* e_flags */
769         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
770         0, /* e_phentsize */
771         0, /* e_phnum */
772         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
773         5, /* e_shnum */
774         2 /* e_shstrndx */
775     };
776 
777     /* 32-bit Elf section header table */
778     static Elf32_Shdr sectionHeaders32[5]={
779         { /* SHN_UNDEF */
780             0
781         },
782         { /* .symtab */
783             1, /* sh_name */
784             SHT_SYMTAB,
785             0, /* sh_flags */
786             0, /* sh_addr */
787             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
788             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
789             3, /* sh_link=sect hdr index of .strtab */
790             1, /* sh_info=One greater than the symbol table index of the last
791                 * local symbol (with STB_LOCAL). */
792             4, /* sh_addralign */
793             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
794         },
795         { /* .shstrtab */
796             9, /* sh_name */
797             SHT_STRTAB,
798             0, /* sh_flags */
799             0, /* sh_addr */
800             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
801             40, /* sh_size */
802             0, /* sh_link */
803             0, /* sh_info */
804             1, /* sh_addralign */
805             0 /* sh_entsize */
806         },
807         { /* .strtab */
808             19, /* sh_name */
809             SHT_STRTAB,
810             0, /* sh_flags */
811             0, /* sh_addr */
812             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
813             (Elf32_Word)sizeof(entry), /* sh_size */
814             0, /* sh_link */
815             0, /* sh_info */
816             1, /* sh_addralign */
817             0 /* sh_entsize */
818         },
819         { /* .rodata */
820             27, /* sh_name */
821             SHT_PROGBITS,
822             SHF_ALLOC, /* sh_flags */
823             0, /* sh_addr */
824             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
825             0, /* sh_size */
826             0, /* sh_link */
827             0, /* sh_info */
828             16, /* sh_addralign */
829             0 /* sh_entsize */
830         }
831     };
832 
833     /* symbol table */
834     static Elf32_Sym symbols32[2]={
835         { /* STN_UNDEF */
836             0
837         },
838         { /* data entry point */
839             1, /* st_name */
840             0, /* st_value */
841             0, /* st_size */
842             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
843             0, /* st_other */
844             4 /* st_shndx=index of related section table entry */
845         }
846     };
847 
848     /* section header string table, with decimal string offsets */
849     static const char sectionStrings[40]=
850         /*  0 */ "\0"
851         /*  1 */ ".symtab\0"
852         /*  9 */ ".shstrtab\0"
853         /* 19 */ ".strtab\0"
854         /* 27 */ ".rodata\0"
855         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
856         /* 40: padded to multiple of 8 bytes */
857 
858     /*
859      * Use entry[] for the string table which will contain only the
860      * entry point name.
861      * entry[0] must be 0 (NUL)
862      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
863      */
864 
865     /* 16-align .rodata in the .o file, just in case */
866     static const char padding[16]={ 0 };
867     int32_t paddingSize;
868 
869 #ifdef U_ELF64
870     /* 64-bit Elf file header */
871     static Elf64_Ehdr header64={
872         {
873             /* e_ident[] */
874             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
875             ELFCLASS64,
876             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
877             EV_CURRENT /* EI_VERSION */
878         },
879         ET_REL,
880         EM_X86_64,
881         EV_CURRENT, /* e_version */
882         0, /* e_entry */
883         0, /* e_phoff */
884         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
885         0, /* e_flags */
886         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
887         0, /* e_phentsize */
888         0, /* e_phnum */
889         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
890         5, /* e_shnum */
891         2 /* e_shstrndx */
892     };
893 
894     /* 64-bit Elf section header table */
895     static Elf64_Shdr sectionHeaders64[5]={
896         { /* SHN_UNDEF */
897             0
898         },
899         { /* .symtab */
900             1, /* sh_name */
901             SHT_SYMTAB,
902             0, /* sh_flags */
903             0, /* sh_addr */
904             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
905             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
906             3, /* sh_link=sect hdr index of .strtab */
907             1, /* sh_info=One greater than the symbol table index of the last
908                 * local symbol (with STB_LOCAL). */
909             4, /* sh_addralign */
910             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
911         },
912         { /* .shstrtab */
913             9, /* sh_name */
914             SHT_STRTAB,
915             0, /* sh_flags */
916             0, /* sh_addr */
917             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
918             40, /* sh_size */
919             0, /* sh_link */
920             0, /* sh_info */
921             1, /* sh_addralign */
922             0 /* sh_entsize */
923         },
924         { /* .strtab */
925             19, /* sh_name */
926             SHT_STRTAB,
927             0, /* sh_flags */
928             0, /* sh_addr */
929             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
930             (Elf64_Xword)sizeof(entry), /* sh_size */
931             0, /* sh_link */
932             0, /* sh_info */
933             1, /* sh_addralign */
934             0 /* sh_entsize */
935         },
936         { /* .rodata */
937             27, /* sh_name */
938             SHT_PROGBITS,
939             SHF_ALLOC, /* sh_flags */
940             0, /* sh_addr */
941             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
942             0, /* sh_size */
943             0, /* sh_link */
944             0, /* sh_info */
945             16, /* sh_addralign */
946             0 /* sh_entsize */
947         }
948     };
949 
950     /*
951      * 64-bit symbol table
952      * careful: different order of items compared with Elf32_sym!
953      */
954     static Elf64_Sym symbols64[2]={
955         { /* STN_UNDEF */
956             0
957         },
958         { /* data entry point */
959             1, /* st_name */
960             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
961             0, /* st_other */
962             4, /* st_shndx=index of related section table entry */
963             0, /* st_value */
964             0 /* st_size */
965         }
966     };
967 
968 #endif /* U_ELF64 */
969 
970     /* entry[] have a leading NUL */
971     entryOffset=1;
972 
973     /* in the common code, count entryLength from after the NUL */
974     entryLengthOffset=1;
975 
976     newSuffix=".o";
977 
978 #elif defined(U_WINDOWS)
979     struct {
980         IMAGE_FILE_HEADER fileHeader;
981         IMAGE_SECTION_HEADER sections[2];
982         char linkerOptions[100];
983     } objHeader;
984     IMAGE_SYMBOL symbols[1];
985     struct {
986         DWORD sizeofLongNames;
987         char longNames[100];
988     } symbolNames;
989 
990     /*
991      * entry sometimes have a leading '_'
992      * overwritten if entryOffset==0 depending on the target platform
993      * see check for cpu below
994      */
995     entry[0]='_';
996 
997     newSuffix=".obj";
998 #else
999 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1000 #endif
1001 
1002     /* deal with options, files and the entry point name */
1003     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1004     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
1005 #ifdef U_WINDOWS
1006     if(cpu==IMAGE_FILE_MACHINE_I386) {
1007         entryOffset=1;
1008     }
1009 #endif
1010 
1011     in=T_FileStream_open(filename, "rb");
1012     if(in==NULL) {
1013         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1014         exit(U_FILE_ACCESS_ERROR);
1015     }
1016     size=T_FileStream_size(in);
1017 
1018     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1019     if (outFilePath != NULL) {
1020         uprv_strcpy(outFilePath, buffer);
1021     }
1022 
1023     if(optEntryPoint != NULL) {
1024         uprv_strcpy(entry+entryOffset, optEntryPoint);
1025         uprv_strcat(entry+entryOffset, "_dat");
1026     }
1027     /* turn dashes in the entry name into underscores */
1028     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1029     for(i=0; i<entryLength; ++i) {
1030         if(entry[entryLengthOffset+i]=='-') {
1031             entry[entryLengthOffset+i]='_';
1032         }
1033     }
1034 
1035     /* open the output file */
1036     out=T_FileStream_open(buffer, "wb");
1037     if(out==NULL) {
1038         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1039         exit(U_FILE_ACCESS_ERROR);
1040     }
1041 
1042 #ifdef U_ELF
1043     if(bits==32) {
1044         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1045         header32.e_machine=cpu;
1046 
1047         /* 16-align .rodata in the .o file, just in case */
1048         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1049         if(paddingSize!=0) {
1050                 paddingSize=0x10-paddingSize;
1051                 sectionHeaders32[4].sh_offset+=paddingSize;
1052         }
1053 
1054         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1055 
1056         symbols32[1].st_size=(Elf32_Word)size;
1057 
1058         /* write .o headers */
1059         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1060         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1061         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1062     } else /* bits==64 */ {
1063 #ifdef U_ELF64
1064         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1065         header64.e_machine=cpu;
1066 
1067         /* 16-align .rodata in the .o file, just in case */
1068         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1069         if(paddingSize!=0) {
1070                 paddingSize=0x10-paddingSize;
1071                 sectionHeaders64[4].sh_offset+=paddingSize;
1072         }
1073 
1074         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1075 
1076         symbols64[1].st_size=(Elf64_Xword)size;
1077 
1078         /* write .o headers */
1079         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1080         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1081         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1082 #endif
1083     }
1084 
1085     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1086     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1087     if(paddingSize!=0) {
1088         T_FileStream_write(out, padding, paddingSize);
1089     }
1090 #elif defined(U_WINDOWS)
1091     /* populate the .obj headers */
1092     uprv_memset(&objHeader, 0, sizeof(objHeader));
1093     uprv_memset(&symbols, 0, sizeof(symbols));
1094     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1095 
1096     /* write the linker export directive */
1097     uprv_strcpy(objHeader.linkerOptions, "-export:");
1098     length=8;
1099     uprv_strcpy(objHeader.linkerOptions+length, entry);
1100     length+=entryLength;
1101     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1102     length+=6;
1103 
1104     /* set the file header */
1105     objHeader.fileHeader.Machine=cpu;
1106     objHeader.fileHeader.NumberOfSections=2;
1107     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1108     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1109     objHeader.fileHeader.NumberOfSymbols=1;
1110 
1111     /* set the section for the linker options */
1112     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1113     objHeader.sections[0].SizeOfRawData=length;
1114     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1115     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1116 
1117     /* set the data section */
1118     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1119     objHeader.sections[1].SizeOfRawData=size;
1120     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1121     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1122 
1123     /* set the symbol table */
1124     if(entryLength<=8) {
1125         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1126         symbolNames.sizeofLongNames=4;
1127     } else {
1128         symbols[0].N.Name.Short=0;
1129         symbols[0].N.Name.Long=4;
1130         symbolNames.sizeofLongNames=4+entryLength+1;
1131         uprv_strcpy(symbolNames.longNames, entry);
1132     }
1133     symbols[0].SectionNumber=2;
1134     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1135 
1136     /* write the file header and the linker options section */
1137     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1138 #else
1139 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1140 #endif
1141 
1142     /* copy the data file into section 2 */
1143     for(;;) {
1144         length=T_FileStream_read(in, buffer, sizeof(buffer));
1145         if(length==0) {
1146             break;
1147         }
1148         T_FileStream_write(out, buffer, (int32_t)length);
1149     }
1150 
1151 #ifdef U_WINDOWS
1152     /* write the symbol table */
1153     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1154     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1155 #endif
1156 
1157     if(T_FileStream_error(in)) {
1158         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1159         exit(U_FILE_ACCESS_ERROR);
1160     }
1161 
1162     if(T_FileStream_error(out)) {
1163         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1164         exit(U_FILE_ACCESS_ERROR);
1165     }
1166 
1167     T_FileStream_close(out);
1168     T_FileStream_close(in);
1169 }
1170 #endif
1171