1 /******************************************************************************
2 * Copyright (C) 2009-2012, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #if U_PLATFORM_HAS_WIN32_API
9 # define VC_EXTRALEAN
10 # define WIN32_LEAN_AND_MEAN
11 # define NOUSER
12 # define NOSERVICE
13 # define NOIME
14 # define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 # ifdef __GNUC__
18 # define WINDOWS_WITH_GNUC
19 # endif
20 #endif
21
22 #if U_PLATFORM_IS_LINUX_BASED
23 # define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 # include <elf.h>
28 # if defined(ELFCLASS64)
29 # define U_ELF64
30 # endif
31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 # ifndef EM_X86_64
33 # define EM_X86_64 62
34 # endif
35 # define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /* 0x1234 */
52 #define HEX_0H 1 /* 01234h */
53
54 /*
55 * The following is needed by MinGW64
56 */
57 #ifndef __USER_LABEL_PREFIX__
58 #define __USER_LABEL_PREFIX__ _
59 #endif
60 #define GCC_LABEL_PREFIX_INTERNAL(a) #a
61 #define GCC_LABEL_PREFIX(a) GCC_LABEL_PREFIX_INTERNAL(a)
62
63 /* prototypes --------------------------------------------------------------- */
64 static void
65 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
66
67 static uint32_t
68 write8(FileStream *out, uint8_t byte, uint32_t column);
69
70 static uint32_t
71 write32(FileStream *out, uint32_t byte, uint32_t column);
72
73 #if U_PLATFORM == U_PF_OS400
74 static uint32_t
75 write8str(FileStream *out, uint8_t byte, uint32_t column);
76 #endif
77 /* -------------------------------------------------------------------------- */
78
79 /*
80 Creating Template Files for New Platforms
81
82 Let the cc compiler help you get started.
83 Compile this program
84 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
85 with the -S option to produce assembly output.
86
87 For example, this will generate array.s:
88 gcc -S array.c
89
90 This will produce a .s file that may look like this:
91
92 .file "array.c"
93 .version "01.01"
94 gcc2_compiled.:
95 .globl x
96 .section .rodata
97 .align 4
98 .type x,@object
99 .size x,20
100 x:
101 .long 1
102 .long 2
103 .long -559038737
104 .long -1
105 .long 16
106 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
107
108 which gives a starting point that will compile, and can be transformed
109 to become the template, generally with some consulting of as docs and
110 some experimentation.
111
112 If you want ICU to automatically use this assembly, you should
113 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
114 where the name is the compiler or platform that you used in this
115 assemblyHeader data structure.
116 */
117 static const struct AssemblyType {
118 const char *name;
119 const char *header;
120 const char *beginLine;
121 const char *footer;
122 int8_t hexType; /* HEX_0X or HEX_0h */
123 } assemblyHeader[] = {
124 {"gcc",
125 ".globl %s\n"
126 "\t.section .note.GNU-stack,\"\",%%progbits\n"
127 "\t.section .rodata\n"
128 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
129 "\t.type %s,%%object\n"
130 "%s:\n\n",
131
132 ".long ","",HEX_0X
133 },
134 {"gcc-darwin",
135 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
136 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
137 ".globl _%s\n"
138 "\t.data\n"
139 "\t.const\n"
140 "\t.align 4\n" /* 1<<4 = 16 */
141 "_%s:\n\n",
142
143 ".long ","",HEX_0X
144 },
145 {"gcc-cygwin",
146 ".globl "GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s\n"
147 "\t.section .rodata\n"
148 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
149 GCC_LABEL_PREFIX(__USER_LABEL_PREFIX__) "%s:\n\n",
150
151 ".long ","",HEX_0X
152 },
153 {"sun",
154 "\t.section \".rodata\"\n"
155 "\t.align 8\n"
156 ".globl %s\n"
157 "%s:\n",
158
159 ".word ","",HEX_0X
160 },
161 {"sun-x86",
162 "Drodata.rodata:\n"
163 "\t.type Drodata.rodata,@object\n"
164 "\t.size Drodata.rodata,0\n"
165 "\t.globl %s\n"
166 "\t.align 8\n"
167 "%s:\n",
168
169 ".4byte ","",HEX_0X
170 },
171 {"xlc",
172 ".globl %s{RO}\n"
173 "\t.toc\n"
174 "%s:\n"
175 "\t.csect %s{RO}, 4\n",
176
177 ".long ","",HEX_0X
178 },
179 {"aCC-ia64",
180 "\t.file \"%s.s\"\n"
181 "\t.type %s,@object\n"
182 "\t.global %s\n"
183 "\t.secalias .abe$0.rodata, \".rodata\"\n"
184 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
185 "\t.align 16\n"
186 "%s::\t",
187
188 "data4 ","",HEX_0X
189 },
190 {"aCC-parisc",
191 "\t.SPACE $TEXT$\n"
192 "\t.SUBSPA $LIT$\n"
193 "%s\n"
194 "\t.EXPORT %s\n"
195 "\t.ALIGN 16\n",
196
197 ".WORD ","",HEX_0X
198 },
199 { "masm",
200 "\tTITLE %s\n"
201 "; generated by genccode\n"
202 ".386\n"
203 ".model flat\n"
204 "\tPUBLIC _%s\n"
205 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
206 "\tALIGN 16\n"
207 "_%s\tLABEL DWORD\n",
208 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
209 }
210 };
211
212 static int32_t assemblyHeaderIndex = -1;
213 static int32_t hexType = HEX_0X;
214
215 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)216 checkAssemblyHeaderName(const char* optAssembly) {
217 int32_t idx;
218 assemblyHeaderIndex = -1;
219 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
220 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
221 assemblyHeaderIndex = idx;
222 hexType = assemblyHeader[idx].hexType; /* set the hex type */
223 return TRUE;
224 }
225 }
226
227 return FALSE;
228 }
229
230
231 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)232 printAssemblyHeadersToStdErr(void) {
233 int32_t idx;
234 fprintf(stderr, "%s", assemblyHeader[0].name);
235 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
236 fprintf(stderr, ", %s", assemblyHeader[idx].name);
237 }
238 fprintf(stderr,
239 ")\n");
240 }
241
242 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)243 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
244 uint32_t column = MAX_COLUMN;
245 char entry[64];
246 uint32_t buffer[1024];
247 char *bufferStr = (char *)buffer;
248 FileStream *in, *out;
249 size_t i, length;
250
251 in=T_FileStream_open(filename, "rb");
252 if(in==NULL) {
253 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
254 exit(U_FILE_ACCESS_ERROR);
255 }
256
257 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
258 out=T_FileStream_open(bufferStr, "w");
259 if(out==NULL) {
260 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
261 exit(U_FILE_ACCESS_ERROR);
262 }
263
264 if (outFilePath != NULL) {
265 uprv_strcpy(outFilePath, bufferStr);
266 }
267
268 #ifdef WINDOWS_WITH_GNUC
269 /* Need to fix the file seperator character when using MinGW. */
270 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
271 #endif
272
273 if(optEntryPoint != NULL) {
274 uprv_strcpy(entry, optEntryPoint);
275 uprv_strcat(entry, "_dat");
276 }
277
278 /* turn dashes or dots in the entry name into underscores */
279 length=uprv_strlen(entry);
280 for(i=0; i<length; ++i) {
281 if(entry[i]=='-' || entry[i]=='.') {
282 entry[i]='_';
283 }
284 }
285
286 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
287 entry, entry, entry, entry,
288 entry, entry, entry, entry);
289 T_FileStream_writeLine(out, bufferStr);
290 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
291
292 for(;;) {
293 length=T_FileStream_read(in, buffer, sizeof(buffer));
294 if(length==0) {
295 break;
296 }
297 if (length != sizeof(buffer)) {
298 /* pad with extra 0's when at the end of the file */
299 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
300 buffer[length+i] = 0;
301 }
302 }
303 for(i=0; i<(length/sizeof(buffer[0])); i++) {
304 column = write32(out, buffer[i], column);
305 }
306 }
307
308 T_FileStream_writeLine(out, "\n");
309
310 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
311 entry, entry, entry, entry,
312 entry, entry, entry, entry);
313 T_FileStream_writeLine(out, bufferStr);
314
315 if(T_FileStream_error(in)) {
316 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
317 exit(U_FILE_ACCESS_ERROR);
318 }
319
320 if(T_FileStream_error(out)) {
321 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
322 exit(U_FILE_ACCESS_ERROR);
323 }
324
325 T_FileStream_close(out);
326 T_FileStream_close(in);
327 }
328
329 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)330 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
331 uint32_t column = MAX_COLUMN;
332 char buffer[4096], entry[64];
333 FileStream *in, *out;
334 size_t i, length;
335
336 in=T_FileStream_open(filename, "rb");
337 if(in==NULL) {
338 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
339 exit(U_FILE_ACCESS_ERROR);
340 }
341
342 if(optName != NULL) { /* prepend 'icudt28_' */
343 strcpy(entry, optName);
344 strcat(entry, "_");
345 } else {
346 entry[0] = 0;
347 }
348
349 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
350 if (outFilePath != NULL) {
351 uprv_strcpy(outFilePath, buffer);
352 }
353 out=T_FileStream_open(buffer, "w");
354 if(out==NULL) {
355 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
356 exit(U_FILE_ACCESS_ERROR);
357 }
358
359 /* turn dashes or dots in the entry name into underscores */
360 length=uprv_strlen(entry);
361 for(i=0; i<length; ++i) {
362 if(entry[i]=='-' || entry[i]=='.') {
363 entry[i]='_';
364 }
365 }
366
367 #if U_PLATFORM == U_PF_OS400
368 /*
369 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
370
371 This is here because this platform can't currently put
372 const data into the read-only pages of an object or
373 shared library (service program). Only strings are allowed in read-only
374 pages, so we use char * strings to store the data.
375
376 In order to prevent the beginning of the data from ever matching the
377 magic numbers we must still use the initial double.
378 [grhoten 4/24/2003]
379 */
380 sprintf(buffer,
381 "#ifndef IN_GENERATED_CCODE\n"
382 "#define IN_GENERATED_CCODE\n"
383 "#define U_DISABLE_RENAMING 1\n"
384 "#include \"unicode/umachine.h\"\n"
385 "#endif\n"
386 "U_CDECL_BEGIN\n"
387 "const struct {\n"
388 " double bogus;\n"
389 " const char *bytes; \n"
390 "} %s={ 0.0, \n",
391 entry);
392 T_FileStream_writeLine(out, buffer);
393
394 for(;;) {
395 length=T_FileStream_read(in, buffer, sizeof(buffer));
396 if(length==0) {
397 break;
398 }
399 for(i=0; i<length; ++i) {
400 column = write8str(out, (uint8_t)buffer[i], column);
401 }
402 }
403
404 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
405 #else
406 /* Function renaming shouldn't be done in data */
407 sprintf(buffer,
408 "#ifndef IN_GENERATED_CCODE\n"
409 "#define IN_GENERATED_CCODE\n"
410 "#define U_DISABLE_RENAMING 1\n"
411 "#include \"unicode/umachine.h\"\n"
412 "#endif\n"
413 "U_CDECL_BEGIN\n"
414 "const struct {\n"
415 " double bogus;\n"
416 " uint8_t bytes[%ld]; \n"
417 "} %s={ 0.0, {\n",
418 (long)T_FileStream_size(in), entry);
419 T_FileStream_writeLine(out, buffer);
420
421 for(;;) {
422 length=T_FileStream_read(in, buffer, sizeof(buffer));
423 if(length==0) {
424 break;
425 }
426 for(i=0; i<length; ++i) {
427 column = write8(out, (uint8_t)buffer[i], column);
428 }
429 }
430
431 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
432 #endif
433
434 if(T_FileStream_error(in)) {
435 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
436 exit(U_FILE_ACCESS_ERROR);
437 }
438
439 if(T_FileStream_error(out)) {
440 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
441 exit(U_FILE_ACCESS_ERROR);
442 }
443
444 T_FileStream_close(out);
445 T_FileStream_close(in);
446 }
447
448 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)449 write32(FileStream *out, uint32_t bitField, uint32_t column) {
450 int32_t i;
451 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
452 char *s = bitFieldStr;
453 uint8_t *ptrIdx = (uint8_t *)&bitField;
454 static const char hexToStr[16] = {
455 '0','1','2','3',
456 '4','5','6','7',
457 '8','9','A','B',
458 'C','D','E','F'
459 };
460
461 /* write the value, possibly with comma and newline */
462 if(column==MAX_COLUMN) {
463 /* first byte */
464 column=1;
465 } else if(column<32) {
466 *(s++)=',';
467 ++column;
468 } else {
469 *(s++)='\n';
470 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
471 s+=uprv_strlen(s);
472 column=1;
473 }
474
475 if (bitField < 10) {
476 /* It's a small number. Don't waste the space for 0x */
477 *(s++)=hexToStr[bitField];
478 }
479 else {
480 int seenNonZero = 0; /* This is used to remove leading zeros */
481
482 if(hexType==HEX_0X) {
483 *(s++)='0';
484 *(s++)='x';
485 } else if(hexType==HEX_0H) {
486 *(s++)='0';
487 }
488
489 /* This creates a 32-bit field */
490 #if U_IS_BIG_ENDIAN
491 for (i = 0; i < sizeof(uint32_t); i++)
492 #else
493 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
494 #endif
495 {
496 uint8_t value = ptrIdx[i];
497 if (value || seenNonZero) {
498 *(s++)=hexToStr[value>>4];
499 *(s++)=hexToStr[value&0xF];
500 seenNonZero = 1;
501 }
502 }
503 if(hexType==HEX_0H) {
504 *(s++)='h';
505 }
506 }
507
508 *(s++)=0;
509 T_FileStream_writeLine(out, bitFieldStr);
510 return column;
511 }
512
513 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)514 write8(FileStream *out, uint8_t byte, uint32_t column) {
515 char s[4];
516 int i=0;
517
518 /* convert the byte value to a string */
519 if(byte>=100) {
520 s[i++]=(char)('0'+byte/100);
521 byte%=100;
522 }
523 if(i>0 || byte>=10) {
524 s[i++]=(char)('0'+byte/10);
525 byte%=10;
526 }
527 s[i++]=(char)('0'+byte);
528 s[i]=0;
529
530 /* write the value, possibly with comma and newline */
531 if(column==MAX_COLUMN) {
532 /* first byte */
533 column=1;
534 } else if(column<16) {
535 T_FileStream_writeLine(out, ",");
536 ++column;
537 } else {
538 T_FileStream_writeLine(out, ",\n");
539 column=1;
540 }
541 T_FileStream_writeLine(out, s);
542 return column;
543 }
544
545 #if U_PLATFORM == U_PF_OS400
546 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)547 write8str(FileStream *out, uint8_t byte, uint32_t column) {
548 char s[8];
549
550 if (byte > 7)
551 sprintf(s, "\\x%X", byte);
552 else
553 sprintf(s, "\\%X", byte);
554
555 /* write the value, possibly with comma and newline */
556 if(column==MAX_COLUMN) {
557 /* first byte */
558 column=1;
559 T_FileStream_writeLine(out, "\"");
560 } else if(column<24) {
561 ++column;
562 } else {
563 T_FileStream_writeLine(out, "\"\n\"");
564 column=1;
565 }
566 T_FileStream_writeLine(out, s);
567 return column;
568 }
569 #endif
570
571 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)572 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
573 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
574
575 /* copy path */
576 if(destdir!=NULL && *destdir!=0) {
577 do {
578 *outFilename++=*destdir++;
579 } while(*destdir!=0);
580 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
581 *outFilename++=U_FILE_SEP_CHAR;
582 }
583 inFilename=basename;
584 } else {
585 while(inFilename<basename) {
586 *outFilename++=*inFilename++;
587 }
588 }
589
590 if(suffix==NULL) {
591 /* the filename does not have a suffix */
592 uprv_strcpy(entryName, inFilename);
593 if(optFilename != NULL) {
594 uprv_strcpy(outFilename, optFilename);
595 } else {
596 uprv_strcpy(outFilename, inFilename);
597 }
598 uprv_strcat(outFilename, newSuffix);
599 } else {
600 char *saveOutFilename = outFilename;
601 /* copy basename */
602 while(inFilename<suffix) {
603 if(*inFilename=='-') {
604 /* iSeries cannot have '-' in the .o objects. */
605 *outFilename++=*entryName++='_';
606 inFilename++;
607 }
608 else {
609 *outFilename++=*entryName++=*inFilename++;
610 }
611 }
612
613 /* replace '.' by '_' */
614 *outFilename++=*entryName++='_';
615 ++inFilename;
616
617 /* copy suffix */
618 while(*inFilename!=0) {
619 *outFilename++=*entryName++=*inFilename++;
620 }
621
622 *entryName=0;
623
624 if(optFilename != NULL) {
625 uprv_strcpy(saveOutFilename, optFilename);
626 uprv_strcat(saveOutFilename, newSuffix);
627 } else {
628 /* add ".c" */
629 uprv_strcpy(outFilename, newSuffix);
630 }
631 }
632 }
633
634 #ifdef CAN_GENERATE_OBJECTS
635 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)636 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
637 union {
638 char bytes[2048];
639 #ifdef U_ELF
640 Elf32_Ehdr header32;
641 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
642 #elif U_PLATFORM_HAS_WIN32_API
643 IMAGE_FILE_HEADER header;
644 #endif
645 } buffer;
646
647 const char *filename;
648 FileStream *in;
649 int32_t length;
650
651 #ifdef U_ELF
652
653 #elif U_PLATFORM_HAS_WIN32_API
654 const IMAGE_FILE_HEADER *pHeader;
655 #else
656 # error "Unknown platform for CAN_GENERATE_OBJECTS."
657 #endif
658
659 if(optMatchArch != NULL) {
660 filename=optMatchArch;
661 } else {
662 /* set defaults */
663 #ifdef U_ELF
664 /* set EM_386 because elf.h does not provide better defaults */
665 *pCPU=EM_386;
666 *pBits=32;
667 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
668 #elif U_PLATFORM_HAS_WIN32_API
669 /* _M_IA64 should be defined in windows.h */
670 # if defined(_M_IA64)
671 *pCPU=IMAGE_FILE_MACHINE_IA64;
672 # elif defined(_M_AMD64)
673 *pCPU=IMAGE_FILE_MACHINE_AMD64;
674 # else
675 *pCPU=IMAGE_FILE_MACHINE_I386;
676 # endif
677 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
678 *pIsBigEndian=FALSE;
679 #else
680 # error "Unknown platform for CAN_GENERATE_OBJECTS."
681 #endif
682 return;
683 }
684
685 in=T_FileStream_open(filename, "rb");
686 if(in==NULL) {
687 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
688 exit(U_FILE_ACCESS_ERROR);
689 }
690 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
691
692 #ifdef U_ELF
693 if(length<sizeof(Elf32_Ehdr)) {
694 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
695 exit(U_UNSUPPORTED_ERROR);
696 }
697 if(
698 buffer.header32.e_ident[0]!=ELFMAG0 ||
699 buffer.header32.e_ident[1]!=ELFMAG1 ||
700 buffer.header32.e_ident[2]!=ELFMAG2 ||
701 buffer.header32.e_ident[3]!=ELFMAG3 ||
702 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
703 ) {
704 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
705 exit(U_UNSUPPORTED_ERROR);
706 }
707
708 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
709 #ifdef U_ELF64
710 if(*pBits!=32 && *pBits!=64) {
711 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
712 exit(U_UNSUPPORTED_ERROR);
713 }
714 #else
715 if(*pBits!=32) {
716 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
717 exit(U_UNSUPPORTED_ERROR);
718 }
719 #endif
720
721 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
722 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
723 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
724 exit(U_UNSUPPORTED_ERROR);
725 }
726 /* TODO: Support byte swapping */
727
728 *pCPU=buffer.header32.e_machine;
729 #elif U_PLATFORM_HAS_WIN32_API
730 if(length<sizeof(IMAGE_FILE_HEADER)) {
731 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
732 exit(U_UNSUPPORTED_ERROR);
733 }
734 /* TODO: Use buffer.header. Keep aliasing legal. */
735 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
736 *pCPU=pHeader->Machine;
737 /*
738 * The number of bits is implicit with the Machine value.
739 * *pBits is ignored in the calling code, so this need not be precise.
740 */
741 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
742 /* Windows always runs on little-endian CPUs. */
743 *pIsBigEndian=FALSE;
744 #else
745 # error "Unknown platform for CAN_GENERATE_OBJECTS."
746 #endif
747
748 T_FileStream_close(in);
749 }
750
751 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)752 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
753 /* common variables */
754 char buffer[4096], entry[40]={ 0 };
755 FileStream *in, *out;
756 const char *newSuffix;
757 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
758
759 uint16_t cpu, bits;
760 UBool makeBigEndian;
761
762 /* platform-specific variables and initialization code */
763 #ifdef U_ELF
764 /* 32-bit Elf file header */
765 static Elf32_Ehdr header32={
766 {
767 /* e_ident[] */
768 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
769 ELFCLASS32,
770 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
771 EV_CURRENT /* EI_VERSION */
772 },
773 ET_REL,
774 EM_386,
775 EV_CURRENT, /* e_version */
776 0, /* e_entry */
777 0, /* e_phoff */
778 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
779 0, /* e_flags */
780 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
781 0, /* e_phentsize */
782 0, /* e_phnum */
783 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
784 5, /* e_shnum */
785 2 /* e_shstrndx */
786 };
787
788 /* 32-bit Elf section header table */
789 static Elf32_Shdr sectionHeaders32[5]={
790 { /* SHN_UNDEF */
791 0
792 },
793 { /* .symtab */
794 1, /* sh_name */
795 SHT_SYMTAB,
796 0, /* sh_flags */
797 0, /* sh_addr */
798 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
799 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
800 3, /* sh_link=sect hdr index of .strtab */
801 1, /* sh_info=One greater than the symbol table index of the last
802 * local symbol (with STB_LOCAL). */
803 4, /* sh_addralign */
804 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
805 },
806 { /* .shstrtab */
807 9, /* sh_name */
808 SHT_STRTAB,
809 0, /* sh_flags */
810 0, /* sh_addr */
811 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
812 40, /* sh_size */
813 0, /* sh_link */
814 0, /* sh_info */
815 1, /* sh_addralign */
816 0 /* sh_entsize */
817 },
818 { /* .strtab */
819 19, /* sh_name */
820 SHT_STRTAB,
821 0, /* sh_flags */
822 0, /* sh_addr */
823 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
824 (Elf32_Word)sizeof(entry), /* sh_size */
825 0, /* sh_link */
826 0, /* sh_info */
827 1, /* sh_addralign */
828 0 /* sh_entsize */
829 },
830 { /* .rodata */
831 27, /* sh_name */
832 SHT_PROGBITS,
833 SHF_ALLOC, /* sh_flags */
834 0, /* sh_addr */
835 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
836 0, /* sh_size */
837 0, /* sh_link */
838 0, /* sh_info */
839 16, /* sh_addralign */
840 0 /* sh_entsize */
841 }
842 };
843
844 /* symbol table */
845 static Elf32_Sym symbols32[2]={
846 { /* STN_UNDEF */
847 0
848 },
849 { /* data entry point */
850 1, /* st_name */
851 0, /* st_value */
852 0, /* st_size */
853 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
854 0, /* st_other */
855 4 /* st_shndx=index of related section table entry */
856 }
857 };
858
859 /* section header string table, with decimal string offsets */
860 static const char sectionStrings[40]=
861 /* 0 */ "\0"
862 /* 1 */ ".symtab\0"
863 /* 9 */ ".shstrtab\0"
864 /* 19 */ ".strtab\0"
865 /* 27 */ ".rodata\0"
866 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
867 /* 40: padded to multiple of 8 bytes */
868
869 /*
870 * Use entry[] for the string table which will contain only the
871 * entry point name.
872 * entry[0] must be 0 (NUL)
873 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
874 */
875
876 /* 16-align .rodata in the .o file, just in case */
877 static const char padding[16]={ 0 };
878 int32_t paddingSize;
879
880 #ifdef U_ELF64
881 /* 64-bit Elf file header */
882 static Elf64_Ehdr header64={
883 {
884 /* e_ident[] */
885 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
886 ELFCLASS64,
887 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
888 EV_CURRENT /* EI_VERSION */
889 },
890 ET_REL,
891 EM_X86_64,
892 EV_CURRENT, /* e_version */
893 0, /* e_entry */
894 0, /* e_phoff */
895 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
896 0, /* e_flags */
897 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
898 0, /* e_phentsize */
899 0, /* e_phnum */
900 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
901 5, /* e_shnum */
902 2 /* e_shstrndx */
903 };
904
905 /* 64-bit Elf section header table */
906 static Elf64_Shdr sectionHeaders64[5]={
907 { /* SHN_UNDEF */
908 0
909 },
910 { /* .symtab */
911 1, /* sh_name */
912 SHT_SYMTAB,
913 0, /* sh_flags */
914 0, /* sh_addr */
915 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
916 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
917 3, /* sh_link=sect hdr index of .strtab */
918 1, /* sh_info=One greater than the symbol table index of the last
919 * local symbol (with STB_LOCAL). */
920 4, /* sh_addralign */
921 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
922 },
923 { /* .shstrtab */
924 9, /* sh_name */
925 SHT_STRTAB,
926 0, /* sh_flags */
927 0, /* sh_addr */
928 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
929 40, /* sh_size */
930 0, /* sh_link */
931 0, /* sh_info */
932 1, /* sh_addralign */
933 0 /* sh_entsize */
934 },
935 { /* .strtab */
936 19, /* sh_name */
937 SHT_STRTAB,
938 0, /* sh_flags */
939 0, /* sh_addr */
940 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
941 (Elf64_Xword)sizeof(entry), /* sh_size */
942 0, /* sh_link */
943 0, /* sh_info */
944 1, /* sh_addralign */
945 0 /* sh_entsize */
946 },
947 { /* .rodata */
948 27, /* sh_name */
949 SHT_PROGBITS,
950 SHF_ALLOC, /* sh_flags */
951 0, /* sh_addr */
952 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
953 0, /* sh_size */
954 0, /* sh_link */
955 0, /* sh_info */
956 16, /* sh_addralign */
957 0 /* sh_entsize */
958 }
959 };
960
961 /*
962 * 64-bit symbol table
963 * careful: different order of items compared with Elf32_sym!
964 */
965 static Elf64_Sym symbols64[2]={
966 { /* STN_UNDEF */
967 0
968 },
969 { /* data entry point */
970 1, /* st_name */
971 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
972 0, /* st_other */
973 4, /* st_shndx=index of related section table entry */
974 0, /* st_value */
975 0 /* st_size */
976 }
977 };
978
979 #endif /* U_ELF64 */
980
981 /* entry[] have a leading NUL */
982 entryOffset=1;
983
984 /* in the common code, count entryLength from after the NUL */
985 entryLengthOffset=1;
986
987 newSuffix=".o";
988
989 #elif U_PLATFORM_HAS_WIN32_API
990 struct {
991 IMAGE_FILE_HEADER fileHeader;
992 IMAGE_SECTION_HEADER sections[2];
993 char linkerOptions[100];
994 } objHeader;
995 IMAGE_SYMBOL symbols[1];
996 struct {
997 DWORD sizeofLongNames;
998 char longNames[100];
999 } symbolNames;
1000
1001 /*
1002 * entry sometimes have a leading '_'
1003 * overwritten if entryOffset==0 depending on the target platform
1004 * see check for cpu below
1005 */
1006 entry[0]='_';
1007
1008 newSuffix=".obj";
1009 #else
1010 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1011 #endif
1012
1013 /* deal with options, files and the entry point name */
1014 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1015 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1016 #if U_PLATFORM_HAS_WIN32_API
1017 if(cpu==IMAGE_FILE_MACHINE_I386) {
1018 entryOffset=1;
1019 }
1020 #endif
1021
1022 in=T_FileStream_open(filename, "rb");
1023 if(in==NULL) {
1024 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1025 exit(U_FILE_ACCESS_ERROR);
1026 }
1027 size=T_FileStream_size(in);
1028
1029 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1030 if (outFilePath != NULL) {
1031 uprv_strcpy(outFilePath, buffer);
1032 }
1033
1034 if(optEntryPoint != NULL) {
1035 uprv_strcpy(entry+entryOffset, optEntryPoint);
1036 uprv_strcat(entry+entryOffset, "_dat");
1037 }
1038 /* turn dashes in the entry name into underscores */
1039 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1040 for(i=0; i<entryLength; ++i) {
1041 if(entry[entryLengthOffset+i]=='-') {
1042 entry[entryLengthOffset+i]='_';
1043 }
1044 }
1045
1046 /* open the output file */
1047 out=T_FileStream_open(buffer, "wb");
1048 if(out==NULL) {
1049 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1050 exit(U_FILE_ACCESS_ERROR);
1051 }
1052
1053 #ifdef U_ELF
1054 if(bits==32) {
1055 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1056 header32.e_machine=cpu;
1057
1058 /* 16-align .rodata in the .o file, just in case */
1059 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1060 if(paddingSize!=0) {
1061 paddingSize=0x10-paddingSize;
1062 sectionHeaders32[4].sh_offset+=paddingSize;
1063 }
1064
1065 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1066
1067 symbols32[1].st_size=(Elf32_Word)size;
1068
1069 /* write .o headers */
1070 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1071 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1072 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1073 } else /* bits==64 */ {
1074 #ifdef U_ELF64
1075 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1076 header64.e_machine=cpu;
1077
1078 /* 16-align .rodata in the .o file, just in case */
1079 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1080 if(paddingSize!=0) {
1081 paddingSize=0x10-paddingSize;
1082 sectionHeaders64[4].sh_offset+=paddingSize;
1083 }
1084
1085 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1086
1087 symbols64[1].st_size=(Elf64_Xword)size;
1088
1089 /* write .o headers */
1090 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1091 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1092 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1093 #endif
1094 }
1095
1096 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1097 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1098 if(paddingSize!=0) {
1099 T_FileStream_write(out, padding, paddingSize);
1100 }
1101 #elif U_PLATFORM_HAS_WIN32_API
1102 /* populate the .obj headers */
1103 uprv_memset(&objHeader, 0, sizeof(objHeader));
1104 uprv_memset(&symbols, 0, sizeof(symbols));
1105 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1106
1107 /* write the linker export directive */
1108 uprv_strcpy(objHeader.linkerOptions, "-export:");
1109 length=8;
1110 uprv_strcpy(objHeader.linkerOptions+length, entry);
1111 length+=entryLength;
1112 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1113 length+=6;
1114
1115 /* set the file header */
1116 objHeader.fileHeader.Machine=cpu;
1117 objHeader.fileHeader.NumberOfSections=2;
1118 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1119 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1120 objHeader.fileHeader.NumberOfSymbols=1;
1121
1122 /* set the section for the linker options */
1123 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1124 objHeader.sections[0].SizeOfRawData=length;
1125 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1126 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1127
1128 /* set the data section */
1129 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1130 objHeader.sections[1].SizeOfRawData=size;
1131 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1132 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1133
1134 /* set the symbol table */
1135 if(entryLength<=8) {
1136 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1137 symbolNames.sizeofLongNames=4;
1138 } else {
1139 symbols[0].N.Name.Short=0;
1140 symbols[0].N.Name.Long=4;
1141 symbolNames.sizeofLongNames=4+entryLength+1;
1142 uprv_strcpy(symbolNames.longNames, entry);
1143 }
1144 symbols[0].SectionNumber=2;
1145 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1146
1147 /* write the file header and the linker options section */
1148 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1149 #else
1150 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1151 #endif
1152
1153 /* copy the data file into section 2 */
1154 for(;;) {
1155 length=T_FileStream_read(in, buffer, sizeof(buffer));
1156 if(length==0) {
1157 break;
1158 }
1159 T_FileStream_write(out, buffer, (int32_t)length);
1160 }
1161
1162 #if U_PLATFORM_HAS_WIN32_API
1163 /* write the symbol table */
1164 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1165 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1166 #endif
1167
1168 if(T_FileStream_error(in)) {
1169 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1170 exit(U_FILE_ACCESS_ERROR);
1171 }
1172
1173 if(T_FileStream_error(out)) {
1174 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1175 exit(U_FILE_ACCESS_ERROR);
1176 }
1177
1178 T_FileStream_close(out);
1179 T_FileStream_close(in);
1180 }
1181 #endif
1182