1 /******************************************************************************
2 * Copyright (C) 2009-2010, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #ifdef U_WINDOWS
9 # define VC_EXTRALEAN
10 # define WIN32_LEAN_AND_MEAN
11 # define NOUSER
12 # define NOSERVICE
13 # define NOIME
14 # define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 # ifdef __GNUC__
18 # define WINDOWS_WITH_GNUC
19 # endif
20 #endif
21
22 #ifdef U_LINUX
23 # define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 # include <elf.h>
28 # if defined(ELFCLASS64)
29 # define U_ELF64
30 # endif
31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 # ifndef EM_X86_64
33 # define EM_X86_64 62
34 # endif
35 # define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /* 0x1234 */
52 #define HEX_0H 1 /* 01234h */
53
54 #if defined(U_WINDOWS) || defined(U_ELF)
55 #define CAN_GENERATE_OBJECTS
56 #endif
57
58 /* prototypes --------------------------------------------------------------- */
59 static void
60 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
61
62 static uint32_t
63 write8(FileStream *out, uint8_t byte, uint32_t column);
64
65 static uint32_t
66 write32(FileStream *out, uint32_t byte, uint32_t column);
67
68 #ifdef OS400
69 static uint32_t
70 write8str(FileStream *out, uint8_t byte, uint32_t column);
71 #endif
72 /* -------------------------------------------------------------------------- */
73
74 /*
75 Creating Template Files for New Platforms
76
77 Let the cc compiler help you get started.
78 Compile this program
79 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
80 with the -S option to produce assembly output.
81
82 For example, this will generate array.s:
83 gcc -S array.c
84
85 This will produce a .s file that may look like this:
86
87 .file "array.c"
88 .version "01.01"
89 gcc2_compiled.:
90 .globl x
91 .section .rodata
92 .align 4
93 .type x,@object
94 .size x,20
95 x:
96 .long 1
97 .long 2
98 .long -559038737
99 .long -1
100 .long 16
101 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
102
103 which gives a starting point that will compile, and can be transformed
104 to become the template, generally with some consulting of as docs and
105 some experimentation.
106
107 If you want ICU to automatically use this assembly, you should
108 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
109 where the name is the compiler or platform that you used in this
110 assemblyHeader data structure.
111 */
112 static const struct AssemblyType {
113 const char *name;
114 const char *header;
115 const char *beginLine;
116 const char *footer;
117 int8_t hexType; /* HEX_0X or HEX_0h */
118 } assemblyHeader[] = {
119 {"gcc",
120 ".globl %s\n"
121 "\t.section .note.GNU-stack,\"\",%%progbits\n"
122 "\t.section .rodata\n"
123 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
124 "\t.type %s,%%object\n"
125 "%s:\n\n",
126
127 ".long ","",HEX_0X
128 },
129 {"gcc-darwin",
130 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
131 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
132 ".globl _%s\n"
133 "\t.data\n"
134 "\t.const\n"
135 "\t.align 4\n" /* 1<<4 = 16 */
136 "_%s:\n\n",
137
138 ".long ","",HEX_0X
139 },
140 {"gcc-cygwin",
141 ".globl _%s\n"
142 "\t.section .rodata\n"
143 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
144 "_%s:\n\n",
145
146 ".long ","",HEX_0X
147 },
148 {"sun",
149 "\t.section \".rodata\"\n"
150 "\t.align 8\n"
151 ".globl %s\n"
152 "%s:\n",
153
154 ".word ","",HEX_0X
155 },
156 {"sun-x86",
157 "Drodata.rodata:\n"
158 "\t.type Drodata.rodata,@object\n"
159 "\t.size Drodata.rodata,0\n"
160 "\t.globl %s\n"
161 "\t.align 8\n"
162 "%s:\n",
163
164 ".4byte ","",HEX_0X
165 },
166 {"xlc",
167 ".globl %s{RO}\n"
168 "\t.toc\n"
169 "%s:\n"
170 "\t.csect %s{RO}, 4\n",
171
172 ".long ","",HEX_0X
173 },
174 {"aCC-ia64",
175 "\t.file \"%s.s\"\n"
176 "\t.type %s,@object\n"
177 "\t.global %s\n"
178 "\t.secalias .abe$0.rodata, \".rodata\"\n"
179 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
180 "\t.align 16\n"
181 "%s::\t",
182
183 "data4 ","",HEX_0X
184 },
185 {"aCC-parisc",
186 "\t.SPACE $TEXT$\n"
187 "\t.SUBSPA $LIT$\n"
188 "%s\n"
189 "\t.EXPORT %s\n"
190 "\t.ALIGN 16\n",
191
192 ".WORD ","",HEX_0X
193 },
194 { "masm",
195 "\tTITLE %s\n"
196 "; generated by genccode\n"
197 ".386\n"
198 ".model flat\n"
199 "\tPUBLIC _%s\n"
200 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
201 "\tALIGN 16\n"
202 "_%s\tLABEL DWORD\n",
203 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
204 }
205 };
206
207 static int32_t assemblyHeaderIndex = -1;
208 static int32_t hexType = HEX_0X;
209
210 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)211 checkAssemblyHeaderName(const char* optAssembly) {
212 int32_t idx;
213 assemblyHeaderIndex = -1;
214 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
215 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
216 assemblyHeaderIndex = idx;
217 hexType = assemblyHeader[idx].hexType; /* set the hex type */
218 return TRUE;
219 }
220 }
221
222 return FALSE;
223 }
224
225
226 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)227 printAssemblyHeadersToStdErr(void) {
228 int32_t idx;
229 fprintf(stderr, "%s", assemblyHeader[0].name);
230 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
231 fprintf(stderr, ", %s", assemblyHeader[idx].name);
232 }
233 fprintf(stderr,
234 ")\n");
235 }
236
237 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)238 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
239 uint32_t column = MAX_COLUMN;
240 char entry[64];
241 uint32_t buffer[1024];
242 char *bufferStr = (char *)buffer;
243 FileStream *in, *out;
244 size_t i, length;
245
246 in=T_FileStream_open(filename, "rb");
247 if(in==NULL) {
248 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
249 exit(U_FILE_ACCESS_ERROR);
250 }
251
252 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
253 out=T_FileStream_open(bufferStr, "w");
254 if(out==NULL) {
255 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
256 exit(U_FILE_ACCESS_ERROR);
257 }
258
259 if (outFilePath != NULL) {
260 uprv_strcpy(outFilePath, bufferStr);
261 }
262
263 #ifdef WINDOWS_WITH_GNUC
264 /* Need to fix the file seperator character when using MinGW. */
265 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
266 #endif
267
268 if(optEntryPoint != NULL) {
269 uprv_strcpy(entry, optEntryPoint);
270 uprv_strcat(entry, "_dat");
271 }
272
273 /* turn dashes or dots in the entry name into underscores */
274 length=uprv_strlen(entry);
275 for(i=0; i<length; ++i) {
276 if(entry[i]=='-' || entry[i]=='.') {
277 entry[i]='_';
278 }
279 }
280
281 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
282 entry, entry, entry, entry,
283 entry, entry, entry, entry);
284 T_FileStream_writeLine(out, bufferStr);
285 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
286
287 for(;;) {
288 length=T_FileStream_read(in, buffer, sizeof(buffer));
289 if(length==0) {
290 break;
291 }
292 if (length != sizeof(buffer)) {
293 /* pad with extra 0's when at the end of the file */
294 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
295 buffer[length+i] = 0;
296 }
297 }
298 for(i=0; i<(length/sizeof(buffer[0])); i++) {
299 column = write32(out, buffer[i], column);
300 }
301 }
302
303 T_FileStream_writeLine(out, "\n");
304
305 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
306 entry, entry, entry, entry,
307 entry, entry, entry, entry);
308 T_FileStream_writeLine(out, bufferStr);
309
310 if(T_FileStream_error(in)) {
311 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
312 exit(U_FILE_ACCESS_ERROR);
313 }
314
315 if(T_FileStream_error(out)) {
316 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
317 exit(U_FILE_ACCESS_ERROR);
318 }
319
320 T_FileStream_close(out);
321 T_FileStream_close(in);
322 }
323
324 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)325 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
326 uint32_t column = MAX_COLUMN;
327 char buffer[4096], entry[64];
328 FileStream *in, *out;
329 size_t i, length;
330
331 in=T_FileStream_open(filename, "rb");
332 if(in==NULL) {
333 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
334 exit(U_FILE_ACCESS_ERROR);
335 }
336
337 if(optName != NULL) { /* prepend 'icudt28_' */
338 strcpy(entry, optName);
339 strcat(entry, "_");
340 } else {
341 entry[0] = 0;
342 }
343
344 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
345 if (outFilePath != NULL) {
346 uprv_strcpy(outFilePath, buffer);
347 }
348 out=T_FileStream_open(buffer, "w");
349 if(out==NULL) {
350 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
351 exit(U_FILE_ACCESS_ERROR);
352 }
353
354 /* turn dashes or dots in the entry name into underscores */
355 length=uprv_strlen(entry);
356 for(i=0; i<length; ++i) {
357 if(entry[i]=='-' || entry[i]=='.') {
358 entry[i]='_';
359 }
360 }
361
362 #ifdef OS400
363 /*
364 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
365
366 This is here because this platform can't currently put
367 const data into the read-only pages of an object or
368 shared library (service program). Only strings are allowed in read-only
369 pages, so we use char * strings to store the data.
370
371 In order to prevent the beginning of the data from ever matching the
372 magic numbers we must still use the initial double.
373 [grhoten 4/24/2003]
374 */
375 sprintf(buffer,
376 "#define U_DISABLE_RENAMING 1\n"
377 "#include \"unicode/umachine.h\"\n"
378 "U_CDECL_BEGIN\n"
379 "const struct {\n"
380 " double bogus;\n"
381 " const char *bytes; \n"
382 "} %s={ 0.0, \n",
383 entry);
384 T_FileStream_writeLine(out, buffer);
385
386 for(;;) {
387 length=T_FileStream_read(in, buffer, sizeof(buffer));
388 if(length==0) {
389 break;
390 }
391 for(i=0; i<length; ++i) {
392 column = write8str(out, (uint8_t)buffer[i], column);
393 }
394 }
395
396 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
397 #else
398 /* Function renaming shouldn't be done in data */
399 sprintf(buffer,
400 "#define U_DISABLE_RENAMING 1\n"
401 "#include \"unicode/umachine.h\"\n"
402 "U_CDECL_BEGIN\n"
403 "const struct {\n"
404 " double bogus;\n"
405 " uint8_t bytes[%ld]; \n"
406 "} %s={ 0.0, {\n",
407 (long)T_FileStream_size(in), entry);
408 T_FileStream_writeLine(out, buffer);
409
410 for(;;) {
411 length=T_FileStream_read(in, buffer, sizeof(buffer));
412 if(length==0) {
413 break;
414 }
415 for(i=0; i<length; ++i) {
416 column = write8(out, (uint8_t)buffer[i], column);
417 }
418 }
419
420 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
421 #endif
422
423 if(T_FileStream_error(in)) {
424 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
425 exit(U_FILE_ACCESS_ERROR);
426 }
427
428 if(T_FileStream_error(out)) {
429 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
430 exit(U_FILE_ACCESS_ERROR);
431 }
432
433 T_FileStream_close(out);
434 T_FileStream_close(in);
435 }
436
437 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)438 write32(FileStream *out, uint32_t bitField, uint32_t column) {
439 int32_t i;
440 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
441 char *s = bitFieldStr;
442 uint8_t *ptrIdx = (uint8_t *)&bitField;
443 static const char hexToStr[16] = {
444 '0','1','2','3',
445 '4','5','6','7',
446 '8','9','A','B',
447 'C','D','E','F'
448 };
449
450 /* write the value, possibly with comma and newline */
451 if(column==MAX_COLUMN) {
452 /* first byte */
453 column=1;
454 } else if(column<32) {
455 *(s++)=',';
456 ++column;
457 } else {
458 *(s++)='\n';
459 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
460 s+=uprv_strlen(s);
461 column=1;
462 }
463
464 if (bitField < 10) {
465 /* It's a small number. Don't waste the space for 0x */
466 *(s++)=hexToStr[bitField];
467 }
468 else {
469 int seenNonZero = 0; /* This is used to remove leading zeros */
470
471 if(hexType==HEX_0X) {
472 *(s++)='0';
473 *(s++)='x';
474 } else if(hexType==HEX_0H) {
475 *(s++)='0';
476 }
477
478 /* This creates a 32-bit field */
479 #if U_IS_BIG_ENDIAN
480 for (i = 0; i < sizeof(uint32_t); i++)
481 #else
482 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
483 #endif
484 {
485 uint8_t value = ptrIdx[i];
486 if (value || seenNonZero) {
487 *(s++)=hexToStr[value>>4];
488 *(s++)=hexToStr[value&0xF];
489 seenNonZero = 1;
490 }
491 }
492 if(hexType==HEX_0H) {
493 *(s++)='h';
494 }
495 }
496
497 *(s++)=0;
498 T_FileStream_writeLine(out, bitFieldStr);
499 return column;
500 }
501
502 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)503 write8(FileStream *out, uint8_t byte, uint32_t column) {
504 char s[4];
505 int i=0;
506
507 /* convert the byte value to a string */
508 if(byte>=100) {
509 s[i++]=(char)('0'+byte/100);
510 byte%=100;
511 }
512 if(i>0 || byte>=10) {
513 s[i++]=(char)('0'+byte/10);
514 byte%=10;
515 }
516 s[i++]=(char)('0'+byte);
517 s[i]=0;
518
519 /* write the value, possibly with comma and newline */
520 if(column==MAX_COLUMN) {
521 /* first byte */
522 column=1;
523 } else if(column<16) {
524 T_FileStream_writeLine(out, ",");
525 ++column;
526 } else {
527 T_FileStream_writeLine(out, ",\n");
528 column=1;
529 }
530 T_FileStream_writeLine(out, s);
531 return column;
532 }
533
534 #ifdef OS400
535 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)536 write8str(FileStream *out, uint8_t byte, uint32_t column) {
537 char s[8];
538
539 if (byte > 7)
540 sprintf(s, "\\x%X", byte);
541 else
542 sprintf(s, "\\%X", byte);
543
544 /* write the value, possibly with comma and newline */
545 if(column==MAX_COLUMN) {
546 /* first byte */
547 column=1;
548 T_FileStream_writeLine(out, "\"");
549 } else if(column<24) {
550 ++column;
551 } else {
552 T_FileStream_writeLine(out, "\"\n\"");
553 column=1;
554 }
555 T_FileStream_writeLine(out, s);
556 return column;
557 }
558 #endif
559
560 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)561 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
562 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
563
564 /* copy path */
565 if(destdir!=NULL && *destdir!=0) {
566 do {
567 *outFilename++=*destdir++;
568 } while(*destdir!=0);
569 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
570 *outFilename++=U_FILE_SEP_CHAR;
571 }
572 inFilename=basename;
573 } else {
574 while(inFilename<basename) {
575 *outFilename++=*inFilename++;
576 }
577 }
578
579 if(suffix==NULL) {
580 /* the filename does not have a suffix */
581 uprv_strcpy(entryName, inFilename);
582 if(optFilename != NULL) {
583 uprv_strcpy(outFilename, optFilename);
584 } else {
585 uprv_strcpy(outFilename, inFilename);
586 }
587 uprv_strcat(outFilename, newSuffix);
588 } else {
589 char *saveOutFilename = outFilename;
590 /* copy basename */
591 while(inFilename<suffix) {
592 if(*inFilename=='-') {
593 /* iSeries cannot have '-' in the .o objects. */
594 *outFilename++=*entryName++='_';
595 inFilename++;
596 }
597 else {
598 *outFilename++=*entryName++=*inFilename++;
599 }
600 }
601
602 /* replace '.' by '_' */
603 *outFilename++=*entryName++='_';
604 ++inFilename;
605
606 /* copy suffix */
607 while(*inFilename!=0) {
608 *outFilename++=*entryName++=*inFilename++;
609 }
610
611 *entryName=0;
612
613 if(optFilename != NULL) {
614 uprv_strcpy(saveOutFilename, optFilename);
615 uprv_strcat(saveOutFilename, newSuffix);
616 } else {
617 /* add ".c" */
618 uprv_strcpy(outFilename, newSuffix);
619 }
620 }
621 }
622
623 #ifdef CAN_GENERATE_OBJECTS
624 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)625 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
626 union {
627 char bytes[2048];
628 #ifdef U_ELF
629 Elf32_Ehdr header32;
630 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
631 #elif defined(U_WINDOWS)
632 IMAGE_FILE_HEADER header;
633 #endif
634 } buffer;
635
636 const char *filename;
637 FileStream *in;
638 int32_t length;
639
640 #ifdef U_ELF
641
642 #elif defined(U_WINDOWS)
643 const IMAGE_FILE_HEADER *pHeader;
644 #else
645 # error "Unknown platform for CAN_GENERATE_OBJECTS."
646 #endif
647
648 if(optMatchArch != NULL) {
649 filename=optMatchArch;
650 } else {
651 /* set defaults */
652 #ifdef U_ELF
653 /* set EM_386 because elf.h does not provide better defaults */
654 *pCPU=EM_386;
655 *pBits=32;
656 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
657 #elif defined(U_WINDOWS)
658 /* _M_IA64 should be defined in windows.h */
659 # if defined(_M_IA64)
660 *pCPU=IMAGE_FILE_MACHINE_IA64;
661 # elif defined(_M_AMD64)
662 *pCPU=IMAGE_FILE_MACHINE_AMD64;
663 # else
664 *pCPU=IMAGE_FILE_MACHINE_I386;
665 # endif
666 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
667 *pIsBigEndian=FALSE;
668 #else
669 # error "Unknown platform for CAN_GENERATE_OBJECTS."
670 #endif
671 return;
672 }
673
674 in=T_FileStream_open(filename, "rb");
675 if(in==NULL) {
676 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
677 exit(U_FILE_ACCESS_ERROR);
678 }
679 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
680
681 #ifdef U_ELF
682 if(length<sizeof(Elf32_Ehdr)) {
683 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
684 exit(U_UNSUPPORTED_ERROR);
685 }
686 if(
687 buffer.header32.e_ident[0]!=ELFMAG0 ||
688 buffer.header32.e_ident[1]!=ELFMAG1 ||
689 buffer.header32.e_ident[2]!=ELFMAG2 ||
690 buffer.header32.e_ident[3]!=ELFMAG3 ||
691 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
692 ) {
693 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
694 exit(U_UNSUPPORTED_ERROR);
695 }
696
697 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
698 #ifdef U_ELF64
699 if(*pBits!=32 && *pBits!=64) {
700 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
701 exit(U_UNSUPPORTED_ERROR);
702 }
703 #else
704 if(*pBits!=32) {
705 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
706 exit(U_UNSUPPORTED_ERROR);
707 }
708 #endif
709
710 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
711 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
712 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
713 exit(U_UNSUPPORTED_ERROR);
714 }
715 /* TODO: Support byte swapping */
716
717 *pCPU=buffer.header32.e_machine;
718 #elif defined(U_WINDOWS)
719 if(length<sizeof(IMAGE_FILE_HEADER)) {
720 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
721 exit(U_UNSUPPORTED_ERROR);
722 }
723 /* TODO: Use buffer.header. Keep aliasing legal. */
724 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
725 *pCPU=pHeader->Machine;
726 /*
727 * The number of bits is implicit with the Machine value.
728 * *pBits is ignored in the calling code, so this need not be precise.
729 */
730 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
731 /* Windows always runs on little-endian CPUs. */
732 *pIsBigEndian=FALSE;
733 #else
734 # error "Unknown platform for CAN_GENERATE_OBJECTS."
735 #endif
736
737 T_FileStream_close(in);
738 }
739
740 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)741 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
742 /* common variables */
743 char buffer[4096], entry[40]={ 0 };
744 FileStream *in, *out;
745 const char *newSuffix;
746 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
747
748 uint16_t cpu, bits;
749 UBool makeBigEndian;
750
751 /* platform-specific variables and initialization code */
752 #ifdef U_ELF
753 /* 32-bit Elf file header */
754 static Elf32_Ehdr header32={
755 {
756 /* e_ident[] */
757 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
758 ELFCLASS32,
759 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
760 EV_CURRENT /* EI_VERSION */
761 },
762 ET_REL,
763 EM_386,
764 EV_CURRENT, /* e_version */
765 0, /* e_entry */
766 0, /* e_phoff */
767 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
768 0, /* e_flags */
769 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
770 0, /* e_phentsize */
771 0, /* e_phnum */
772 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
773 5, /* e_shnum */
774 2 /* e_shstrndx */
775 };
776
777 /* 32-bit Elf section header table */
778 static Elf32_Shdr sectionHeaders32[5]={
779 { /* SHN_UNDEF */
780 0
781 },
782 { /* .symtab */
783 1, /* sh_name */
784 SHT_SYMTAB,
785 0, /* sh_flags */
786 0, /* sh_addr */
787 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
788 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
789 3, /* sh_link=sect hdr index of .strtab */
790 1, /* sh_info=One greater than the symbol table index of the last
791 * local symbol (with STB_LOCAL). */
792 4, /* sh_addralign */
793 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
794 },
795 { /* .shstrtab */
796 9, /* sh_name */
797 SHT_STRTAB,
798 0, /* sh_flags */
799 0, /* sh_addr */
800 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
801 40, /* sh_size */
802 0, /* sh_link */
803 0, /* sh_info */
804 1, /* sh_addralign */
805 0 /* sh_entsize */
806 },
807 { /* .strtab */
808 19, /* sh_name */
809 SHT_STRTAB,
810 0, /* sh_flags */
811 0, /* sh_addr */
812 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
813 (Elf32_Word)sizeof(entry), /* sh_size */
814 0, /* sh_link */
815 0, /* sh_info */
816 1, /* sh_addralign */
817 0 /* sh_entsize */
818 },
819 { /* .rodata */
820 27, /* sh_name */
821 SHT_PROGBITS,
822 SHF_ALLOC, /* sh_flags */
823 0, /* sh_addr */
824 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
825 0, /* sh_size */
826 0, /* sh_link */
827 0, /* sh_info */
828 16, /* sh_addralign */
829 0 /* sh_entsize */
830 }
831 };
832
833 /* symbol table */
834 static Elf32_Sym symbols32[2]={
835 { /* STN_UNDEF */
836 0
837 },
838 { /* data entry point */
839 1, /* st_name */
840 0, /* st_value */
841 0, /* st_size */
842 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
843 0, /* st_other */
844 4 /* st_shndx=index of related section table entry */
845 }
846 };
847
848 /* section header string table, with decimal string offsets */
849 static const char sectionStrings[40]=
850 /* 0 */ "\0"
851 /* 1 */ ".symtab\0"
852 /* 9 */ ".shstrtab\0"
853 /* 19 */ ".strtab\0"
854 /* 27 */ ".rodata\0"
855 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
856 /* 40: padded to multiple of 8 bytes */
857
858 /*
859 * Use entry[] for the string table which will contain only the
860 * entry point name.
861 * entry[0] must be 0 (NUL)
862 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
863 */
864
865 /* 16-align .rodata in the .o file, just in case */
866 static const char padding[16]={ 0 };
867 int32_t paddingSize;
868
869 #ifdef U_ELF64
870 /* 64-bit Elf file header */
871 static Elf64_Ehdr header64={
872 {
873 /* e_ident[] */
874 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
875 ELFCLASS64,
876 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
877 EV_CURRENT /* EI_VERSION */
878 },
879 ET_REL,
880 EM_X86_64,
881 EV_CURRENT, /* e_version */
882 0, /* e_entry */
883 0, /* e_phoff */
884 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
885 0, /* e_flags */
886 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
887 0, /* e_phentsize */
888 0, /* e_phnum */
889 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
890 5, /* e_shnum */
891 2 /* e_shstrndx */
892 };
893
894 /* 64-bit Elf section header table */
895 static Elf64_Shdr sectionHeaders64[5]={
896 { /* SHN_UNDEF */
897 0
898 },
899 { /* .symtab */
900 1, /* sh_name */
901 SHT_SYMTAB,
902 0, /* sh_flags */
903 0, /* sh_addr */
904 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
905 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
906 3, /* sh_link=sect hdr index of .strtab */
907 1, /* sh_info=One greater than the symbol table index of the last
908 * local symbol (with STB_LOCAL). */
909 4, /* sh_addralign */
910 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
911 },
912 { /* .shstrtab */
913 9, /* sh_name */
914 SHT_STRTAB,
915 0, /* sh_flags */
916 0, /* sh_addr */
917 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
918 40, /* sh_size */
919 0, /* sh_link */
920 0, /* sh_info */
921 1, /* sh_addralign */
922 0 /* sh_entsize */
923 },
924 { /* .strtab */
925 19, /* sh_name */
926 SHT_STRTAB,
927 0, /* sh_flags */
928 0, /* sh_addr */
929 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
930 (Elf64_Xword)sizeof(entry), /* sh_size */
931 0, /* sh_link */
932 0, /* sh_info */
933 1, /* sh_addralign */
934 0 /* sh_entsize */
935 },
936 { /* .rodata */
937 27, /* sh_name */
938 SHT_PROGBITS,
939 SHF_ALLOC, /* sh_flags */
940 0, /* sh_addr */
941 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
942 0, /* sh_size */
943 0, /* sh_link */
944 0, /* sh_info */
945 16, /* sh_addralign */
946 0 /* sh_entsize */
947 }
948 };
949
950 /*
951 * 64-bit symbol table
952 * careful: different order of items compared with Elf32_sym!
953 */
954 static Elf64_Sym symbols64[2]={
955 { /* STN_UNDEF */
956 0
957 },
958 { /* data entry point */
959 1, /* st_name */
960 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
961 0, /* st_other */
962 4, /* st_shndx=index of related section table entry */
963 0, /* st_value */
964 0 /* st_size */
965 }
966 };
967
968 #endif /* U_ELF64 */
969
970 /* entry[] have a leading NUL */
971 entryOffset=1;
972
973 /* in the common code, count entryLength from after the NUL */
974 entryLengthOffset=1;
975
976 newSuffix=".o";
977
978 #elif defined(U_WINDOWS)
979 struct {
980 IMAGE_FILE_HEADER fileHeader;
981 IMAGE_SECTION_HEADER sections[2];
982 char linkerOptions[100];
983 } objHeader;
984 IMAGE_SYMBOL symbols[1];
985 struct {
986 DWORD sizeofLongNames;
987 char longNames[100];
988 } symbolNames;
989
990 /*
991 * entry sometimes have a leading '_'
992 * overwritten if entryOffset==0 depending on the target platform
993 * see check for cpu below
994 */
995 entry[0]='_';
996
997 newSuffix=".obj";
998 #else
999 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1000 #endif
1001
1002 /* deal with options, files and the entry point name */
1003 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1004 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
1005 #ifdef U_WINDOWS
1006 if(cpu==IMAGE_FILE_MACHINE_I386) {
1007 entryOffset=1;
1008 }
1009 #endif
1010
1011 in=T_FileStream_open(filename, "rb");
1012 if(in==NULL) {
1013 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1014 exit(U_FILE_ACCESS_ERROR);
1015 }
1016 size=T_FileStream_size(in);
1017
1018 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1019 if (outFilePath != NULL) {
1020 uprv_strcpy(outFilePath, buffer);
1021 }
1022
1023 if(optEntryPoint != NULL) {
1024 uprv_strcpy(entry+entryOffset, optEntryPoint);
1025 uprv_strcat(entry+entryOffset, "_dat");
1026 }
1027 /* turn dashes in the entry name into underscores */
1028 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1029 for(i=0; i<entryLength; ++i) {
1030 if(entry[entryLengthOffset+i]=='-') {
1031 entry[entryLengthOffset+i]='_';
1032 }
1033 }
1034
1035 /* open the output file */
1036 out=T_FileStream_open(buffer, "wb");
1037 if(out==NULL) {
1038 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1039 exit(U_FILE_ACCESS_ERROR);
1040 }
1041
1042 #ifdef U_ELF
1043 if(bits==32) {
1044 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1045 header32.e_machine=cpu;
1046
1047 /* 16-align .rodata in the .o file, just in case */
1048 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1049 if(paddingSize!=0) {
1050 paddingSize=0x10-paddingSize;
1051 sectionHeaders32[4].sh_offset+=paddingSize;
1052 }
1053
1054 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1055
1056 symbols32[1].st_size=(Elf32_Word)size;
1057
1058 /* write .o headers */
1059 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1060 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1061 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1062 } else /* bits==64 */ {
1063 #ifdef U_ELF64
1064 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1065 header64.e_machine=cpu;
1066
1067 /* 16-align .rodata in the .o file, just in case */
1068 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1069 if(paddingSize!=0) {
1070 paddingSize=0x10-paddingSize;
1071 sectionHeaders64[4].sh_offset+=paddingSize;
1072 }
1073
1074 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1075
1076 symbols64[1].st_size=(Elf64_Xword)size;
1077
1078 /* write .o headers */
1079 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1080 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1081 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1082 #endif
1083 }
1084
1085 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1086 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1087 if(paddingSize!=0) {
1088 T_FileStream_write(out, padding, paddingSize);
1089 }
1090 #elif defined(U_WINDOWS)
1091 /* populate the .obj headers */
1092 uprv_memset(&objHeader, 0, sizeof(objHeader));
1093 uprv_memset(&symbols, 0, sizeof(symbols));
1094 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1095
1096 /* write the linker export directive */
1097 uprv_strcpy(objHeader.linkerOptions, "-export:");
1098 length=8;
1099 uprv_strcpy(objHeader.linkerOptions+length, entry);
1100 length+=entryLength;
1101 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1102 length+=6;
1103
1104 /* set the file header */
1105 objHeader.fileHeader.Machine=cpu;
1106 objHeader.fileHeader.NumberOfSections=2;
1107 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1108 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1109 objHeader.fileHeader.NumberOfSymbols=1;
1110
1111 /* set the section for the linker options */
1112 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1113 objHeader.sections[0].SizeOfRawData=length;
1114 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1115 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1116
1117 /* set the data section */
1118 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1119 objHeader.sections[1].SizeOfRawData=size;
1120 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1121 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1122
1123 /* set the symbol table */
1124 if(entryLength<=8) {
1125 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1126 symbolNames.sizeofLongNames=4;
1127 } else {
1128 symbols[0].N.Name.Short=0;
1129 symbols[0].N.Name.Long=4;
1130 symbolNames.sizeofLongNames=4+entryLength+1;
1131 uprv_strcpy(symbolNames.longNames, entry);
1132 }
1133 symbols[0].SectionNumber=2;
1134 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1135
1136 /* write the file header and the linker options section */
1137 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1138 #else
1139 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1140 #endif
1141
1142 /* copy the data file into section 2 */
1143 for(;;) {
1144 length=T_FileStream_read(in, buffer, sizeof(buffer));
1145 if(length==0) {
1146 break;
1147 }
1148 T_FileStream_write(out, buffer, (int32_t)length);
1149 }
1150
1151 #ifdef U_WINDOWS
1152 /* write the symbol table */
1153 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1154 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1155 #endif
1156
1157 if(T_FileStream_error(in)) {
1158 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1159 exit(U_FILE_ACCESS_ERROR);
1160 }
1161
1162 if(T_FileStream_error(out)) {
1163 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1164 exit(U_FILE_ACCESS_ERROR);
1165 }
1166
1167 T_FileStream_close(out);
1168 T_FileStream_close(in);
1169 }
1170 #endif
1171