1 /******************************************************************************
2 * Copyright (C) 2009-2012, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #if U_PLATFORM_HAS_WIN32_API
9 # define VC_EXTRALEAN
10 # define WIN32_LEAN_AND_MEAN
11 # define NOUSER
12 # define NOSERVICE
13 # define NOIME
14 # define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 # ifdef __GNUC__
18 # define WINDOWS_WITH_GNUC
19 # endif
20 #endif
21
22 #if U_PLATFORM_IS_LINUX_BASED
23 # define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 # include <elf.h>
28 # if defined(ELFCLASS64)
29 # define U_ELF64
30 # endif
31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 # ifndef EM_X86_64
33 # define EM_X86_64 62
34 # endif
35 # define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /* 0x1234 */
52 #define HEX_0H 1 /* 01234h */
53
54 /* prototypes --------------------------------------------------------------- */
55 static void
56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58 static uint32_t
59 write8(FileStream *out, uint8_t byte, uint32_t column);
60
61 static uint32_t
62 write32(FileStream *out, uint32_t byte, uint32_t column);
63
64 #if U_PLATFORM == U_PF_OS400
65 static uint32_t
66 write8str(FileStream *out, uint8_t byte, uint32_t column);
67 #endif
68 /* -------------------------------------------------------------------------- */
69
70 /*
71 Creating Template Files for New Platforms
72
73 Let the cc compiler help you get started.
74 Compile this program
75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76 with the -S option to produce assembly output.
77
78 For example, this will generate array.s:
79 gcc -S array.c
80
81 This will produce a .s file that may look like this:
82
83 .file "array.c"
84 .version "01.01"
85 gcc2_compiled.:
86 .globl x
87 .section .rodata
88 .align 4
89 .type x,@object
90 .size x,20
91 x:
92 .long 1
93 .long 2
94 .long -559038737
95 .long -1
96 .long 16
97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99 which gives a starting point that will compile, and can be transformed
100 to become the template, generally with some consulting of as docs and
101 some experimentation.
102
103 If you want ICU to automatically use this assembly, you should
104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105 where the name is the compiler or platform that you used in this
106 assemblyHeader data structure.
107 */
108 static const struct AssemblyType {
109 const char *name;
110 const char *header;
111 const char *beginLine;
112 const char *footer;
113 int8_t hexType; /* HEX_0X or HEX_0h */
114 } assemblyHeader[] = {
115 {"gcc",
116 ".globl %s\n"
117 "\t.section .note.GNU-stack,\"\",%%progbits\n"
118 "\t.section .rodata\n"
119 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
120 "\t.type %s,%%object\n"
121 "%s:\n\n",
122
123 ".long ","",HEX_0X
124 },
125 {"gcc-darwin",
126 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
127 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
128 ".globl _%s\n"
129 "\t.data\n"
130 "\t.const\n"
131 "\t.align 4\n" /* 1<<4 = 16 */
132 "_%s:\n\n",
133
134 ".long ","",HEX_0X
135 },
136 {"gcc-cygwin",
137 ".globl _%s\n"
138 "\t.section .rodata\n"
139 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
140 "_%s:\n\n",
141
142 ".long ","",HEX_0X
143 },
144 {"gcc-mingw64",
145 ".globl %s\n"
146 "\t.section .rodata\n"
147 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
148 "%s:\n\n",
149
150 ".long ","",HEX_0X
151 },
152 {"sun",
153 "\t.section \".rodata\"\n"
154 "\t.align 8\n"
155 ".globl %s\n"
156 "%s:\n",
157
158 ".word ","",HEX_0X
159 },
160 {"sun-x86",
161 "Drodata.rodata:\n"
162 "\t.type Drodata.rodata,@object\n"
163 "\t.size Drodata.rodata,0\n"
164 "\t.globl %s\n"
165 "\t.align 8\n"
166 "%s:\n",
167
168 ".4byte ","",HEX_0X
169 },
170 {"xlc",
171 ".globl %s{RO}\n"
172 "\t.toc\n"
173 "%s:\n"
174 "\t.csect %s{RO}, 4\n",
175
176 ".long ","",HEX_0X
177 },
178 {"aCC-ia64",
179 "\t.file \"%s.s\"\n"
180 "\t.type %s,@object\n"
181 "\t.global %s\n"
182 "\t.secalias .abe$0.rodata, \".rodata\"\n"
183 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
184 "\t.align 16\n"
185 "%s::\t",
186
187 "data4 ","",HEX_0X
188 },
189 {"aCC-parisc",
190 "\t.SPACE $TEXT$\n"
191 "\t.SUBSPA $LIT$\n"
192 "%s\n"
193 "\t.EXPORT %s\n"
194 "\t.ALIGN 16\n",
195
196 ".WORD ","",HEX_0X
197 },
198 { "masm",
199 "\tTITLE %s\n"
200 "; generated by genccode\n"
201 ".386\n"
202 ".model flat\n"
203 "\tPUBLIC _%s\n"
204 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
205 "\tALIGN 16\n"
206 "_%s\tLABEL DWORD\n",
207 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
208 }
209 };
210
211 static int32_t assemblyHeaderIndex = -1;
212 static int32_t hexType = HEX_0X;
213
214 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)215 checkAssemblyHeaderName(const char* optAssembly) {
216 int32_t idx;
217 assemblyHeaderIndex = -1;
218 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
219 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
220 assemblyHeaderIndex = idx;
221 hexType = assemblyHeader[idx].hexType; /* set the hex type */
222 return TRUE;
223 }
224 }
225
226 return FALSE;
227 }
228
229
230 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)231 printAssemblyHeadersToStdErr(void) {
232 int32_t idx;
233 fprintf(stderr, "%s", assemblyHeader[0].name);
234 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
235 fprintf(stderr, ", %s", assemblyHeader[idx].name);
236 }
237 fprintf(stderr,
238 ")\n");
239 }
240
241 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)242 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
243 uint32_t column = MAX_COLUMN;
244 char entry[64];
245 uint32_t buffer[1024];
246 char *bufferStr = (char *)buffer;
247 FileStream *in, *out;
248 size_t i, length;
249
250 in=T_FileStream_open(filename, "rb");
251 if(in==NULL) {
252 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
253 exit(U_FILE_ACCESS_ERROR);
254 }
255
256 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
257 out=T_FileStream_open(bufferStr, "w");
258 if(out==NULL) {
259 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
260 exit(U_FILE_ACCESS_ERROR);
261 }
262
263 if (outFilePath != NULL) {
264 uprv_strcpy(outFilePath, bufferStr);
265 }
266
267 #ifdef WINDOWS_WITH_GNUC
268 /* Need to fix the file seperator character when using MinGW. */
269 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
270 #endif
271
272 if(optEntryPoint != NULL) {
273 uprv_strcpy(entry, optEntryPoint);
274 uprv_strcat(entry, "_dat");
275 }
276
277 /* turn dashes or dots in the entry name into underscores */
278 length=uprv_strlen(entry);
279 for(i=0; i<length; ++i) {
280 if(entry[i]=='-' || entry[i]=='.') {
281 entry[i]='_';
282 }
283 }
284
285 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
286 entry, entry, entry, entry,
287 entry, entry, entry, entry);
288 T_FileStream_writeLine(out, bufferStr);
289 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
290
291 for(;;) {
292 length=T_FileStream_read(in, buffer, sizeof(buffer));
293 if(length==0) {
294 break;
295 }
296 if (length != sizeof(buffer)) {
297 /* pad with extra 0's when at the end of the file */
298 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
299 buffer[length+i] = 0;
300 }
301 }
302 for(i=0; i<(length/sizeof(buffer[0])); i++) {
303 column = write32(out, buffer[i], column);
304 }
305 }
306
307 T_FileStream_writeLine(out, "\n");
308
309 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
310 entry, entry, entry, entry,
311 entry, entry, entry, entry);
312 T_FileStream_writeLine(out, bufferStr);
313
314 if(T_FileStream_error(in)) {
315 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
316 exit(U_FILE_ACCESS_ERROR);
317 }
318
319 if(T_FileStream_error(out)) {
320 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
321 exit(U_FILE_ACCESS_ERROR);
322 }
323
324 T_FileStream_close(out);
325 T_FileStream_close(in);
326 }
327
328 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)329 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
330 uint32_t column = MAX_COLUMN;
331 char buffer[4096], entry[64];
332 FileStream *in, *out;
333 size_t i, length;
334
335 in=T_FileStream_open(filename, "rb");
336 if(in==NULL) {
337 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
338 exit(U_FILE_ACCESS_ERROR);
339 }
340
341 if(optName != NULL) { /* prepend 'icudt28_' */
342 strcpy(entry, optName);
343 strcat(entry, "_");
344 } else {
345 entry[0] = 0;
346 }
347
348 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
349 if (outFilePath != NULL) {
350 uprv_strcpy(outFilePath, buffer);
351 }
352 out=T_FileStream_open(buffer, "w");
353 if(out==NULL) {
354 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
355 exit(U_FILE_ACCESS_ERROR);
356 }
357
358 /* turn dashes or dots in the entry name into underscores */
359 length=uprv_strlen(entry);
360 for(i=0; i<length; ++i) {
361 if(entry[i]=='-' || entry[i]=='.') {
362 entry[i]='_';
363 }
364 }
365
366 #if U_PLATFORM == U_PF_OS400
367 /*
368 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
369
370 This is here because this platform can't currently put
371 const data into the read-only pages of an object or
372 shared library (service program). Only strings are allowed in read-only
373 pages, so we use char * strings to store the data.
374
375 In order to prevent the beginning of the data from ever matching the
376 magic numbers we must still use the initial double.
377 [grhoten 4/24/2003]
378 */
379 sprintf(buffer,
380 "#ifndef IN_GENERATED_CCODE\n"
381 "#define IN_GENERATED_CCODE\n"
382 "#define U_DISABLE_RENAMING 1\n"
383 "#include \"unicode/umachine.h\"\n"
384 "#endif\n"
385 "U_CDECL_BEGIN\n"
386 "const struct {\n"
387 " double bogus;\n"
388 " const char *bytes; \n"
389 "} %s={ 0.0, \n",
390 entry);
391 T_FileStream_writeLine(out, buffer);
392
393 for(;;) {
394 length=T_FileStream_read(in, buffer, sizeof(buffer));
395 if(length==0) {
396 break;
397 }
398 for(i=0; i<length; ++i) {
399 column = write8str(out, (uint8_t)buffer[i], column);
400 }
401 }
402
403 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
404 #else
405 /* Function renaming shouldn't be done in data */
406 sprintf(buffer,
407 "#ifndef IN_GENERATED_CCODE\n"
408 "#define IN_GENERATED_CCODE\n"
409 "#define U_DISABLE_RENAMING 1\n"
410 "#include \"unicode/umachine.h\"\n"
411 "#endif\n"
412 "U_CDECL_BEGIN\n"
413 "const struct {\n"
414 " double bogus;\n"
415 " uint8_t bytes[%ld]; \n"
416 "} %s={ 0.0, {\n",
417 (long)T_FileStream_size(in), entry);
418 T_FileStream_writeLine(out, buffer);
419
420 for(;;) {
421 length=T_FileStream_read(in, buffer, sizeof(buffer));
422 if(length==0) {
423 break;
424 }
425 for(i=0; i<length; ++i) {
426 column = write8(out, (uint8_t)buffer[i], column);
427 }
428 }
429
430 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
431 #endif
432
433 if(T_FileStream_error(in)) {
434 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
435 exit(U_FILE_ACCESS_ERROR);
436 }
437
438 if(T_FileStream_error(out)) {
439 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
440 exit(U_FILE_ACCESS_ERROR);
441 }
442
443 T_FileStream_close(out);
444 T_FileStream_close(in);
445 }
446
447 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)448 write32(FileStream *out, uint32_t bitField, uint32_t column) {
449 int32_t i;
450 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
451 char *s = bitFieldStr;
452 uint8_t *ptrIdx = (uint8_t *)&bitField;
453 static const char hexToStr[16] = {
454 '0','1','2','3',
455 '4','5','6','7',
456 '8','9','A','B',
457 'C','D','E','F'
458 };
459
460 /* write the value, possibly with comma and newline */
461 if(column==MAX_COLUMN) {
462 /* first byte */
463 column=1;
464 } else if(column<32) {
465 *(s++)=',';
466 ++column;
467 } else {
468 *(s++)='\n';
469 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
470 s+=uprv_strlen(s);
471 column=1;
472 }
473
474 if (bitField < 10) {
475 /* It's a small number. Don't waste the space for 0x */
476 *(s++)=hexToStr[bitField];
477 }
478 else {
479 int seenNonZero = 0; /* This is used to remove leading zeros */
480
481 if(hexType==HEX_0X) {
482 *(s++)='0';
483 *(s++)='x';
484 } else if(hexType==HEX_0H) {
485 *(s++)='0';
486 }
487
488 /* This creates a 32-bit field */
489 #if U_IS_BIG_ENDIAN
490 for (i = 0; i < sizeof(uint32_t); i++)
491 #else
492 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
493 #endif
494 {
495 uint8_t value = ptrIdx[i];
496 if (value || seenNonZero) {
497 *(s++)=hexToStr[value>>4];
498 *(s++)=hexToStr[value&0xF];
499 seenNonZero = 1;
500 }
501 }
502 if(hexType==HEX_0H) {
503 *(s++)='h';
504 }
505 }
506
507 *(s++)=0;
508 T_FileStream_writeLine(out, bitFieldStr);
509 return column;
510 }
511
512 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)513 write8(FileStream *out, uint8_t byte, uint32_t column) {
514 char s[4];
515 int i=0;
516
517 /* convert the byte value to a string */
518 if(byte>=100) {
519 s[i++]=(char)('0'+byte/100);
520 byte%=100;
521 }
522 if(i>0 || byte>=10) {
523 s[i++]=(char)('0'+byte/10);
524 byte%=10;
525 }
526 s[i++]=(char)('0'+byte);
527 s[i]=0;
528
529 /* write the value, possibly with comma and newline */
530 if(column==MAX_COLUMN) {
531 /* first byte */
532 column=1;
533 } else if(column<16) {
534 T_FileStream_writeLine(out, ",");
535 ++column;
536 } else {
537 T_FileStream_writeLine(out, ",\n");
538 column=1;
539 }
540 T_FileStream_writeLine(out, s);
541 return column;
542 }
543
544 #if U_PLATFORM == U_PF_OS400
545 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)546 write8str(FileStream *out, uint8_t byte, uint32_t column) {
547 char s[8];
548
549 if (byte > 7)
550 sprintf(s, "\\x%X", byte);
551 else
552 sprintf(s, "\\%X", byte);
553
554 /* write the value, possibly with comma and newline */
555 if(column==MAX_COLUMN) {
556 /* first byte */
557 column=1;
558 T_FileStream_writeLine(out, "\"");
559 } else if(column<24) {
560 ++column;
561 } else {
562 T_FileStream_writeLine(out, "\"\n\"");
563 column=1;
564 }
565 T_FileStream_writeLine(out, s);
566 return column;
567 }
568 #endif
569
570 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)571 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
572 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
573
574 /* copy path */
575 if(destdir!=NULL && *destdir!=0) {
576 do {
577 *outFilename++=*destdir++;
578 } while(*destdir!=0);
579 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
580 *outFilename++=U_FILE_SEP_CHAR;
581 }
582 inFilename=basename;
583 } else {
584 while(inFilename<basename) {
585 *outFilename++=*inFilename++;
586 }
587 }
588
589 if(suffix==NULL) {
590 /* the filename does not have a suffix */
591 uprv_strcpy(entryName, inFilename);
592 if(optFilename != NULL) {
593 uprv_strcpy(outFilename, optFilename);
594 } else {
595 uprv_strcpy(outFilename, inFilename);
596 }
597 uprv_strcat(outFilename, newSuffix);
598 } else {
599 char *saveOutFilename = outFilename;
600 /* copy basename */
601 while(inFilename<suffix) {
602 if(*inFilename=='-') {
603 /* iSeries cannot have '-' in the .o objects. */
604 *outFilename++=*entryName++='_';
605 inFilename++;
606 }
607 else {
608 *outFilename++=*entryName++=*inFilename++;
609 }
610 }
611
612 /* replace '.' by '_' */
613 *outFilename++=*entryName++='_';
614 ++inFilename;
615
616 /* copy suffix */
617 while(*inFilename!=0) {
618 *outFilename++=*entryName++=*inFilename++;
619 }
620
621 *entryName=0;
622
623 if(optFilename != NULL) {
624 uprv_strcpy(saveOutFilename, optFilename);
625 uprv_strcat(saveOutFilename, newSuffix);
626 } else {
627 /* add ".c" */
628 uprv_strcpy(outFilename, newSuffix);
629 }
630 }
631 }
632
633 #ifdef CAN_GENERATE_OBJECTS
634 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)635 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
636 union {
637 char bytes[2048];
638 #ifdef U_ELF
639 Elf32_Ehdr header32;
640 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
641 #elif U_PLATFORM_HAS_WIN32_API
642 IMAGE_FILE_HEADER header;
643 #endif
644 } buffer;
645
646 const char *filename;
647 FileStream *in;
648 int32_t length;
649
650 #ifdef U_ELF
651
652 #elif U_PLATFORM_HAS_WIN32_API
653 const IMAGE_FILE_HEADER *pHeader;
654 #else
655 # error "Unknown platform for CAN_GENERATE_OBJECTS."
656 #endif
657
658 if(optMatchArch != NULL) {
659 filename=optMatchArch;
660 } else {
661 /* set defaults */
662 #ifdef U_ELF
663 /* set EM_386 because elf.h does not provide better defaults */
664 *pCPU=EM_386;
665 *pBits=32;
666 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
667 #elif U_PLATFORM_HAS_WIN32_API
668 /* _M_IA64 should be defined in windows.h */
669 # if defined(_M_IA64)
670 *pCPU=IMAGE_FILE_MACHINE_IA64;
671 # elif defined(_M_AMD64)
672 *pCPU=IMAGE_FILE_MACHINE_AMD64;
673 # else
674 *pCPU=IMAGE_FILE_MACHINE_I386;
675 # endif
676 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
677 *pIsBigEndian=FALSE;
678 #else
679 # error "Unknown platform for CAN_GENERATE_OBJECTS."
680 #endif
681 return;
682 }
683
684 in=T_FileStream_open(filename, "rb");
685 if(in==NULL) {
686 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
687 exit(U_FILE_ACCESS_ERROR);
688 }
689 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
690
691 #ifdef U_ELF
692 if(length<sizeof(Elf32_Ehdr)) {
693 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
694 exit(U_UNSUPPORTED_ERROR);
695 }
696 if(
697 buffer.header32.e_ident[0]!=ELFMAG0 ||
698 buffer.header32.e_ident[1]!=ELFMAG1 ||
699 buffer.header32.e_ident[2]!=ELFMAG2 ||
700 buffer.header32.e_ident[3]!=ELFMAG3 ||
701 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
702 ) {
703 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
704 exit(U_UNSUPPORTED_ERROR);
705 }
706
707 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
708 #ifdef U_ELF64
709 if(*pBits!=32 && *pBits!=64) {
710 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
711 exit(U_UNSUPPORTED_ERROR);
712 }
713 #else
714 if(*pBits!=32) {
715 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
716 exit(U_UNSUPPORTED_ERROR);
717 }
718 #endif
719
720 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
721 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
722 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
723 exit(U_UNSUPPORTED_ERROR);
724 }
725 /* TODO: Support byte swapping */
726
727 *pCPU=buffer.header32.e_machine;
728 #elif U_PLATFORM_HAS_WIN32_API
729 if(length<sizeof(IMAGE_FILE_HEADER)) {
730 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
731 exit(U_UNSUPPORTED_ERROR);
732 }
733 /* TODO: Use buffer.header. Keep aliasing legal. */
734 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
735 *pCPU=pHeader->Machine;
736 /*
737 * The number of bits is implicit with the Machine value.
738 * *pBits is ignored in the calling code, so this need not be precise.
739 */
740 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
741 /* Windows always runs on little-endian CPUs. */
742 *pIsBigEndian=FALSE;
743 #else
744 # error "Unknown platform for CAN_GENERATE_OBJECTS."
745 #endif
746
747 T_FileStream_close(in);
748 }
749
750 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)751 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
752 /* common variables */
753 char buffer[4096], entry[40]={ 0 };
754 FileStream *in, *out;
755 const char *newSuffix;
756 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
757
758 uint16_t cpu, bits;
759 UBool makeBigEndian;
760
761 /* platform-specific variables and initialization code */
762 #ifdef U_ELF
763 /* 32-bit Elf file header */
764 static Elf32_Ehdr header32={
765 {
766 /* e_ident[] */
767 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
768 ELFCLASS32,
769 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
770 EV_CURRENT /* EI_VERSION */
771 },
772 ET_REL,
773 EM_386,
774 EV_CURRENT, /* e_version */
775 0, /* e_entry */
776 0, /* e_phoff */
777 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
778 0, /* e_flags */
779 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
780 0, /* e_phentsize */
781 0, /* e_phnum */
782 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
783 5, /* e_shnum */
784 2 /* e_shstrndx */
785 };
786
787 /* 32-bit Elf section header table */
788 static Elf32_Shdr sectionHeaders32[5]={
789 { /* SHN_UNDEF */
790 0
791 },
792 { /* .symtab */
793 1, /* sh_name */
794 SHT_SYMTAB,
795 0, /* sh_flags */
796 0, /* sh_addr */
797 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
798 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
799 3, /* sh_link=sect hdr index of .strtab */
800 1, /* sh_info=One greater than the symbol table index of the last
801 * local symbol (with STB_LOCAL). */
802 4, /* sh_addralign */
803 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
804 },
805 { /* .shstrtab */
806 9, /* sh_name */
807 SHT_STRTAB,
808 0, /* sh_flags */
809 0, /* sh_addr */
810 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
811 40, /* sh_size */
812 0, /* sh_link */
813 0, /* sh_info */
814 1, /* sh_addralign */
815 0 /* sh_entsize */
816 },
817 { /* .strtab */
818 19, /* sh_name */
819 SHT_STRTAB,
820 0, /* sh_flags */
821 0, /* sh_addr */
822 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
823 (Elf32_Word)sizeof(entry), /* sh_size */
824 0, /* sh_link */
825 0, /* sh_info */
826 1, /* sh_addralign */
827 0 /* sh_entsize */
828 },
829 { /* .rodata */
830 27, /* sh_name */
831 SHT_PROGBITS,
832 SHF_ALLOC, /* sh_flags */
833 0, /* sh_addr */
834 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
835 0, /* sh_size */
836 0, /* sh_link */
837 0, /* sh_info */
838 16, /* sh_addralign */
839 0 /* sh_entsize */
840 }
841 };
842
843 /* symbol table */
844 static Elf32_Sym symbols32[2]={
845 { /* STN_UNDEF */
846 0
847 },
848 { /* data entry point */
849 1, /* st_name */
850 0, /* st_value */
851 0, /* st_size */
852 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
853 0, /* st_other */
854 4 /* st_shndx=index of related section table entry */
855 }
856 };
857
858 /* section header string table, with decimal string offsets */
859 static const char sectionStrings[40]=
860 /* 0 */ "\0"
861 /* 1 */ ".symtab\0"
862 /* 9 */ ".shstrtab\0"
863 /* 19 */ ".strtab\0"
864 /* 27 */ ".rodata\0"
865 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
866 /* 40: padded to multiple of 8 bytes */
867
868 /*
869 * Use entry[] for the string table which will contain only the
870 * entry point name.
871 * entry[0] must be 0 (NUL)
872 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
873 */
874
875 /* 16-align .rodata in the .o file, just in case */
876 static const char padding[16]={ 0 };
877 int32_t paddingSize;
878
879 #ifdef U_ELF64
880 /* 64-bit Elf file header */
881 static Elf64_Ehdr header64={
882 {
883 /* e_ident[] */
884 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
885 ELFCLASS64,
886 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
887 EV_CURRENT /* EI_VERSION */
888 },
889 ET_REL,
890 EM_X86_64,
891 EV_CURRENT, /* e_version */
892 0, /* e_entry */
893 0, /* e_phoff */
894 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
895 0, /* e_flags */
896 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
897 0, /* e_phentsize */
898 0, /* e_phnum */
899 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
900 5, /* e_shnum */
901 2 /* e_shstrndx */
902 };
903
904 /* 64-bit Elf section header table */
905 static Elf64_Shdr sectionHeaders64[5]={
906 { /* SHN_UNDEF */
907 0
908 },
909 { /* .symtab */
910 1, /* sh_name */
911 SHT_SYMTAB,
912 0, /* sh_flags */
913 0, /* sh_addr */
914 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
915 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
916 3, /* sh_link=sect hdr index of .strtab */
917 1, /* sh_info=One greater than the symbol table index of the last
918 * local symbol (with STB_LOCAL). */
919 4, /* sh_addralign */
920 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
921 },
922 { /* .shstrtab */
923 9, /* sh_name */
924 SHT_STRTAB,
925 0, /* sh_flags */
926 0, /* sh_addr */
927 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
928 40, /* sh_size */
929 0, /* sh_link */
930 0, /* sh_info */
931 1, /* sh_addralign */
932 0 /* sh_entsize */
933 },
934 { /* .strtab */
935 19, /* sh_name */
936 SHT_STRTAB,
937 0, /* sh_flags */
938 0, /* sh_addr */
939 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
940 (Elf64_Xword)sizeof(entry), /* sh_size */
941 0, /* sh_link */
942 0, /* sh_info */
943 1, /* sh_addralign */
944 0 /* sh_entsize */
945 },
946 { /* .rodata */
947 27, /* sh_name */
948 SHT_PROGBITS,
949 SHF_ALLOC, /* sh_flags */
950 0, /* sh_addr */
951 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
952 0, /* sh_size */
953 0, /* sh_link */
954 0, /* sh_info */
955 16, /* sh_addralign */
956 0 /* sh_entsize */
957 }
958 };
959
960 /*
961 * 64-bit symbol table
962 * careful: different order of items compared with Elf32_sym!
963 */
964 static Elf64_Sym symbols64[2]={
965 { /* STN_UNDEF */
966 0
967 },
968 { /* data entry point */
969 1, /* st_name */
970 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
971 0, /* st_other */
972 4, /* st_shndx=index of related section table entry */
973 0, /* st_value */
974 0 /* st_size */
975 }
976 };
977
978 #endif /* U_ELF64 */
979
980 /* entry[] have a leading NUL */
981 entryOffset=1;
982
983 /* in the common code, count entryLength from after the NUL */
984 entryLengthOffset=1;
985
986 newSuffix=".o";
987
988 #elif U_PLATFORM_HAS_WIN32_API
989 struct {
990 IMAGE_FILE_HEADER fileHeader;
991 IMAGE_SECTION_HEADER sections[2];
992 char linkerOptions[100];
993 } objHeader;
994 IMAGE_SYMBOL symbols[1];
995 struct {
996 DWORD sizeofLongNames;
997 char longNames[100];
998 } symbolNames;
999
1000 /*
1001 * entry sometimes have a leading '_'
1002 * overwritten if entryOffset==0 depending on the target platform
1003 * see check for cpu below
1004 */
1005 entry[0]='_';
1006
1007 newSuffix=".obj";
1008 #else
1009 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1010 #endif
1011
1012 /* deal with options, files and the entry point name */
1013 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1014 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1015 #if U_PLATFORM_HAS_WIN32_API
1016 if(cpu==IMAGE_FILE_MACHINE_I386) {
1017 entryOffset=1;
1018 }
1019 #endif
1020
1021 in=T_FileStream_open(filename, "rb");
1022 if(in==NULL) {
1023 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1024 exit(U_FILE_ACCESS_ERROR);
1025 }
1026 size=T_FileStream_size(in);
1027
1028 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1029 if (outFilePath != NULL) {
1030 uprv_strcpy(outFilePath, buffer);
1031 }
1032
1033 if(optEntryPoint != NULL) {
1034 uprv_strcpy(entry+entryOffset, optEntryPoint);
1035 uprv_strcat(entry+entryOffset, "_dat");
1036 }
1037 /* turn dashes in the entry name into underscores */
1038 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1039 for(i=0; i<entryLength; ++i) {
1040 if(entry[entryLengthOffset+i]=='-') {
1041 entry[entryLengthOffset+i]='_';
1042 }
1043 }
1044
1045 /* open the output file */
1046 out=T_FileStream_open(buffer, "wb");
1047 if(out==NULL) {
1048 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1049 exit(U_FILE_ACCESS_ERROR);
1050 }
1051
1052 #ifdef U_ELF
1053 if(bits==32) {
1054 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1055 header32.e_machine=cpu;
1056
1057 /* 16-align .rodata in the .o file, just in case */
1058 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1059 if(paddingSize!=0) {
1060 paddingSize=0x10-paddingSize;
1061 sectionHeaders32[4].sh_offset+=paddingSize;
1062 }
1063
1064 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1065
1066 symbols32[1].st_size=(Elf32_Word)size;
1067
1068 /* write .o headers */
1069 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1070 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1071 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1072 } else /* bits==64 */ {
1073 #ifdef U_ELF64
1074 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1075 header64.e_machine=cpu;
1076
1077 /* 16-align .rodata in the .o file, just in case */
1078 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1079 if(paddingSize!=0) {
1080 paddingSize=0x10-paddingSize;
1081 sectionHeaders64[4].sh_offset+=paddingSize;
1082 }
1083
1084 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1085
1086 symbols64[1].st_size=(Elf64_Xword)size;
1087
1088 /* write .o headers */
1089 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1090 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1091 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1092 #endif
1093 }
1094
1095 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1096 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1097 if(paddingSize!=0) {
1098 T_FileStream_write(out, padding, paddingSize);
1099 }
1100 #elif U_PLATFORM_HAS_WIN32_API
1101 /* populate the .obj headers */
1102 uprv_memset(&objHeader, 0, sizeof(objHeader));
1103 uprv_memset(&symbols, 0, sizeof(symbols));
1104 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1105
1106 /* write the linker export directive */
1107 uprv_strcpy(objHeader.linkerOptions, "-export:");
1108 length=8;
1109 uprv_strcpy(objHeader.linkerOptions+length, entry);
1110 length+=entryLength;
1111 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1112 length+=6;
1113
1114 /* set the file header */
1115 objHeader.fileHeader.Machine=cpu;
1116 objHeader.fileHeader.NumberOfSections=2;
1117 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1118 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1119 objHeader.fileHeader.NumberOfSymbols=1;
1120
1121 /* set the section for the linker options */
1122 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1123 objHeader.sections[0].SizeOfRawData=length;
1124 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1125 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1126
1127 /* set the data section */
1128 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1129 objHeader.sections[1].SizeOfRawData=size;
1130 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1131 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1132
1133 /* set the symbol table */
1134 if(entryLength<=8) {
1135 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1136 symbolNames.sizeofLongNames=4;
1137 } else {
1138 symbols[0].N.Name.Short=0;
1139 symbols[0].N.Name.Long=4;
1140 symbolNames.sizeofLongNames=4+entryLength+1;
1141 uprv_strcpy(symbolNames.longNames, entry);
1142 }
1143 symbols[0].SectionNumber=2;
1144 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1145
1146 /* write the file header and the linker options section */
1147 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1148 #else
1149 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1150 #endif
1151
1152 /* copy the data file into section 2 */
1153 for(;;) {
1154 length=T_FileStream_read(in, buffer, sizeof(buffer));
1155 if(length==0) {
1156 break;
1157 }
1158 T_FileStream_write(out, buffer, (int32_t)length);
1159 }
1160
1161 #if U_PLATFORM_HAS_WIN32_API
1162 /* write the symbol table */
1163 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1164 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1165 #endif
1166
1167 if(T_FileStream_error(in)) {
1168 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1169 exit(U_FILE_ACCESS_ERROR);
1170 }
1171
1172 if(T_FileStream_error(out)) {
1173 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1174 exit(U_FILE_ACCESS_ERROR);
1175 }
1176
1177 T_FileStream_close(out);
1178 T_FileStream_close(in);
1179 }
1180 #endif
1181