1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4 * Copyright (C) 2009-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *******************************************************************************
7 */
8 #include "unicode/utypes.h"
9
10 #if U_PLATFORM_HAS_WIN32_API
11 # define VC_EXTRALEAN
12 # define WIN32_LEAN_AND_MEAN
13 # define NOUSER
14 # define NOSERVICE
15 # define NOIME
16 # define NOMCX
17 #include <windows.h>
18 #include <time.h>
19 # ifdef __GNUC__
20 # define WINDOWS_WITH_GNUC
21 # endif
22 #endif
23
24 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
25 # define U_ELF
26 #endif
27
28 #ifdef U_ELF
29 # include <elf.h>
30 # if defined(ELFCLASS64)
31 # define U_ELF64
32 # endif
33 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
34 # ifndef EM_X86_64
35 # define EM_X86_64 62
36 # endif
37 # define ICU_ENTRY_OFFSET 0
38 #endif
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include "unicode/putil.h"
43 #include "cmemory.h"
44 #include "cstring.h"
45 #include "filestrm.h"
46 #include "toolutil.h"
47 #include "unicode/uclean.h"
48 #include "uoptions.h"
49 #include "pkg_genc.h"
50
51 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
52
53 #define HEX_0X 0 /* 0x1234 */
54 #define HEX_0H 1 /* 01234h */
55
56 /* prototypes --------------------------------------------------------------- */
57 static void
58 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
59
60 static uint32_t
61 write8(FileStream *out, uint8_t byte, uint32_t column);
62
63 static uint32_t
64 write32(FileStream *out, uint32_t byte, uint32_t column);
65
66 #if U_PLATFORM == U_PF_OS400
67 static uint32_t
68 write8str(FileStream *out, uint8_t byte, uint32_t column);
69 #endif
70 /* -------------------------------------------------------------------------- */
71
72 /*
73 Creating Template Files for New Platforms
74
75 Let the cc compiler help you get started.
76 Compile this program
77 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
78 with the -S option to produce assembly output.
79
80 For example, this will generate array.s:
81 gcc -S array.c
82
83 This will produce a .s file that may look like this:
84
85 .file "array.c"
86 .version "01.01"
87 gcc2_compiled.:
88 .globl x
89 .section .rodata
90 .align 4
91 .type x,@object
92 .size x,20
93 x:
94 .long 1
95 .long 2
96 .long -559038737
97 .long -1
98 .long 16
99 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
100
101 which gives a starting point that will compile, and can be transformed
102 to become the template, generally with some consulting of as docs and
103 some experimentation.
104
105 If you want ICU to automatically use this assembly, you should
106 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
107 where the name is the compiler or platform that you used in this
108 assemblyHeader data structure.
109 */
110 static const struct AssemblyType {
111 const char *name;
112 const char *header;
113 const char *beginLine;
114 const char *footer;
115 int8_t hexType; /* HEX_0X or HEX_0h */
116 } assemblyHeader[] = {
117 /* For gcc assemblers, the meaning of .align changes depending on the */
118 /* hardware, so we use .balign 16 which always means 16 bytes. */
119 /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
120 {"gcc",
121 ".globl %s\n"
122 "\t.section .note.GNU-stack,\"\",%%progbits\n"
123 "\t.section .rodata\n"
124 "\t.balign 16\n"
125 "#ifdef U_HIDE_DATA_SYMBOL\n"
126 "\t.hidden %s\n"
127 "#endif\n"
128 "\t.type %s,%%object\n"
129 "%s:\n\n",
130
131 ".long ",".size %s, .-%s\n",HEX_0X
132 },
133 {"gcc-darwin",
134 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
135 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
136 ".globl _%s\n"
137 "#ifdef U_HIDE_DATA_SYMBOL\n"
138 "\t.private_extern _%s\n"
139 "#endif\n"
140 "\t.data\n"
141 "\t.const\n"
142 "\t.balign 16\n"
143 "_%s:\n\n",
144
145 ".long ","",HEX_0X
146 },
147 {"gcc-cygwin",
148 ".globl _%s\n"
149 "\t.section .rodata\n"
150 "\t.balign 16\n"
151 "_%s:\n\n",
152
153 ".long ","",HEX_0X
154 },
155 {"gcc-mingw64",
156 ".globl %s\n"
157 "\t.section .rodata\n"
158 "\t.balign 16\n"
159 "%s:\n\n",
160
161 ".long ","",HEX_0X
162 },
163 /* 16 bytes alignment. */
164 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
165 {"sun",
166 "\t.section \".rodata\"\n"
167 "\t.align 16\n"
168 ".globl %s\n"
169 "%s:\n",
170
171 ".word ","",HEX_0X
172 },
173 /* 16 bytes alignment for sun-x86. */
174 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
175 {"sun-x86",
176 "Drodata.rodata:\n"
177 "\t.type Drodata.rodata,@object\n"
178 "\t.size Drodata.rodata,0\n"
179 "\t.globl %s\n"
180 "\t.align 16\n"
181 "%s:\n",
182
183 ".4byte ","",HEX_0X
184 },
185 /* 1<<4 bit alignment for aix. */
186 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
187 {"xlc",
188 ".globl %s{RO}\n"
189 "\t.toc\n"
190 "%s:\n"
191 "\t.csect %s{RO}, 4\n",
192
193 ".long ","",HEX_0X
194 },
195 {"aCC-ia64",
196 "\t.file \"%s.s\"\n"
197 "\t.type %s,@object\n"
198 "\t.global %s\n"
199 "\t.secalias .abe$0.rodata, \".rodata\"\n"
200 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
201 "\t.align 16\n"
202 "%s::\t",
203
204 "data4 ","",HEX_0X
205 },
206 {"aCC-parisc",
207 "\t.SPACE $TEXT$\n"
208 "\t.SUBSPA $LIT$\n"
209 "%s\n"
210 "\t.EXPORT %s\n"
211 "\t.ALIGN 16\n",
212
213 ".WORD ","",HEX_0X
214 },
215 /* align 16 bytes */
216 /* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
217 { "masm",
218 "\tTITLE %s\n"
219 "; generated by genccode\n"
220 ".386\n"
221 ".model flat\n"
222 "\tPUBLIC _%s\n"
223 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
224 "\tALIGN 16\n"
225 "_%s\tLABEL DWORD\n",
226 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
227 }
228 };
229
230 static int32_t assemblyHeaderIndex = -1;
231 static int32_t hexType = HEX_0X;
232
233 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)234 checkAssemblyHeaderName(const char* optAssembly) {
235 int32_t idx;
236 assemblyHeaderIndex = -1;
237 for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
238 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
239 assemblyHeaderIndex = idx;
240 hexType = assemblyHeader[idx].hexType; /* set the hex type */
241 return TRUE;
242 }
243 }
244
245 return FALSE;
246 }
247
248
249 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)250 printAssemblyHeadersToStdErr(void) {
251 int32_t idx;
252 fprintf(stderr, "%s", assemblyHeader[0].name);
253 for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
254 fprintf(stderr, ", %s", assemblyHeader[idx].name);
255 }
256 fprintf(stderr,
257 ")\n");
258 }
259
260 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)261 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
262 uint32_t column = MAX_COLUMN;
263 char entry[64];
264 uint32_t buffer[1024];
265 char *bufferStr = (char *)buffer;
266 FileStream *in, *out;
267 size_t i, length;
268
269 in=T_FileStream_open(filename, "rb");
270 if(in==NULL) {
271 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
272 exit(U_FILE_ACCESS_ERROR);
273 }
274
275 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
276 out=T_FileStream_open(bufferStr, "w");
277 if(out==NULL) {
278 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
279 exit(U_FILE_ACCESS_ERROR);
280 }
281
282 if (outFilePath != NULL) {
283 uprv_strcpy(outFilePath, bufferStr);
284 }
285
286 #ifdef WINDOWS_WITH_GNUC
287 /* Need to fix the file seperator character when using MinGW. */
288 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
289 #endif
290
291 if(optEntryPoint != NULL) {
292 uprv_strcpy(entry, optEntryPoint);
293 uprv_strcat(entry, "_dat");
294 }
295
296 /* turn dashes or dots in the entry name into underscores */
297 length=uprv_strlen(entry);
298 for(i=0; i<length; ++i) {
299 if(entry[i]=='-' || entry[i]=='.') {
300 entry[i]='_';
301 }
302 }
303
304 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
305 entry, entry, entry, entry,
306 entry, entry, entry, entry);
307 T_FileStream_writeLine(out, bufferStr);
308 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
309
310 for(;;) {
311 length=T_FileStream_read(in, buffer, sizeof(buffer));
312 if(length==0) {
313 break;
314 }
315 if (length != sizeof(buffer)) {
316 /* pad with extra 0's when at the end of the file */
317 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
318 buffer[length+i] = 0;
319 }
320 }
321 for(i=0; i<(length/sizeof(buffer[0])); i++) {
322 column = write32(out, buffer[i], column);
323 }
324 }
325
326 T_FileStream_writeLine(out, "\n");
327
328 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
329 entry, entry, entry, entry,
330 entry, entry, entry, entry);
331 T_FileStream_writeLine(out, bufferStr);
332
333 if(T_FileStream_error(in)) {
334 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
335 exit(U_FILE_ACCESS_ERROR);
336 }
337
338 if(T_FileStream_error(out)) {
339 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
340 exit(U_FILE_ACCESS_ERROR);
341 }
342
343 T_FileStream_close(out);
344 T_FileStream_close(in);
345 }
346
347 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)348 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
349 uint32_t column = MAX_COLUMN;
350 char buffer[4096], entry[64];
351 FileStream *in, *out;
352 size_t i, length;
353
354 in=T_FileStream_open(filename, "rb");
355 if(in==NULL) {
356 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
357 exit(U_FILE_ACCESS_ERROR);
358 }
359
360 if(optName != NULL) { /* prepend 'icudt28_' */
361 strcpy(entry, optName);
362 strcat(entry, "_");
363 } else {
364 entry[0] = 0;
365 }
366
367 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
368 if (outFilePath != NULL) {
369 uprv_strcpy(outFilePath, buffer);
370 }
371 out=T_FileStream_open(buffer, "w");
372 if(out==NULL) {
373 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
374 exit(U_FILE_ACCESS_ERROR);
375 }
376
377 /* turn dashes or dots in the entry name into underscores */
378 length=uprv_strlen(entry);
379 for(i=0; i<length; ++i) {
380 if(entry[i]=='-' || entry[i]=='.') {
381 entry[i]='_';
382 }
383 }
384
385 #if U_PLATFORM == U_PF_OS400
386 /*
387 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
388
389 This is here because this platform can't currently put
390 const data into the read-only pages of an object or
391 shared library (service program). Only strings are allowed in read-only
392 pages, so we use char * strings to store the data.
393
394 In order to prevent the beginning of the data from ever matching the
395 magic numbers we must still use the initial double.
396 [grhoten 4/24/2003]
397 */
398 sprintf(buffer,
399 "#ifndef IN_GENERATED_CCODE\n"
400 "#define IN_GENERATED_CCODE\n"
401 "#define U_DISABLE_RENAMING 1\n"
402 "#include \"unicode/umachine.h\"\n"
403 "#endif\n"
404 "U_CDECL_BEGIN\n"
405 "const struct {\n"
406 " double bogus;\n"
407 " const char *bytes; \n"
408 "} %s={ 0.0, \n",
409 entry);
410 T_FileStream_writeLine(out, buffer);
411
412 for(;;) {
413 length=T_FileStream_read(in, buffer, sizeof(buffer));
414 if(length==0) {
415 break;
416 }
417 for(i=0; i<length; ++i) {
418 column = write8str(out, (uint8_t)buffer[i], column);
419 }
420 }
421
422 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
423 #else
424 /* Function renaming shouldn't be done in data */
425 sprintf(buffer,
426 "#ifndef IN_GENERATED_CCODE\n"
427 "#define IN_GENERATED_CCODE\n"
428 "#define U_DISABLE_RENAMING 1\n"
429 "#include \"unicode/umachine.h\"\n"
430 "#endif\n"
431 "U_CDECL_BEGIN\n"
432 "const struct {\n"
433 " double bogus;\n"
434 " uint8_t bytes[%ld]; \n"
435 "} %s={ 0.0, {\n",
436 (long)T_FileStream_size(in), entry);
437 T_FileStream_writeLine(out, buffer);
438
439 for(;;) {
440 length=T_FileStream_read(in, buffer, sizeof(buffer));
441 if(length==0) {
442 break;
443 }
444 for(i=0; i<length; ++i) {
445 column = write8(out, (uint8_t)buffer[i], column);
446 }
447 }
448
449 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
450 #endif
451
452 if(T_FileStream_error(in)) {
453 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
454 exit(U_FILE_ACCESS_ERROR);
455 }
456
457 if(T_FileStream_error(out)) {
458 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
459 exit(U_FILE_ACCESS_ERROR);
460 }
461
462 T_FileStream_close(out);
463 T_FileStream_close(in);
464 }
465
466 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)467 write32(FileStream *out, uint32_t bitField, uint32_t column) {
468 int32_t i;
469 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
470 char *s = bitFieldStr;
471 uint8_t *ptrIdx = (uint8_t *)&bitField;
472 static const char hexToStr[16] = {
473 '0','1','2','3',
474 '4','5','6','7',
475 '8','9','A','B',
476 'C','D','E','F'
477 };
478
479 /* write the value, possibly with comma and newline */
480 if(column==MAX_COLUMN) {
481 /* first byte */
482 column=1;
483 } else if(column<32) {
484 *(s++)=',';
485 ++column;
486 } else {
487 *(s++)='\n';
488 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
489 s+=uprv_strlen(s);
490 column=1;
491 }
492
493 if (bitField < 10) {
494 /* It's a small number. Don't waste the space for 0x */
495 *(s++)=hexToStr[bitField];
496 }
497 else {
498 int seenNonZero = 0; /* This is used to remove leading zeros */
499
500 if(hexType==HEX_0X) {
501 *(s++)='0';
502 *(s++)='x';
503 } else if(hexType==HEX_0H) {
504 *(s++)='0';
505 }
506
507 /* This creates a 32-bit field */
508 #if U_IS_BIG_ENDIAN
509 for (i = 0; i < sizeof(uint32_t); i++)
510 #else
511 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
512 #endif
513 {
514 uint8_t value = ptrIdx[i];
515 if (value || seenNonZero) {
516 *(s++)=hexToStr[value>>4];
517 *(s++)=hexToStr[value&0xF];
518 seenNonZero = 1;
519 }
520 }
521 if(hexType==HEX_0H) {
522 *(s++)='h';
523 }
524 }
525
526 *(s++)=0;
527 T_FileStream_writeLine(out, bitFieldStr);
528 return column;
529 }
530
531 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)532 write8(FileStream *out, uint8_t byte, uint32_t column) {
533 char s[4];
534 int i=0;
535
536 /* convert the byte value to a string */
537 if(byte>=100) {
538 s[i++]=(char)('0'+byte/100);
539 byte%=100;
540 }
541 if(i>0 || byte>=10) {
542 s[i++]=(char)('0'+byte/10);
543 byte%=10;
544 }
545 s[i++]=(char)('0'+byte);
546 s[i]=0;
547
548 /* write the value, possibly with comma and newline */
549 if(column==MAX_COLUMN) {
550 /* first byte */
551 column=1;
552 } else if(column<16) {
553 T_FileStream_writeLine(out, ",");
554 ++column;
555 } else {
556 T_FileStream_writeLine(out, ",\n");
557 column=1;
558 }
559 T_FileStream_writeLine(out, s);
560 return column;
561 }
562
563 #if U_PLATFORM == U_PF_OS400
564 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)565 write8str(FileStream *out, uint8_t byte, uint32_t column) {
566 char s[8];
567
568 if (byte > 7)
569 sprintf(s, "\\x%X", byte);
570 else
571 sprintf(s, "\\%X", byte);
572
573 /* write the value, possibly with comma and newline */
574 if(column==MAX_COLUMN) {
575 /* first byte */
576 column=1;
577 T_FileStream_writeLine(out, "\"");
578 } else if(column<24) {
579 ++column;
580 } else {
581 T_FileStream_writeLine(out, "\"\n\"");
582 column=1;
583 }
584 T_FileStream_writeLine(out, s);
585 return column;
586 }
587 #endif
588
589 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)590 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
591 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
592
593 /* copy path */
594 if(destdir!=NULL && *destdir!=0) {
595 do {
596 *outFilename++=*destdir++;
597 } while(*destdir!=0);
598 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
599 *outFilename++=U_FILE_SEP_CHAR;
600 }
601 inFilename=basename;
602 } else {
603 while(inFilename<basename) {
604 *outFilename++=*inFilename++;
605 }
606 }
607
608 if(suffix==NULL) {
609 /* the filename does not have a suffix */
610 uprv_strcpy(entryName, inFilename);
611 if(optFilename != NULL) {
612 uprv_strcpy(outFilename, optFilename);
613 } else {
614 uprv_strcpy(outFilename, inFilename);
615 }
616 uprv_strcat(outFilename, newSuffix);
617 } else {
618 char *saveOutFilename = outFilename;
619 /* copy basename */
620 while(inFilename<suffix) {
621 if(*inFilename=='-') {
622 /* iSeries cannot have '-' in the .o objects. */
623 *outFilename++=*entryName++='_';
624 inFilename++;
625 }
626 else {
627 *outFilename++=*entryName++=*inFilename++;
628 }
629 }
630
631 /* replace '.' by '_' */
632 *outFilename++=*entryName++='_';
633 ++inFilename;
634
635 /* copy suffix */
636 while(*inFilename!=0) {
637 *outFilename++=*entryName++=*inFilename++;
638 }
639
640 *entryName=0;
641
642 if(optFilename != NULL) {
643 uprv_strcpy(saveOutFilename, optFilename);
644 uprv_strcat(saveOutFilename, newSuffix);
645 } else {
646 /* add ".c" */
647 uprv_strcpy(outFilename, newSuffix);
648 }
649 }
650 }
651
652 #ifdef CAN_GENERATE_OBJECTS
653 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)654 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
655 union {
656 char bytes[2048];
657 #ifdef U_ELF
658 Elf32_Ehdr header32;
659 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
660 #elif U_PLATFORM_HAS_WIN32_API
661 IMAGE_FILE_HEADER header;
662 #endif
663 } buffer;
664
665 const char *filename;
666 FileStream *in;
667 int32_t length;
668
669 #ifdef U_ELF
670
671 #elif U_PLATFORM_HAS_WIN32_API
672 const IMAGE_FILE_HEADER *pHeader;
673 #else
674 # error "Unknown platform for CAN_GENERATE_OBJECTS."
675 #endif
676
677 if(optMatchArch != NULL) {
678 filename=optMatchArch;
679 } else {
680 /* set defaults */
681 #ifdef U_ELF
682 /* set EM_386 because elf.h does not provide better defaults */
683 *pCPU=EM_386;
684 *pBits=32;
685 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
686 #elif U_PLATFORM_HAS_WIN32_API
687 /* _M_IA64 should be defined in windows.h */
688 # if defined(_M_IA64)
689 *pCPU=IMAGE_FILE_MACHINE_IA64;
690 # elif defined(_M_AMD64)
691 *pCPU=IMAGE_FILE_MACHINE_AMD64;
692 # else
693 *pCPU=IMAGE_FILE_MACHINE_I386;
694 # endif
695 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
696 *pIsBigEndian=FALSE;
697 #else
698 # error "Unknown platform for CAN_GENERATE_OBJECTS."
699 #endif
700 return;
701 }
702
703 in=T_FileStream_open(filename, "rb");
704 if(in==NULL) {
705 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
706 exit(U_FILE_ACCESS_ERROR);
707 }
708 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
709
710 #ifdef U_ELF
711 if(length<sizeof(Elf32_Ehdr)) {
712 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
713 exit(U_UNSUPPORTED_ERROR);
714 }
715 if(
716 buffer.header32.e_ident[0]!=ELFMAG0 ||
717 buffer.header32.e_ident[1]!=ELFMAG1 ||
718 buffer.header32.e_ident[2]!=ELFMAG2 ||
719 buffer.header32.e_ident[3]!=ELFMAG3 ||
720 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
721 ) {
722 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
723 exit(U_UNSUPPORTED_ERROR);
724 }
725
726 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
727 #ifdef U_ELF64
728 if(*pBits!=32 && *pBits!=64) {
729 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
730 exit(U_UNSUPPORTED_ERROR);
731 }
732 #else
733 if(*pBits!=32) {
734 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
735 exit(U_UNSUPPORTED_ERROR);
736 }
737 #endif
738
739 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
740 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
741 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
742 exit(U_UNSUPPORTED_ERROR);
743 }
744 /* TODO: Support byte swapping */
745
746 *pCPU=buffer.header32.e_machine;
747 #elif U_PLATFORM_HAS_WIN32_API
748 if(length<sizeof(IMAGE_FILE_HEADER)) {
749 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
750 exit(U_UNSUPPORTED_ERROR);
751 }
752 /* TODO: Use buffer.header. Keep aliasing legal. */
753 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
754 *pCPU=pHeader->Machine;
755 /*
756 * The number of bits is implicit with the Machine value.
757 * *pBits is ignored in the calling code, so this need not be precise.
758 */
759 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
760 /* Windows always runs on little-endian CPUs. */
761 *pIsBigEndian=FALSE;
762 #else
763 # error "Unknown platform for CAN_GENERATE_OBJECTS."
764 #endif
765
766 T_FileStream_close(in);
767 }
768
769 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)770 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
771 /* common variables */
772 char buffer[4096], entry[96]={ 0 };
773 FileStream *in, *out;
774 const char *newSuffix;
775 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
776
777 uint16_t cpu, bits;
778 UBool makeBigEndian;
779
780 /* platform-specific variables and initialization code */
781 #ifdef U_ELF
782 /* 32-bit Elf file header */
783 static Elf32_Ehdr header32={
784 {
785 /* e_ident[] */
786 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
787 ELFCLASS32,
788 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
789 EV_CURRENT /* EI_VERSION */
790 },
791 ET_REL,
792 EM_386,
793 EV_CURRENT, /* e_version */
794 0, /* e_entry */
795 0, /* e_phoff */
796 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
797 0, /* e_flags */
798 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
799 0, /* e_phentsize */
800 0, /* e_phnum */
801 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
802 5, /* e_shnum */
803 2 /* e_shstrndx */
804 };
805
806 /* 32-bit Elf section header table */
807 static Elf32_Shdr sectionHeaders32[5]={
808 { /* SHN_UNDEF */
809 0
810 },
811 { /* .symtab */
812 1, /* sh_name */
813 SHT_SYMTAB,
814 0, /* sh_flags */
815 0, /* sh_addr */
816 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
817 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
818 3, /* sh_link=sect hdr index of .strtab */
819 1, /* sh_info=One greater than the symbol table index of the last
820 * local symbol (with STB_LOCAL). */
821 4, /* sh_addralign */
822 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
823 },
824 { /* .shstrtab */
825 9, /* sh_name */
826 SHT_STRTAB,
827 0, /* sh_flags */
828 0, /* sh_addr */
829 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
830 40, /* sh_size */
831 0, /* sh_link */
832 0, /* sh_info */
833 1, /* sh_addralign */
834 0 /* sh_entsize */
835 },
836 { /* .strtab */
837 19, /* sh_name */
838 SHT_STRTAB,
839 0, /* sh_flags */
840 0, /* sh_addr */
841 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
842 (Elf32_Word)sizeof(entry), /* sh_size */
843 0, /* sh_link */
844 0, /* sh_info */
845 1, /* sh_addralign */
846 0 /* sh_entsize */
847 },
848 { /* .rodata */
849 27, /* sh_name */
850 SHT_PROGBITS,
851 SHF_ALLOC, /* sh_flags */
852 0, /* sh_addr */
853 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
854 0, /* sh_size */
855 0, /* sh_link */
856 0, /* sh_info */
857 16, /* sh_addralign */
858 0 /* sh_entsize */
859 }
860 };
861
862 /* symbol table */
863 static Elf32_Sym symbols32[2]={
864 { /* STN_UNDEF */
865 0
866 },
867 { /* data entry point */
868 1, /* st_name */
869 0, /* st_value */
870 0, /* st_size */
871 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
872 0, /* st_other */
873 4 /* st_shndx=index of related section table entry */
874 }
875 };
876
877 /* section header string table, with decimal string offsets */
878 static const char sectionStrings[40]=
879 /* 0 */ "\0"
880 /* 1 */ ".symtab\0"
881 /* 9 */ ".shstrtab\0"
882 /* 19 */ ".strtab\0"
883 /* 27 */ ".rodata\0"
884 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
885 /* 40: padded to multiple of 8 bytes */
886
887 /*
888 * Use entry[] for the string table which will contain only the
889 * entry point name.
890 * entry[0] must be 0 (NUL)
891 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
892 */
893
894 /* 16-align .rodata in the .o file, just in case */
895 static const char padding[16]={ 0 };
896 int32_t paddingSize;
897
898 #ifdef U_ELF64
899 /* 64-bit Elf file header */
900 static Elf64_Ehdr header64={
901 {
902 /* e_ident[] */
903 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
904 ELFCLASS64,
905 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
906 EV_CURRENT /* EI_VERSION */
907 },
908 ET_REL,
909 EM_X86_64,
910 EV_CURRENT, /* e_version */
911 0, /* e_entry */
912 0, /* e_phoff */
913 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
914 0, /* e_flags */
915 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
916 0, /* e_phentsize */
917 0, /* e_phnum */
918 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
919 5, /* e_shnum */
920 2 /* e_shstrndx */
921 };
922
923 /* 64-bit Elf section header table */
924 static Elf64_Shdr sectionHeaders64[5]={
925 { /* SHN_UNDEF */
926 0
927 },
928 { /* .symtab */
929 1, /* sh_name */
930 SHT_SYMTAB,
931 0, /* sh_flags */
932 0, /* sh_addr */
933 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
934 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
935 3, /* sh_link=sect hdr index of .strtab */
936 1, /* sh_info=One greater than the symbol table index of the last
937 * local symbol (with STB_LOCAL). */
938 4, /* sh_addralign */
939 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
940 },
941 { /* .shstrtab */
942 9, /* sh_name */
943 SHT_STRTAB,
944 0, /* sh_flags */
945 0, /* sh_addr */
946 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
947 40, /* sh_size */
948 0, /* sh_link */
949 0, /* sh_info */
950 1, /* sh_addralign */
951 0 /* sh_entsize */
952 },
953 { /* .strtab */
954 19, /* sh_name */
955 SHT_STRTAB,
956 0, /* sh_flags */
957 0, /* sh_addr */
958 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
959 (Elf64_Xword)sizeof(entry), /* sh_size */
960 0, /* sh_link */
961 0, /* sh_info */
962 1, /* sh_addralign */
963 0 /* sh_entsize */
964 },
965 { /* .rodata */
966 27, /* sh_name */
967 SHT_PROGBITS,
968 SHF_ALLOC, /* sh_flags */
969 0, /* sh_addr */
970 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
971 0, /* sh_size */
972 0, /* sh_link */
973 0, /* sh_info */
974 16, /* sh_addralign */
975 0 /* sh_entsize */
976 }
977 };
978
979 /*
980 * 64-bit symbol table
981 * careful: different order of items compared with Elf32_sym!
982 */
983 static Elf64_Sym symbols64[2]={
984 { /* STN_UNDEF */
985 0
986 },
987 { /* data entry point */
988 1, /* st_name */
989 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
990 0, /* st_other */
991 4, /* st_shndx=index of related section table entry */
992 0, /* st_value */
993 0 /* st_size */
994 }
995 };
996
997 #endif /* U_ELF64 */
998
999 /* entry[] have a leading NUL */
1000 entryOffset=1;
1001
1002 /* in the common code, count entryLength from after the NUL */
1003 entryLengthOffset=1;
1004
1005 newSuffix=".o";
1006
1007 #elif U_PLATFORM_HAS_WIN32_API
1008 struct {
1009 IMAGE_FILE_HEADER fileHeader;
1010 IMAGE_SECTION_HEADER sections[2];
1011 char linkerOptions[100];
1012 } objHeader;
1013 IMAGE_SYMBOL symbols[1];
1014 struct {
1015 DWORD sizeofLongNames;
1016 char longNames[100];
1017 } symbolNames;
1018
1019 /*
1020 * entry sometimes have a leading '_'
1021 * overwritten if entryOffset==0 depending on the target platform
1022 * see check for cpu below
1023 */
1024 entry[0]='_';
1025
1026 newSuffix=".obj";
1027 #else
1028 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1029 #endif
1030
1031 /* deal with options, files and the entry point name */
1032 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1033 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1034 #if U_PLATFORM_HAS_WIN32_API
1035 if(cpu==IMAGE_FILE_MACHINE_I386) {
1036 entryOffset=1;
1037 }
1038 #endif
1039
1040 in=T_FileStream_open(filename, "rb");
1041 if(in==NULL) {
1042 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1043 exit(U_FILE_ACCESS_ERROR);
1044 }
1045 size=T_FileStream_size(in);
1046
1047 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1048 if (outFilePath != NULL) {
1049 uprv_strcpy(outFilePath, buffer);
1050 }
1051
1052 if(optEntryPoint != NULL) {
1053 uprv_strcpy(entry+entryOffset, optEntryPoint);
1054 uprv_strcat(entry+entryOffset, "_dat");
1055 }
1056 /* turn dashes in the entry name into underscores */
1057 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1058 for(i=0; i<entryLength; ++i) {
1059 if(entry[entryLengthOffset+i]=='-') {
1060 entry[entryLengthOffset+i]='_';
1061 }
1062 }
1063
1064 /* open the output file */
1065 out=T_FileStream_open(buffer, "wb");
1066 if(out==NULL) {
1067 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1068 exit(U_FILE_ACCESS_ERROR);
1069 }
1070
1071 #ifdef U_ELF
1072 if(bits==32) {
1073 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1074 header32.e_machine=cpu;
1075
1076 /* 16-align .rodata in the .o file, just in case */
1077 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1078 if(paddingSize!=0) {
1079 paddingSize=0x10-paddingSize;
1080 sectionHeaders32[4].sh_offset+=paddingSize;
1081 }
1082
1083 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1084
1085 symbols32[1].st_size=(Elf32_Word)size;
1086
1087 /* write .o headers */
1088 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1089 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1090 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1091 } else /* bits==64 */ {
1092 #ifdef U_ELF64
1093 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1094 header64.e_machine=cpu;
1095
1096 /* 16-align .rodata in the .o file, just in case */
1097 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1098 if(paddingSize!=0) {
1099 paddingSize=0x10-paddingSize;
1100 sectionHeaders64[4].sh_offset+=paddingSize;
1101 }
1102
1103 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1104
1105 symbols64[1].st_size=(Elf64_Xword)size;
1106
1107 /* write .o headers */
1108 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1109 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1110 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1111 #endif
1112 }
1113
1114 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1115 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1116 if(paddingSize!=0) {
1117 T_FileStream_write(out, padding, paddingSize);
1118 }
1119 #elif U_PLATFORM_HAS_WIN32_API
1120 /* populate the .obj headers */
1121 uprv_memset(&objHeader, 0, sizeof(objHeader));
1122 uprv_memset(&symbols, 0, sizeof(symbols));
1123 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1124
1125 /* write the linker export directive */
1126 uprv_strcpy(objHeader.linkerOptions, "-export:");
1127 length=8;
1128 uprv_strcpy(objHeader.linkerOptions+length, entry);
1129 length+=entryLength;
1130 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1131 length+=6;
1132
1133 /* set the file header */
1134 objHeader.fileHeader.Machine=cpu;
1135 objHeader.fileHeader.NumberOfSections=2;
1136 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1137 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1138 objHeader.fileHeader.NumberOfSymbols=1;
1139
1140 /* set the section for the linker options */
1141 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1142 objHeader.sections[0].SizeOfRawData=length;
1143 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1144 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1145
1146 /* set the data section */
1147 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1148 objHeader.sections[1].SizeOfRawData=size;
1149 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1150 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1151
1152 /* set the symbol table */
1153 if(entryLength<=8) {
1154 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1155 symbolNames.sizeofLongNames=4;
1156 } else {
1157 symbols[0].N.Name.Short=0;
1158 symbols[0].N.Name.Long=4;
1159 symbolNames.sizeofLongNames=4+entryLength+1;
1160 uprv_strcpy(symbolNames.longNames, entry);
1161 }
1162 symbols[0].SectionNumber=2;
1163 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1164
1165 /* write the file header and the linker options section */
1166 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1167 #else
1168 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1169 #endif
1170
1171 /* copy the data file into section 2 */
1172 for(;;) {
1173 length=T_FileStream_read(in, buffer, sizeof(buffer));
1174 if(length==0) {
1175 break;
1176 }
1177 T_FileStream_write(out, buffer, (int32_t)length);
1178 }
1179
1180 #if U_PLATFORM_HAS_WIN32_API
1181 /* write the symbol table */
1182 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1183 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1184 #endif
1185
1186 if(T_FileStream_error(in)) {
1187 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1188 exit(U_FILE_ACCESS_ERROR);
1189 }
1190
1191 if(T_FileStream_error(out)) {
1192 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1193 exit(U_FILE_ACCESS_ERROR);
1194 }
1195
1196 T_FileStream_close(out);
1197 T_FileStream_close(in);
1198 }
1199 #endif
1200