1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4 * Copyright (C) 2009-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *******************************************************************************
7 */
8 #include "unicode/utypes.h"
9
10 #if U_PLATFORM_HAS_WIN32_API
11 # define VC_EXTRALEAN
12 # define WIN32_LEAN_AND_MEAN
13 # define NOUSER
14 # define NOSERVICE
15 # define NOIME
16 # define NOMCX
17 #include <windows.h>
18 #include <time.h>
19 # ifdef __GNUC__
20 # define WINDOWS_WITH_GNUC
21 # endif
22 #endif
23
24 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
25 # define U_ELF
26 #endif
27
28 #ifdef U_ELF
29 # include <elf.h>
30 # if defined(ELFCLASS64)
31 # define U_ELF64
32 # endif
33 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
34 # ifndef EM_X86_64
35 # define EM_X86_64 62
36 # endif
37 # define ICU_ENTRY_OFFSET 0
38 #endif
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include "unicode/putil.h"
43 #include "cmemory.h"
44 #include "cstring.h"
45 #include "filestrm.h"
46 #include "toolutil.h"
47 #include "unicode/uclean.h"
48 #include "uoptions.h"
49 #include "pkg_genc.h"
50 #include "filetools.h"
51
52 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
53
54 #define HEX_0X 0 /* 0x1234 */
55 #define HEX_0H 1 /* 01234h */
56
57 /* prototypes --------------------------------------------------------------- */
58 static void
59 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
60
61 static uint32_t
62 write8(FileStream *out, uint8_t byte, uint32_t column);
63
64 static uint32_t
65 write32(FileStream *out, uint32_t byte, uint32_t column);
66
67 #if U_PLATFORM == U_PF_OS400
68 static uint32_t
69 write8str(FileStream *out, uint8_t byte, uint32_t column);
70 #endif
71 /* -------------------------------------------------------------------------- */
72
73 /*
74 Creating Template Files for New Platforms
75
76 Let the cc compiler help you get started.
77 Compile this program
78 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
79 with the -S option to produce assembly output.
80
81 For example, this will generate array.s:
82 gcc -S array.c
83
84 This will produce a .s file that may look like this:
85
86 .file "array.c"
87 .version "01.01"
88 gcc2_compiled.:
89 .globl x
90 .section .rodata
91 .align 4
92 .type x,@object
93 .size x,20
94 x:
95 .long 1
96 .long 2
97 .long -559038737
98 .long -1
99 .long 16
100 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
101
102 which gives a starting point that will compile, and can be transformed
103 to become the template, generally with some consulting of as docs and
104 some experimentation.
105
106 If you want ICU to automatically use this assembly, you should
107 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
108 where the name is the compiler or platform that you used in this
109 assemblyHeader data structure.
110 */
111 static const struct AssemblyType {
112 const char *name;
113 const char *header;
114 const char *beginLine;
115 const char *footer;
116 int8_t hexType; /* HEX_0X or HEX_0h */
117 } assemblyHeader[] = {
118 /* For gcc assemblers, the meaning of .align changes depending on the */
119 /* hardware, so we use .balign 16 which always means 16 bytes. */
120 /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
121 {"gcc",
122 ".globl %s\n"
123 "\t.section .note.GNU-stack,\"\",%%progbits\n"
124 "\t.section .rodata\n"
125 "\t.balign 16\n"
126 "#ifdef U_HIDE_DATA_SYMBOL\n"
127 "\t.hidden %s\n"
128 "#endif\n"
129 "\t.type %s,%%object\n"
130 "%s:\n\n",
131
132 ".long ",".size %s, .-%s\n",HEX_0X
133 },
134 {"gcc-darwin",
135 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
136 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
137 ".globl _%s\n"
138 "#ifdef U_HIDE_DATA_SYMBOL\n"
139 "\t.private_extern _%s\n"
140 "#endif\n"
141 "\t.data\n"
142 "\t.const\n"
143 "\t.balign 16\n"
144 "_%s:\n\n",
145
146 ".long ","",HEX_0X
147 },
148 {"gcc-cygwin",
149 ".globl _%s\n"
150 "\t.section .rodata\n"
151 "\t.balign 16\n"
152 "_%s:\n\n",
153
154 ".long ","",HEX_0X
155 },
156 {"gcc-mingw64",
157 ".globl %s\n"
158 "\t.section .rodata\n"
159 "\t.balign 16\n"
160 "%s:\n\n",
161
162 ".long ","",HEX_0X
163 },
164 /* 16 bytes alignment. */
165 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
166 {"sun",
167 "\t.section \".rodata\"\n"
168 "\t.align 16\n"
169 ".globl %s\n"
170 "%s:\n",
171
172 ".word ","",HEX_0X
173 },
174 /* 16 bytes alignment for sun-x86. */
175 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
176 {"sun-x86",
177 "Drodata.rodata:\n"
178 "\t.type Drodata.rodata,@object\n"
179 "\t.size Drodata.rodata,0\n"
180 "\t.globl %s\n"
181 "\t.align 16\n"
182 "%s:\n",
183
184 ".4byte ","",HEX_0X
185 },
186 /* 1<<4 bit alignment for aix. */
187 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
188 {"xlc",
189 ".globl %s{RO}\n"
190 "\t.toc\n"
191 "%s:\n"
192 "\t.csect %s{RO}, 4\n",
193
194 ".long ","",HEX_0X
195 },
196 {"aCC-ia64",
197 "\t.file \"%s.s\"\n"
198 "\t.type %s,@object\n"
199 "\t.global %s\n"
200 "\t.secalias .abe$0.rodata, \".rodata\"\n"
201 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
202 "\t.align 16\n"
203 "%s::\t",
204
205 "data4 ","",HEX_0X
206 },
207 {"aCC-parisc",
208 "\t.SPACE $TEXT$\n"
209 "\t.SUBSPA $LIT$\n"
210 "%s\n"
211 "\t.EXPORT %s\n"
212 "\t.ALIGN 16\n",
213
214 ".WORD ","",HEX_0X
215 },
216 /* align 16 bytes */
217 /* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
218 { "masm",
219 "\tTITLE %s\n"
220 "; generated by genccode\n"
221 ".386\n"
222 ".model flat\n"
223 "\tPUBLIC _%s\n"
224 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
225 "\tALIGN 16\n"
226 "_%s\tLABEL DWORD\n",
227 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
228 }
229 };
230
231 static int32_t assemblyHeaderIndex = -1;
232 static int32_t hexType = HEX_0X;
233
234 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)235 checkAssemblyHeaderName(const char* optAssembly) {
236 int32_t idx;
237 assemblyHeaderIndex = -1;
238 for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
239 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
240 assemblyHeaderIndex = idx;
241 hexType = assemblyHeader[idx].hexType; /* set the hex type */
242 return TRUE;
243 }
244 }
245
246 return FALSE;
247 }
248
249
250 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)251 printAssemblyHeadersToStdErr(void) {
252 int32_t idx;
253 fprintf(stderr, "%s", assemblyHeader[0].name);
254 for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
255 fprintf(stderr, ", %s", assemblyHeader[idx].name);
256 }
257 fprintf(stderr,
258 ")\n");
259 }
260
261 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)262 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
263 uint32_t column = MAX_COLUMN;
264 char entry[64];
265 uint32_t buffer[1024];
266 char *bufferStr = (char *)buffer;
267 FileStream *in, *out;
268 size_t i, length;
269
270 in=T_FileStream_open(filename, "rb");
271 if(in==NULL) {
272 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
273 exit(U_FILE_ACCESS_ERROR);
274 }
275
276 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
277 out=T_FileStream_open(bufferStr, "w");
278 if(out==NULL) {
279 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
280 exit(U_FILE_ACCESS_ERROR);
281 }
282
283 if (outFilePath != NULL) {
284 uprv_strcpy(outFilePath, bufferStr);
285 }
286
287 #if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN
288 /* Need to fix the file separator character when using MinGW. */
289 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
290 #endif
291
292 if(optEntryPoint != NULL) {
293 uprv_strcpy(entry, optEntryPoint);
294 uprv_strcat(entry, "_dat");
295 }
296
297 /* turn dashes or dots in the entry name into underscores */
298 length=uprv_strlen(entry);
299 for(i=0; i<length; ++i) {
300 if(entry[i]=='-' || entry[i]=='.') {
301 entry[i]='_';
302 }
303 }
304
305 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
306 entry, entry, entry, entry,
307 entry, entry, entry, entry);
308 T_FileStream_writeLine(out, bufferStr);
309 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
310
311 for(;;) {
312 length=T_FileStream_read(in, buffer, sizeof(buffer));
313 if(length==0) {
314 break;
315 }
316 if (length != sizeof(buffer)) {
317 /* pad with extra 0's when at the end of the file */
318 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
319 buffer[length+i] = 0;
320 }
321 }
322 for(i=0; i<(length/sizeof(buffer[0])); i++) {
323 column = write32(out, buffer[i], column);
324 }
325 }
326
327 T_FileStream_writeLine(out, "\n");
328
329 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
330 entry, entry, entry, entry,
331 entry, entry, entry, entry);
332 T_FileStream_writeLine(out, bufferStr);
333
334 if(T_FileStream_error(in)) {
335 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
336 exit(U_FILE_ACCESS_ERROR);
337 }
338
339 if(T_FileStream_error(out)) {
340 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
341 exit(U_FILE_ACCESS_ERROR);
342 }
343
344 T_FileStream_close(out);
345 T_FileStream_close(in);
346 }
347
348 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)349 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
350 uint32_t column = MAX_COLUMN;
351 char buffer[4096], entry[64];
352 FileStream *in, *out;
353 size_t i, length;
354
355 in=T_FileStream_open(filename, "rb");
356 if(in==NULL) {
357 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
358 exit(U_FILE_ACCESS_ERROR);
359 }
360
361 if(optName != NULL) { /* prepend 'icudt28_' */
362 strcpy(entry, optName);
363 strcat(entry, "_");
364 } else {
365 entry[0] = 0;
366 }
367
368 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
369 if (outFilePath != NULL) {
370 uprv_strcpy(outFilePath, buffer);
371 }
372 out=T_FileStream_open(buffer, "w");
373 if(out==NULL) {
374 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
375 exit(U_FILE_ACCESS_ERROR);
376 }
377
378 /* turn dashes or dots in the entry name into underscores */
379 length=uprv_strlen(entry);
380 for(i=0; i<length; ++i) {
381 if(entry[i]=='-' || entry[i]=='.') {
382 entry[i]='_';
383 }
384 }
385
386 #if U_PLATFORM == U_PF_OS400
387 /*
388 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
389
390 This is here because this platform can't currently put
391 const data into the read-only pages of an object or
392 shared library (service program). Only strings are allowed in read-only
393 pages, so we use char * strings to store the data.
394
395 In order to prevent the beginning of the data from ever matching the
396 magic numbers we must still use the initial double.
397 [grhoten 4/24/2003]
398 */
399 sprintf(buffer,
400 "#ifndef IN_GENERATED_CCODE\n"
401 "#define IN_GENERATED_CCODE\n"
402 "#define U_DISABLE_RENAMING 1\n"
403 "#include \"unicode/umachine.h\"\n"
404 "#endif\n"
405 "U_CDECL_BEGIN\n"
406 "const struct {\n"
407 " double bogus;\n"
408 " const char *bytes; \n"
409 "} %s={ 0.0, \n",
410 entry);
411 T_FileStream_writeLine(out, buffer);
412
413 for(;;) {
414 length=T_FileStream_read(in, buffer, sizeof(buffer));
415 if(length==0) {
416 break;
417 }
418 for(i=0; i<length; ++i) {
419 column = write8str(out, (uint8_t)buffer[i], column);
420 }
421 }
422
423 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
424 #else
425 /* Function renaming shouldn't be done in data */
426 sprintf(buffer,
427 "#ifndef IN_GENERATED_CCODE\n"
428 "#define IN_GENERATED_CCODE\n"
429 "#define U_DISABLE_RENAMING 1\n"
430 "#include \"unicode/umachine.h\"\n"
431 "#endif\n"
432 "U_CDECL_BEGIN\n"
433 "const struct {\n"
434 " double bogus;\n"
435 " uint8_t bytes[%ld]; \n"
436 "} %s={ 0.0, {\n",
437 (long)T_FileStream_size(in), entry);
438 T_FileStream_writeLine(out, buffer);
439
440 for(;;) {
441 length=T_FileStream_read(in, buffer, sizeof(buffer));
442 if(length==0) {
443 break;
444 }
445 for(i=0; i<length; ++i) {
446 column = write8(out, (uint8_t)buffer[i], column);
447 }
448 }
449
450 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
451 #endif
452
453 if(T_FileStream_error(in)) {
454 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
455 exit(U_FILE_ACCESS_ERROR);
456 }
457
458 if(T_FileStream_error(out)) {
459 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
460 exit(U_FILE_ACCESS_ERROR);
461 }
462
463 T_FileStream_close(out);
464 T_FileStream_close(in);
465 }
466
467 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)468 write32(FileStream *out, uint32_t bitField, uint32_t column) {
469 int32_t i;
470 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
471 char *s = bitFieldStr;
472 uint8_t *ptrIdx = (uint8_t *)&bitField;
473 static const char hexToStr[16] = {
474 '0','1','2','3',
475 '4','5','6','7',
476 '8','9','A','B',
477 'C','D','E','F'
478 };
479
480 /* write the value, possibly with comma and newline */
481 if(column==MAX_COLUMN) {
482 /* first byte */
483 column=1;
484 } else if(column<32) {
485 *(s++)=',';
486 ++column;
487 } else {
488 *(s++)='\n';
489 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
490 s+=uprv_strlen(s);
491 column=1;
492 }
493
494 if (bitField < 10) {
495 /* It's a small number. Don't waste the space for 0x */
496 *(s++)=hexToStr[bitField];
497 }
498 else {
499 int seenNonZero = 0; /* This is used to remove leading zeros */
500
501 if(hexType==HEX_0X) {
502 *(s++)='0';
503 *(s++)='x';
504 } else if(hexType==HEX_0H) {
505 *(s++)='0';
506 }
507
508 /* This creates a 32-bit field */
509 #if U_IS_BIG_ENDIAN
510 for (i = 0; i < sizeof(uint32_t); i++)
511 #else
512 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
513 #endif
514 {
515 uint8_t value = ptrIdx[i];
516 if (value || seenNonZero) {
517 *(s++)=hexToStr[value>>4];
518 *(s++)=hexToStr[value&0xF];
519 seenNonZero = 1;
520 }
521 }
522 if(hexType==HEX_0H) {
523 *(s++)='h';
524 }
525 }
526
527 *(s++)=0;
528 T_FileStream_writeLine(out, bitFieldStr);
529 return column;
530 }
531
532 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)533 write8(FileStream *out, uint8_t byte, uint32_t column) {
534 char s[4];
535 int i=0;
536
537 /* convert the byte value to a string */
538 if(byte>=100) {
539 s[i++]=(char)('0'+byte/100);
540 byte%=100;
541 }
542 if(i>0 || byte>=10) {
543 s[i++]=(char)('0'+byte/10);
544 byte%=10;
545 }
546 s[i++]=(char)('0'+byte);
547 s[i]=0;
548
549 /* write the value, possibly with comma and newline */
550 if(column==MAX_COLUMN) {
551 /* first byte */
552 column=1;
553 } else if(column<16) {
554 T_FileStream_writeLine(out, ",");
555 ++column;
556 } else {
557 T_FileStream_writeLine(out, ",\n");
558 column=1;
559 }
560 T_FileStream_writeLine(out, s);
561 return column;
562 }
563
564 #if U_PLATFORM == U_PF_OS400
565 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)566 write8str(FileStream *out, uint8_t byte, uint32_t column) {
567 char s[8];
568
569 if (byte > 7)
570 sprintf(s, "\\x%X", byte);
571 else
572 sprintf(s, "\\%X", byte);
573
574 /* write the value, possibly with comma and newline */
575 if(column==MAX_COLUMN) {
576 /* first byte */
577 column=1;
578 T_FileStream_writeLine(out, "\"");
579 } else if(column<24) {
580 ++column;
581 } else {
582 T_FileStream_writeLine(out, "\"\n\"");
583 column=1;
584 }
585 T_FileStream_writeLine(out, s);
586 return column;
587 }
588 #endif
589
590 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)591 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
592 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
593
594 /* copy path */
595 if(destdir!=NULL && *destdir!=0) {
596 do {
597 *outFilename++=*destdir++;
598 } while(*destdir!=0);
599 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
600 *outFilename++=U_FILE_SEP_CHAR;
601 }
602 inFilename=basename;
603 } else {
604 while(inFilename<basename) {
605 *outFilename++=*inFilename++;
606 }
607 }
608
609 if(suffix==NULL) {
610 /* the filename does not have a suffix */
611 uprv_strcpy(entryName, inFilename);
612 if(optFilename != NULL) {
613 uprv_strcpy(outFilename, optFilename);
614 } else {
615 uprv_strcpy(outFilename, inFilename);
616 }
617 uprv_strcat(outFilename, newSuffix);
618 } else {
619 char *saveOutFilename = outFilename;
620 /* copy basename */
621 while(inFilename<suffix) {
622 if(*inFilename=='-') {
623 /* iSeries cannot have '-' in the .o objects. */
624 *outFilename++=*entryName++='_';
625 inFilename++;
626 }
627 else {
628 *outFilename++=*entryName++=*inFilename++;
629 }
630 }
631
632 /* replace '.' by '_' */
633 *outFilename++=*entryName++='_';
634 ++inFilename;
635
636 /* copy suffix */
637 while(*inFilename!=0) {
638 *outFilename++=*entryName++=*inFilename++;
639 }
640
641 *entryName=0;
642
643 if(optFilename != NULL) {
644 uprv_strcpy(saveOutFilename, optFilename);
645 uprv_strcat(saveOutFilename, newSuffix);
646 } else {
647 /* add ".c" */
648 uprv_strcpy(outFilename, newSuffix);
649 }
650 }
651 }
652
653 #ifdef CAN_GENERATE_OBJECTS
654 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)655 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
656 union {
657 char bytes[2048];
658 #ifdef U_ELF
659 Elf32_Ehdr header32;
660 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
661 #elif U_PLATFORM_HAS_WIN32_API
662 IMAGE_FILE_HEADER header;
663 #endif
664 } buffer;
665
666 const char *filename;
667 FileStream *in;
668 int32_t length;
669
670 #ifdef U_ELF
671
672 #elif U_PLATFORM_HAS_WIN32_API
673 const IMAGE_FILE_HEADER *pHeader;
674 #else
675 # error "Unknown platform for CAN_GENERATE_OBJECTS."
676 #endif
677
678 if(optMatchArch != NULL) {
679 filename=optMatchArch;
680 } else {
681 /* set defaults */
682 #ifdef U_ELF
683 /* set EM_386 because elf.h does not provide better defaults */
684 *pCPU=EM_386;
685 *pBits=32;
686 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
687 #elif U_PLATFORM_HAS_WIN32_API
688 /* _M_IA64 should be defined in windows.h */
689 # if defined(_M_IA64)
690 *pCPU=IMAGE_FILE_MACHINE_IA64;
691 *pBits = 64;
692 # elif defined(_M_AMD64)
693 // link.exe does not really care about the .obj machine type and this will
694 // allow us to build a dll for both ARM & x64 with an amd64 built tool
695 // ARM is same as x64 except for first 2 bytes of object file
696 *pCPU = IMAGE_FILE_MACHINE_UNKNOWN;
697 // *pCPU = IMAGE_FILE_MACHINE_ARMNT; // If we wanted to be explicit
698 // *pCPU = IMAGE_FILE_MACHINE_AMD64; // We would use one of these names
699 *pBits = 64; // Doesn't seem to be used for anything interesting?
700 # else
701 *pCPU=IMAGE_FILE_MACHINE_I386; // We would use one of these names
702 *pBits = 32;
703 # endif
704 *pIsBigEndian=FALSE;
705 #else
706 # error "Unknown platform for CAN_GENERATE_OBJECTS."
707 #endif
708 return;
709 }
710
711 in=T_FileStream_open(filename, "rb");
712 if(in==NULL) {
713 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
714 exit(U_FILE_ACCESS_ERROR);
715 }
716 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
717
718 #ifdef U_ELF
719 if(length<(int32_t)sizeof(Elf32_Ehdr)) {
720 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
721 exit(U_UNSUPPORTED_ERROR);
722 }
723 if(
724 buffer.header32.e_ident[0]!=ELFMAG0 ||
725 buffer.header32.e_ident[1]!=ELFMAG1 ||
726 buffer.header32.e_ident[2]!=ELFMAG2 ||
727 buffer.header32.e_ident[3]!=ELFMAG3 ||
728 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
729 ) {
730 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
731 exit(U_UNSUPPORTED_ERROR);
732 }
733
734 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
735 #ifdef U_ELF64
736 if(*pBits!=32 && *pBits!=64) {
737 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
738 exit(U_UNSUPPORTED_ERROR);
739 }
740 #else
741 if(*pBits!=32) {
742 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
743 exit(U_UNSUPPORTED_ERROR);
744 }
745 #endif
746
747 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
748 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
749 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
750 exit(U_UNSUPPORTED_ERROR);
751 }
752 /* TODO: Support byte swapping */
753
754 *pCPU=buffer.header32.e_machine;
755 #elif U_PLATFORM_HAS_WIN32_API
756 if(length<sizeof(IMAGE_FILE_HEADER)) {
757 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
758 exit(U_UNSUPPORTED_ERROR);
759 }
760 /* TODO: Use buffer.header. Keep aliasing legal. */
761 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
762 *pCPU=pHeader->Machine;
763 /*
764 * The number of bits is implicit with the Machine value.
765 * *pBits is ignored in the calling code, so this need not be precise.
766 */
767 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
768 /* Windows always runs on little-endian CPUs. */
769 *pIsBigEndian=FALSE;
770 #else
771 # error "Unknown platform for CAN_GENERATE_OBJECTS."
772 #endif
773
774 T_FileStream_close(in);
775 }
776
777 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)778 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
779 /* common variables */
780 char buffer[4096], entry[96]={ 0 };
781 FileStream *in, *out;
782 const char *newSuffix;
783 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
784
785 uint16_t cpu, bits;
786 UBool makeBigEndian;
787
788 /* platform-specific variables and initialization code */
789 #ifdef U_ELF
790 /* 32-bit Elf file header */
791 static Elf32_Ehdr header32={
792 {
793 /* e_ident[] */
794 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
795 ELFCLASS32,
796 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
797 EV_CURRENT /* EI_VERSION */
798 },
799 ET_REL,
800 EM_386,
801 EV_CURRENT, /* e_version */
802 0, /* e_entry */
803 0, /* e_phoff */
804 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
805 0, /* e_flags */
806 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
807 0, /* e_phentsize */
808 0, /* e_phnum */
809 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
810 5, /* e_shnum */
811 2 /* e_shstrndx */
812 };
813
814 /* 32-bit Elf section header table */
815 static Elf32_Shdr sectionHeaders32[5]={
816 { /* SHN_UNDEF */
817 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
818 },
819 { /* .symtab */
820 1, /* sh_name */
821 SHT_SYMTAB,
822 0, /* sh_flags */
823 0, /* sh_addr */
824 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
825 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
826 3, /* sh_link=sect hdr index of .strtab */
827 1, /* sh_info=One greater than the symbol table index of the last
828 * local symbol (with STB_LOCAL). */
829 4, /* sh_addralign */
830 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
831 },
832 { /* .shstrtab */
833 9, /* sh_name */
834 SHT_STRTAB,
835 0, /* sh_flags */
836 0, /* sh_addr */
837 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
838 40, /* sh_size */
839 0, /* sh_link */
840 0, /* sh_info */
841 1, /* sh_addralign */
842 0 /* sh_entsize */
843 },
844 { /* .strtab */
845 19, /* sh_name */
846 SHT_STRTAB,
847 0, /* sh_flags */
848 0, /* sh_addr */
849 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
850 (Elf32_Word)sizeof(entry), /* sh_size */
851 0, /* sh_link */
852 0, /* sh_info */
853 1, /* sh_addralign */
854 0 /* sh_entsize */
855 },
856 { /* .rodata */
857 27, /* sh_name */
858 SHT_PROGBITS,
859 SHF_ALLOC, /* sh_flags */
860 0, /* sh_addr */
861 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
862 0, /* sh_size */
863 0, /* sh_link */
864 0, /* sh_info */
865 16, /* sh_addralign */
866 0 /* sh_entsize */
867 }
868 };
869
870 /* symbol table */
871 static Elf32_Sym symbols32[2]={
872 { /* STN_UNDEF */
873 0, 0, 0, 0, 0, 0
874 },
875 { /* data entry point */
876 1, /* st_name */
877 0, /* st_value */
878 0, /* st_size */
879 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
880 0, /* st_other */
881 4 /* st_shndx=index of related section table entry */
882 }
883 };
884
885 /* section header string table, with decimal string offsets */
886 static const char sectionStrings[40]=
887 /* 0 */ "\0"
888 /* 1 */ ".symtab\0"
889 /* 9 */ ".shstrtab\0"
890 /* 19 */ ".strtab\0"
891 /* 27 */ ".rodata\0"
892 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
893 /* 40: padded to multiple of 8 bytes */
894
895 /*
896 * Use entry[] for the string table which will contain only the
897 * entry point name.
898 * entry[0] must be 0 (NUL)
899 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
900 */
901
902 /* 16-align .rodata in the .o file, just in case */
903 static const char padding[16]={ 0 };
904 int32_t paddingSize;
905
906 #ifdef U_ELF64
907 /* 64-bit Elf file header */
908 static Elf64_Ehdr header64={
909 {
910 /* e_ident[] */
911 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
912 ELFCLASS64,
913 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
914 EV_CURRENT /* EI_VERSION */
915 },
916 ET_REL,
917 EM_X86_64,
918 EV_CURRENT, /* e_version */
919 0, /* e_entry */
920 0, /* e_phoff */
921 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
922 0, /* e_flags */
923 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
924 0, /* e_phentsize */
925 0, /* e_phnum */
926 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
927 5, /* e_shnum */
928 2 /* e_shstrndx */
929 };
930
931 /* 64-bit Elf section header table */
932 static Elf64_Shdr sectionHeaders64[5]={
933 { /* SHN_UNDEF */
934 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
935 },
936 { /* .symtab */
937 1, /* sh_name */
938 SHT_SYMTAB,
939 0, /* sh_flags */
940 0, /* sh_addr */
941 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
942 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
943 3, /* sh_link=sect hdr index of .strtab */
944 1, /* sh_info=One greater than the symbol table index of the last
945 * local symbol (with STB_LOCAL). */
946 4, /* sh_addralign */
947 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
948 },
949 { /* .shstrtab */
950 9, /* sh_name */
951 SHT_STRTAB,
952 0, /* sh_flags */
953 0, /* sh_addr */
954 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
955 40, /* sh_size */
956 0, /* sh_link */
957 0, /* sh_info */
958 1, /* sh_addralign */
959 0 /* sh_entsize */
960 },
961 { /* .strtab */
962 19, /* sh_name */
963 SHT_STRTAB,
964 0, /* sh_flags */
965 0, /* sh_addr */
966 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
967 (Elf64_Xword)sizeof(entry), /* sh_size */
968 0, /* sh_link */
969 0, /* sh_info */
970 1, /* sh_addralign */
971 0 /* sh_entsize */
972 },
973 { /* .rodata */
974 27, /* sh_name */
975 SHT_PROGBITS,
976 SHF_ALLOC, /* sh_flags */
977 0, /* sh_addr */
978 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
979 0, /* sh_size */
980 0, /* sh_link */
981 0, /* sh_info */
982 16, /* sh_addralign */
983 0 /* sh_entsize */
984 }
985 };
986
987 /*
988 * 64-bit symbol table
989 * careful: different order of items compared with Elf32_sym!
990 */
991 static Elf64_Sym symbols64[2]={
992 { /* STN_UNDEF */
993 0, 0, 0, 0, 0, 0
994 },
995 { /* data entry point */
996 1, /* st_name */
997 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
998 0, /* st_other */
999 4, /* st_shndx=index of related section table entry */
1000 0, /* st_value */
1001 0 /* st_size */
1002 }
1003 };
1004
1005 #endif /* U_ELF64 */
1006
1007 /* entry[] have a leading NUL */
1008 entryOffset=1;
1009
1010 /* in the common code, count entryLength from after the NUL */
1011 entryLengthOffset=1;
1012
1013 newSuffix=".o";
1014
1015 #elif U_PLATFORM_HAS_WIN32_API
1016 struct {
1017 IMAGE_FILE_HEADER fileHeader;
1018 IMAGE_SECTION_HEADER sections[2];
1019 char linkerOptions[100];
1020 } objHeader;
1021 IMAGE_SYMBOL symbols[1];
1022 struct {
1023 DWORD sizeofLongNames;
1024 char longNames[100];
1025 } symbolNames;
1026
1027 /*
1028 * entry sometimes have a leading '_'
1029 * overwritten if entryOffset==0 depending on the target platform
1030 * see check for cpu below
1031 */
1032 entry[0]='_';
1033
1034 newSuffix=".obj";
1035 #else
1036 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1037 #endif
1038
1039 /* deal with options, files and the entry point name */
1040 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1041 if (optMatchArch)
1042 {
1043 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1044 }
1045 else
1046 {
1047 printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1048 }
1049 #if U_PLATFORM_HAS_WIN32_API
1050 if(cpu==IMAGE_FILE_MACHINE_I386) {
1051 entryOffset=1;
1052 }
1053 #endif
1054
1055 in=T_FileStream_open(filename, "rb");
1056 if(in==NULL) {
1057 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1058 exit(U_FILE_ACCESS_ERROR);
1059 }
1060 size=T_FileStream_size(in);
1061
1062 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1063 if (outFilePath != NULL) {
1064 uprv_strcpy(outFilePath, buffer);
1065 }
1066
1067 if(optEntryPoint != NULL) {
1068 uprv_strcpy(entry+entryOffset, optEntryPoint);
1069 uprv_strcat(entry+entryOffset, "_dat");
1070 }
1071 /* turn dashes in the entry name into underscores */
1072 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1073 for(i=0; i<entryLength; ++i) {
1074 if(entry[entryLengthOffset+i]=='-') {
1075 entry[entryLengthOffset+i]='_';
1076 }
1077 }
1078
1079 /* open the output file */
1080 out=T_FileStream_open(buffer, "wb");
1081 if(out==NULL) {
1082 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1083 exit(U_FILE_ACCESS_ERROR);
1084 }
1085
1086 #ifdef U_ELF
1087 if(bits==32) {
1088 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1089 header32.e_machine=cpu;
1090
1091 /* 16-align .rodata in the .o file, just in case */
1092 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1093 if(paddingSize!=0) {
1094 paddingSize=0x10-paddingSize;
1095 sectionHeaders32[4].sh_offset+=paddingSize;
1096 }
1097
1098 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1099
1100 symbols32[1].st_size=(Elf32_Word)size;
1101
1102 /* write .o headers */
1103 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1104 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1105 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1106 } else /* bits==64 */ {
1107 #ifdef U_ELF64
1108 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1109 header64.e_machine=cpu;
1110
1111 /* 16-align .rodata in the .o file, just in case */
1112 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1113 if(paddingSize!=0) {
1114 paddingSize=0x10-paddingSize;
1115 sectionHeaders64[4].sh_offset+=paddingSize;
1116 }
1117
1118 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1119
1120 symbols64[1].st_size=(Elf64_Xword)size;
1121
1122 /* write .o headers */
1123 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1124 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1125 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1126 #endif
1127 }
1128
1129 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1130 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1131 if(paddingSize!=0) {
1132 T_FileStream_write(out, padding, paddingSize);
1133 }
1134 #elif U_PLATFORM_HAS_WIN32_API
1135 /* populate the .obj headers */
1136 uprv_memset(&objHeader, 0, sizeof(objHeader));
1137 uprv_memset(&symbols, 0, sizeof(symbols));
1138 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1139
1140 /* write the linker export directive */
1141 uprv_strcpy(objHeader.linkerOptions, "-export:");
1142 length=8;
1143 uprv_strcpy(objHeader.linkerOptions+length, entry);
1144 length+=entryLength;
1145 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1146 length+=6;
1147
1148 /* set the file header */
1149 objHeader.fileHeader.Machine=cpu;
1150 objHeader.fileHeader.NumberOfSections=2;
1151 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1152 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1153 objHeader.fileHeader.NumberOfSymbols=1;
1154
1155 /* set the section for the linker options */
1156 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1157 objHeader.sections[0].SizeOfRawData=length;
1158 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1159 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1160
1161 /* set the data section */
1162 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1163 objHeader.sections[1].SizeOfRawData=size;
1164 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1165 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1166
1167 /* set the symbol table */
1168 if(entryLength<=8) {
1169 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1170 symbolNames.sizeofLongNames=4;
1171 } else {
1172 symbols[0].N.Name.Short=0;
1173 symbols[0].N.Name.Long=4;
1174 symbolNames.sizeofLongNames=4+entryLength+1;
1175 uprv_strcpy(symbolNames.longNames, entry);
1176 }
1177 symbols[0].SectionNumber=2;
1178 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1179
1180 /* write the file header and the linker options section */
1181 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1182 #else
1183 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1184 #endif
1185
1186 /* copy the data file into section 2 */
1187 for(;;) {
1188 length=T_FileStream_read(in, buffer, sizeof(buffer));
1189 if(length==0) {
1190 break;
1191 }
1192 T_FileStream_write(out, buffer, (int32_t)length);
1193 }
1194
1195 #if U_PLATFORM_HAS_WIN32_API
1196 /* write the symbol table */
1197 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1198 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1199 #endif
1200
1201 if(T_FileStream_error(in)) {
1202 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1203 exit(U_FILE_ACCESS_ERROR);
1204 }
1205
1206 if(T_FileStream_error(out)) {
1207 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1208 exit(U_FILE_ACCESS_ERROR);
1209 }
1210
1211 T_FileStream_close(out);
1212 T_FileStream_close(in);
1213 }
1214 #endif
1215