1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2002-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File wrtxml.cpp
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 10/01/02 Ram Creation.
17 * 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform
18 *
19 *******************************************************************************
20 */
21
22 // Safer use of UnicodeString.
23 #ifndef UNISTR_FROM_CHAR_EXPLICIT
24 # define UNISTR_FROM_CHAR_EXPLICIT explicit
25 #endif
26
27 // Less important, but still a good idea.
28 #ifndef UNISTR_FROM_STRING_EXPLICIT
29 # define UNISTR_FROM_STRING_EXPLICIT explicit
30 #endif
31
32 #include "reslist.h"
33 #include "unewdata.h"
34 #include "unicode/ures.h"
35 #include "errmsg.h"
36 #include "filestrm.h"
37 #include "cstring.h"
38 #include "unicode/ucnv.h"
39 #include "genrb.h"
40 #include "rle.h"
41 #include "uhash.h"
42 #include "uresimp.h"
43 #include "unicode/ustring.h"
44 #include "unicode/uchar.h"
45 #include "ustr.h"
46 #include "prscmnts.h"
47 #include "unicode/unistr.h"
48 #include "unicode/utf8.h"
49 #include "unicode/utf16.h"
50 #include <time.h>
51
52 U_NAMESPACE_USE
53
54 static int tabCount = 0;
55
56 static FileStream* out=NULL;
57 static struct SRBRoot* srBundle ;
58 static const char* outDir = NULL;
59 static const char* enc ="";
60 static UConverter* conv = NULL;
61
62 const char* const* ISOLanguages;
63 const char* const* ISOCountries;
64 const char* textExt = ".txt";
65 const char* xliffExt = ".xlf";
66
write_utf8_file(FileStream * fileStream,UnicodeString outString)67 static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
68 {
69 UErrorCode status = U_ZERO_ERROR;
70 int32_t len = 0;
71
72 // preflight to get the destination buffer size
73 u_strToUTF8(NULL,
74 0,
75 &len,
76 outString.getBuffer(),
77 outString.length(),
78 &status);
79
80 // allocate the buffer
81 char* dest = (char*)uprv_malloc(len);
82 status = U_ZERO_ERROR;
83
84 // convert the data
85 u_strToUTF8(dest,
86 len,
87 &len,
88 outString.getBuffer(),
89 outString.length(),
90 &status);
91
92 // write data to out file
93 int32_t ret = T_FileStream_write(fileStream, dest, len);
94 uprv_free(dest);
95 return (ret);
96 }
97
98 /*write indentation for formatting*/
write_tabs(FileStream * os)99 static void write_tabs(FileStream* os){
100 int i=0;
101 for(;i<=tabCount;i++){
102 write_utf8_file(os,UnicodeString(" "));
103 }
104 }
105
106 /*get ID for each element. ID is globally unique.*/
getID(const char * id,const char * curKey,char * result)107 static char* getID(const char* id, const char* curKey, char* result) {
108 if(curKey == NULL) {
109 result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
110 uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
111 uprv_strcpy(result, id);
112 } else {
113 result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
114 uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
115 if(id[0]!='\0'){
116 uprv_strcpy(result, id);
117 uprv_strcat(result, "_");
118 }
119 uprv_strcat(result, curKey);
120 }
121 return result;
122 }
123
124 /*compute CRC for binary code*/
125 /* The code is from http://www.theorem.com/java/CRC32.java
126 * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
127 * <P> This check is used in numerous systems to verify the integrity
128 * of information. It's also used as a hashing function. Unlike a regular
129 * checksum, it's sensitive to the order of the characters.
130 * It produces a 32 bit
131 *
132 * @author Michael Lecuyer (mjl@theorem.com)
133 * @version 1.1 August 11, 1998
134 */
135
136 /* ICU is not endian portable, because ICU data generated on big endian machines can be
137 * ported to big endian machines but not to little endian machines and vice versa. The
138 * conversion is not portable across platforms with different endianess.
139 */
140
computeCRC(const char * ptr,uint32_t len,uint32_t lastcrc)141 uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){
142 int32_t crc;
143 uint32_t temp1;
144 uint32_t temp2;
145
146 int32_t crc_ta[256];
147 int i = 0;
148 int j = 0;
149 uint32_t crc2 = 0;
150
151 #define CRC32_POLYNOMIAL 0xEDB88320
152
153 /*build crc table*/
154 for (i = 0; i <= 255; i++) {
155 crc2 = i;
156 for (j = 8; j > 0; j--) {
157 if ((crc2 & 1) == 1) {
158 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
159 } else {
160 crc2 >>= 1;
161 }
162 }
163 crc_ta[i] = crc2;
164 }
165
166 crc = lastcrc;
167 while(len--!=0) {
168 temp1 = (uint32_t)crc>>8;
169 temp2 = crc_ta[(crc^*ptr) & 0xFF];
170 crc = temp1^temp2;
171 ptr++;
172 }
173 return(crc);
174 }
175
strnrepchr(char * src,int32_t srcLen,char s,char r)176 static void strnrepchr(char* src, int32_t srcLen, char s, char r){
177 int32_t i = 0;
178 for(i=0;i<srcLen;i++){
179 if(src[i]==s){
180 src[i]=r;
181 }
182 }
183 }
184 /* Parse the filename, and get its language information.
185 * If it fails to get the language information from the filename,
186 * use "en" as the default value for language
187 */
parseFilename(const char * id,char *)188 static char* parseFilename(const char* id, char* /*lang*/) {
189 int idLen = (int) uprv_strlen(id);
190 char* localeID = (char*) uprv_malloc(idLen);
191 int pos = 0;
192 int canonCapacity = 0;
193 char* canon = NULL;
194 int canonLen = 0;
195 /*int i;*/
196 UErrorCode status = U_ZERO_ERROR;
197 const char *ext = uprv_strchr(id, '.');
198
199 if(ext != NULL){
200 pos = (int) (ext - id);
201 } else {
202 pos = idLen;
203 }
204 uprv_memcpy(localeID, id, pos);
205 localeID[pos]=0; /* NUL terminate the string */
206
207 canonCapacity =pos*3;
208 canon = (char*) uprv_malloc(canonCapacity);
209 canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
210
211 if(U_FAILURE(status)){
212 fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
213 exit(status);
214 }
215 strnrepchr(canon, canonLen, '_', '-');
216 return canon;
217 }
218
219 static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
220 #if 0
221 static const char* bundleStart = "<xliff version = \"1.2\" "
222 "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
223 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
224 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
225 #else
226 static const char* bundleStart = "<xliff version = \"1.1\" "
227 "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
228 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
229 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
230 #endif
231 static const char* bundleEnd = "</xliff>\n";
232
233 void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
234
convertAndEscape(char ** pDest,int32_t destCap,int32_t * destLength,const UChar * src,int32_t srcLen,UErrorCode * status)235 static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
236 const UChar* src, int32_t srcLen, UErrorCode* status){
237 int32_t srcIndex=0;
238 char* dest=NULL;
239 char* temp=NULL;
240 int32_t destLen=0;
241 UChar32 c = 0;
242
243 if(status==NULL || U_FAILURE(*status) || pDest==NULL || srcLen==0 || src == NULL){
244 return NULL;
245 }
246 dest =*pDest;
247 if(dest==NULL || destCap <=0){
248 destCap = srcLen * 8;
249 dest = (char*) uprv_malloc(sizeof(char) * destCap);
250 if(dest==NULL){
251 *status=U_MEMORY_ALLOCATION_ERROR;
252 return NULL;
253 }
254 }
255
256 dest[0]=0;
257
258 while(srcIndex<srcLen){
259 U16_NEXT(src, srcIndex, srcLen, c);
260
261 if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
262 *status = U_ILLEGAL_CHAR_FOUND;
263 fprintf(stderr, "Illegal Surrogate! \n");
264 uprv_free(dest);
265 return NULL;
266 }
267
268 if((destLen+U8_LENGTH(c)) < destCap){
269
270 /* ASCII Range */
271 if(c <=0x007F){
272 switch(c) {
273 case '\x26':
274 uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &*/
275 destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
276 break;
277 case '\x3c':
278 uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* <*/
279 destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
280 break;
281 case '\x3e':
282 uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* >*/
283 destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
284 break;
285 case '\x22':
286 uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* "*/
287 destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
288 break;
289 case '\x27':
290 uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* ' */
291 destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
292 break;
293
294 /* Disallow C0 controls except TAB, CR, LF*/
295 case 0x00:
296 case 0x01:
297 case 0x02:
298 case 0x03:
299 case 0x04:
300 case 0x05:
301 case 0x06:
302 case 0x07:
303 case 0x08:
304 /*case 0x09:*/
305 /*case 0x0A: */
306 case 0x0B:
307 case 0x0C:
308 /*case 0x0D:*/
309 case 0x0E:
310 case 0x0F:
311 case 0x10:
312 case 0x11:
313 case 0x12:
314 case 0x13:
315 case 0x14:
316 case 0x15:
317 case 0x16:
318 case 0x17:
319 case 0x18:
320 case 0x19:
321 case 0x1A:
322 case 0x1B:
323 case 0x1C:
324 case 0x1D:
325 case 0x1E:
326 case 0x1F:
327 *status = U_ILLEGAL_CHAR_FOUND;
328 fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
329 uprv_free(dest);
330 return NULL;
331 default:
332 dest[destLen++]=(char)c;
333 }
334 }else{
335 UBool isError = FALSE;
336 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
337 if(isError){
338 *status = U_ILLEGAL_CHAR_FOUND;
339 fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
340 uprv_free(dest);
341 return NULL;
342 }
343 }
344 }else{
345 destCap += destLen;
346
347 temp = (char*) uprv_malloc(sizeof(char)*destCap);
348 if(temp==NULL){
349 *status=U_MEMORY_ALLOCATION_ERROR;
350 uprv_free(dest);
351 return NULL;
352 }
353 uprv_memmove(temp,dest,destLen);
354 destLen=0;
355 uprv_free(dest);
356 dest=temp;
357 temp=NULL;
358 }
359
360 }
361 *destLength = destLen;
362 return dest;
363 }
364
365 #define ASTERISK 0x002A
366 #define SPACE 0x0020
367 #define CR 0x000A
368 #define LF 0x000D
369 #define AT_SIGN 0x0040
370
371 static void
trim(char ** src,int32_t * len)372 trim(char **src, int32_t *len){
373
374 char *s = NULL;
375 int32_t i = 0;
376 if(src == NULL || *src == NULL){
377 return;
378 }
379 s = *src;
380 /* trim from the end */
381 for( i=(*len-1); i>= 0; i--){
382 switch(s[i]){
383 case ASTERISK:
384 case SPACE:
385 case CR:
386 case LF:
387 s[i] = 0;
388 continue;
389 default:
390 break;
391 }
392 break;
393
394 }
395 *len = i+1;
396 }
397
398 static void
print(UChar * src,int32_t srcLen,const char * tagStart,const char * tagEnd,UErrorCode * status)399 print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd, UErrorCode *status){
400 int32_t bufCapacity = srcLen*4;
401 char *buf = NULL;
402 int32_t bufLen = 0;
403
404 if(U_FAILURE(*status)){
405 return;
406 }
407
408 buf = (char*) (uprv_malloc(bufCapacity));
409 if(buf==0){
410 fprintf(stderr, "Could not allocate memory!!");
411 exit(U_MEMORY_ALLOCATION_ERROR);
412 }
413 buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
414 if(U_SUCCESS(*status)){
415 trim(&buf,&bufLen);
416 write_utf8_file(out,UnicodeString(tagStart));
417 write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
418 write_utf8_file(out,UnicodeString(tagEnd));
419 write_utf8_file(out,UnicodeString("\n"));
420
421 }
422 }
423 static void
printNoteElements(const UString * src,UErrorCode * status)424 printNoteElements(const UString *src, UErrorCode *status){
425
426 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
427
428 int32_t capacity = 0;
429 UChar* note = NULL;
430 int32_t noteLen = 0;
431 int32_t count = 0,i;
432
433 if(src == NULL){
434 return;
435 }
436
437 capacity = src->fLength;
438 note = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
439
440 count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
441 if(U_FAILURE(*status)){
442 uprv_free(note);
443 return;
444 }
445 for(i=0; i < count; i++){
446 noteLen = getAt(src->fChars,src->fLength, ¬e, capacity, i, UPC_NOTE, status);
447 if(U_FAILURE(*status)){
448 uprv_free(note);
449 return;
450 }
451 if(noteLen > 0){
452 write_tabs(out);
453 print(note, noteLen,"<note>", "</note>", status);
454 }
455 }
456 uprv_free(note);
457 #else
458
459 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
460
461 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
462
463 }
464
printAttribute(const char * name,const char * value,int32_t)465 static void printAttribute(const char *name, const char *value, int32_t /*len*/)
466 {
467 write_utf8_file(out, UnicodeString(" "));
468 write_utf8_file(out, UnicodeString(name));
469 write_utf8_file(out, UnicodeString(" = \""));
470 write_utf8_file(out, UnicodeString(value));
471 write_utf8_file(out, UnicodeString("\""));
472 }
473
printAttribute(const char * name,const UnicodeString value,int32_t)474 static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
475 {
476 write_utf8_file(out, UnicodeString(" "));
477 write_utf8_file(out, UnicodeString(name));
478 write_utf8_file(out, UnicodeString(" = \""));
479 write_utf8_file(out, value);
480 write_utf8_file(out, UnicodeString("\""));
481 }
482
483 static void
printComments(struct UString * src,const char * resName,UBool printTranslate,UErrorCode * status)484 printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
485
486 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
487
488 if(status==NULL || U_FAILURE(*status)){
489 return;
490 }
491
492 int32_t capacity = src->fLength + 1;
493 char* buf = NULL;
494 int32_t bufLen = 0;
495 UChar* desc = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
496 UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
497
498 int32_t descLen = 0, transLen=0;
499 if(desc==NULL || trans==NULL){
500 *status = U_MEMORY_ALLOCATION_ERROR;
501 uprv_free(desc);
502 uprv_free(trans);
503 return;
504 }
505 // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc.
506 src->fLength = removeCmtText(src->fChars, src->fLength, status);
507 descLen = getDescription(src->fChars,src->fLength, &desc, capacity, status);
508 transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
509
510 /* first print translate attribute */
511 if(transLen > 0){
512 if(printTranslate){
513 /* print translate attribute */
514 buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
515 if(U_SUCCESS(*status)){
516 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
517 write_utf8_file(out,UnicodeString(">\n"));
518 }
519 }else if(getShowWarning()){
520 fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
521 /* no translate attribute .. just close the tag */
522 write_utf8_file(out,UnicodeString(">\n"));
523 }
524 }else{
525 /* no translate attribute .. just close the tag */
526 write_utf8_file(out,UnicodeString(">\n"));
527 }
528
529 if(descLen > 0){
530 write_tabs(out);
531 print(desc, descLen, "<!--", "-->", status);
532 }
533
534 uprv_free(desc);
535 uprv_free(trans);
536 #else
537
538 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
539
540 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
541
542 }
543
544 /*
545 * Print out a containing element, like:
546 * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
547 * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
548 */
printContainer(SResource * res,const char * container,const char * restype,const char * mimetype,const char * id,UErrorCode * status)549 static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
550 {
551 const char *resname = NULL;
552 char *sid = NULL;
553
554 write_tabs(out);
555
556 resname = res->getKeyString(srBundle);
557 if (resname != NULL && *resname != 0) {
558 sid = getID(id, resname, sid);
559 } else {
560 sid = getID(id, NULL, sid);
561 }
562
563 write_utf8_file(out, UnicodeString("<"));
564 write_utf8_file(out, UnicodeString(container));
565 printAttribute("id", sid, (int32_t) uprv_strlen(sid));
566
567 if (resname != NULL) {
568 printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
569 }
570
571 if (mimetype != NULL) {
572 printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
573 }
574
575 if (restype != NULL) {
576 printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
577 }
578
579 tabCount += 1;
580 if (res->fComment.fLength > 0) {
581 /* printComments will print the closing ">\n" */
582 printComments(&res->fComment, resname, TRUE, status);
583 } else {
584 write_utf8_file(out, UnicodeString(">\n"));
585 }
586
587 return sid;
588 }
589
590 /* Writing Functions */
591
592 static const char *trans_unit = "trans-unit";
593 static const char *close_trans_unit = "</trans-unit>\n";
594 static const char *source = "<source>";
595 static const char *close_source = "</source>\n";
596 static const char *group = "group";
597 static const char *close_group = "</group>\n";
598
599 static const char *bin_unit = "bin-unit";
600 static const char *close_bin_unit = "</bin-unit>\n";
601 static const char *bin_source = "<bin-source>\n";
602 static const char *close_bin_source = "</bin-source>\n";
603 static const char *external_file = "<external-file";
604 /*static const char *close_external_file = "</external-file>\n";*/
605 static const char *internal_file = "<internal-file";
606 static const char *close_internal_file = "</internal-file>\n";
607
608 static const char *application_mimetype = "application"; /* add "/octet-stream"? */
609
610 static const char *alias_restype = "x-icu-alias";
611 static const char *array_restype = "x-icu-array";
612 static const char *binary_restype = "x-icu-binary";
613 static const char *integer_restype = "x-icu-integer";
614 static const char *intvector_restype = "x-icu-intvector";
615 static const char *table_restype = "x-icu-table";
616
617 static void
string_write_xml(StringResource * res,const char * id,const char *,UErrorCode * status)618 string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
619
620 char *sid = NULL;
621 char* buf = NULL;
622 int32_t bufLen = 0;
623
624 if(status==NULL || U_FAILURE(*status)){
625 return;
626 }
627
628 sid = printContainer(res, trans_unit, NULL, NULL, id, status);
629
630 write_tabs(out);
631
632 write_utf8_file(out, UnicodeString(source));
633
634 buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
635
636 if (U_FAILURE(*status)) {
637 return;
638 }
639
640 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
641 write_utf8_file(out, UnicodeString(close_source));
642
643 printNoteElements(&res->fComment, status);
644
645 tabCount -= 1;
646 write_tabs(out);
647
648 write_utf8_file(out, UnicodeString(close_trans_unit));
649
650 uprv_free(buf);
651 uprv_free(sid);
652 }
653
654 static void
alias_write_xml(AliasResource * res,const char * id,const char *,UErrorCode * status)655 alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
656 char *sid = NULL;
657 char* buf = NULL;
658 int32_t bufLen=0;
659
660 sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
661
662 write_tabs(out);
663
664 write_utf8_file(out, UnicodeString(source));
665
666 buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
667
668 if(U_FAILURE(*status)){
669 return;
670 }
671 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
672 write_utf8_file(out, UnicodeString(close_source));
673
674 printNoteElements(&res->fComment, status);
675
676 tabCount -= 1;
677 write_tabs(out);
678
679 write_utf8_file(out, UnicodeString(close_trans_unit));
680
681 uprv_free(buf);
682 uprv_free(sid);
683 }
684
685 static void
array_write_xml(ArrayResource * res,const char * id,const char * language,UErrorCode * status)686 array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) {
687 char* sid = NULL;
688 int index = 0;
689
690 struct SResource *current = NULL;
691
692 sid = printContainer(res, group, array_restype, NULL, id, status);
693
694 current = res->fFirst;
695
696 while (current != NULL) {
697 char c[256] = {0};
698 char* subId = NULL;
699
700 itostr(c, index, 10, 0);
701 index += 1;
702 subId = getID(sid, c, subId);
703
704 res_write_xml(current, subId, language, FALSE, status);
705 uprv_free(subId);
706 subId = NULL;
707
708 if(U_FAILURE(*status)){
709 return;
710 }
711
712 current = current->fNext;
713 }
714
715 tabCount -= 1;
716 write_tabs(out);
717 write_utf8_file(out, UnicodeString(close_group));
718
719 uprv_free(sid);
720 }
721
722 static void
intvector_write_xml(IntVectorResource * res,const char * id,const char *,UErrorCode * status)723 intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
724 char* sid = NULL;
725 char* ivd = NULL;
726 uint32_t i=0;
727 uint32_t len=0;
728 char buf[256] = {'0'};
729
730 sid = printContainer(res, group, intvector_restype, NULL, id, status);
731
732 for(i = 0; i < res->fCount; i += 1) {
733 char c[256] = {0};
734
735 itostr(c, i, 10, 0);
736 ivd = getID(sid, c, ivd);
737 len = itostr(buf, res->fArray[i], 10, 0);
738
739 write_tabs(out);
740 write_utf8_file(out, UnicodeString("<"));
741 write_utf8_file(out, UnicodeString(trans_unit));
742
743 printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
744 printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
745
746 write_utf8_file(out, UnicodeString(">\n"));
747
748 tabCount += 1;
749 write_tabs(out);
750 write_utf8_file(out, UnicodeString(source));
751
752 write_utf8_file(out, UnicodeString(buf, len));
753
754 write_utf8_file(out, UnicodeString(close_source));
755 tabCount -= 1;
756 write_tabs(out);
757 write_utf8_file(out, UnicodeString(close_trans_unit));
758
759 uprv_free(ivd);
760 ivd = NULL;
761 }
762
763 tabCount -= 1;
764 write_tabs(out);
765
766 write_utf8_file(out, UnicodeString(close_group));
767 uprv_free(sid);
768 sid = NULL;
769 }
770
771 static void
int_write_xml(IntResource * res,const char * id,const char *,UErrorCode * status)772 int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
773 char* sid = NULL;
774 char buf[256] = {0};
775 uint32_t len = 0;
776
777 sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
778
779 write_tabs(out);
780
781 write_utf8_file(out, UnicodeString(source));
782
783 len = itostr(buf, res->fValue, 10, 0);
784 write_utf8_file(out, UnicodeString(buf, len));
785
786 write_utf8_file(out, UnicodeString(close_source));
787
788 printNoteElements(&res->fComment, status);
789
790 tabCount -= 1;
791 write_tabs(out);
792
793 write_utf8_file(out, UnicodeString(close_trans_unit));
794
795 uprv_free(sid);
796 sid = NULL;
797 }
798
799 static void
bin_write_xml(BinaryResource * res,const char * id,const char *,UErrorCode * status)800 bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
801 const char* m_type = application_mimetype;
802 char* sid = NULL;
803 uint32_t crc = 0xFFFFFFFF;
804
805 char fileName[1024] ={0};
806 int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
807 char* fn = (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
808 (res->fFileName !=NULL ?
809 uprv_strlen(res->fFileName) :0)));
810 const char* ext = NULL;
811
812 char* f = NULL;
813
814 fn[0]=0;
815
816 if(res->fFileName != NULL){
817 uprv_strcpy(fileName, res->fFileName);
818 f = uprv_strrchr(fileName, '\\');
819
820 if (f != NULL) {
821 f++;
822 } else {
823 f = fileName;
824 }
825
826 ext = uprv_strrchr(fileName, '.');
827
828 if (ext == NULL) {
829 fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
830 exit(U_ILLEGAL_ARGUMENT_ERROR);
831 }
832
833 if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
834 m_type = "image";
835 } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
836 m_type = "audio";
837 } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
838 m_type = "video";
839 } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
840 m_type = "text";
841 }
842
843 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
844
845 write_tabs(out);
846
847 write_utf8_file(out, UnicodeString(bin_source));
848
849 tabCount+= 1;
850 write_tabs(out);
851
852 write_utf8_file(out, UnicodeString(external_file));
853 printAttribute("href", f, (int32_t)uprv_strlen(f));
854 write_utf8_file(out, UnicodeString("/>\n"));
855 tabCount -= 1;
856 write_tabs(out);
857
858 write_utf8_file(out, UnicodeString(close_bin_source));
859
860 printNoteElements(&res->fComment, status);
861 tabCount -= 1;
862 write_tabs(out);
863 write_utf8_file(out, UnicodeString(close_bin_unit));
864 } else {
865 char temp[256] = {0};
866 uint32_t i = 0;
867 int32_t len=0;
868
869 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
870
871 write_tabs(out);
872 write_utf8_file(out, UnicodeString(bin_source));
873
874 tabCount += 1;
875 write_tabs(out);
876
877 write_utf8_file(out, UnicodeString(internal_file));
878 printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
879
880 while(i <res->fLength){
881 len = itostr(temp, res->fData[i], 16, 2);
882 crc = computeCRC(temp, len, crc);
883 i++;
884 }
885
886 len = itostr(temp, crc, 10, 0);
887 printAttribute("crc", temp, len);
888
889 write_utf8_file(out, UnicodeString(">"));
890
891 i = 0;
892 while(i <res->fLength){
893 len = itostr(temp, res->fData[i], 16, 2);
894 write_utf8_file(out, UnicodeString(temp));
895 i += 1;
896 }
897
898 write_utf8_file(out, UnicodeString(close_internal_file));
899
900 tabCount -= 2;
901 write_tabs(out);
902
903 write_utf8_file(out, UnicodeString(close_bin_source));
904 printNoteElements(&res->fComment, status);
905
906 tabCount -= 1;
907 write_tabs(out);
908 write_utf8_file(out, UnicodeString(close_bin_unit));
909
910 uprv_free(sid);
911 sid = NULL;
912 }
913
914 uprv_free(fn);
915 }
916
917
918
919 static void
table_write_xml(TableResource * res,const char * id,const char * language,UBool isTopLevel,UErrorCode * status)920 table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
921
922 uint32_t i = 0;
923
924 struct SResource *current = NULL;
925 char* sid = NULL;
926
927 if (U_FAILURE(*status)) {
928 return ;
929 }
930
931 sid = printContainer(res, group, table_restype, NULL, id, status);
932
933 if(isTopLevel) {
934 sid[0] = '\0';
935 }
936
937 current = res->fFirst;
938 i = 0;
939
940 while (current != NULL) {
941 res_write_xml(current, sid, language, FALSE, status);
942
943 if(U_FAILURE(*status)){
944 return;
945 }
946
947 i += 1;
948 current = current->fNext;
949 }
950
951 tabCount -= 1;
952 write_tabs(out);
953
954 write_utf8_file(out, UnicodeString(close_group));
955
956 uprv_free(sid);
957 sid = NULL;
958 }
959
960 void
res_write_xml(struct SResource * res,const char * id,const char * language,UBool isTopLevel,UErrorCode * status)961 res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
962
963 if (U_FAILURE(*status)) {
964 return ;
965 }
966
967 if (res != NULL) {
968 switch (res->fType) {
969 case URES_STRING:
970 string_write_xml (static_cast<StringResource *>(res), id, language, status);
971 return;
972
973 case URES_ALIAS:
974 alias_write_xml (static_cast<AliasResource *>(res), id, language, status);
975 return;
976
977 case URES_INT_VECTOR:
978 intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status);
979 return;
980
981 case URES_BINARY:
982 bin_write_xml (static_cast<BinaryResource *>(res), id, language, status);
983 return;
984
985 case URES_INT:
986 int_write_xml (static_cast<IntResource *>(res), id, language, status);
987 return;
988
989 case URES_ARRAY:
990 array_write_xml (static_cast<ArrayResource *>(res), id, language, status);
991 return;
992
993 case URES_TABLE:
994 table_write_xml (static_cast<TableResource *>(res), id, language, isTopLevel, status);
995 return;
996
997 default:
998 break;
999 }
1000 }
1001
1002 *status = U_INTERNAL_PROGRAM_ERROR;
1003 }
1004
1005 void
bundle_write_xml(struct SRBRoot * bundle,const char * outputDir,const char * outputEnc,const char * filename,char * writtenFilename,int writtenFilenameLen,const char * language,const char * outFileName,UErrorCode * status)1006 bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
1007 char *writtenFilename, int writtenFilenameLen,
1008 const char* language, const char* outFileName, UErrorCode *status) {
1009
1010 char* xmlfileName = NULL;
1011 char* outputFileName = NULL;
1012 char* originalFileName = NULL;
1013 const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
1014 const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
1015 const char* file2 = "original = \"";
1016 const char* file4 = "\" date = \"";
1017 const char* fileEnd = "</file>\n";
1018 const char* headerStart = "<header>\n";
1019 const char* headerEnd = "</header>\n";
1020 const char* bodyStart = "<body>\n";
1021 const char* bodyEnd = "</body>\n";
1022
1023 const char *tool_start = "<tool";
1024 const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
1025 const char *tool_name = "genrb";
1026
1027 char* temp = NULL;
1028 char* lang = NULL;
1029 const char* pos = NULL;
1030 int32_t first, index;
1031 time_t currTime;
1032 char timeBuf[128];
1033
1034 outDir = outputDir;
1035
1036 srBundle = bundle;
1037
1038 pos = uprv_strrchr(filename, '\\');
1039 if(pos != NULL) {
1040 first = (int32_t)(pos - filename + 1);
1041 } else {
1042 first = 0;
1043 }
1044 index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
1045 originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
1046 uprv_memset(originalFileName, 0, sizeof(char)*index+1);
1047 uprv_strncpy(originalFileName, filename + first, index);
1048
1049 if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
1050 fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
1051 }
1052
1053 temp = originalFileName;
1054 originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1055 uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1056 uprv_strcat(originalFileName, temp);
1057 uprv_strcat(originalFileName, textExt);
1058 uprv_free(temp);
1059 temp = NULL;
1060
1061
1062 if (language == NULL) {
1063 /* lang = parseFilename(filename, lang);
1064 if (lang == NULL) {*/
1065 /* now check if locale name is valid or not
1066 * this is to cater for situation where
1067 * pegasusServer.txt contains
1068 *
1069 * en{
1070 * ..
1071 * }
1072 */
1073 lang = parseFilename(srBundle->fLocale, lang);
1074 /*
1075 * Neither the file name nor the table name inside the
1076 * txt file contain a valid country and language codes
1077 * throw an error.
1078 * pegasusServer.txt contains
1079 *
1080 * testelements{
1081 * ....
1082 * }
1083 */
1084 if(lang==NULL){
1085 fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
1086 exit(U_ILLEGAL_ARGUMENT_ERROR);
1087 }
1088 /* }*/
1089 } else {
1090 lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
1091 uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
1092 uprv_strcpy(lang, language);
1093 }
1094
1095 if(outFileName) {
1096 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
1097 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
1098 uprv_strcpy(outputFileName,outFileName);
1099 } else {
1100 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1101 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1102 uprv_strcpy(outputFileName,srBundle->fLocale);
1103 }
1104
1105 if(outputDir) {
1106 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1107 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1108 } else {
1109 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1110 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1111 }
1112
1113 if(outputDir){
1114 uprv_strcpy(xmlfileName, outputDir);
1115 if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
1116 uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
1117 }
1118 }
1119 uprv_strcat(xmlfileName,outputFileName);
1120 uprv_strcat(xmlfileName,xliffExt);
1121
1122 if (writtenFilename) {
1123 uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
1124 }
1125
1126 if (U_FAILURE(*status)) {
1127 goto cleanup_bundle_write_xml;
1128 }
1129
1130 out= T_FileStream_open(xmlfileName,"w");
1131
1132 if(out==NULL){
1133 *status = U_FILE_ACCESS_ERROR;
1134 goto cleanup_bundle_write_xml;
1135 }
1136 write_utf8_file(out, UnicodeString(xmlHeader));
1137
1138 if(outputEnc && *outputEnc!='\0'){
1139 /* store the output encoding */
1140 enc = outputEnc;
1141 conv=ucnv_open(enc,status);
1142 if(U_FAILURE(*status)){
1143 goto cleanup_bundle_write_xml;
1144 }
1145 }
1146 write_utf8_file(out, UnicodeString(bundleStart));
1147 write_tabs(out);
1148 write_utf8_file(out, UnicodeString(fileStart));
1149 /* check if lang and language are the same */
1150 if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
1151 fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
1152 }
1153 write_utf8_file(out, UnicodeString(lang));
1154 write_utf8_file(out, UnicodeString(file1));
1155 write_utf8_file(out, UnicodeString(file2));
1156 write_utf8_file(out, UnicodeString(originalFileName));
1157 write_utf8_file(out, UnicodeString(file4));
1158
1159 time(&currTime);
1160 strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
1161 write_utf8_file(out, UnicodeString(timeBuf));
1162 write_utf8_file(out, UnicodeString("\">\n"));
1163
1164 tabCount += 1;
1165 write_tabs(out);
1166 write_utf8_file(out, UnicodeString(headerStart));
1167
1168 tabCount += 1;
1169 write_tabs(out);
1170
1171 write_utf8_file(out, UnicodeString(tool_start));
1172 printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
1173 printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
1174 write_utf8_file(out, UnicodeString("/>\n"));
1175
1176 tabCount -= 1;
1177 write_tabs(out);
1178
1179 write_utf8_file(out, UnicodeString(headerEnd));
1180
1181 write_tabs(out);
1182 tabCount += 1;
1183
1184 write_utf8_file(out, UnicodeString(bodyStart));
1185
1186
1187 res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
1188
1189 tabCount -= 1;
1190 write_tabs(out);
1191
1192 write_utf8_file(out, UnicodeString(bodyEnd));
1193 tabCount--;
1194 write_tabs(out);
1195 write_utf8_file(out, UnicodeString(fileEnd));
1196 tabCount--;
1197 write_tabs(out);
1198 write_utf8_file(out, UnicodeString(bundleEnd));
1199 T_FileStream_close(out);
1200
1201 ucnv_close(conv);
1202
1203 cleanup_bundle_write_xml:
1204 uprv_free(originalFileName);
1205 uprv_free(lang);
1206 if(xmlfileName != NULL) {
1207 uprv_free(xmlfileName);
1208 }
1209 if(outputFileName != NULL){
1210 uprv_free(outputFileName);
1211 }
1212 }
1213