1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File ucbuf.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/10/01 Ram Creation.
15 *******************************************************************************
16 */
17
18 #include "unicode/utypes.h"
19 #include "unicode/putil.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "filestrm.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "ustrfmt.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "ucbuf.h"
29 #include <stdio.h>
30
31 #if !UCONFIG_NO_CONVERSION
32
33
34 #define MAX_IN_BUF 1000
35 #define MAX_U_BUF 1500
36 #define CONTEXT_LEN 20
37
38 struct UCHARBUF {
39 UChar* buffer;
40 UChar* currentPos;
41 UChar* bufLimit;
42 int32_t bufCapacity;
43 int32_t remaining;
44 int32_t signatureLength;
45 FileStream* in;
46 UConverter* conv;
47 UBool showWarning; /* makes this API not produce any errors */
48 UBool isBuffered;
49 };
50
51 U_CAPI UBool U_EXPORT2
ucbuf_autodetect_fs(FileStream * in,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)52 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
53 char start[8];
54 int32_t numRead;
55
56 UChar target[1]={ 0 };
57 UChar* pTarget;
58 const char* pStart;
59
60 /* read a few bytes */
61 numRead=T_FileStream_read(in, start, sizeof(start));
62
63 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
64
65 /* unread the bytes beyond what was consumed for U+FEFF */
66 T_FileStream_rewind(in);
67 if (*signatureLength > 0) {
68 numRead = T_FileStream_read(in, start, *signatureLength);
69 }
70
71 if(*cp==NULL){
72 *conv =NULL;
73 return FALSE;
74 }
75
76 /* open the converter for the detected Unicode charset */
77 *conv = ucnv_open(*cp,error);
78
79 /* convert and ignore initial U+FEFF, and the buffer overflow */
80 pTarget = target;
81 pStart = start;
82 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
83 *signatureLength = (int32_t)(pStart - start);
84 if(*error==U_BUFFER_OVERFLOW_ERROR) {
85 *error=U_ZERO_ERROR;
86 }
87
88 /* verify that we successfully read exactly U+FEFF */
89 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
90 *error=U_INTERNAL_PROGRAM_ERROR;
91 }
92
93
94 return TRUE;
95 }
ucbuf_isCPKnown(const char * cp)96 static UBool ucbuf_isCPKnown(const char* cp){
97 if(ucnv_compareNames("UTF-8",cp)==0){
98 return TRUE;
99 }
100 if(ucnv_compareNames("UTF-16BE",cp)==0){
101 return TRUE;
102 }
103 if(ucnv_compareNames("UTF-16LE",cp)==0){
104 return TRUE;
105 }
106 if(ucnv_compareNames("UTF-16",cp)==0){
107 return TRUE;
108 }
109 if(ucnv_compareNames("UTF-32",cp)==0){
110 return TRUE;
111 }
112 if(ucnv_compareNames("UTF-32BE",cp)==0){
113 return TRUE;
114 }
115 if(ucnv_compareNames("UTF-32LE",cp)==0){
116 return TRUE;
117 }
118 if(ucnv_compareNames("SCSU",cp)==0){
119 return TRUE;
120 }
121 if(ucnv_compareNames("BOCU-1",cp)==0){
122 return TRUE;
123 }
124 if(ucnv_compareNames("UTF-7",cp)==0){
125 return TRUE;
126 }
127 return FALSE;
128 }
129
130 U_CAPI FileStream * U_EXPORT2
ucbuf_autodetect(const char * fileName,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)131 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
132 FileStream* in=NULL;
133 if(error==NULL || U_FAILURE(*error)){
134 return NULL;
135 }
136 if(conv==NULL || cp==NULL || fileName==NULL){
137 *error = U_ILLEGAL_ARGUMENT_ERROR;
138 return NULL;
139 }
140 /* open the file */
141 in= T_FileStream_open(fileName,"rb");
142
143 if(in == NULL){
144 *error=U_FILE_ACCESS_ERROR;
145 return NULL;
146 }
147
148 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
149 return in;
150 } else {
151 ucnv_close(*conv);
152 *conv=NULL;
153 T_FileStream_close(in);
154 return NULL;
155 }
156 }
157
158 /* fill the uchar buffer */
159 static UCHARBUF*
ucbuf_fillucbuf(UCHARBUF * buf,UErrorCode * error)160 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
161 UChar* pTarget=NULL;
162 UChar* target=NULL;
163 const char* source=NULL;
164 char carr[MAX_IN_BUF] = {'\0'};
165 char* cbuf = carr;
166 int32_t inputRead=0;
167 int32_t outputWritten=0;
168 int32_t offset=0;
169 const char* sourceLimit =NULL;
170 int32_t cbufSize=0;
171 pTarget = buf->buffer;
172 /* check if we arrived here without exhausting the buffer*/
173 if(buf->currentPos<buf->bufLimit){
174 offset = (int32_t)(buf->bufLimit-buf->currentPos);
175 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
176 }
177
178 #if DEBUG
179 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
180 #endif
181 if(buf->isBuffered){
182 cbufSize = MAX_IN_BUF;
183 /* read the file */
184 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
185 buf->remaining-=inputRead;
186
187 }else{
188 cbufSize = T_FileStream_size(buf->in);
189 cbuf = (char*)uprv_malloc(cbufSize);
190 if (cbuf == NULL) {
191 *error = U_MEMORY_ALLOCATION_ERROR;
192 return NULL;
193 }
194 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
195 buf->remaining-=inputRead;
196 }
197
198 /* just to be sure...*/
199 if ( 0 == inputRead )
200 buf->remaining = 0;
201
202 target=pTarget;
203 /* convert the bytes */
204 if(buf->conv){
205 /* set the callback to stop */
206 UConverterToUCallback toUOldAction ;
207 void* toUOldContext;
208 void* toUNewContext=NULL;
209 ucnv_setToUCallBack(buf->conv,
210 UCNV_TO_U_CALLBACK_STOP,
211 toUNewContext,
212 &toUOldAction,
213 (const void**)&toUOldContext,
214 error);
215 /* since state is saved in the converter we add offset to source*/
216 target = pTarget+offset;
217 source = cbuf;
218 sourceLimit = source + inputRead;
219 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
220 &source,sourceLimit,NULL,
221 (UBool)(buf->remaining==0),error);
222
223 if(U_FAILURE(*error)){
224 char context[CONTEXT_LEN+1];
225 char preContext[CONTEXT_LEN+1];
226 char postContext[CONTEXT_LEN+1];
227 int8_t len = CONTEXT_LEN;
228 int32_t start=0;
229 int32_t stop =0;
230 int32_t pos =0;
231 /* use erro1 to preserve the error code */
232 UErrorCode error1 =U_ZERO_ERROR;
233
234 if( buf->showWarning==TRUE){
235 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
236 " converting input stream to target encoding: %s\n",
237 u_errorName(*error));
238 }
239
240
241 /* now get the context chars */
242 ucnv_getInvalidChars(buf->conv,context,&len,&error1);
243 context[len]= 0 ; /* null terminate the buffer */
244
245 pos = (int32_t)(source - cbuf - len);
246
247 /* for pre-context */
248 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
249 stop = pos-len;
250
251 memcpy(preContext,cbuf+start,stop-start);
252 /* null terminate the buffer */
253 preContext[stop-start] = 0;
254
255 /* for post-context */
256 start = pos+len;
257 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
258
259 memcpy(postContext,source,stop-start);
260 /* null terminate the buffer */
261 postContext[stop-start] = 0;
262
263 if(buf->showWarning ==TRUE){
264 /* print out the context */
265 fprintf(stderr,"\tPre-context: %s\n",preContext);
266 fprintf(stderr,"\tContext: %s\n",context);
267 fprintf(stderr,"\tPost-context: %s\n", postContext);
268 }
269
270 /* reset the converter */
271 ucnv_reset(buf->conv);
272
273 /* set the call back to substitute
274 * and restart conversion
275 */
276 ucnv_setToUCallBack(buf->conv,
277 UCNV_TO_U_CALLBACK_SUBSTITUTE,
278 toUNewContext,
279 &toUOldAction,
280 (const void**)&toUOldContext,
281 &error1);
282
283 /* reset source and target start positions */
284 target = pTarget+offset;
285 source = cbuf;
286
287 /* re convert */
288 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
289 &source,sourceLimit,NULL,
290 (UBool)(buf->remaining==0),&error1);
291
292 }
293 outputWritten = (int32_t)(target - pTarget);
294
295
296 #if DEBUG
297 {
298 int i;
299 target = pTarget;
300 for(i=0;i<numRead;i++){
301 /* printf("%c", (char)(*target++));*/
302 }
303 }
304 #endif
305
306 }else{
307 u_charsToUChars(cbuf,target+offset,inputRead);
308 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
309 }
310 buf->currentPos = pTarget;
311 buf->bufLimit=pTarget+outputWritten;
312 *buf->bufLimit=0; /*NUL terminate*/
313 if(cbuf!=carr){
314 uprv_free(cbuf);
315 }
316 return buf;
317 }
318
319
320
321 /* get a UChar from the stream*/
322 U_CAPI int32_t U_EXPORT2
ucbuf_getc(UCHARBUF * buf,UErrorCode * error)323 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
324 if(error==NULL || U_FAILURE(*error)){
325 return FALSE;
326 }
327 if(buf->currentPos>=buf->bufLimit){
328 if(buf->remaining==0){
329 return U_EOF;
330 }
331 buf=ucbuf_fillucbuf(buf,error);
332 if(U_FAILURE(*error)){
333 return U_EOF;
334 }
335 }
336
337 return *(buf->currentPos++);
338 }
339
340 /* get a UChar32 from the stream*/
341 U_CAPI int32_t U_EXPORT2
ucbuf_getc32(UCHARBUF * buf,UErrorCode * error)342 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
343 int32_t retVal = (int32_t)U_EOF;
344 if(error==NULL || U_FAILURE(*error)){
345 return FALSE;
346 }
347 if(buf->currentPos+1>=buf->bufLimit){
348 if(buf->remaining==0){
349 return U_EOF;
350 }
351 buf=ucbuf_fillucbuf(buf,error);
352 if(U_FAILURE(*error)){
353 return U_EOF;
354 }
355 }
356 if(UTF_IS_LEAD(*(buf->currentPos))){
357 retVal=UTF16_GET_PAIR_VALUE(buf->currentPos[0],buf->currentPos[1]);
358 buf->currentPos+=2;
359 }else{
360 retVal = *(buf->currentPos++);
361 }
362 return retVal;
363 }
364
365 /* u_unescapeAt() callback to return a UChar*/
366 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)367 _charAt(int32_t offset, void *context) {
368 return ((UCHARBUF*) context)->currentPos[offset];
369 }
370
371 /* getc and escape it */
372 U_CAPI int32_t U_EXPORT2
ucbuf_getcx32(UCHARBUF * buf,UErrorCode * error)373 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
374 int32_t length;
375 int32_t offset;
376 UChar32 c32,c1,c2;
377 if(error==NULL || U_FAILURE(*error)){
378 return FALSE;
379 }
380 /* Fill the buffer if it is empty */
381 if (buf->currentPos >=buf->bufLimit-2) {
382 ucbuf_fillucbuf(buf,error);
383 }
384
385 /* Get the next character in the buffer */
386 if (buf->currentPos < buf->bufLimit) {
387 c1 = *(buf->currentPos)++;
388 } else {
389 c1 = U_EOF;
390 }
391
392 c2 = *(buf->currentPos);
393
394 /* If it isn't a backslash, return it */
395 if (c1 != 0x005C) {
396 return c1;
397 }
398
399 /* Determine the amount of data in the buffer */
400 length = (int32_t)(buf->bufLimit - buf->currentPos);
401
402 /* The longest escape sequence is \Uhhhhhhhh; make sure
403 we have at least that many characters */
404 if (length < 10) {
405
406 /* fill the buffer */
407 ucbuf_fillucbuf(buf,error);
408 length = (int32_t)(buf->bufLimit - buf->buffer);
409 }
410
411 /* Process the escape */
412 offset = 0;
413 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
414
415 /* check if u_unescapeAt unescaped and converted
416 * to c32 or not
417 */
418 if(c32==0xFFFFFFFF){
419 if(buf->showWarning) {
420 char context[CONTEXT_LEN+1];
421 int32_t len = CONTEXT_LEN;
422 if(length < len) {
423 len = length;
424 }
425 context[len]= 0 ; /* null terminate the buffer */
426 u_UCharsToChars( buf->currentPos, context, len);
427 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
428 }
429 *error= U_ILLEGAL_ESCAPE_SEQUENCE;
430 return c1;
431 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
432 /* Update the current buffer position */
433 buf->currentPos += offset;
434 }else{
435 /* unescaping failed so we just return
436 * c1 and not consume the buffer
437 * this is useful for rules with escapes
438 * in resouce bundles
439 * eg: \' \\ \"
440 */
441 return c1;
442 }
443
444 return c32;
445 }
446
447 U_CAPI UCHARBUF* U_EXPORT2
ucbuf_open(const char * fileName,const char ** cp,UBool showWarning,UBool buffered,UErrorCode * error)448 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
449
450 FileStream* in = NULL;
451 int32_t fileSize=0;
452 const char* knownCp;
453 if(error==NULL || U_FAILURE(*error)){
454 return NULL;
455 }
456 if(cp==NULL || fileName==NULL){
457 *error = U_ILLEGAL_ARGUMENT_ERROR;
458 return FALSE;
459 }
460 if (!uprv_strcmp(fileName, "-")) {
461 in = T_FileStream_stdin();
462 }else{
463 in = T_FileStream_open(fileName, "rb");
464 }
465
466 if(in!=NULL){
467 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
468 fileSize = T_FileStream_size(in);
469 if(buf == NULL){
470 *error = U_MEMORY_ALLOCATION_ERROR;
471 T_FileStream_close(in);
472 return NULL;
473 }
474 buf->in=in;
475 buf->conv=NULL;
476 buf->showWarning = showWarning;
477 buf->isBuffered = buffered;
478 buf->signatureLength=0;
479 if(*cp==NULL || **cp=='\0'){
480 /* don't have code page name... try to autodetect */
481 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
482 }else if(ucbuf_isCPKnown(*cp)){
483 /* discard BOM */
484 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
485 }
486 if(U_SUCCESS(*error) && buf->conv==NULL) {
487 buf->conv=ucnv_open(*cp,error);
488 }
489 if(U_FAILURE(*error)){
490 ucnv_close(buf->conv);
491 uprv_free(buf);
492 T_FileStream_close(in);
493 return NULL;
494 }
495
496 if((buf->conv==NULL) && (buf->showWarning==TRUE)){
497 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
498 }
499 buf->remaining=fileSize-buf->signatureLength;
500 if(buf->isBuffered){
501 buf->bufCapacity=MAX_U_BUF;
502 }else{
503 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
504 }
505 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
506 if (buf->buffer == NULL) {
507 *error = U_MEMORY_ALLOCATION_ERROR;
508 ucbuf_close(buf);
509 return NULL;
510 }
511 buf->currentPos=buf->buffer;
512 buf->bufLimit=buf->buffer;
513 if(U_FAILURE(*error)){
514 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
515 ucbuf_close(buf);
516 return NULL;
517 }
518 ucbuf_fillucbuf(buf,error);
519 if(U_FAILURE(*error)){
520 ucbuf_close(buf);
521 return NULL;
522 }
523 return buf;
524 }
525 *error =U_FILE_ACCESS_ERROR;
526 return NULL;
527 }
528
529
530
531 /* TODO: this method will fail if at the
532 * begining of buffer and the uchar to unget
533 * is from the previous buffer. Need to implement
534 * system to take care of that situation.
535 */
536 U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t c,UCHARBUF * buf)537 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
538 /* decrement currentPos pointer
539 * if not at the begining of buffer
540 */
541 if(buf->currentPos!=buf->buffer){
542 if(*(buf->currentPos-1)==c){
543 buf->currentPos--;
544 } else {
545 /* ungetc failed - did not match. */
546 }
547 } else {
548 /* ungetc failed - beginning of buffer. */
549 }
550 }
551
552 /* frees the resources of UChar* buffer */
553 static void
ucbuf_closebuf(UCHARBUF * buf)554 ucbuf_closebuf(UCHARBUF* buf){
555 uprv_free(buf->buffer);
556 buf->buffer = NULL;
557 }
558
559 /* close the buf and release resources*/
560 U_CAPI void U_EXPORT2
ucbuf_close(UCHARBUF * buf)561 ucbuf_close(UCHARBUF* buf){
562 if(buf!=NULL){
563 if(buf->conv){
564 ucnv_close(buf->conv);
565 }
566 T_FileStream_close(buf->in);
567 ucbuf_closebuf(buf);
568 uprv_free(buf);
569 }
570 }
571
572 /* rewind the buf and file stream */
573 U_CAPI void U_EXPORT2
ucbuf_rewind(UCHARBUF * buf,UErrorCode * error)574 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
575 if(error==NULL || U_FAILURE(*error)){
576 return;
577 }
578 if(buf){
579 buf->currentPos=buf->buffer;
580 buf->bufLimit=buf->buffer;
581 T_FileStream_rewind(buf->in);
582 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
583
584 ucnv_resetToUnicode(buf->conv);
585 if(buf->signatureLength>0) {
586 UChar target[1]={ 0 };
587 UChar* pTarget;
588 char start[8];
589 const char* pStart;
590 int32_t numRead;
591
592 /* read the signature bytes */
593 numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
594
595 /* convert and ignore initial U+FEFF, and the buffer overflow */
596 pTarget = target;
597 pStart = start;
598 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
599 if(*error==U_BUFFER_OVERFLOW_ERROR) {
600 *error=U_ZERO_ERROR;
601 }
602
603 /* verify that we successfully read exactly U+FEFF */
604 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
605 *error=U_INTERNAL_PROGRAM_ERROR;
606 }
607 }
608 }
609 }
610
611
612 U_CAPI int32_t U_EXPORT2
ucbuf_size(UCHARBUF * buf)613 ucbuf_size(UCHARBUF* buf){
614 if(buf){
615 if(buf->isBuffered){
616 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
617 }else{
618 return (int32_t)(buf->bufLimit - buf->buffer);
619 }
620 }
621 return 0;
622 }
623
624 U_CAPI const UChar* U_EXPORT2
ucbuf_getBuffer(UCHARBUF * buf,int32_t * len,UErrorCode * error)625 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
626 if(error==NULL || U_FAILURE(*error)){
627 return NULL;
628 }
629 if(buf==NULL || len==NULL){
630 *error = U_ILLEGAL_ARGUMENT_ERROR;
631 return NULL;
632 }
633 *len = (int32_t)(buf->bufLimit - buf->buffer);
634 return buf->buffer;
635 }
636
637 U_CAPI const char* U_EXPORT2
ucbuf_resolveFileName(const char * inputDir,const char * fileName,char * target,int32_t * len,UErrorCode * status)638 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
639 int32_t requiredLen = 0;
640 int32_t dirlen = 0;
641 int32_t filelen = 0;
642 if(status==NULL || U_FAILURE(*status)){
643 return NULL;
644 }
645
646 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
647 *status = U_ILLEGAL_ARGUMENT_ERROR;
648 return NULL;
649 }
650
651
652 dirlen = (int32_t)uprv_strlen(inputDir);
653 filelen = (int32_t)uprv_strlen(fileName);
654 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
655 requiredLen = dirlen + filelen + 2;
656 if((*len < requiredLen) || target==NULL){
657 *len = requiredLen;
658 *status = U_BUFFER_OVERFLOW_ERROR;
659 return NULL;
660 }
661
662 target[0] = '\0';
663 /*
664 * append the input dir to openFileName if the first char in
665 * filename is not file seperation char and the last char input directory is not '.'.
666 * This is to support :
667 * genrb -s. /home/icu/data
668 * genrb -s. icu/data
669 * The user cannot mix notations like
670 * genrb -s. /icu/data --- the absolute path specified. -s redundant
671 * user should use
672 * genrb -s. icu/data --- start from CWD and look in icu/data dir
673 */
674 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
675 uprv_strcpy(target, inputDir);
676 target[dirlen] = U_FILE_SEP_CHAR;
677 }
678 target[dirlen + 1] = '\0';
679 } else {
680 requiredLen = dirlen + filelen + 1;
681 if((*len < requiredLen) || target==NULL){
682 *len = requiredLen;
683 *status = U_BUFFER_OVERFLOW_ERROR;
684 return NULL;
685 }
686
687 uprv_strcpy(target, inputDir);
688 }
689
690 uprv_strcat(target, fileName);
691 return target;
692 }
693 /*
694 * Unicode TR 13 says any of the below chars is
695 * a new line char in a readline function in addition
696 * to CR+LF combination which needs to be
697 * handled seperately
698 */
ucbuf_isCharNewLine(UChar c)699 static UBool ucbuf_isCharNewLine(UChar c){
700 switch(c){
701 case 0x000A: /* LF */
702 case 0x000D: /* CR */
703 case 0x000C: /* FF */
704 case 0x0085: /* NEL */
705 case 0x2028: /* LS */
706 case 0x2029: /* PS */
707 return TRUE;
708 default:
709 return FALSE;
710 }
711 }
712
713 U_CAPI const UChar* U_EXPORT2
ucbuf_readline(UCHARBUF * buf,int32_t * len,UErrorCode * err)714 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
715 UChar* temp = buf->currentPos;
716 UChar* savePos =NULL;
717 UChar c=0x0000;
718 if(buf->isBuffered){
719 /* The input is buffered we have to do more
720 * for returning a pointer U_TRUNCATED_CHAR_FOUND
721 */
722 for(;;){
723 c = *temp++;
724 if(buf->remaining==0){
725 return NULL; /* end of file is reached return NULL */
726 }
727 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
728 *err= U_TRUNCATED_CHAR_FOUND;
729 return NULL;
730 }else{
731 ucbuf_fillucbuf(buf,err);
732 if(U_FAILURE(*err)){
733 return NULL;
734 }
735 }
736 /*
737 * Accoding to TR 13 readLine functions must interpret
738 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
739 */
740 /* Windows CR LF */
741 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
742 *len = (int32_t)(temp++ - buf->currentPos);
743 savePos = buf->currentPos;
744 buf->currentPos = temp;
745 return savePos;
746 }
747 /* else */
748
749 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */
750 *len = (int32_t)(temp - buf->currentPos);
751 savePos = buf->currentPos;
752 buf->currentPos = temp;
753 return savePos;
754 }
755 }
756 }else{
757 /* we know that all input is read into the internal
758 * buffer so we can safely return pointers
759 */
760 for(;;){
761 c = *temp++;
762
763 if(buf->currentPos==buf->bufLimit){
764 return NULL; /* end of file is reached return NULL */
765 }
766 /* Windows CR LF */
767 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
768 *len = (int32_t)(temp++ - buf->currentPos);
769 savePos = buf->currentPos;
770 buf->currentPos = temp;
771 return savePos;
772 }
773 /* else */
774 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */
775 *len = (int32_t)(temp - buf->currentPos);
776 savePos = buf->currentPos;
777 buf->currentPos = temp;
778 return savePos;
779 }
780 }
781 }
782 /* not reached */
783 /* A compiler warning will appear if all paths don't contain a return statement. */
784 /* return NULL;*/
785 }
786 #endif
787