1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File ucbuf.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/10/01 Ram Creation.
15 *******************************************************************************
16 */
17
18 #include "unicode/utypes.h"
19 #include "unicode/putil.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "filestrm.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "ustrfmt.h"
26 #include "unicode/ustring.h"
27 #include "unicode/uchar.h"
28 #include "ucbuf.h"
29 #include <stdio.h>
30
31 #if !UCONFIG_NO_CONVERSION
32
33
34 #define MAX_IN_BUF 1000
35 #define MAX_U_BUF 1500
36 #define CONTEXT_LEN 15
37
38 struct UCHARBUF {
39 UChar* buffer;
40 UChar* currentPos;
41 UChar* bufLimit;
42 int32_t bufCapacity;
43 int32_t remaining;
44 int32_t signatureLength;
45 FileStream* in;
46 UConverter* conv;
47 UBool showWarning; /* makes this API not produce any errors */
48 UBool isBuffered;
49 };
50
51 U_CAPI UBool U_EXPORT2
ucbuf_autodetect_fs(FileStream * in,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)52 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
53 char start[8];
54 int32_t numRead;
55
56 UChar target[1]={ 0 };
57 UChar* pTarget;
58 const char* pStart;
59
60 /* read a few bytes */
61 numRead=T_FileStream_read(in, start, sizeof(start));
62
63 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
64
65 /* unread the bytes beyond what was consumed for U+FEFF */
66 T_FileStream_rewind(in);
67 if (*signatureLength > 0) {
68 numRead = T_FileStream_read(in, start, *signatureLength);
69 }
70
71 if(*cp==NULL){
72 *conv =NULL;
73 return FALSE;
74 }
75
76 /* open the converter for the detected Unicode charset */
77 *conv = ucnv_open(*cp,error);
78
79 /* convert and ignore initial U+FEFF, and the buffer overflow */
80 pTarget = target;
81 pStart = start;
82 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
83 *signatureLength = (int32_t)(pStart - start);
84 if(*error==U_BUFFER_OVERFLOW_ERROR) {
85 *error=U_ZERO_ERROR;
86 }
87
88 /* verify that we successfully read exactly U+FEFF */
89 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
90 *error=U_INTERNAL_PROGRAM_ERROR;
91 }
92
93
94 return TRUE;
95 }
ucbuf_isCPKnown(const char * cp)96 static UBool ucbuf_isCPKnown(const char* cp){
97 if(ucnv_compareNames("UTF-8",cp)==0){
98 return TRUE;
99 }
100 if(ucnv_compareNames("UTF-16BE",cp)==0){
101 return TRUE;
102 }
103 if(ucnv_compareNames("UTF-16LE",cp)==0){
104 return TRUE;
105 }
106 if(ucnv_compareNames("UTF-16",cp)==0){
107 return TRUE;
108 }
109 if(ucnv_compareNames("UTF-32",cp)==0){
110 return TRUE;
111 }
112 if(ucnv_compareNames("UTF-32BE",cp)==0){
113 return TRUE;
114 }
115 if(ucnv_compareNames("UTF-32LE",cp)==0){
116 return TRUE;
117 }
118 if(ucnv_compareNames("SCSU",cp)==0){
119 return TRUE;
120 }
121 if(ucnv_compareNames("BOCU-1",cp)==0){
122 return TRUE;
123 }
124 if(ucnv_compareNames("UTF-7",cp)==0){
125 return TRUE;
126 }
127 return FALSE;
128 }
129
130 U_CAPI FileStream * U_EXPORT2
ucbuf_autodetect(const char * fileName,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)131 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
132 FileStream* in=NULL;
133 if(error==NULL || U_FAILURE(*error)){
134 return NULL;
135 }
136 if(conv==NULL || cp==NULL || fileName==NULL){
137 *error = U_ILLEGAL_ARGUMENT_ERROR;
138 return NULL;
139 }
140 /* open the file */
141 in= T_FileStream_open(fileName,"rb");
142
143 if(in == NULL){
144 *error=U_FILE_ACCESS_ERROR;
145 return NULL;
146 }
147
148 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
149 return in;
150 } else {
151 ucnv_close(*conv);
152 *conv=NULL;
153 T_FileStream_close(in);
154 return NULL;
155 }
156 }
157
158 /* fill the uchar buffer */
159 static UCHARBUF*
ucbuf_fillucbuf(UCHARBUF * buf,UErrorCode * error)160 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
161 UChar* pTarget=NULL;
162 UChar* target=NULL;
163 const char* source=NULL;
164 char carr[MAX_IN_BUF] = {'\0'};
165 char* cbuf = carr;
166 int32_t inputRead=0;
167 int32_t outputWritten=0;
168 int32_t offset=0;
169 const char* sourceLimit =NULL;
170 int32_t cbufSize=0;
171 pTarget = buf->buffer;
172 /* check if we arrived here without exhausting the buffer*/
173 if(buf->currentPos<buf->bufLimit){
174 offset = (int32_t)(buf->bufLimit-buf->currentPos);
175 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
176 }
177
178 #if DEBUG
179 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
180 #endif
181 if(buf->isBuffered){
182 cbufSize = MAX_IN_BUF;
183 /* read the file */
184 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
185 buf->remaining-=inputRead;
186
187 }else{
188 cbufSize = T_FileStream_size(buf->in);
189 cbuf = (char*)uprv_malloc(cbufSize);
190 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
191 buf->remaining-=inputRead;
192 }
193
194 /* just to be sure...*/
195 if ( 0 == inputRead )
196 buf->remaining = 0;
197
198 target=pTarget;
199 /* convert the bytes */
200 if(buf->conv){
201 /* set the callback to stop */
202 UConverterToUCallback toUOldAction ;
203 void* toUOldContext;
204 void* toUNewContext=NULL;
205 ucnv_setToUCallBack(buf->conv,
206 UCNV_TO_U_CALLBACK_STOP,
207 toUNewContext,
208 &toUOldAction,
209 (const void**)&toUOldContext,
210 error);
211 /* since state is saved in the converter we add offset to source*/
212 target = pTarget+offset;
213 source = cbuf;
214 sourceLimit = source + inputRead;
215 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
216 &source,sourceLimit,NULL,
217 (UBool)(buf->remaining==0),error);
218
219 if(U_FAILURE(*error)){
220 char context[CONTEXT_LEN];
221 char preContext[CONTEXT_LEN];
222 char postContext[CONTEXT_LEN];
223 int8_t len = CONTEXT_LEN;
224 int32_t start=0;
225 int32_t stop =0;
226 int32_t pos =0;
227 /* use erro1 to preserve the error code */
228 UErrorCode error1 =U_ZERO_ERROR;
229
230 if( buf->showWarning==TRUE){
231 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
232 " converting input stream to target encoding: %s\n",
233 u_errorName(*error));
234 }
235
236
237 /* now get the context chars */
238 ucnv_getInvalidChars(buf->conv,context,&len,&error1);
239 context[len]= 0 ; /* null terminate the buffer */
240
241 pos = (int32_t)(source - cbuf - len);
242
243 /* for pre-context */
244 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
245 stop = pos-len;
246
247 memcpy(preContext,cbuf+start,stop-start);
248 /* null terminate the buffer */
249 preContext[stop-start] = 0;
250
251 /* for post-context */
252 start = pos+len;
253 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
254
255 memcpy(postContext,source,stop-start);
256 /* null terminate the buffer */
257 postContext[stop-start] = 0;
258
259 if(buf->showWarning ==TRUE){
260 /* print out the context */
261 fprintf(stderr,"\tPre-context: %s\n",preContext);
262 fprintf(stderr,"\tContext: %s\n",context);
263 fprintf(stderr,"\tPost-context: %s\n", postContext);
264 }
265
266 /* reset the converter */
267 ucnv_reset(buf->conv);
268
269 /* set the call back to substitute
270 * and restart conversion
271 */
272 ucnv_setToUCallBack(buf->conv,
273 UCNV_TO_U_CALLBACK_SUBSTITUTE,
274 toUNewContext,
275 &toUOldAction,
276 (const void**)&toUOldContext,
277 &error1);
278
279 /* reset source and target start positions */
280 target = pTarget+offset;
281 source = cbuf;
282
283 /* re convert */
284 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
285 &source,sourceLimit,NULL,
286 (UBool)(buf->remaining==0),&error1);
287
288 }
289 outputWritten = (int32_t)(target - pTarget);
290
291
292 #if DEBUG
293 {
294 int i;
295 target = pTarget;
296 for(i=0;i<numRead;i++){
297 /* printf("%c", (char)(*target++));*/
298 }
299 }
300 #endif
301
302 }else{
303 u_charsToUChars(cbuf,target+offset,inputRead);
304 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
305 }
306 buf->currentPos = pTarget;
307 buf->bufLimit=pTarget+outputWritten;
308 *buf->bufLimit=0; /*NUL terminate*/
309 if(cbuf!=carr){
310 uprv_free(cbuf);
311 }
312 return buf;
313 }
314
315
316
317 /* get a UChar from the stream*/
318 U_CAPI int32_t U_EXPORT2
ucbuf_getc(UCHARBUF * buf,UErrorCode * error)319 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
320 if(error==NULL || U_FAILURE(*error)){
321 return FALSE;
322 }
323 if(buf->currentPos>=buf->bufLimit){
324 if(buf->remaining==0){
325 return U_EOF;
326 }
327 buf=ucbuf_fillucbuf(buf,error);
328 if(U_FAILURE(*error)){
329 return U_EOF;
330 }
331 }
332
333 return *(buf->currentPos++);
334 }
335
336 /* get a UChar32 from the stream*/
337 U_CAPI int32_t U_EXPORT2
ucbuf_getc32(UCHARBUF * buf,UErrorCode * error)338 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
339 int32_t retVal = (int32_t)U_EOF;
340 if(error==NULL || U_FAILURE(*error)){
341 return FALSE;
342 }
343 if(buf->currentPos+1>=buf->bufLimit){
344 if(buf->remaining==0){
345 return U_EOF;
346 }
347 buf=ucbuf_fillucbuf(buf,error);
348 if(U_FAILURE(*error)){
349 return U_EOF;
350 }
351 }
352 if(UTF_IS_LEAD(*(buf->currentPos))){
353 retVal=UTF16_GET_PAIR_VALUE(buf->currentPos[0],buf->currentPos[1]);
354 buf->currentPos+=2;
355 }else{
356 retVal = *(buf->currentPos++);
357 }
358 return retVal;
359 }
360
361 /* u_unescapeAt() callback to return a UChar*/
362 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)363 _charAt(int32_t offset, void *context) {
364 return ((UCHARBUF*) context)->currentPos[offset];
365 }
366
367 /* getc and escape it */
368 U_CAPI int32_t U_EXPORT2
ucbuf_getcx32(UCHARBUF * buf,UErrorCode * error)369 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
370 int32_t length;
371 int32_t offset;
372 UChar32 c32,c1,c2;
373 if(error==NULL || U_FAILURE(*error)){
374 return FALSE;
375 }
376 /* Fill the buffer if it is empty */
377 if (buf->currentPos >=buf->bufLimit-2) {
378 ucbuf_fillucbuf(buf,error);
379 }
380
381 /* Get the next character in the buffer */
382 if (buf->currentPos < buf->bufLimit) {
383 c1 = *(buf->currentPos)++;
384 } else {
385 c1 = U_EOF;
386 }
387
388 c2 = *(buf->currentPos);
389
390 /* If it isn't a backslash, return it */
391 if (c1 != 0x005C) {
392 return c1;
393 }
394
395 /* Determine the amount of data in the buffer */
396 length = (int32_t)(buf->bufLimit - buf->currentPos);
397
398 /* The longest escape sequence is \Uhhhhhhhh; make sure
399 we have at least that many characters */
400 if (length < 10) {
401
402 /* fill the buffer */
403 ucbuf_fillucbuf(buf,error);
404 length = (int32_t)(buf->bufLimit - buf->buffer);
405 }
406
407 /* Process the escape */
408 offset = 0;
409 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
410
411 /* check if u_unescapeAt unescaped and converted
412 * to c32 or not
413 */
414 if(c32==0xFFFFFFFF){
415 if(buf->showWarning) {
416 char context[20];
417 int32_t len = 20;
418 if(length < len) {
419 len = length;
420 }
421 context[len]= 0 ; /* null terminate the buffer */
422 u_UCharsToChars( buf->currentPos, context, len);
423 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
424 }
425 *error= U_ILLEGAL_ESCAPE_SEQUENCE;
426 return c1;
427 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
428 /* Update the current buffer position */
429 buf->currentPos += offset;
430 }else{
431 /* unescaping failed so we just return
432 * c1 and not consume the buffer
433 * this is useful for rules with escapes
434 * in resouce bundles
435 * eg: \' \\ \"
436 */
437 return c1;
438 }
439
440 return c32;
441 }
442
443 U_CAPI UCHARBUF* U_EXPORT2
ucbuf_open(const char * fileName,const char ** cp,UBool showWarning,UBool buffered,UErrorCode * error)444 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
445
446 FileStream* in = NULL;
447 int32_t fileSize=0;
448 const char* knownCp;
449 if(error==NULL || U_FAILURE(*error)){
450 return NULL;
451 }
452 if(cp==NULL || fileName==NULL){
453 *error = U_ILLEGAL_ARGUMENT_ERROR;
454 return FALSE;
455 }
456 if (!uprv_strcmp(fileName, "-")) {
457 in = T_FileStream_stdin();
458 }else{
459 in = T_FileStream_open(fileName, "rb");
460 }
461
462 if(in!=NULL){
463 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
464 fileSize = T_FileStream_size(in);
465 if(buf == NULL){
466 *error = U_MEMORY_ALLOCATION_ERROR;
467 T_FileStream_close(in);
468 return NULL;
469 }
470 buf->in=in;
471 buf->conv=NULL;
472 buf->showWarning = showWarning;
473 buf->isBuffered = buffered;
474 buf->signatureLength=0;
475 if(*cp==NULL || **cp=='\0'){
476 /* don't have code page name... try to autodetect */
477 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
478 }else if(ucbuf_isCPKnown(*cp)){
479 /* discard BOM */
480 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
481 }
482 if(U_SUCCESS(*error) && buf->conv==NULL) {
483 buf->conv=ucnv_open(*cp,error);
484 }
485 if(U_FAILURE(*error)){
486 ucnv_close(buf->conv);
487 uprv_free(buf);
488 T_FileStream_close(in);
489 return NULL;
490 }
491
492 if((buf->conv==NULL) && (buf->showWarning==TRUE)){
493 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
494 }
495 buf->remaining=fileSize-buf->signatureLength;
496 if(buf->isBuffered){
497 buf->bufCapacity=MAX_U_BUF;
498 }else{
499 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
500 }
501 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
502 if (buf->buffer == NULL) {
503 *error = U_MEMORY_ALLOCATION_ERROR;
504 ucnv_close(buf->conv);
505 uprv_free(buf);
506 T_FileStream_close(in);
507 return NULL;
508 }
509 buf->currentPos=buf->buffer;
510 buf->bufLimit=buf->buffer;
511 if(U_FAILURE(*error)){
512 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
513 ucnv_close(buf->conv);
514 uprv_free(buf);
515 T_FileStream_close(in);
516 return NULL;
517 }
518 buf=ucbuf_fillucbuf(buf,error);
519 return buf;
520 }
521 *error =U_FILE_ACCESS_ERROR;
522 return NULL;
523 }
524
525
526
527 /* TODO: this method will fail if at the
528 * begining of buffer and the uchar to unget
529 * is from the previous buffer. Need to implement
530 * system to take care of that situation.
531 */
532 U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t c,UCHARBUF * buf)533 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
534 /* decrement currentPos pointer
535 * if not at the begining of buffer
536 */
537 UChar escaped[8] ={'\0'};
538 int32_t len =0;
539 if(c > 0xFFFF){
540 len = uprv_itou(escaped,8,c,16,8);
541 }else{
542 len=uprv_itou(escaped,8,c,16,4);
543 }
544 if(buf->currentPos!=buf->buffer){
545 if(*(buf->currentPos-1)==c){
546 buf->currentPos--;
547 }else if(u_strncmp(buf->currentPos-len,escaped,len) == 0){
548 while(--len>0){
549 buf->currentPos--;
550 }
551 }
552 }
553 }
554
555 /* frees the resources of UChar* buffer */
556 static void
ucbuf_closebuf(UCHARBUF * buf)557 ucbuf_closebuf(UCHARBUF* buf){
558 uprv_free(buf->buffer);
559 buf->buffer = NULL;
560 }
561
562 /* close the buf and release resources*/
563 U_CAPI void U_EXPORT2
ucbuf_close(UCHARBUF * buf)564 ucbuf_close(UCHARBUF* buf){
565 if(buf!=NULL){
566 if(buf->conv){
567 ucnv_close(buf->conv);
568 }
569 T_FileStream_close(buf->in);
570 ucbuf_closebuf(buf);
571 uprv_free(buf);
572 }
573 }
574
575 /* rewind the buf and file stream */
576 U_CAPI void U_EXPORT2
ucbuf_rewind(UCHARBUF * buf,UErrorCode * error)577 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
578 if(error==NULL || U_FAILURE(*error)){
579 return;
580 }
581 if(buf){
582 buf->currentPos=buf->buffer;
583 buf->bufLimit=buf->buffer;
584 T_FileStream_rewind(buf->in);
585 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
586
587 ucnv_resetToUnicode(buf->conv);
588 if(buf->signatureLength>0) {
589 UChar target[1]={ 0 };
590 UChar* pTarget;
591 char start[8];
592 const char* pStart;
593 int32_t numRead;
594
595 /* read the signature bytes */
596 numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
597
598 /* convert and ignore initial U+FEFF, and the buffer overflow */
599 pTarget = target;
600 pStart = start;
601 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
602 if(*error==U_BUFFER_OVERFLOW_ERROR) {
603 *error=U_ZERO_ERROR;
604 }
605
606 /* verify that we successfully read exactly U+FEFF */
607 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
608 *error=U_INTERNAL_PROGRAM_ERROR;
609 }
610 }
611 }
612 }
613
614
615 U_CAPI int32_t U_EXPORT2
ucbuf_size(UCHARBUF * buf)616 ucbuf_size(UCHARBUF* buf){
617 if(buf){
618 if(buf->isBuffered){
619 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
620 }else{
621 return (int32_t)(buf->bufLimit - buf->buffer);
622 }
623 }
624 return 0;
625 }
626
627 U_CAPI const UChar* U_EXPORT2
ucbuf_getBuffer(UCHARBUF * buf,int32_t * len,UErrorCode * error)628 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
629 if(error==NULL || U_FAILURE(*error)){
630 return NULL;
631 }
632 if(buf==NULL || len==NULL){
633 *error = U_ILLEGAL_ARGUMENT_ERROR;
634 return NULL;
635 }
636 *len = (int32_t)(buf->bufLimit - buf->buffer);
637 return buf->buffer;
638 }
639
640 U_CAPI const char* U_EXPORT2
ucbuf_resolveFileName(const char * inputDir,const char * fileName,char * target,int32_t * len,UErrorCode * status)641 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
642 int32_t requiredLen = 0;
643 int32_t dirlen = 0;
644 int32_t filelen = 0;
645 if(status==NULL || U_FAILURE(*status)){
646 return NULL;
647 }
648
649 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
650 *status = U_ILLEGAL_ARGUMENT_ERROR;
651 return NULL;
652 }
653
654
655 dirlen = (int32_t)uprv_strlen(inputDir);
656 filelen = (int32_t)uprv_strlen(fileName);
657 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
658 requiredLen = dirlen + filelen + 2;
659 if((*len < requiredLen) || target==NULL){
660 *len = requiredLen;
661 *status = U_BUFFER_OVERFLOW_ERROR;
662 return NULL;
663 }
664
665 target[0] = '\0';
666 /*
667 * append the input dir to openFileName if the first char in
668 * filename is not file seperation char and the last char input directory is not '.'.
669 * This is to support :
670 * genrb -s. /home/icu/data
671 * genrb -s. icu/data
672 * The user cannot mix notations like
673 * genrb -s. /icu/data --- the absolute path specified. -s redundant
674 * user should use
675 * genrb -s. icu/data --- start from CWD and look in icu/data dir
676 */
677 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
678 uprv_strcpy(target, inputDir);
679 target[dirlen] = U_FILE_SEP_CHAR;
680 }
681 target[dirlen + 1] = '\0';
682 } else {
683 requiredLen = dirlen + filelen + 1;
684 if((*len < requiredLen) || target==NULL){
685 *len = requiredLen;
686 *status = U_BUFFER_OVERFLOW_ERROR;
687 return NULL;
688 }
689
690 uprv_strcpy(target, inputDir);
691 }
692
693 uprv_strcat(target, fileName);
694 return target;
695 }
696 /*
697 * Unicode TR 13 says any of the below chars is
698 * a new line char in a readline function in addition
699 * to CR+LF combination which needs to be
700 * handled seperately
701 */
ucbuf_isCharNewLine(UChar c)702 static UBool ucbuf_isCharNewLine(UChar c){
703 switch(c){
704 case 0x000A: /* LF */
705 case 0x000D: /* CR */
706 case 0x000C: /* FF */
707 case 0x0085: /* NEL */
708 case 0x2028: /* LS */
709 case 0x2029: /* PS */
710 return TRUE;
711 default:
712 return FALSE;
713 }
714 }
715
716 U_CAPI const UChar* U_EXPORT2
ucbuf_readline(UCHARBUF * buf,int32_t * len,UErrorCode * err)717 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
718 UChar* temp = buf->currentPos;
719 UChar* savePos =NULL;
720 UChar c=0x0000;
721 if(buf->isBuffered){
722 /* The input is buffered we have to do more
723 * for returning a pointer U_TRUNCATED_CHAR_FOUND
724 */
725 for(;;){
726 c = *temp++;
727 if(buf->remaining==0){
728 return NULL; /* end of file is reached return NULL */
729 }
730 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
731 *err= U_TRUNCATED_CHAR_FOUND;
732 return NULL;
733 }else{
734 ucbuf_fillucbuf(buf,err);
735 if(U_FAILURE(*err)){
736 return NULL;
737 }
738 }
739 /*
740 * Accoding to TR 13 readLine functions must interpret
741 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
742 */
743 /* Windows CR LF */
744 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
745 *len = (int32_t)(temp++ - buf->currentPos);
746 savePos = buf->currentPos;
747 buf->currentPos = temp;
748 return savePos;
749 }
750 /* else */
751
752 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */
753 *len = (int32_t)(temp - buf->currentPos);
754 savePos = buf->currentPos;
755 buf->currentPos = temp;
756 return savePos;
757 }
758 }
759 }else{
760 /* we know that all input is read into the internal
761 * buffer so we can safely return pointers
762 */
763 for(;;){
764 c = *temp++;
765
766 if(buf->currentPos==buf->bufLimit){
767 return NULL; /* end of file is reached return NULL */
768 }
769 /* Windows CR LF */
770 if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
771 *len = (int32_t)(temp++ - buf->currentPos);
772 savePos = buf->currentPos;
773 buf->currentPos = temp;
774 return savePos;
775 }
776 /* else */
777 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */
778 *len = (int32_t)(temp - buf->currentPos);
779 savePos = buf->currentPos;
780 buf->currentPos = temp;
781 return savePos;
782 }
783 }
784 }
785 /* not reached */
786 /* A compiler warning will appear if all paths don't contain a return statement. */
787 /* return NULL;*/
788 }
789 #endif
790