• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1998-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File ucbuf.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   05/10/01    Ram         Creation.
15 *******************************************************************************
16 */
17 
18 #include "unicode/utypes.h"
19 #include "unicode/putil.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utf16.h"
25 #include "filestrm.h"
26 #include "cstring.h"
27 #include "cmemory.h"
28 #include "ustrfmt.h"
29 #include "ucbuf.h"
30 #include <stdio.h>
31 
32 #if !UCONFIG_NO_CONVERSION
33 
34 
35 #define MAX_IN_BUF 1000
36 #define MAX_U_BUF 1500
37 #define CONTEXT_LEN 20
38 
39 struct UCHARBUF {
40     UChar* buffer;
41     UChar* currentPos;
42     UChar* bufLimit;
43     int32_t bufCapacity;
44     int32_t remaining;
45     int32_t signatureLength;
46     FileStream* in;
47     UConverter* conv;
48     UBool showWarning; /* makes this API not produce any errors */
49     UBool isBuffered;
50 };
51 
52 U_CAPI UBool U_EXPORT2
ucbuf_autodetect_fs(FileStream * in,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)53 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
54     char start[8];
55     int32_t numRead;
56 
57     UChar target[1]={ 0 };
58     UChar* pTarget;
59     const char* pStart;
60 
61     /* read a few bytes */
62     numRead=T_FileStream_read(in, start, sizeof(start));
63 
64     *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
65 
66     /* unread the bytes beyond what was consumed for U+FEFF */
67     T_FileStream_rewind(in);
68     if (*signatureLength > 0) {
69         T_FileStream_read(in, start, *signatureLength);
70     }
71 
72     if(*cp==NULL){
73         *conv =NULL;
74         return FALSE;
75     }
76 
77     /* open the converter for the detected Unicode charset */
78     *conv = ucnv_open(*cp,error);
79 
80     /* convert and ignore initial U+FEFF, and the buffer overflow */
81     pTarget = target;
82     pStart = start;
83     ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
84     *signatureLength = (int32_t)(pStart - start);
85     if(*error==U_BUFFER_OVERFLOW_ERROR) {
86         *error=U_ZERO_ERROR;
87     }
88 
89     /* verify that we successfully read exactly U+FEFF */
90     if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
91         *error=U_INTERNAL_PROGRAM_ERROR;
92     }
93 
94 
95     return TRUE;
96 }
ucbuf_isCPKnown(const char * cp)97 static UBool ucbuf_isCPKnown(const char* cp){
98     if(ucnv_compareNames("UTF-8",cp)==0){
99         return TRUE;
100     }
101     if(ucnv_compareNames("UTF-16BE",cp)==0){
102         return TRUE;
103     }
104     if(ucnv_compareNames("UTF-16LE",cp)==0){
105         return TRUE;
106     }
107     if(ucnv_compareNames("UTF-16",cp)==0){
108         return TRUE;
109     }
110     if(ucnv_compareNames("UTF-32",cp)==0){
111         return TRUE;
112     }
113     if(ucnv_compareNames("UTF-32BE",cp)==0){
114         return TRUE;
115     }
116     if(ucnv_compareNames("UTF-32LE",cp)==0){
117         return TRUE;
118     }
119     if(ucnv_compareNames("SCSU",cp)==0){
120         return TRUE;
121     }
122     if(ucnv_compareNames("BOCU-1",cp)==0){
123         return TRUE;
124     }
125     if(ucnv_compareNames("UTF-7",cp)==0){
126         return TRUE;
127     }
128     return FALSE;
129 }
130 
131 U_CAPI FileStream * U_EXPORT2
ucbuf_autodetect(const char * fileName,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)132 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
133     FileStream* in=NULL;
134     if(error==NULL || U_FAILURE(*error)){
135         return NULL;
136     }
137     if(conv==NULL || cp==NULL || fileName==NULL){
138         *error = U_ILLEGAL_ARGUMENT_ERROR;
139         return NULL;
140     }
141     /* open the file */
142     in= T_FileStream_open(fileName,"rb");
143 
144     if(in == NULL){
145         *error=U_FILE_ACCESS_ERROR;
146         return NULL;
147     }
148 
149     if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
150         return in;
151     } else {
152         ucnv_close(*conv);
153         *conv=NULL;
154         T_FileStream_close(in);
155         return NULL;
156     }
157 }
158 
159 /* fill the uchar buffer */
160 static UCHARBUF*
ucbuf_fillucbuf(UCHARBUF * buf,UErrorCode * error)161 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
162     UChar* pTarget=NULL;
163     UChar* target=NULL;
164     const char* source=NULL;
165     char  carr[MAX_IN_BUF] = {'\0'};
166     char* cbuf =  carr;
167     int32_t inputRead=0;
168     int32_t outputWritten=0;
169     int32_t offset=0;
170     const char* sourceLimit =NULL;
171     int32_t cbufSize=0;
172     pTarget = buf->buffer;
173     /* check if we arrived here without exhausting the buffer*/
174     if(buf->currentPos<buf->bufLimit){
175         offset = (int32_t)(buf->bufLimit-buf->currentPos);
176         memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
177     }
178 
179 #if DEBUG
180     memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
181 #endif
182     if(buf->isBuffered){
183         cbufSize = MAX_IN_BUF;
184         /* read the file */
185         inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
186         buf->remaining-=inputRead;
187 
188     }else{
189         cbufSize = T_FileStream_size(buf->in);
190         cbuf = (char*)uprv_malloc(cbufSize);
191         if (cbuf == NULL) {
192         	*error = U_MEMORY_ALLOCATION_ERROR;
193         	return NULL;
194         }
195         inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
196         buf->remaining-=inputRead;
197     }
198 
199     /* just to be sure...*/
200     if ( 0 == inputRead )
201        buf->remaining = 0;
202 
203     target=pTarget;
204     /* convert the bytes */
205     if(buf->conv){
206         /* set the callback to stop */
207         UConverterToUCallback toUOldAction ;
208         void* toUOldContext;
209         void* toUNewContext=NULL;
210         ucnv_setToUCallBack(buf->conv,
211            UCNV_TO_U_CALLBACK_STOP,
212            toUNewContext,
213            &toUOldAction,
214            (const void**)&toUOldContext,
215            error);
216         /* since state is saved in the converter we add offset to source*/
217         target = pTarget+offset;
218         source = cbuf;
219         sourceLimit = source + inputRead;
220         ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
221                         &source,sourceLimit,NULL,
222                         (UBool)(buf->remaining==0),error);
223 
224         if(U_FAILURE(*error)){
225             char context[CONTEXT_LEN+1];
226             char preContext[CONTEXT_LEN+1];
227             char postContext[CONTEXT_LEN+1];
228             int8_t len = CONTEXT_LEN;
229             int32_t start=0;
230             int32_t stop =0;
231             int32_t pos =0;
232             /* use erro1 to preserve the error code */
233             UErrorCode error1 =U_ZERO_ERROR;
234 
235             if( buf->showWarning==TRUE){
236                 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
237                                " converting input stream to target encoding: %s\n",
238                                u_errorName(*error));
239             }
240 
241 
242             /* now get the context chars */
243             ucnv_getInvalidChars(buf->conv,context,&len,&error1);
244             context[len]= 0 ; /* null terminate the buffer */
245 
246             pos = (int32_t)(source - cbuf - len);
247 
248             /* for pre-context */
249             start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
250             stop  = pos-len;
251 
252             memcpy(preContext,cbuf+start,stop-start);
253             /* null terminate the buffer */
254             preContext[stop-start] = 0;
255 
256             /* for post-context */
257             start = pos+len;
258             stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
259 
260             memcpy(postContext,source,stop-start);
261             /* null terminate the buffer */
262             postContext[stop-start] = 0;
263 
264             if(buf->showWarning ==TRUE){
265                 /* print out the context */
266                 fprintf(stderr,"\tPre-context: %s\n",preContext);
267                 fprintf(stderr,"\tContext: %s\n",context);
268                 fprintf(stderr,"\tPost-context: %s\n", postContext);
269             }
270 
271             /* reset the converter */
272             ucnv_reset(buf->conv);
273 
274             /* set the call back to substitute
275              * and restart conversion
276              */
277             ucnv_setToUCallBack(buf->conv,
278                UCNV_TO_U_CALLBACK_SUBSTITUTE,
279                toUNewContext,
280                &toUOldAction,
281                (const void**)&toUOldContext,
282                &error1);
283 
284             /* reset source and target start positions */
285             target = pTarget+offset;
286             source = cbuf;
287 
288             /* re convert */
289             ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
290                             &source,sourceLimit,NULL,
291                             (UBool)(buf->remaining==0),&error1);
292 
293         }
294         outputWritten = (int32_t)(target - pTarget);
295 
296 
297 #if DEBUG
298         {
299             int i;
300             target = pTarget;
301             for(i=0;i<numRead;i++){
302               /*  printf("%c", (char)(*target++));*/
303             }
304         }
305 #endif
306 
307     }else{
308         u_charsToUChars(cbuf,target+offset,inputRead);
309         outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
310     }
311     buf->currentPos = pTarget;
312     buf->bufLimit=pTarget+outputWritten;
313     *buf->bufLimit=0; /*NUL terminate*/
314     if(cbuf!=carr){
315         uprv_free(cbuf);
316     }
317     return buf;
318 }
319 
320 
321 
322 /* get a UChar from the stream*/
323 U_CAPI int32_t U_EXPORT2
ucbuf_getc(UCHARBUF * buf,UErrorCode * error)324 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
325     if(error==NULL || U_FAILURE(*error)){
326         return FALSE;
327     }
328     if(buf->currentPos>=buf->bufLimit){
329         if(buf->remaining==0){
330             return U_EOF;
331         }
332         buf=ucbuf_fillucbuf(buf,error);
333         if(U_FAILURE(*error)){
334             return U_EOF;
335         }
336     }
337 
338     return *(buf->currentPos++);
339 }
340 
341 /* get a UChar32 from the stream*/
342 U_CAPI int32_t U_EXPORT2
ucbuf_getc32(UCHARBUF * buf,UErrorCode * error)343 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
344     int32_t retVal = (int32_t)U_EOF;
345     if(error==NULL || U_FAILURE(*error)){
346         return FALSE;
347     }
348     if(buf->currentPos+1>=buf->bufLimit){
349         if(buf->remaining==0){
350             return U_EOF;
351         }
352         buf=ucbuf_fillucbuf(buf,error);
353         if(U_FAILURE(*error)){
354             return U_EOF;
355         }
356     }
357     if(U16_IS_LEAD(*(buf->currentPos))){
358         retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
359         buf->currentPos+=2;
360     }else{
361         retVal = *(buf->currentPos++);
362     }
363     return retVal;
364 }
365 
366 /* u_unescapeAt() callback to return a UChar*/
367 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)368 _charAt(int32_t offset, void *context) {
369     return ((UCHARBUF*) context)->currentPos[offset];
370 }
371 
372 /* getc and escape it */
373 U_CAPI int32_t U_EXPORT2
ucbuf_getcx32(UCHARBUF * buf,UErrorCode * error)374 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
375     int32_t length;
376     int32_t offset;
377     UChar32 c32,c1,c2;
378     if(error==NULL || U_FAILURE(*error)){
379         return FALSE;
380     }
381     /* Fill the buffer if it is empty */
382     if (buf->currentPos >=buf->bufLimit-2) {
383         ucbuf_fillucbuf(buf,error);
384     }
385 
386     /* Get the next character in the buffer */
387     if (buf->currentPos < buf->bufLimit) {
388         c1 = *(buf->currentPos)++;
389     } else {
390         c1 = U_EOF;
391     }
392 
393     c2 = *(buf->currentPos);
394 
395     /* If it isn't a backslash, return it */
396     if (c1 != 0x005C) {
397         return c1;
398     }
399 
400     /* Determine the amount of data in the buffer */
401     length = (int32_t)(buf->bufLimit - buf->currentPos);
402 
403     /* The longest escape sequence is \Uhhhhhhhh; make sure
404        we have at least that many characters */
405     if (length < 10) {
406 
407         /* fill the buffer */
408         ucbuf_fillucbuf(buf,error);
409         length = (int32_t)(buf->bufLimit - buf->buffer);
410     }
411 
412     /* Process the escape */
413     offset = 0;
414     c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
415 
416     /* check if u_unescapeAt unescaped and converted
417      * to c32 or not
418      */
419     if(c32==0xFFFFFFFF){
420         if(buf->showWarning) {
421             char context[CONTEXT_LEN+1];
422             int32_t len = CONTEXT_LEN;
423             if(length < len) {
424                 len = length;
425             }
426             context[len]= 0 ; /* null terminate the buffer */
427             u_UCharsToChars( buf->currentPos, context, len);
428             fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
429         }
430         *error= U_ILLEGAL_ESCAPE_SEQUENCE;
431         return c1;
432     }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
433         /* Update the current buffer position */
434         buf->currentPos += offset;
435     }else{
436         /* unescaping failed so we just return
437          * c1 and not consume the buffer
438          * this is useful for rules with escapes
439          * in resouce bundles
440          * eg: \' \\ \"
441          */
442         return c1;
443     }
444 
445     return c32;
446 }
447 
448 U_CAPI UCHARBUF* U_EXPORT2
ucbuf_open(const char * fileName,const char ** cp,UBool showWarning,UBool buffered,UErrorCode * error)449 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
450 
451     FileStream* in = NULL;
452     int32_t fileSize=0;
453     const char* knownCp;
454     if(error==NULL || U_FAILURE(*error)){
455         return NULL;
456     }
457     if(cp==NULL || fileName==NULL){
458         *error = U_ILLEGAL_ARGUMENT_ERROR;
459         return FALSE;
460     }
461     if (!uprv_strcmp(fileName, "-")) {
462         in = T_FileStream_stdin();
463     }else{
464         in = T_FileStream_open(fileName, "rb");
465     }
466 
467     if(in!=NULL){
468         UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
469         fileSize = T_FileStream_size(in);
470         if(buf == NULL){
471             *error = U_MEMORY_ALLOCATION_ERROR;
472             T_FileStream_close(in);
473             return NULL;
474         }
475         buf->in=in;
476         buf->conv=NULL;
477         buf->showWarning = showWarning;
478         buf->isBuffered = buffered;
479         buf->signatureLength=0;
480         if(*cp==NULL || **cp=='\0'){
481             /* don't have code page name... try to autodetect */
482             ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
483         }else if(ucbuf_isCPKnown(*cp)){
484             /* discard BOM */
485             ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
486         }
487         if(U_SUCCESS(*error) && buf->conv==NULL) {
488             buf->conv=ucnv_open(*cp,error);
489         }
490         if(U_FAILURE(*error)){
491             ucnv_close(buf->conv);
492             uprv_free(buf);
493             T_FileStream_close(in);
494             return NULL;
495         }
496 
497         if((buf->conv==NULL) && (buf->showWarning==TRUE)){
498             fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
499         }
500         buf->remaining=fileSize-buf->signatureLength;
501         if(buf->isBuffered){
502             buf->bufCapacity=MAX_U_BUF;
503         }else{
504             buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
505         }
506         buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
507         if (buf->buffer == NULL) {
508             *error = U_MEMORY_ALLOCATION_ERROR;
509             ucbuf_close(buf);
510             return NULL;
511         }
512         buf->currentPos=buf->buffer;
513         buf->bufLimit=buf->buffer;
514         if(U_FAILURE(*error)){
515             fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
516             ucbuf_close(buf);
517             return NULL;
518         }
519         ucbuf_fillucbuf(buf,error);
520         if(U_FAILURE(*error)){
521             ucbuf_close(buf);
522             return NULL;
523         }
524         return buf;
525     }
526     *error =U_FILE_ACCESS_ERROR;
527     return NULL;
528 }
529 
530 
531 
532 /* TODO: this method will fail if at the
533  * begining of buffer and the uchar to unget
534  * is from the previous buffer. Need to implement
535  * system to take care of that situation.
536  */
537 U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t c,UCHARBUF * buf)538 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
539     /* decrement currentPos pointer
540      * if not at the begining of buffer
541      */
542     if(buf->currentPos!=buf->buffer){
543         if(*(buf->currentPos-1)==c){
544             buf->currentPos--;
545         } else {
546             /* ungetc failed - did not match. */
547         }
548     } else {
549        /* ungetc failed - beginning of buffer. */
550     }
551 }
552 
553 /* frees the resources of UChar* buffer */
554 static void
ucbuf_closebuf(UCHARBUF * buf)555 ucbuf_closebuf(UCHARBUF* buf){
556     uprv_free(buf->buffer);
557     buf->buffer = NULL;
558 }
559 
560 /* close the buf and release resources*/
561 U_CAPI void U_EXPORT2
ucbuf_close(UCHARBUF * buf)562 ucbuf_close(UCHARBUF* buf){
563     if(buf!=NULL){
564         if(buf->conv){
565             ucnv_close(buf->conv);
566         }
567         T_FileStream_close(buf->in);
568         ucbuf_closebuf(buf);
569         uprv_free(buf);
570     }
571 }
572 
573 /* rewind the buf and file stream */
574 U_CAPI void U_EXPORT2
ucbuf_rewind(UCHARBUF * buf,UErrorCode * error)575 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
576     if(error==NULL || U_FAILURE(*error)){
577         return;
578     }
579     if(buf){
580         buf->currentPos=buf->buffer;
581         buf->bufLimit=buf->buffer;
582         T_FileStream_rewind(buf->in);
583         buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
584 
585         ucnv_resetToUnicode(buf->conv);
586         if(buf->signatureLength>0) {
587             UChar target[1]={ 0 };
588             UChar* pTarget;
589             char start[8];
590             const char* pStart;
591             int32_t numRead;
592 
593             /* read the signature bytes */
594             numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
595 
596             /* convert and ignore initial U+FEFF, and the buffer overflow */
597             pTarget = target;
598             pStart = start;
599             ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
600             if(*error==U_BUFFER_OVERFLOW_ERROR) {
601                 *error=U_ZERO_ERROR;
602             }
603 
604             /* verify that we successfully read exactly U+FEFF */
605             if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
606                 *error=U_INTERNAL_PROGRAM_ERROR;
607             }
608         }
609     }
610 }
611 
612 
613 U_CAPI int32_t U_EXPORT2
ucbuf_size(UCHARBUF * buf)614 ucbuf_size(UCHARBUF* buf){
615     if(buf){
616         if(buf->isBuffered){
617             return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
618         }else{
619             return (int32_t)(buf->bufLimit - buf->buffer);
620         }
621     }
622     return 0;
623 }
624 
625 U_CAPI const UChar* U_EXPORT2
ucbuf_getBuffer(UCHARBUF * buf,int32_t * len,UErrorCode * error)626 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
627     if(error==NULL || U_FAILURE(*error)){
628         return NULL;
629     }
630     if(buf==NULL || len==NULL){
631         *error = U_ILLEGAL_ARGUMENT_ERROR;
632         return NULL;
633     }
634     *len = (int32_t)(buf->bufLimit - buf->buffer);
635     return buf->buffer;
636 }
637 
638 U_CAPI const char* U_EXPORT2
ucbuf_resolveFileName(const char * inputDir,const char * fileName,char * target,int32_t * len,UErrorCode * status)639 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
640     int32_t requiredLen = 0;
641     int32_t dirlen =  0;
642     int32_t filelen = 0;
643     if(status==NULL || U_FAILURE(*status)){
644         return NULL;
645     }
646 
647     if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
648         *status = U_ILLEGAL_ARGUMENT_ERROR;
649         return NULL;
650     }
651 
652 
653     dirlen  = (int32_t)uprv_strlen(inputDir);
654     filelen = (int32_t)uprv_strlen(fileName);
655     if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
656         requiredLen = dirlen + filelen + 2;
657         if((*len < requiredLen) || target==NULL){
658             *len = requiredLen;
659             *status = U_BUFFER_OVERFLOW_ERROR;
660             return NULL;
661         }
662 
663         target[0] = '\0';
664         /*
665          * append the input dir to openFileName if the first char in
666          * filename is not file seperation char and the last char input directory is  not '.'.
667          * This is to support :
668          * genrb -s. /home/icu/data
669          * genrb -s. icu/data
670          * The user cannot mix notations like
671          * genrb -s. /icu/data --- the absolute path specified. -s redundant
672          * user should use
673          * genrb -s. icu/data  --- start from CWD and look in icu/data dir
674          */
675         if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
676             uprv_strcpy(target, inputDir);
677             target[dirlen]     = U_FILE_SEP_CHAR;
678         }
679         target[dirlen + 1] = '\0';
680     } else {
681         requiredLen = dirlen + filelen + 1;
682         if((*len < requiredLen) || target==NULL){
683             *len = requiredLen;
684             *status = U_BUFFER_OVERFLOW_ERROR;
685             return NULL;
686         }
687 
688         uprv_strcpy(target, inputDir);
689     }
690 
691     uprv_strcat(target, fileName);
692     return target;
693 }
694 /*
695  * Unicode TR 13 says any of the below chars is
696  * a new line char in a readline function in addition
697  * to CR+LF combination which needs to be
698  * handled seperately
699  */
ucbuf_isCharNewLine(UChar c)700 static UBool ucbuf_isCharNewLine(UChar c){
701     switch(c){
702     case 0x000A: /* LF  */
703     case 0x000D: /* CR  */
704     case 0x000C: /* FF  */
705     case 0x0085: /* NEL */
706     case 0x2028: /* LS  */
707     case 0x2029: /* PS  */
708         return TRUE;
709     default:
710         return FALSE;
711     }
712 }
713 
714 U_CAPI const UChar* U_EXPORT2
ucbuf_readline(UCHARBUF * buf,int32_t * len,UErrorCode * err)715 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
716     UChar* temp = buf->currentPos;
717     UChar* savePos =NULL;
718     UChar c=0x0000;
719     if(buf->isBuffered){
720         /* The input is buffered we have to do more
721         * for returning a pointer U_TRUNCATED_CHAR_FOUND
722         */
723         for(;;){
724             c = *temp++;
725             if(buf->remaining==0){
726                 return NULL; /* end of file is reached return NULL */
727             }
728             if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
729                 *err= U_TRUNCATED_CHAR_FOUND;
730                 return NULL;
731             }else{
732                 ucbuf_fillucbuf(buf,err);
733                 if(U_FAILURE(*err)){
734                     return NULL;
735                 }
736             }
737             /*
738              * Accoding to TR 13 readLine functions must interpret
739              * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
740              */
741             /* Windows CR LF */
742             if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
743                 *len = (int32_t)(temp++ - buf->currentPos);
744                 savePos = buf->currentPos;
745                 buf->currentPos = temp;
746                 return savePos;
747             }
748             /* else */
749 
750             if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){  /* Unipad inserts 2028 line separators! */
751                 *len = (int32_t)(temp - buf->currentPos);
752                 savePos = buf->currentPos;
753                 buf->currentPos = temp;
754                 return savePos;
755             }
756         }
757     }else{
758     /* we know that all input is read into the internal
759     * buffer so we can safely return pointers
760         */
761         for(;;){
762             c = *temp++;
763 
764             if(buf->currentPos==buf->bufLimit){
765                 return NULL; /* end of file is reached return NULL */
766             }
767             /* Windows CR LF */
768             if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
769                 *len = (int32_t)(temp++ - buf->currentPos);
770                 savePos = buf->currentPos;
771                 buf->currentPos = temp;
772                 return savePos;
773             }
774             /* else */
775             if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) {  /* Unipad inserts 2028 line separators! */
776                 *len = (int32_t)(temp - buf->currentPos);
777                 savePos = buf->currentPos;
778                 buf->currentPos = temp;
779                 return savePos;
780             }
781         }
782     }
783     /* not reached */
784     /* A compiler warning will appear if all paths don't contain a return statement. */
785 /*    return NULL;*/
786 }
787 #endif
788