1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File ucbuf.cpp
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 05/10/01 Ram Creation.
17 *******************************************************************************
18 */
19
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uchar.h"
23 #include "unicode/ucnv.h"
24 #include "unicode/ucnv_err.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf16.h"
27 #include "filestrm.h"
28 #include "cstring.h"
29 #include "cmemory.h"
30 #include "ustrfmt.h"
31 #include "ucbuf.h"
32 #include <stdio.h>
33
34 #if !UCONFIG_NO_CONVERSION
35
36
37 #define MAX_IN_BUF 1000
38 #define MAX_U_BUF 1500
39 #define CONTEXT_LEN 20
40
41 struct UCHARBUF {
42 UChar* buffer;
43 UChar* currentPos;
44 UChar* bufLimit;
45 int32_t bufCapacity;
46 int32_t remaining;
47 int32_t signatureLength;
48 FileStream* in;
49 UConverter* conv;
50 UBool showWarning; /* makes this API not produce any errors */
51 UBool isBuffered;
52 };
53
54 U_CAPI UBool U_EXPORT2
ucbuf_autodetect_fs(FileStream * in,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)55 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
56 char start[8];
57 int32_t numRead;
58
59 UChar target[1]={ 0 };
60 UChar* pTarget;
61 const char* pStart;
62
63 /* read a few bytes */
64 numRead=T_FileStream_read(in, start, sizeof(start));
65
66 *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
67
68 /* unread the bytes beyond what was consumed for U+FEFF */
69 T_FileStream_rewind(in);
70 if (*signatureLength > 0) {
71 T_FileStream_read(in, start, *signatureLength);
72 }
73
74 if(*cp==NULL){
75 *conv =NULL;
76 return FALSE;
77 }
78
79 /* open the converter for the detected Unicode charset */
80 *conv = ucnv_open(*cp,error);
81
82 /* convert and ignore initial U+FEFF, and the buffer overflow */
83 pTarget = target;
84 pStart = start;
85 ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
86 *signatureLength = (int32_t)(pStart - start);
87 if(*error==U_BUFFER_OVERFLOW_ERROR) {
88 *error=U_ZERO_ERROR;
89 }
90
91 /* verify that we successfully read exactly U+FEFF */
92 if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
93 *error=U_INTERNAL_PROGRAM_ERROR;
94 }
95
96
97 return TRUE;
98 }
ucbuf_isCPKnown(const char * cp)99 static UBool ucbuf_isCPKnown(const char* cp){
100 if(ucnv_compareNames("UTF-8",cp)==0){
101 return TRUE;
102 }
103 if(ucnv_compareNames("UTF-16BE",cp)==0){
104 return TRUE;
105 }
106 if(ucnv_compareNames("UTF-16LE",cp)==0){
107 return TRUE;
108 }
109 if(ucnv_compareNames("UTF-16",cp)==0){
110 return TRUE;
111 }
112 if(ucnv_compareNames("UTF-32",cp)==0){
113 return TRUE;
114 }
115 if(ucnv_compareNames("UTF-32BE",cp)==0){
116 return TRUE;
117 }
118 if(ucnv_compareNames("UTF-32LE",cp)==0){
119 return TRUE;
120 }
121 if(ucnv_compareNames("SCSU",cp)==0){
122 return TRUE;
123 }
124 if(ucnv_compareNames("BOCU-1",cp)==0){
125 return TRUE;
126 }
127 if(ucnv_compareNames("UTF-7",cp)==0){
128 return TRUE;
129 }
130 return FALSE;
131 }
132
133 U_CAPI FileStream * U_EXPORT2
ucbuf_autodetect(const char * fileName,const char ** cp,UConverter ** conv,int32_t * signatureLength,UErrorCode * error)134 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
135 FileStream* in=NULL;
136 if(error==NULL || U_FAILURE(*error)){
137 return NULL;
138 }
139 if(conv==NULL || cp==NULL || fileName==NULL){
140 *error = U_ILLEGAL_ARGUMENT_ERROR;
141 return NULL;
142 }
143 /* open the file */
144 in= T_FileStream_open(fileName,"rb");
145
146 if(in == NULL){
147 *error=U_FILE_ACCESS_ERROR;
148 return NULL;
149 }
150
151 if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
152 return in;
153 } else {
154 ucnv_close(*conv);
155 *conv=NULL;
156 T_FileStream_close(in);
157 return NULL;
158 }
159 }
160
161 /* fill the uchar buffer */
162 static UCHARBUF*
ucbuf_fillucbuf(UCHARBUF * buf,UErrorCode * error)163 ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
164 UChar* pTarget=NULL;
165 UChar* target=NULL;
166 const char* source=NULL;
167 char carr[MAX_IN_BUF] = {'\0'};
168 char* cbuf = carr;
169 int32_t inputRead=0;
170 int32_t outputWritten=0;
171 int32_t offset=0;
172 const char* sourceLimit =NULL;
173 int32_t cbufSize=0;
174 pTarget = buf->buffer;
175 /* check if we arrived here without exhausting the buffer*/
176 if(buf->currentPos<buf->bufLimit){
177 offset = (int32_t)(buf->bufLimit-buf->currentPos);
178 memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
179 }
180
181 #if UCBUF_DEBUG
182 memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
183 #endif
184 if(buf->isBuffered){
185 cbufSize = MAX_IN_BUF;
186 /* read the file */
187 inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
188 buf->remaining-=inputRead;
189
190 }else{
191 cbufSize = T_FileStream_size(buf->in);
192 cbuf = (char*)uprv_malloc(cbufSize);
193 if (cbuf == NULL) {
194 *error = U_MEMORY_ALLOCATION_ERROR;
195 return NULL;
196 }
197 inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
198 buf->remaining-=inputRead;
199 }
200
201 /* just to be sure...*/
202 if ( 0 == inputRead )
203 buf->remaining = 0;
204
205 target=pTarget;
206 /* convert the bytes */
207 if(buf->conv){
208 /* set the callback to stop */
209 UConverterToUCallback toUOldAction ;
210 void* toUOldContext;
211 void* toUNewContext=NULL;
212 ucnv_setToUCallBack(buf->conv,
213 UCNV_TO_U_CALLBACK_STOP,
214 toUNewContext,
215 &toUOldAction,
216 (const void**)&toUOldContext,
217 error);
218 /* since state is saved in the converter we add offset to source*/
219 target = pTarget+offset;
220 source = cbuf;
221 sourceLimit = source + inputRead;
222 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
223 &source,sourceLimit,NULL,
224 (UBool)(buf->remaining==0),error);
225
226 if(U_FAILURE(*error)){
227 char context[CONTEXT_LEN+1];
228 char preContext[CONTEXT_LEN+1];
229 char postContext[CONTEXT_LEN+1];
230 int8_t len = CONTEXT_LEN;
231 int32_t start=0;
232 int32_t stop =0;
233 int32_t pos =0;
234 /* use erro1 to preserve the error code */
235 UErrorCode error1 =U_ZERO_ERROR;
236
237 if( buf->showWarning==TRUE){
238 fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
239 " converting input stream to target encoding: %s\n",
240 u_errorName(*error));
241 }
242
243
244 /* now get the context chars */
245 ucnv_getInvalidChars(buf->conv,context,&len,&error1);
246 context[len]= 0 ; /* null terminate the buffer */
247
248 pos = (int32_t)(source - cbuf - len);
249
250 /* for pre-context */
251 start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
252 stop = pos-len;
253
254 memcpy(preContext,cbuf+start,stop-start);
255 /* null terminate the buffer */
256 preContext[stop-start] = 0;
257
258 /* for post-context */
259 start = pos+len;
260 stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
261
262 memcpy(postContext,source,stop-start);
263 /* null terminate the buffer */
264 postContext[stop-start] = 0;
265
266 if(buf->showWarning ==TRUE){
267 /* print out the context */
268 fprintf(stderr,"\tPre-context: %s\n",preContext);
269 fprintf(stderr,"\tContext: %s\n",context);
270 fprintf(stderr,"\tPost-context: %s\n", postContext);
271 }
272
273 /* reset the converter */
274 ucnv_reset(buf->conv);
275
276 /* set the call back to substitute
277 * and restart conversion
278 */
279 ucnv_setToUCallBack(buf->conv,
280 UCNV_TO_U_CALLBACK_SUBSTITUTE,
281 toUNewContext,
282 &toUOldAction,
283 (const void**)&toUOldContext,
284 &error1);
285
286 /* reset source and target start positions */
287 target = pTarget+offset;
288 source = cbuf;
289
290 /* re convert */
291 ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
292 &source,sourceLimit,NULL,
293 (UBool)(buf->remaining==0),&error1);
294
295 }
296 outputWritten = (int32_t)(target - pTarget);
297
298 #if UCBUF_DEBUG
299 {
300 int i;
301 target = pTarget;
302 for(i=0;i<numRead;i++){
303 /* printf("%c", (char)(*target++));*/
304 }
305 }
306 #endif
307
308 }else{
309 u_charsToUChars(cbuf,target+offset,inputRead);
310 outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
311 }
312 buf->currentPos = pTarget;
313 buf->bufLimit=pTarget+outputWritten;
314 *buf->bufLimit=0; /*NUL terminate*/
315 if(cbuf!=carr){
316 uprv_free(cbuf);
317 }
318 return buf;
319 }
320
321
322
323 /* get a UChar from the stream*/
324 U_CAPI int32_t U_EXPORT2
ucbuf_getc(UCHARBUF * buf,UErrorCode * error)325 ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
326 if(error==NULL || U_FAILURE(*error)){
327 return FALSE;
328 }
329 if(buf->currentPos>=buf->bufLimit){
330 if(buf->remaining==0){
331 return U_EOF;
332 }
333 buf=ucbuf_fillucbuf(buf,error);
334 if(U_FAILURE(*error)){
335 return U_EOF;
336 }
337 }
338
339 return *(buf->currentPos++);
340 }
341
342 /* get a UChar32 from the stream*/
343 U_CAPI int32_t U_EXPORT2
ucbuf_getc32(UCHARBUF * buf,UErrorCode * error)344 ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
345 int32_t retVal = (int32_t)U_EOF;
346 if(error==NULL || U_FAILURE(*error)){
347 return FALSE;
348 }
349 if(buf->currentPos+1>=buf->bufLimit){
350 if(buf->remaining==0){
351 return U_EOF;
352 }
353 buf=ucbuf_fillucbuf(buf,error);
354 if(U_FAILURE(*error)){
355 return U_EOF;
356 }
357 }
358 if(U16_IS_LEAD(*(buf->currentPos))){
359 retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
360 buf->currentPos+=2;
361 }else{
362 retVal = *(buf->currentPos++);
363 }
364 return retVal;
365 }
366
367 /* u_unescapeAt() callback to return a UChar*/
368 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)369 _charAt(int32_t offset, void *context) {
370 return ((UCHARBUF*) context)->currentPos[offset];
371 }
372
373 /* getc and escape it */
374 U_CAPI int32_t U_EXPORT2
ucbuf_getcx32(UCHARBUF * buf,UErrorCode * error)375 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
376 int32_t length;
377 int32_t offset;
378 UChar32 c32,c1,c2;
379 if(error==NULL || U_FAILURE(*error)){
380 return FALSE;
381 }
382 /* Fill the buffer if it is empty */
383 if (buf->currentPos >=buf->bufLimit-2) {
384 ucbuf_fillucbuf(buf,error);
385 }
386
387 /* Get the next character in the buffer */
388 if (buf->currentPos < buf->bufLimit) {
389 c1 = *(buf->currentPos)++;
390 } else {
391 c1 = U_EOF;
392 }
393
394 c2 = *(buf->currentPos);
395
396 /* If it isn't a backslash, return it */
397 if (c1 != 0x005C) {
398 return c1;
399 }
400
401 /* Determine the amount of data in the buffer */
402 length = (int32_t)(buf->bufLimit - buf->currentPos);
403
404 /* The longest escape sequence is \Uhhhhhhhh; make sure
405 we have at least that many characters */
406 if (length < 10) {
407
408 /* fill the buffer */
409 ucbuf_fillucbuf(buf,error);
410 length = (int32_t)(buf->bufLimit - buf->buffer);
411 }
412
413 /* Process the escape */
414 offset = 0;
415 c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
416
417 /* check if u_unescapeAt unescaped and converted
418 * to c32 or not
419 */
420 if(c32==(UChar32)0xFFFFFFFF){
421 if(buf->showWarning) {
422 char context[CONTEXT_LEN+1];
423 int32_t len = CONTEXT_LEN;
424 if(length < len) {
425 len = length;
426 }
427 context[len]= 0 ; /* null terminate the buffer */
428 u_UCharsToChars( buf->currentPos, context, len);
429 fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
430 }
431 *error= U_ILLEGAL_ESCAPE_SEQUENCE;
432 return c1;
433 }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
434 /* Update the current buffer position */
435 buf->currentPos += offset;
436 }else{
437 /* unescaping failed so we just return
438 * c1 and not consume the buffer
439 * this is useful for rules with escapes
440 * in resouce bundles
441 * eg: \' \\ \"
442 */
443 return c1;
444 }
445
446 return c32;
447 }
448
449 U_CAPI UCHARBUF* U_EXPORT2
ucbuf_open(const char * fileName,const char ** cp,UBool showWarning,UBool buffered,UErrorCode * error)450 ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
451
452 FileStream* in = NULL;
453 int32_t fileSize=0;
454 const char* knownCp;
455 if(error==NULL || U_FAILURE(*error)){
456 return NULL;
457 }
458 if(cp==NULL || fileName==NULL){
459 *error = U_ILLEGAL_ARGUMENT_ERROR;
460 return FALSE;
461 }
462 if (!uprv_strcmp(fileName, "-")) {
463 in = T_FileStream_stdin();
464 }else{
465 in = T_FileStream_open(fileName, "rb");
466 }
467
468 if(in!=NULL){
469 UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
470 fileSize = T_FileStream_size(in);
471 if(buf == NULL){
472 *error = U_MEMORY_ALLOCATION_ERROR;
473 T_FileStream_close(in);
474 return NULL;
475 }
476 buf->in=in;
477 buf->conv=NULL;
478 buf->showWarning = showWarning;
479 buf->isBuffered = buffered;
480 buf->signatureLength=0;
481 if(*cp==NULL || **cp=='\0'){
482 /* don't have code page name... try to autodetect */
483 ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
484 }else if(ucbuf_isCPKnown(*cp)){
485 /* discard BOM */
486 ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
487 }
488 if(U_SUCCESS(*error) && buf->conv==NULL) {
489 buf->conv=ucnv_open(*cp,error);
490 }
491 if(U_FAILURE(*error)){
492 ucnv_close(buf->conv);
493 uprv_free(buf);
494 T_FileStream_close(in);
495 return NULL;
496 }
497
498 if((buf->conv==NULL) && (buf->showWarning==TRUE)){
499 fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
500 }
501 buf->remaining=fileSize-buf->signatureLength;
502 if(buf->isBuffered){
503 buf->bufCapacity=MAX_U_BUF;
504 }else{
505 buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
506 }
507 buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
508 if (buf->buffer == NULL) {
509 *error = U_MEMORY_ALLOCATION_ERROR;
510 ucbuf_close(buf);
511 return NULL;
512 }
513 buf->currentPos=buf->buffer;
514 buf->bufLimit=buf->buffer;
515 if(U_FAILURE(*error)){
516 fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
517 ucbuf_close(buf);
518 return NULL;
519 }
520 ucbuf_fillucbuf(buf,error);
521 if(U_FAILURE(*error)){
522 ucbuf_close(buf);
523 return NULL;
524 }
525 return buf;
526 }
527 *error =U_FILE_ACCESS_ERROR;
528 return NULL;
529 }
530
531
532
533 /* TODO: this method will fail if at the
534 * begining of buffer and the uchar to unget
535 * is from the previous buffer. Need to implement
536 * system to take care of that situation.
537 */
538 U_CAPI void U_EXPORT2
ucbuf_ungetc(int32_t c,UCHARBUF * buf)539 ucbuf_ungetc(int32_t c,UCHARBUF* buf){
540 /* decrement currentPos pointer
541 * if not at the begining of buffer
542 */
543 if(buf->currentPos!=buf->buffer){
544 if(*(buf->currentPos-1)==c){
545 buf->currentPos--;
546 } else {
547 /* ungetc failed - did not match. */
548 }
549 } else {
550 /* ungetc failed - beginning of buffer. */
551 }
552 }
553
554 /* frees the resources of UChar* buffer */
555 static void
ucbuf_closebuf(UCHARBUF * buf)556 ucbuf_closebuf(UCHARBUF* buf){
557 uprv_free(buf->buffer);
558 buf->buffer = NULL;
559 }
560
561 /* close the buf and release resources*/
562 U_CAPI void U_EXPORT2
ucbuf_close(UCHARBUF * buf)563 ucbuf_close(UCHARBUF* buf){
564 if(buf!=NULL){
565 if(buf->conv){
566 ucnv_close(buf->conv);
567 }
568 T_FileStream_close(buf->in);
569 ucbuf_closebuf(buf);
570 uprv_free(buf);
571 }
572 }
573
574 /* rewind the buf and file stream */
575 U_CAPI void U_EXPORT2
ucbuf_rewind(UCHARBUF * buf,UErrorCode * error)576 ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
577 if(error==NULL || U_FAILURE(*error)){
578 return;
579 }
580 if(buf){
581 buf->currentPos=buf->buffer;
582 buf->bufLimit=buf->buffer;
583 T_FileStream_rewind(buf->in);
584 buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
585
586 ucnv_resetToUnicode(buf->conv);
587 if(buf->signatureLength>0) {
588 UChar target[1]={ 0 };
589 UChar* pTarget;
590 char start[8];
591 const char* pStart;
592 int32_t numRead;
593
594 /* read the signature bytes */
595 numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
596
597 /* convert and ignore initial U+FEFF, and the buffer overflow */
598 pTarget = target;
599 pStart = start;
600 ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
601 if(*error==U_BUFFER_OVERFLOW_ERROR) {
602 *error=U_ZERO_ERROR;
603 }
604
605 /* verify that we successfully read exactly U+FEFF */
606 if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
607 *error=U_INTERNAL_PROGRAM_ERROR;
608 }
609 }
610 }
611 }
612
613
614 U_CAPI int32_t U_EXPORT2
ucbuf_size(UCHARBUF * buf)615 ucbuf_size(UCHARBUF* buf){
616 if(buf){
617 if(buf->isBuffered){
618 return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
619 }else{
620 return (int32_t)(buf->bufLimit - buf->buffer);
621 }
622 }
623 return 0;
624 }
625
626 U_CAPI const UChar* U_EXPORT2
ucbuf_getBuffer(UCHARBUF * buf,int32_t * len,UErrorCode * error)627 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
628 if(error==NULL || U_FAILURE(*error)){
629 return NULL;
630 }
631 if(buf==NULL || len==NULL){
632 *error = U_ILLEGAL_ARGUMENT_ERROR;
633 return NULL;
634 }
635 *len = (int32_t)(buf->bufLimit - buf->buffer);
636 return buf->buffer;
637 }
638
639 U_CAPI const char* U_EXPORT2
ucbuf_resolveFileName(const char * inputDir,const char * fileName,char * target,int32_t * len,UErrorCode * status)640 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
641 int32_t requiredLen = 0;
642 int32_t dirlen = 0;
643 int32_t filelen = 0;
644 if(status==NULL || U_FAILURE(*status)){
645 return NULL;
646 }
647
648 if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
649 *status = U_ILLEGAL_ARGUMENT_ERROR;
650 return NULL;
651 }
652
653
654 dirlen = (int32_t)uprv_strlen(inputDir);
655 filelen = (int32_t)uprv_strlen(fileName);
656 if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
657 requiredLen = dirlen + filelen + 2;
658 if((*len < requiredLen) || target==NULL){
659 *len = requiredLen;
660 *status = U_BUFFER_OVERFLOW_ERROR;
661 return NULL;
662 }
663
664 target[0] = '\0';
665 /*
666 * append the input dir to openFileName if the first char in
667 * filename is not file seperation char and the last char input directory is not '.'.
668 * This is to support :
669 * genrb -s. /home/icu/data
670 * genrb -s. icu/data
671 * The user cannot mix notations like
672 * genrb -s. /icu/data --- the absolute path specified. -s redundant
673 * user should use
674 * genrb -s. icu/data --- start from CWD and look in icu/data dir
675 */
676 if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
677 uprv_strcpy(target, inputDir);
678 target[dirlen] = U_FILE_SEP_CHAR;
679 }
680 target[dirlen + 1] = '\0';
681 } else {
682 requiredLen = dirlen + filelen + 1;
683 if((*len < requiredLen) || target==NULL){
684 *len = requiredLen;
685 *status = U_BUFFER_OVERFLOW_ERROR;
686 return NULL;
687 }
688
689 uprv_strcpy(target, inputDir);
690 }
691
692 uprv_strcat(target, fileName);
693 return target;
694 }
695 /*
696 * Unicode TR 13 says any of the below chars is
697 * a new line char in a readline function in addition
698 * to CR+LF combination which needs to be
699 * handled seperately
700 */
ucbuf_isCharNewLine(UChar c)701 static UBool ucbuf_isCharNewLine(UChar c){
702 switch(c){
703 case 0x000A: /* LF */
704 case 0x000D: /* CR */
705 case 0x000C: /* FF */
706 case 0x0085: /* NEL */
707 case 0x2028: /* LS */
708 case 0x2029: /* PS */
709 return TRUE;
710 default:
711 return FALSE;
712 }
713 }
714
715 U_CAPI const UChar* U_EXPORT2
ucbuf_readline(UCHARBUF * buf,int32_t * len,UErrorCode * err)716 ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
717 UChar* temp = buf->currentPos;
718 UChar* savePos =NULL;
719 UChar c=0x0000;
720 if(buf->isBuffered){
721 /* The input is buffered we have to do more
722 * for returning a pointer U_TRUNCATED_CHAR_FOUND
723 */
724 for(;;){
725 c = *temp++;
726 if(buf->remaining==0){
727 return NULL; /* end of file is reached return NULL */
728 }
729 if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
730 *err= U_TRUNCATED_CHAR_FOUND;
731 return NULL;
732 }else{
733 ucbuf_fillucbuf(buf,err);
734 if(U_FAILURE(*err)){
735 return NULL;
736 }
737 }
738 /*
739 * Accoding to TR 13 readLine functions must interpret
740 * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
741 */
742 /* Windows CR LF */
743 if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
744 *len = (int32_t)(temp++ - buf->currentPos);
745 savePos = buf->currentPos;
746 buf->currentPos = temp;
747 return savePos;
748 }
749 /* else */
750
751 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */
752 *len = (int32_t)(temp - buf->currentPos);
753 savePos = buf->currentPos;
754 buf->currentPos = temp;
755 return savePos;
756 }
757 }
758 }else{
759 /* we know that all input is read into the internal
760 * buffer so we can safely return pointers
761 */
762 for(;;){
763 c = *temp++;
764
765 if(buf->currentPos==buf->bufLimit){
766 return NULL; /* end of file is reached return NULL */
767 }
768 /* Windows CR LF */
769 if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
770 *len = (int32_t)(temp++ - buf->currentPos);
771 savePos = buf->currentPos;
772 buf->currentPos = temp;
773 return savePos;
774 }
775 /* else */
776 if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */
777 *len = (int32_t)(temp - buf->currentPos);
778 savePos = buf->currentPos;
779 buf->currentPos = temp;
780 return savePos;
781 }
782 }
783 }
784 /* not reached */
785 /* A compiler warning will appear if all paths don't contain a return statement. */
786 /* return NULL;*/
787 }
788 #endif
789