• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  *
4  *   Copyright (C) 1998-2010, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  ******************************************************************************
8  *
9  * File ustdio.c
10  *
11  * Modification History:
12  *
13  *   Date        Name        Description
14  *   11/18/98    stephen     Creation.
15  *   03/12/99    stephen     Modified for new C API.
16  *   07/19/99    stephen     Fixed read() and gets()
17  ******************************************************************************
18  */
19 
20 #include "unicode/ustdio.h"
21 #include "unicode/putil.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "ufile.h"
25 #include "ufmt_cmn.h"
26 #include "unicode/ucnv.h"
27 #include "unicode/ustring.h"
28 
29 #include <string.h>
30 
31 #define DELIM_LF 0x000A
32 #define DELIM_VT 0x000B
33 #define DELIM_FF 0x000C
34 #define DELIM_CR 0x000D
35 #define DELIM_NEL 0x0085
36 #define DELIM_LS 0x2028
37 #define DELIM_PS 0x2029
38 
39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
40 #ifdef U_WINDOWS
41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
42 static const uint32_t DELIMITERS_LEN = 2;
43 /* TODO: Default newline writing should be detected based upon the converter being used. */
44 #else
45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
46 static const uint32_t DELIMITERS_LEN = 1;
47 #endif
48 
49 #define IS_FIRST_STRING_DELIMITER(c1) \
50  (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
51         || (c1) == DELIM_NEL \
52         || (c1) == DELIM_LS \
53         || (c1) == DELIM_PS)
54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
56  (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
57 
58 
59 #if !UCONFIG_NO_TRANSLITERATION
60 
61 U_CAPI UTransliterator* U_EXPORT2
u_fsettransliterator(UFILE * file,UFileDirection direction,UTransliterator * adopt,UErrorCode * status)62 u_fsettransliterator(UFILE *file, UFileDirection direction,
63                      UTransliterator *adopt, UErrorCode *status)
64 {
65     UTransliterator *old = NULL;
66 
67     if(U_FAILURE(*status))
68     {
69         return adopt;
70     }
71 
72     if(!file)
73     {
74         *status = U_ILLEGAL_ARGUMENT_ERROR;
75         return adopt;
76     }
77 
78     if(direction & U_READ)
79     {
80         /** TODO: implement */
81         *status = U_UNSUPPORTED_ERROR;
82         return adopt;
83     }
84 
85     if(adopt == NULL) /* they are clearing it */
86     {
87         if(file->fTranslit != NULL)
88         {
89             /* TODO: Check side */
90             old = file->fTranslit->translit;
91             uprv_free(file->fTranslit->buffer);
92             file->fTranslit->buffer=NULL;
93             uprv_free(file->fTranslit);
94             file->fTranslit=NULL;
95         }
96     }
97     else
98     {
99         if(file->fTranslit == NULL)
100         {
101             file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
102             if(!file->fTranslit)
103             {
104                 *status = U_MEMORY_ALLOCATION_ERROR;
105                 return adopt;
106             }
107             file->fTranslit->capacity = 0;
108             file->fTranslit->length = 0;
109             file->fTranslit->pos = 0;
110             file->fTranslit->buffer = NULL;
111         }
112         else
113         {
114             old = file->fTranslit->translit;
115             ufile_flush_translit(file);
116         }
117 
118         file->fTranslit->translit = adopt;
119     }
120 
121     return old;
122 }
123 
u_file_translit(UFILE * f,const UChar * src,int32_t * count,UBool flush)124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
125 {
126     int32_t newlen;
127     int32_t junkCount = 0;
128     int32_t textLength;
129     int32_t textLimit;
130     UTransPosition pos;
131     UErrorCode status = U_ZERO_ERROR;
132 
133     if(count == NULL)
134     {
135         count = &junkCount;
136     }
137 
138     if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
139     {
140         /* fast path */
141         return src;
142     }
143 
144     /* First: slide over everything */
145     if(f->fTranslit->length > f->fTranslit->pos)
146     {
147         memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
148             (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
149     }
150     f->fTranslit->length -= f->fTranslit->pos; /* always */
151     f->fTranslit->pos = 0;
152 
153     /* Calculate new buffer size needed */
154     newlen = (*count + f->fTranslit->length) * 4;
155 
156     if(newlen > f->fTranslit->capacity)
157     {
158         if(f->fTranslit->buffer == NULL)
159         {
160             f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
161         }
162         else
163         {
164             f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
165         }
166         /* Check for malloc/realloc failure. */
167         if (f->fTranslit->buffer == NULL) {
168         	return NULL;
169         }
170         f->fTranslit->capacity = newlen;
171     }
172 
173     /* Now, copy any data over */
174     u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
175         src,
176         *count);
177     f->fTranslit->length += *count;
178 
179     /* Now, translit in place as much as we can  */
180     if(flush == FALSE)
181     {
182         textLength = f->fTranslit->length;
183         pos.contextStart = 0;
184         pos.contextLimit = textLength;
185         pos.start        = 0;
186         pos.limit        = textLength;
187 
188         utrans_transIncrementalUChars(f->fTranslit->translit,
189             f->fTranslit->buffer, /* because we shifted */
190             &textLength,
191             f->fTranslit->capacity,
192             &pos,
193             &status);
194 
195         /* now: start/limit point to the transliterated text */
196         /* Transliterated is [buffer..pos.start) */
197         *count            = pos.start;
198         f->fTranslit->pos = pos.start;
199         f->fTranslit->length = pos.limit;
200 
201         return f->fTranslit->buffer;
202     }
203     else
204     {
205         textLength = f->fTranslit->length;
206         textLimit = f->fTranslit->length;
207 
208         utrans_transUChars(f->fTranslit->translit,
209             f->fTranslit->buffer,
210             &textLength,
211             f->fTranslit->capacity,
212             0,
213             &textLimit,
214             &status);
215 
216         /* out: converted len */
217         *count = textLimit;
218 
219         /* Set pointers to 0 */
220         f->fTranslit->pos = 0;
221         f->fTranslit->length = 0;
222 
223         return f->fTranslit->buffer;
224     }
225 }
226 
227 #endif
228 
229 void
ufile_flush_translit(UFILE * f)230 ufile_flush_translit(UFILE *f)
231 {
232 #if !UCONFIG_NO_TRANSLITERATION
233     if((!f)||(!f->fTranslit))
234         return;
235 #endif
236 
237     u_file_write_flush(NULL, 0, f, FALSE, TRUE);
238 }
239 
240 
241 void
ufile_flush_io(UFILE * f)242 ufile_flush_io(UFILE *f)
243 {
244   if((!f) || (!f->fFile)) {
245     return; /* skip if no file */
246   }
247 
248   u_file_write_flush(NULL, 0, f, TRUE, FALSE);
249 }
250 
251 
252 void
ufile_close_translit(UFILE * f)253 ufile_close_translit(UFILE *f)
254 {
255 #if !UCONFIG_NO_TRANSLITERATION
256     if((!f)||(!f->fTranslit))
257         return;
258 #endif
259 
260     ufile_flush_translit(f);
261 
262 #if !UCONFIG_NO_TRANSLITERATION
263     if(f->fTranslit->translit)
264         utrans_close(f->fTranslit->translit);
265 
266     if(f->fTranslit->buffer)
267     {
268         uprv_free(f->fTranslit->buffer);
269     }
270 
271     uprv_free(f->fTranslit);
272     f->fTranslit = NULL;
273 #endif
274 }
275 
276 
277 /* Input/output */
278 
279 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputs(const UChar * s,UFILE * f)280 u_fputs(const UChar    *s,
281         UFILE        *f)
282 {
283     int32_t count = u_file_write(s, u_strlen(s), f);
284     count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
285     return count;
286 }
287 
288 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32 uc,UFILE * f)289 u_fputc(UChar32      uc,
290         UFILE        *f)
291 {
292     UChar buf[2];
293     int32_t idx = 0;
294     UBool isError = FALSE;
295 
296     U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
297     if (isError) {
298         return U_EOF;
299     }
300     return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
301 }
302 
303 
304 U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const UChar * chars,int32_t count,UFILE * f,UBool flushIO,UBool flushTranslit)305 u_file_write_flush(const UChar *chars,
306                    int32_t     count,
307                    UFILE       *f,
308                    UBool       flushIO,
309                    UBool       flushTranslit)
310 {
311     /* Set up conversion parameters */
312     UErrorCode  status       = U_ZERO_ERROR;
313     const UChar *mySource    = chars;
314     const UChar *mySourceBegin;
315     const UChar *mySourceEnd;
316     char        charBuffer[UFILE_CHARBUFFER_SIZE];
317     char        *myTarget   = charBuffer;
318     int32_t     written      = 0;
319     int32_t     numConverted = 0;
320 
321     if (count < 0) {
322         count = u_strlen(chars);
323     }
324 
325 #if !UCONFIG_NO_TRANSLITERATION
326     if((f->fTranslit) && (f->fTranslit->translit))
327     {
328         /* Do the transliteration */
329         mySource = u_file_translit(f, chars, &count, flushTranslit);
330     }
331 #endif
332 
333     /* Write to a string. */
334     if (!f->fFile) {
335         int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
336         if (flushIO && charsLeft > count) {
337             count++;
338         }
339         written = ufmt_min(count, charsLeft);
340         u_strncpy(f->str.fPos, mySource, written);
341         f->str.fPos += written;
342         return written;
343     }
344 
345     mySourceEnd = mySource + count;
346 
347     /* Perform the conversion in a loop */
348     do {
349         mySourceBegin = mySource; /* beginning location for this loop */
350         status     = U_ZERO_ERROR;
351         if(f->fConverter != NULL) { /* We have a valid converter */
352             ucnv_fromUnicode(f->fConverter,
353                 &myTarget,
354                 charBuffer + UFILE_CHARBUFFER_SIZE,
355                 &mySource,
356                 mySourceEnd,
357                 NULL,
358                 flushIO,
359                 &status);
360         } else { /*weiv: do the invariant conversion */
361             int32_t convertChars = (int32_t) (mySourceEnd - mySource);
362             if (convertChars > UFILE_CHARBUFFER_SIZE) {
363                 convertChars = UFILE_CHARBUFFER_SIZE;
364                 status = U_BUFFER_OVERFLOW_ERROR;
365             }
366             u_UCharsToChars(mySource, myTarget, convertChars);
367             mySource += convertChars;
368             myTarget += convertChars;
369         }
370         numConverted = (int32_t)(myTarget - charBuffer);
371 
372         if (numConverted > 0) {
373             /* write the converted bytes */
374             fwrite(charBuffer,
375                 sizeof(char),
376                 numConverted,
377                 f->fFile);
378 
379             written     += (int32_t) (mySource - mySourceBegin);
380         }
381         myTarget     = charBuffer;
382     }
383     while(status == U_BUFFER_OVERFLOW_ERROR);
384 
385     /* return # of chars written */
386     return written;
387 }
388 
389 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_write(const UChar * chars,int32_t count,UFILE * f)390 u_file_write(    const UChar     *chars,
391              int32_t        count,
392              UFILE         *f)
393 {
394     return u_file_write_flush(chars,count,f,FALSE,FALSE);
395 }
396 
397 
398 /* private function used for buffering input */
399 void
ufile_fill_uchar_buffer(UFILE * f)400 ufile_fill_uchar_buffer(UFILE *f)
401 {
402     UErrorCode  status;
403     const char  *mySource;
404     const char  *mySourceEnd;
405     UChar       *myTarget;
406     int32_t     bufferSize;
407     int32_t     maxCPBytes;
408     int32_t     bytesRead;
409     int32_t     availLength;
410     int32_t     dataSize;
411     char        charBuffer[UFILE_CHARBUFFER_SIZE];
412     u_localized_string *str;
413 
414     if (f->fFile == NULL) {
415         /* There is nothing to do. It's a string. */
416         return;
417     }
418 
419     str = &f->str;
420     dataSize = (int32_t)(str->fLimit - str->fPos);
421     if (f->fFileno == 0 && dataSize > 0) {
422         /* Don't read from stdin too many times. There is still some data. */
423         return;
424     }
425 
426     /* shift the buffer if it isn't empty */
427     if(dataSize != 0) {
428         uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
429     }
430 
431 
432     /* record how much buffer space is available */
433     availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
434 
435     /* Determine the # of codepage bytes needed to fill our UChar buffer */
436     /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
437     maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
438 
439     /* Read in the data to convert */
440     if (f->fFileno == 0) {
441         /* Special case. Read from stdin one line at a time. */
442         char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
443         bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
444     }
445     else {
446         /* A normal file */
447         bytesRead = (int32_t)fread(charBuffer,
448             sizeof(char),
449             ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
450             f->fFile);
451     }
452 
453     /* Set up conversion parameters */
454     status      = U_ZERO_ERROR;
455     mySource    = charBuffer;
456     mySourceEnd = charBuffer + bytesRead;
457     myTarget    = f->fUCBuffer + dataSize;
458     bufferSize  = UFILE_UCHARBUFFER_SIZE;
459 
460     if(f->fConverter != NULL) { /* We have a valid converter */
461         /* Perform the conversion */
462         ucnv_toUnicode(f->fConverter,
463             &myTarget,
464             f->fUCBuffer + bufferSize,
465             &mySource,
466             mySourceEnd,
467             NULL,
468             (UBool)(feof(f->fFile) != 0),
469             &status);
470 
471     } else { /*weiv: do the invariant conversion */
472         u_charsToUChars(mySource, myTarget, bytesRead);
473         myTarget += bytesRead;
474     }
475 
476     /* update the pointers into our array */
477     str->fPos    = str->fBuffer;
478     str->fLimit  = myTarget;
479 }
480 
481 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgets(UChar * s,int32_t n,UFILE * f)482 u_fgets(UChar        *s,
483         int32_t       n,
484         UFILE        *f)
485 {
486     int32_t dataSize;
487     int32_t count;
488     UChar *alias;
489     const UChar *limit;
490     UChar *sItr;
491     UChar currDelim = 0;
492     u_localized_string *str;
493 
494     if (n <= 0) {
495         /* Caller screwed up. We need to write the null terminatior. */
496         return NULL;
497     }
498 
499     /* fill the buffer if needed */
500     str = &f->str;
501     if (str->fPos >= str->fLimit) {
502         ufile_fill_uchar_buffer(f);
503     }
504 
505     /* subtract 1 from n to compensate for the terminator */
506     --n;
507 
508     /* determine the amount of data in the buffer */
509     dataSize = (int32_t)(str->fLimit - str->fPos);
510 
511     /* if 0 characters were left, return 0 */
512     if (dataSize == 0)
513         return NULL;
514 
515     /* otherwise, iteratively fill the buffer and copy */
516     count = 0;
517     sItr = s;
518     currDelim = 0;
519     while (dataSize > 0 && count < n) {
520         alias = str->fPos;
521 
522         /* Find how much to copy */
523         if (dataSize < (n - count)) {
524             limit = str->fLimit;
525         }
526         else {
527             limit = alias + (n - count);
528         }
529 
530         if (!currDelim) {
531             /* Copy UChars until we find the first occurrence of a delimiter character */
532             while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
533                 count++;
534                 *(sItr++) = *(alias++);
535             }
536             /* Preserve the newline */
537             if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
538                 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
539                     currDelim = *alias;
540                 }
541                 else {
542                     currDelim = 1;  /* This isn't a newline, but it's used to say
543                                     that we should break later. We've checked all
544                                     possible newline combinations even across buffer
545                                     boundaries. */
546                 }
547                 count++;
548                 *(sItr++) = *(alias++);
549             }
550         }
551         /* If we have a CRLF combination, preserve that too. */
552         if (alias < limit) {
553             if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
554                 count++;
555                 *(sItr++) = *(alias++);
556             }
557             currDelim = 1;  /* This isn't a newline, but it's used to say
558                             that we should break later. We've checked all
559                             possible newline combinations even across buffer
560                             boundaries. */
561         }
562 
563         /* update the current buffer position */
564         str->fPos = alias;
565 
566         /* if we found a delimiter */
567         if (currDelim == 1) {
568             /* break out */
569             break;
570         }
571 
572         /* refill the buffer */
573         ufile_fill_uchar_buffer(f);
574 
575         /* determine the amount of data in the buffer */
576         dataSize = (int32_t)(str->fLimit - str->fPos);
577     }
578 
579     /* add the terminator and return s */
580     *sItr = 0x0000;
581     return s;
582 }
583 
584 U_CFUNC UBool U_EXPORT2
ufile_getch(UFILE * f,UChar * ch)585 ufile_getch(UFILE *f, UChar *ch)
586 {
587     UBool isValidChar = FALSE;
588 
589     *ch = U_EOF;
590     /* if we have an available character in the buffer, return it */
591     if(f->str.fPos < f->str.fLimit){
592         *ch = *(f->str.fPos)++;
593         isValidChar = TRUE;
594     }
595     else {
596         /* otherwise, fill the buffer and return the next character */
597         if(f->str.fPos >= f->str.fLimit) {
598             ufile_fill_uchar_buffer(f);
599         }
600         if(f->str.fPos < f->str.fLimit) {
601             *ch = *(f->str.fPos)++;
602             isValidChar = TRUE;
603         }
604     }
605     return isValidChar;
606 }
607 
608 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetc(UFILE * f)609 u_fgetc(UFILE        *f)
610 {
611     UChar ch;
612     ufile_getch(f, &ch);
613     return ch;
614 }
615 
616 U_CFUNC UBool U_EXPORT2
ufile_getch32(UFILE * f,UChar32 * c32)617 ufile_getch32(UFILE *f, UChar32 *c32)
618 {
619     UBool isValidChar = FALSE;
620     u_localized_string *str;
621 
622     *c32 = U_EOF;
623 
624     /* Fill the buffer if it is empty */
625     str = &f->str;
626     if (f && str->fPos + 1 >= str->fLimit) {
627         ufile_fill_uchar_buffer(f);
628     }
629 
630     /* Get the next character in the buffer */
631     if (str->fPos < str->fLimit) {
632         *c32 = *(str->fPos)++;
633         if (U_IS_LEAD(*c32)) {
634             if (str->fPos < str->fLimit) {
635                 UChar c16 = *(str->fPos)++;
636                 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
637                 isValidChar = TRUE;
638             }
639             else {
640                 *c32 = U_EOF;
641             }
642         }
643         else {
644             isValidChar = TRUE;
645         }
646     }
647 
648     return isValidChar;
649 }
650 
651 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetcx(UFILE * f)652 u_fgetcx(UFILE        *f)
653 {
654     UChar32 ch;
655     ufile_getch32(f, &ch);
656     return ch;
657 }
658 
659 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fungetc(UChar32 ch,UFILE * f)660 u_fungetc(UChar32        ch,
661     UFILE        *f)
662 {
663     u_localized_string *str;
664 
665     str = &f->str;
666 
667     /* if we're at the beginning of the buffer, sorry! */
668     if (str->fPos == str->fBuffer
669         || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
670     {
671         ch = U_EOF;
672     }
673     else {
674         /* otherwise, put the character back */
675         /* Remember, read them back on in the reverse order. */
676         if (U_IS_LEAD(ch)) {
677             if (*--(str->fPos) != U16_TRAIL(ch)
678                 || *--(str->fPos) != U16_LEAD(ch))
679             {
680                 ch = U_EOF;
681             }
682         }
683         else if (*--(str->fPos) != ch) {
684             ch = U_EOF;
685         }
686     }
687     return ch;
688 }
689 
690 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_read(UChar * chars,int32_t count,UFILE * f)691 u_file_read(    UChar        *chars,
692     int32_t        count,
693     UFILE         *f)
694 {
695     int32_t dataSize;
696     int32_t read = 0;
697     u_localized_string *str = &f->str;
698 
699     do {
700 
701         /* determine the amount of data in the buffer */
702         dataSize = (int32_t)(str->fLimit - str->fPos);
703         if (dataSize <= 0) {
704             /* fill the buffer */
705             ufile_fill_uchar_buffer(f);
706             dataSize = (int32_t)(str->fLimit - str->fPos);
707         }
708 
709         /* Make sure that we don't read too much */
710         if (dataSize > (count - read)) {
711             dataSize = count - read;
712         }
713 
714         /* copy the current data in the buffer */
715         memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
716 
717         /* update number of items read */
718         read += dataSize;
719 
720         /* update the current buffer position */
721         str->fPos += dataSize;
722     }
723     while (dataSize != 0 && read < count);
724 
725     return read;
726 }
727