1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File ustdio.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 11/18/98 stephen Creation.
15 * 03/12/99 stephen Modified for new C API.
16 * 07/19/99 stephen Fixed read() and gets()
17 ******************************************************************************
18 */
19
20 #include "unicode/ustdio.h"
21 #include "unicode/putil.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "ufile.h"
25 #include "ufmt_cmn.h"
26 #include "unicode/ucnv.h"
27 #include "unicode/ustring.h"
28
29 #include <string.h>
30
31 #define DELIM_LF 0x000A
32 #define DELIM_VT 0x000B
33 #define DELIM_FF 0x000C
34 #define DELIM_CR 0x000D
35 #define DELIM_NEL 0x0085
36 #define DELIM_LS 0x2028
37 #define DELIM_PS 0x2029
38
39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
40 #ifdef U_WINDOWS
41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
42 static const uint32_t DELIMITERS_LEN = 2;
43 /* TODO: Default newline writing should be detected based upon the converter being used. */
44 #else
45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
46 static const uint32_t DELIMITERS_LEN = 1;
47 #endif
48
49 #define IS_FIRST_STRING_DELIMITER(c1) \
50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
51 || (c1) == DELIM_NEL \
52 || (c1) == DELIM_LS \
53 || (c1) == DELIM_PS)
54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
57
58
59 #if !UCONFIG_NO_TRANSLITERATION
60
61 U_CAPI UTransliterator* U_EXPORT2
u_fsettransliterator(UFILE * file,UFileDirection direction,UTransliterator * adopt,UErrorCode * status)62 u_fsettransliterator(UFILE *file, UFileDirection direction,
63 UTransliterator *adopt, UErrorCode *status)
64 {
65 UTransliterator *old = NULL;
66
67 if(U_FAILURE(*status))
68 {
69 return adopt;
70 }
71
72 if(!file)
73 {
74 *status = U_ILLEGAL_ARGUMENT_ERROR;
75 return adopt;
76 }
77
78 if(direction & U_READ)
79 {
80 /** TODO: implement */
81 *status = U_UNSUPPORTED_ERROR;
82 return adopt;
83 }
84
85 if(adopt == NULL) /* they are clearing it */
86 {
87 if(file->fTranslit != NULL)
88 {
89 /* TODO: Check side */
90 old = file->fTranslit->translit;
91 uprv_free(file->fTranslit->buffer);
92 file->fTranslit->buffer=NULL;
93 uprv_free(file->fTranslit);
94 file->fTranslit=NULL;
95 }
96 }
97 else
98 {
99 if(file->fTranslit == NULL)
100 {
101 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
102 if(!file->fTranslit)
103 {
104 *status = U_MEMORY_ALLOCATION_ERROR;
105 return adopt;
106 }
107 file->fTranslit->capacity = 0;
108 file->fTranslit->length = 0;
109 file->fTranslit->pos = 0;
110 file->fTranslit->buffer = NULL;
111 }
112 else
113 {
114 old = file->fTranslit->translit;
115 ufile_flush_translit(file);
116 }
117
118 file->fTranslit->translit = adopt;
119 }
120
121 return old;
122 }
123
u_file_translit(UFILE * f,const UChar * src,int32_t * count,UBool flush)124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
125 {
126 int32_t newlen;
127 int32_t junkCount = 0;
128 int32_t textLength;
129 int32_t textLimit;
130 UTransPosition pos;
131 UErrorCode status = U_ZERO_ERROR;
132
133 if(count == NULL)
134 {
135 count = &junkCount;
136 }
137
138 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
139 {
140 /* fast path */
141 return src;
142 }
143
144 /* First: slide over everything */
145 if(f->fTranslit->length > f->fTranslit->pos)
146 {
147 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
148 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
149 }
150 f->fTranslit->length -= f->fTranslit->pos; /* always */
151 f->fTranslit->pos = 0;
152
153 /* Calculate new buffer size needed */
154 newlen = (*count + f->fTranslit->length) * 4;
155
156 if(newlen > f->fTranslit->capacity)
157 {
158 if(f->fTranslit->buffer == NULL)
159 {
160 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
161 }
162 else
163 {
164 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
165 }
166 /* Check for malloc/realloc failure. */
167 if (f->fTranslit->buffer == NULL) {
168 return NULL;
169 }
170 f->fTranslit->capacity = newlen;
171 }
172
173 /* Now, copy any data over */
174 u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
175 src,
176 *count);
177 f->fTranslit->length += *count;
178
179 /* Now, translit in place as much as we can */
180 if(flush == FALSE)
181 {
182 textLength = f->fTranslit->length;
183 pos.contextStart = 0;
184 pos.contextLimit = textLength;
185 pos.start = 0;
186 pos.limit = textLength;
187
188 utrans_transIncrementalUChars(f->fTranslit->translit,
189 f->fTranslit->buffer, /* because we shifted */
190 &textLength,
191 f->fTranslit->capacity,
192 &pos,
193 &status);
194
195 /* now: start/limit point to the transliterated text */
196 /* Transliterated is [buffer..pos.start) */
197 *count = pos.start;
198 f->fTranslit->pos = pos.start;
199 f->fTranslit->length = pos.limit;
200
201 return f->fTranslit->buffer;
202 }
203 else
204 {
205 textLength = f->fTranslit->length;
206 textLimit = f->fTranslit->length;
207
208 utrans_transUChars(f->fTranslit->translit,
209 f->fTranslit->buffer,
210 &textLength,
211 f->fTranslit->capacity,
212 0,
213 &textLimit,
214 &status);
215
216 /* out: converted len */
217 *count = textLimit;
218
219 /* Set pointers to 0 */
220 f->fTranslit->pos = 0;
221 f->fTranslit->length = 0;
222
223 return f->fTranslit->buffer;
224 }
225 }
226
227 #endif
228
229 void
ufile_flush_translit(UFILE * f)230 ufile_flush_translit(UFILE *f)
231 {
232 #if !UCONFIG_NO_TRANSLITERATION
233 if((!f)||(!f->fTranslit))
234 return;
235 #endif
236
237 u_file_write_flush(NULL, 0, f, FALSE, TRUE);
238 }
239
240
241 void
ufile_flush_io(UFILE * f)242 ufile_flush_io(UFILE *f)
243 {
244 if((!f) || (!f->fFile)) {
245 return; /* skip if no file */
246 }
247
248 u_file_write_flush(NULL, 0, f, TRUE, FALSE);
249 }
250
251
252 void
ufile_close_translit(UFILE * f)253 ufile_close_translit(UFILE *f)
254 {
255 #if !UCONFIG_NO_TRANSLITERATION
256 if((!f)||(!f->fTranslit))
257 return;
258 #endif
259
260 ufile_flush_translit(f);
261
262 #if !UCONFIG_NO_TRANSLITERATION
263 if(f->fTranslit->translit)
264 utrans_close(f->fTranslit->translit);
265
266 if(f->fTranslit->buffer)
267 {
268 uprv_free(f->fTranslit->buffer);
269 }
270
271 uprv_free(f->fTranslit);
272 f->fTranslit = NULL;
273 #endif
274 }
275
276
277 /* Input/output */
278
279 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputs(const UChar * s,UFILE * f)280 u_fputs(const UChar *s,
281 UFILE *f)
282 {
283 int32_t count = u_file_write(s, u_strlen(s), f);
284 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
285 return count;
286 }
287
288 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fputc(UChar32 uc,UFILE * f)289 u_fputc(UChar32 uc,
290 UFILE *f)
291 {
292 UChar buf[2];
293 int32_t idx = 0;
294 UBool isError = FALSE;
295
296 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
297 if (isError) {
298 return U_EOF;
299 }
300 return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
301 }
302
303
304 U_CFUNC int32_t U_EXPORT2
u_file_write_flush(const UChar * chars,int32_t count,UFILE * f,UBool flushIO,UBool flushTranslit)305 u_file_write_flush(const UChar *chars,
306 int32_t count,
307 UFILE *f,
308 UBool flushIO,
309 UBool flushTranslit)
310 {
311 /* Set up conversion parameters */
312 UErrorCode status = U_ZERO_ERROR;
313 const UChar *mySource = chars;
314 const UChar *mySourceBegin;
315 const UChar *mySourceEnd;
316 char charBuffer[UFILE_CHARBUFFER_SIZE];
317 char *myTarget = charBuffer;
318 int32_t written = 0;
319 int32_t numConverted = 0;
320
321 if (count < 0) {
322 count = u_strlen(chars);
323 }
324
325 #if !UCONFIG_NO_TRANSLITERATION
326 if((f->fTranslit) && (f->fTranslit->translit))
327 {
328 /* Do the transliteration */
329 mySource = u_file_translit(f, chars, &count, flushTranslit);
330 }
331 #endif
332
333 /* Write to a string. */
334 if (!f->fFile) {
335 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
336 if (flushIO && charsLeft > count) {
337 count++;
338 }
339 written = ufmt_min(count, charsLeft);
340 u_strncpy(f->str.fPos, mySource, written);
341 f->str.fPos += written;
342 return written;
343 }
344
345 mySourceEnd = mySource + count;
346
347 /* Perform the conversion in a loop */
348 do {
349 mySourceBegin = mySource; /* beginning location for this loop */
350 status = U_ZERO_ERROR;
351 if(f->fConverter != NULL) { /* We have a valid converter */
352 ucnv_fromUnicode(f->fConverter,
353 &myTarget,
354 charBuffer + UFILE_CHARBUFFER_SIZE,
355 &mySource,
356 mySourceEnd,
357 NULL,
358 flushIO,
359 &status);
360 } else { /*weiv: do the invariant conversion */
361 int32_t convertChars = (int32_t) (mySourceEnd - mySource);
362 if (convertChars > UFILE_CHARBUFFER_SIZE) {
363 convertChars = UFILE_CHARBUFFER_SIZE;
364 status = U_BUFFER_OVERFLOW_ERROR;
365 }
366 u_UCharsToChars(mySource, myTarget, convertChars);
367 mySource += convertChars;
368 myTarget += convertChars;
369 }
370 numConverted = (int32_t)(myTarget - charBuffer);
371
372 if (numConverted > 0) {
373 /* write the converted bytes */
374 fwrite(charBuffer,
375 sizeof(char),
376 numConverted,
377 f->fFile);
378
379 written += (int32_t) (mySource - mySourceBegin);
380 }
381 myTarget = charBuffer;
382 }
383 while(status == U_BUFFER_OVERFLOW_ERROR);
384
385 /* return # of chars written */
386 return written;
387 }
388
389 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_write(const UChar * chars,int32_t count,UFILE * f)390 u_file_write( const UChar *chars,
391 int32_t count,
392 UFILE *f)
393 {
394 return u_file_write_flush(chars,count,f,FALSE,FALSE);
395 }
396
397
398 /* private function used for buffering input */
399 void
ufile_fill_uchar_buffer(UFILE * f)400 ufile_fill_uchar_buffer(UFILE *f)
401 {
402 UErrorCode status;
403 const char *mySource;
404 const char *mySourceEnd;
405 UChar *myTarget;
406 int32_t bufferSize;
407 int32_t maxCPBytes;
408 int32_t bytesRead;
409 int32_t availLength;
410 int32_t dataSize;
411 char charBuffer[UFILE_CHARBUFFER_SIZE];
412 u_localized_string *str;
413
414 if (f->fFile == NULL) {
415 /* There is nothing to do. It's a string. */
416 return;
417 }
418
419 str = &f->str;
420 dataSize = (int32_t)(str->fLimit - str->fPos);
421 if (f->fFileno == 0 && dataSize > 0) {
422 /* Don't read from stdin too many times. There is still some data. */
423 return;
424 }
425
426 /* shift the buffer if it isn't empty */
427 if(dataSize != 0) {
428 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
429 }
430
431
432 /* record how much buffer space is available */
433 availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
434
435 /* Determine the # of codepage bytes needed to fill our UChar buffer */
436 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
437 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
438
439 /* Read in the data to convert */
440 if (f->fFileno == 0) {
441 /* Special case. Read from stdin one line at a time. */
442 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
443 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
444 }
445 else {
446 /* A normal file */
447 bytesRead = (int32_t)fread(charBuffer,
448 sizeof(char),
449 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
450 f->fFile);
451 }
452
453 /* Set up conversion parameters */
454 status = U_ZERO_ERROR;
455 mySource = charBuffer;
456 mySourceEnd = charBuffer + bytesRead;
457 myTarget = f->fUCBuffer + dataSize;
458 bufferSize = UFILE_UCHARBUFFER_SIZE;
459
460 if(f->fConverter != NULL) { /* We have a valid converter */
461 /* Perform the conversion */
462 ucnv_toUnicode(f->fConverter,
463 &myTarget,
464 f->fUCBuffer + bufferSize,
465 &mySource,
466 mySourceEnd,
467 NULL,
468 (UBool)(feof(f->fFile) != 0),
469 &status);
470
471 } else { /*weiv: do the invariant conversion */
472 u_charsToUChars(mySource, myTarget, bytesRead);
473 myTarget += bytesRead;
474 }
475
476 /* update the pointers into our array */
477 str->fPos = str->fBuffer;
478 str->fLimit = myTarget;
479 }
480
481 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgets(UChar * s,int32_t n,UFILE * f)482 u_fgets(UChar *s,
483 int32_t n,
484 UFILE *f)
485 {
486 int32_t dataSize;
487 int32_t count;
488 UChar *alias;
489 const UChar *limit;
490 UChar *sItr;
491 UChar currDelim = 0;
492 u_localized_string *str;
493
494 if (n <= 0) {
495 /* Caller screwed up. We need to write the null terminatior. */
496 return NULL;
497 }
498
499 /* fill the buffer if needed */
500 str = &f->str;
501 if (str->fPos >= str->fLimit) {
502 ufile_fill_uchar_buffer(f);
503 }
504
505 /* subtract 1 from n to compensate for the terminator */
506 --n;
507
508 /* determine the amount of data in the buffer */
509 dataSize = (int32_t)(str->fLimit - str->fPos);
510
511 /* if 0 characters were left, return 0 */
512 if (dataSize == 0)
513 return NULL;
514
515 /* otherwise, iteratively fill the buffer and copy */
516 count = 0;
517 sItr = s;
518 currDelim = 0;
519 while (dataSize > 0 && count < n) {
520 alias = str->fPos;
521
522 /* Find how much to copy */
523 if (dataSize < (n - count)) {
524 limit = str->fLimit;
525 }
526 else {
527 limit = alias + (n - count);
528 }
529
530 if (!currDelim) {
531 /* Copy UChars until we find the first occurrence of a delimiter character */
532 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
533 count++;
534 *(sItr++) = *(alias++);
535 }
536 /* Preserve the newline */
537 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
538 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
539 currDelim = *alias;
540 }
541 else {
542 currDelim = 1; /* This isn't a newline, but it's used to say
543 that we should break later. We've checked all
544 possible newline combinations even across buffer
545 boundaries. */
546 }
547 count++;
548 *(sItr++) = *(alias++);
549 }
550 }
551 /* If we have a CRLF combination, preserve that too. */
552 if (alias < limit) {
553 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
554 count++;
555 *(sItr++) = *(alias++);
556 }
557 currDelim = 1; /* This isn't a newline, but it's used to say
558 that we should break later. We've checked all
559 possible newline combinations even across buffer
560 boundaries. */
561 }
562
563 /* update the current buffer position */
564 str->fPos = alias;
565
566 /* if we found a delimiter */
567 if (currDelim == 1) {
568 /* break out */
569 break;
570 }
571
572 /* refill the buffer */
573 ufile_fill_uchar_buffer(f);
574
575 /* determine the amount of data in the buffer */
576 dataSize = (int32_t)(str->fLimit - str->fPos);
577 }
578
579 /* add the terminator and return s */
580 *sItr = 0x0000;
581 return s;
582 }
583
584 U_CFUNC UBool U_EXPORT2
ufile_getch(UFILE * f,UChar * ch)585 ufile_getch(UFILE *f, UChar *ch)
586 {
587 UBool isValidChar = FALSE;
588
589 *ch = U_EOF;
590 /* if we have an available character in the buffer, return it */
591 if(f->str.fPos < f->str.fLimit){
592 *ch = *(f->str.fPos)++;
593 isValidChar = TRUE;
594 }
595 else {
596 /* otherwise, fill the buffer and return the next character */
597 if(f->str.fPos >= f->str.fLimit) {
598 ufile_fill_uchar_buffer(f);
599 }
600 if(f->str.fPos < f->str.fLimit) {
601 *ch = *(f->str.fPos)++;
602 isValidChar = TRUE;
603 }
604 }
605 return isValidChar;
606 }
607
608 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetc(UFILE * f)609 u_fgetc(UFILE *f)
610 {
611 UChar ch;
612 ufile_getch(f, &ch);
613 return ch;
614 }
615
616 U_CFUNC UBool U_EXPORT2
ufile_getch32(UFILE * f,UChar32 * c32)617 ufile_getch32(UFILE *f, UChar32 *c32)
618 {
619 UBool isValidChar = FALSE;
620 u_localized_string *str;
621
622 *c32 = U_EOF;
623
624 /* Fill the buffer if it is empty */
625 str = &f->str;
626 if (f && str->fPos + 1 >= str->fLimit) {
627 ufile_fill_uchar_buffer(f);
628 }
629
630 /* Get the next character in the buffer */
631 if (str->fPos < str->fLimit) {
632 *c32 = *(str->fPos)++;
633 if (U_IS_LEAD(*c32)) {
634 if (str->fPos < str->fLimit) {
635 UChar c16 = *(str->fPos)++;
636 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
637 isValidChar = TRUE;
638 }
639 else {
640 *c32 = U_EOF;
641 }
642 }
643 else {
644 isValidChar = TRUE;
645 }
646 }
647
648 return isValidChar;
649 }
650
651 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fgetcx(UFILE * f)652 u_fgetcx(UFILE *f)
653 {
654 UChar32 ch;
655 ufile_getch32(f, &ch);
656 return ch;
657 }
658
659 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_fungetc(UChar32 ch,UFILE * f)660 u_fungetc(UChar32 ch,
661 UFILE *f)
662 {
663 u_localized_string *str;
664
665 str = &f->str;
666
667 /* if we're at the beginning of the buffer, sorry! */
668 if (str->fPos == str->fBuffer
669 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
670 {
671 ch = U_EOF;
672 }
673 else {
674 /* otherwise, put the character back */
675 /* Remember, read them back on in the reverse order. */
676 if (U_IS_LEAD(ch)) {
677 if (*--(str->fPos) != U16_TRAIL(ch)
678 || *--(str->fPos) != U16_LEAD(ch))
679 {
680 ch = U_EOF;
681 }
682 }
683 else if (*--(str->fPos) != ch) {
684 ch = U_EOF;
685 }
686 }
687 return ch;
688 }
689
690 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
u_file_read(UChar * chars,int32_t count,UFILE * f)691 u_file_read( UChar *chars,
692 int32_t count,
693 UFILE *f)
694 {
695 int32_t dataSize;
696 int32_t read = 0;
697 u_localized_string *str = &f->str;
698
699 do {
700
701 /* determine the amount of data in the buffer */
702 dataSize = (int32_t)(str->fLimit - str->fPos);
703 if (dataSize <= 0) {
704 /* fill the buffer */
705 ufile_fill_uchar_buffer(f);
706 dataSize = (int32_t)(str->fLimit - str->fPos);
707 }
708
709 /* Make sure that we don't read too much */
710 if (dataSize > (count - read)) {
711 dataSize = count - read;
712 }
713
714 /* copy the current data in the buffer */
715 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
716
717 /* update number of items read */
718 read += dataSize;
719
720 /* update the current buffer position */
721 str->fPos += dataSize;
722 }
723 while (dataSize != 0 && read < count);
724
725 return read;
726 }
727