1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (c) 2004,2011 International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Author: Alan Liu
9 * Created: March 19 2004
10 * Since: ICU 3.0
11 **********************************************************************
12 */
13 #include "textfile.h"
14 #include "cmemory.h"
15 #include "cstring.h"
16 #include "intltest.h"
17 #include "util.h"
18
19 // If the symbol CCP is defined, then the 'name' and 'encoding'
20 // constructor parameters are copied. Otherwise they are aliased.
21 // #define CCP
22
TextFile(const char * _name,const char * _encoding,UErrorCode & ec)23 TextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) :
24 file(0),
25 name(0), encoding(0),
26 buffer(0),
27 capacity(0),
28 lineNo(0)
29 {
30 if (U_FAILURE(ec) || _name == 0 || _encoding == 0) {
31 if (U_SUCCESS(ec)) {
32 ec = U_ILLEGAL_ARGUMENT_ERROR;
33 }
34 return;
35 }
36
37 #ifdef CCP
38 name = uprv_malloc(uprv_strlen(_name) + 1);
39 encoding = uprv_malloc(uprv_strlen(_encoding) + 1);
40 if (name == 0 || encoding == 0) {
41 ec = U_MEMORY_ALLOCATION_ERROR;
42 return;
43 }
44 uprv_strcpy(name, _name);
45 uprv_strcpy(encoding, _encoding);
46 #else
47 name = (char*) _name;
48 encoding = (char*) _encoding;
49 #endif
50
51 const char* testDir = IntlTest::getSourceTestData(ec);
52 if (U_FAILURE(ec)) {
53 return;
54 }
55 if (!ensureCapacity((int32_t)(uprv_strlen(testDir) + uprv_strlen(name) + 1))) {
56 ec = U_MEMORY_ALLOCATION_ERROR;
57 return;
58 }
59 uprv_strcpy(buffer, testDir);
60 uprv_strcat(buffer, name);
61
62 file = T_FileStream_open(buffer, "rb");
63 if (file == 0) {
64 ec = U_ILLEGAL_ARGUMENT_ERROR;
65 return;
66 }
67 }
68
~TextFile()69 TextFile::~TextFile() {
70 if (file != 0) T_FileStream_close(file);
71 if (buffer != 0) uprv_free(buffer);
72 #ifdef CCP
73 uprv_free(name);
74 uprv_free(encoding);
75 #endif
76 }
77
readLine(UnicodeString & line,UErrorCode & ec)78 UBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) {
79 if (T_FileStream_eof(file)) {
80 return false;
81 }
82 // Note: 'buffer' may change after ensureCapacity() is called,
83 // so don't use
84 // p=buffer; *p++=c;
85 // but rather
86 // i=; buffer[i++]=c;
87 int32_t n = 0;
88 for (;;) {
89 int c = T_FileStream_getc(file); // sic: int, not int32_t
90 if (c < 0 || c == 0xD || c == 0xA) {
91 // consume 0xA following 0xD
92 if (c == 0xD) {
93 c = T_FileStream_getc(file);
94 if (c != 0xA && c >= 0) {
95 T_FileStream_ungetc(c, file);
96 }
97 }
98 break;
99 }
100 if (!setBuffer(n++, c, ec)) return false;
101 }
102 if (!setBuffer(n++, 0, ec)) return false;
103 UnicodeString str(buffer, encoding);
104 // Remove BOM in first line, if present
105 if (lineNo == 0 && str[0] == 0xFEFF) {
106 str.remove(0, 1);
107 }
108 ++lineNo;
109 line = str.unescape();
110 return true;
111 }
112
readLineSkippingComments(UnicodeString & line,UErrorCode & ec,UBool trim)113 UBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
114 UBool trim) {
115 for (;;) {
116 if (!readLine(line, ec)) return false;
117 // Skip over white space
118 int32_t pos = 0;
119 ICU_Utility::skipWhitespace(line, pos, true);
120 // Ignore blank lines and comment lines
121 if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) {
122 continue;
123 }
124 // Process line
125 if (trim) line.remove(0, pos);
126 return true;
127 }
128 }
129
130 /**
131 * Set buffer[index] to c, growing buffer if necessary. Return true if
132 * successful.
133 */
setBuffer(int32_t index,char c,UErrorCode & ec)134 UBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) {
135 if (capacity <= index) {
136 if (!ensureCapacity(index+1)) {
137 ec = U_MEMORY_ALLOCATION_ERROR;
138 return false;
139 }
140 }
141 buffer[index] = c;
142 return true;
143 }
144
145 /**
146 * Make sure that 'buffer' has at least 'mincapacity' bytes.
147 * Return true upon success. Upon return, 'buffer' may change
148 * value. In any case, previous contents are preserved.
149 */
150 #define LOWEST_MIN_CAPACITY 64
ensureCapacity(int32_t mincapacity)151 UBool TextFile::ensureCapacity(int32_t mincapacity) {
152 if (capacity >= mincapacity) {
153 return true;
154 }
155
156 // Grow by factor of 2 to prevent frequent allocation
157 // Note: 'capacity' may be 0
158 int32_t i = (capacity < LOWEST_MIN_CAPACITY)? LOWEST_MIN_CAPACITY: capacity;
159 while (i < mincapacity) {
160 i <<= 1;
161 if (i < 0) {
162 i = 0x7FFFFFFF;
163 break;
164 }
165 }
166 mincapacity = i;
167
168 // Simple realloc() no good; contents not preserved
169 // Note: 'buffer' may be 0
170 char* newbuffer = (char*) uprv_malloc(mincapacity);
171 if (newbuffer == 0) {
172 return false;
173 }
174 if (buffer != 0) {
175 uprv_strncpy(newbuffer, buffer, capacity);
176 uprv_free(buffer);
177 }
178 buffer = newbuffer;
179 capacity = mincapacity;
180 return true;
181 }
182
183