1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: toolutil.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 1999nov19
16 * created by: Markus W. Scherer
17 *
18 * 6/25/08 - Added Cygwin specific code in uprv_mkdir - Brian Rower
19 *
20 * This file contains utility functions for ICU tools like genccode.
21 */
22
23 #include "unicode/platform.h"
24 #if U_PLATFORM == U_PF_MINGW
25 // *cough* - for struct stat
26 #ifdef __STRICT_ANSI__
27 #undef __STRICT_ANSI__
28 #endif
29 #endif
30
31 #include <stdio.h>
32 #include <sys/stat.h>
33 #include <fstream>
34 #include <time.h>
35 #include "unicode/utypes.h"
36
37 #ifndef U_TOOLUTIL_IMPLEMENTATION
38 #error U_TOOLUTIL_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
39 #endif
40
41 #if U_PLATFORM_USES_ONLY_WIN32_API
42 # define VC_EXTRALEAN
43 # define WIN32_LEAN_AND_MEAN
44 # define NOUSER
45 # define NOSERVICE
46 # define NOIME
47 # define NOMCX
48 # if U_PLATFORM == U_PF_MINGW
49 # define __NO_MINGW_LFS /* gets around missing 'off64_t' */
50 # endif
51 # include <windows.h>
52 # include <direct.h>
53 #else
54 # include <sys/stat.h>
55 # include <sys/types.h>
56 #endif
57
58 /* In MinGW environment, io.h needs to be included for _mkdir() */
59 #if U_PLATFORM == U_PF_MINGW
60 #include <io.h>
61 #endif
62
63 #include <errno.h>
64
65 #include <cstddef>
66
67 #include "unicode/errorcode.h"
68 #include "unicode/putil.h"
69 #include "unicode/uchar.h"
70 #include "unicode/umutablecptrie.h"
71 #include "unicode/ucptrie.h"
72 #include "cmemory.h"
73 #include "cstring.h"
74 #include "toolutil.h"
75 #include "uassert.h"
76
77 U_NAMESPACE_BEGIN
78
~IcuToolErrorCode()79 IcuToolErrorCode::~IcuToolErrorCode() {
80 // Safe because our handleFailure() does not throw exceptions.
81 if(isFailure()) { handleFailure(); }
82 }
83
handleFailure() const84 void IcuToolErrorCode::handleFailure() const {
85 fprintf(stderr, "error at %s: %s\n", location, errorName());
86 exit(errorCode);
87 }
88
89 namespace toolutil {
90
setCPTrieBit(UMutableCPTrie * mutableCPTrie,UChar32 start,UChar32 end,int32_t shift,bool on,UErrorCode & errorCode)91 void setCPTrieBit(UMutableCPTrie *mutableCPTrie,
92 UChar32 start, UChar32 end, int32_t shift, bool on, UErrorCode &errorCode) {
93 uint32_t mask = U_MASK(shift);
94 uint32_t value = on ? mask : 0;
95 setCPTrieBits(mutableCPTrie, start, end, mask, value, errorCode);
96 }
97
setCPTrieBits(UMutableCPTrie * mutableCPTrie,UChar32 start,UChar32 end,uint32_t mask,uint32_t value,UErrorCode & errorCode)98 void setCPTrieBits(UMutableCPTrie *mutableCPTrie,
99 UChar32 start, UChar32 end, uint32_t mask, uint32_t value,
100 UErrorCode &errorCode) {
101 if (U_FAILURE(errorCode)) { return; }
102 // The value must not have any bits set outside of the mask.
103 if ((value & ~mask) != 0) {
104 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
105 return;
106 }
107
108 if (start == end) {
109 uint32_t oldValue = umutablecptrie_get(mutableCPTrie, start);
110 uint32_t newValue = (oldValue & ~mask) | value;
111 if (newValue != oldValue) {
112 umutablecptrie_set(mutableCPTrie, start, newValue, &errorCode);
113 }
114 return;
115 }
116 while (start <= end && U_SUCCESS(errorCode)) {
117 uint32_t oldValue;
118 UChar32 rangeEnd = umutablecptrie_getRange(
119 mutableCPTrie, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &oldValue);
120 if (rangeEnd > end) {
121 rangeEnd = end;
122 }
123 uint32_t newValue = (oldValue & ~mask) | value;
124 if (newValue != oldValue) {
125 umutablecptrie_setRange(mutableCPTrie, start, rangeEnd, newValue, &errorCode);
126 }
127 start = rangeEnd + 1;
128 }
129 }
130
getCPTrieSize(UMutableCPTrie * mt,UCPTrieType type,UCPTrieValueWidth valueWidth)131 int32_t getCPTrieSize(UMutableCPTrie *mt, UCPTrieType type, UCPTrieValueWidth valueWidth) {
132 UErrorCode errorCode = U_ZERO_ERROR;
133 UCPTrie *cpTrie = umutablecptrie_buildImmutable(mt, type, valueWidth, &errorCode);
134 if (U_FAILURE(errorCode)) {
135 fprintf(stderr,
136 "toolutil/getCPTrieSize error: umutablecptrie_buildImmutable() failed: %s\n",
137 u_errorName(errorCode));
138 return -1;
139 }
140 uint8_t block[100000];
141 int32_t size = ucptrie_toBinary(cpTrie, block, sizeof(block), &errorCode);
142 ucptrie_close(cpTrie);
143 if (U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
144 fprintf(stderr,
145 "toolutil/getCPTrieSize error: ucptrie_toBinary() failed: %s (length %ld)\n",
146 u_errorName(errorCode), static_cast<long>(size));
147 return -1;
148 }
149 U_ASSERT((size & 3) == 0); // multiple of 4 bytes
150 return size;
151 }
152
153 } // toolutil
154
155 U_NAMESPACE_END
156
157 static int32_t currentYear = -1;
158
getCurrentYear()159 U_CAPI int32_t U_EXPORT2 getCurrentYear() {
160 if(currentYear == -1) {
161 time_t now = time(nullptr);
162 tm *fields = gmtime(&now);
163 currentYear = 1900 + fields->tm_year;
164 }
165 return currentYear;
166 }
167
168
169 U_CAPI const char * U_EXPORT2
getLongPathname(const char * pathname)170 getLongPathname(const char *pathname) {
171 #if U_PLATFORM_USES_ONLY_WIN32_API
172 /* anticipate problems with "short" pathnames */
173 static WIN32_FIND_DATAA info;
174 HANDLE file=FindFirstFileA(pathname, &info);
175 if(file!=INVALID_HANDLE_VALUE) {
176 if(info.cAlternateFileName[0]!=0) {
177 /* this file has a short name, get and use the long one */
178 const char *basename=findBasename(pathname);
179 if(basename!=pathname) {
180 /* prepend the long filename with the original path */
181 uprv_memmove(info.cFileName+(basename-pathname), info.cFileName, uprv_strlen(info.cFileName)+1);
182 uprv_memcpy(info.cFileName, pathname, basename-pathname);
183 }
184 pathname=info.cFileName;
185 }
186 FindClose(file);
187 }
188 #endif
189 return pathname;
190 }
191
192 U_CAPI const char * U_EXPORT2
findDirname(const char * path,char * buffer,int32_t bufLen,UErrorCode * status)193 findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status) {
194 if(U_FAILURE(*status)) return nullptr;
195 const char *resultPtr = nullptr;
196 int32_t resultLen = 0;
197
198 const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
199 #if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR
200 const char *basenameAlt=uprv_strrchr(path, U_FILE_ALT_SEP_CHAR);
201 if(basenameAlt && (!basename || basename<basenameAlt)) {
202 basename = basenameAlt;
203 }
204 #endif
205 if(!basename) {
206 /* no basename - return ''. */
207 resultPtr = "";
208 resultLen = 0;
209 } else {
210 resultPtr = path;
211 resultLen = static_cast<int32_t>(basename - path);
212 if(resultLen<1) {
213 resultLen = 1; /* '/' or '/a' -> '/' */
214 }
215 }
216
217 if((resultLen+1) <= bufLen) {
218 uprv_strncpy(buffer, resultPtr, resultLen);
219 buffer[resultLen]=0;
220 return buffer;
221 } else {
222 *status = U_BUFFER_OVERFLOW_ERROR;
223 return nullptr;
224 }
225 }
226
227 U_CAPI const char * U_EXPORT2
findBasename(const char * filename)228 findBasename(const char *filename) {
229 const char *basename=uprv_strrchr(filename, U_FILE_SEP_CHAR);
230
231 #if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR
232 //be lenient about pathname separators on Windows, like official implementation of C++17 std::filesystem in MSVC
233 //would be convenient to merge this loop with the one above, but alas, there is no such solution in the standard library
234 const char *alt_basename=uprv_strrchr(filename, U_FILE_ALT_SEP_CHAR);
235 if(alt_basename>basename) {
236 basename=alt_basename;
237 }
238 #endif
239
240 if(basename!=nullptr) {
241 return basename+1;
242 } else {
243 return filename;
244 }
245 }
246
247 U_CAPI void U_EXPORT2
uprv_mkdir(const char * pathname,UErrorCode * status)248 uprv_mkdir(const char *pathname, UErrorCode *status) {
249
250 int retVal = 0;
251 #if U_PLATFORM_USES_ONLY_WIN32_API
252 retVal = _mkdir(pathname);
253 #else
254 retVal = mkdir(pathname, S_IRWXU | (S_IROTH | S_IXOTH) | (S_IROTH | S_IXOTH));
255 #endif
256 if (retVal && errno != EEXIST) {
257 #if U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
258 /*if using Cygwin and the mkdir says it failed...check if the directory already exists..*/
259 /* if it does...don't give the error, if it does not...give the error - Brian Rower - 6/25/08 */
260 struct stat st;
261
262 if(stat(pathname,&st) != 0)
263 {
264 *status = U_FILE_ACCESS_ERROR;
265 }
266 #else
267 *status = U_FILE_ACCESS_ERROR;
268 #endif
269 }
270 }
271
272 #if !UCONFIG_NO_FILE_IO
273 U_CAPI UBool U_EXPORT2
uprv_fileExists(const char * file)274 uprv_fileExists(const char *file) {
275 struct stat stat_buf;
276 if (stat(file, &stat_buf) == 0) {
277 return true;
278 } else {
279 return false;
280 }
281 }
282 #endif
283
284 U_CAPI int32_t U_EXPORT2
uprv_compareGoldenFiles(const char * buffer,int32_t bufferLen,const char * goldenFilePath,bool overwrite)285 uprv_compareGoldenFiles(
286 const char* buffer, int32_t bufferLen,
287 const char* goldenFilePath,
288 bool overwrite) {
289
290 if (overwrite) {
291 std::ofstream ofs;
292 ofs.open(goldenFilePath);
293 ofs.write(buffer, bufferLen);
294 ofs.close();
295 return -1;
296 }
297
298 std::ifstream ifs(goldenFilePath, std::ifstream::in);
299 int32_t pos = 0;
300 char c;
301 while (ifs.get(c) && pos < bufferLen) {
302 if (c != buffer[pos]) {
303 // Files differ at this position
304 break;
305 }
306 pos++;
307 }
308 if (pos == bufferLen && ifs.eof()) {
309 // Files are same lengths
310 pos = -1;
311 }
312 ifs.close();
313 return pos;
314 }
315
316 /*U_CAPI UDate U_EXPORT2
317 uprv_getModificationDate(const char *pathname, UErrorCode *status)
318 {
319 if(U_FAILURE(*status)) {
320 return;
321 }
322 // TODO: handle case where stat is not available
323 struct stat st;
324
325 if(stat(pathname,&st) != 0)
326 {
327 *status = U_FILE_ACCESS_ERROR;
328 } else {
329 return st.st_mtime;
330 }
331 }
332 */
333
334 /* tool memory helper ------------------------------------------------------- */
335
336 struct UToolMemory {
337 char name[64];
338 int32_t capacity, maxCapacity, size, idx;
339 void *array;
340 alignas(std::max_align_t) char staticArray[1];
341 };
342
343 U_CAPI UToolMemory * U_EXPORT2
utm_open(const char * name,int32_t initialCapacity,int32_t maxCapacity,int32_t size)344 utm_open(const char *name, int32_t initialCapacity, int32_t maxCapacity, int32_t size) {
345 UToolMemory *mem;
346
347 if(maxCapacity<initialCapacity) {
348 maxCapacity=initialCapacity;
349 }
350
351 mem=(UToolMemory *)uprv_malloc(sizeof(UToolMemory)+initialCapacity*size);
352 if(mem==nullptr) {
353 fprintf(stderr, "error: %s - out of memory\n", name);
354 exit(U_MEMORY_ALLOCATION_ERROR);
355 }
356 mem->array=mem->staticArray;
357
358 uprv_strcpy(mem->name, name);
359 mem->capacity=initialCapacity;
360 mem->maxCapacity=maxCapacity;
361 mem->size=size;
362 mem->idx=0;
363 return mem;
364 }
365
366 U_CAPI void U_EXPORT2
utm_close(UToolMemory * mem)367 utm_close(UToolMemory *mem) {
368 if(mem!=nullptr) {
369 if(mem->array!=mem->staticArray) {
370 uprv_free(mem->array);
371 }
372 uprv_free(mem);
373 }
374 }
375
376
377 U_CAPI void * U_EXPORT2
utm_getStart(UToolMemory * mem)378 utm_getStart(UToolMemory *mem) {
379 return (char *)mem->array;
380 }
381
382 U_CAPI int32_t U_EXPORT2
utm_countItems(UToolMemory * mem)383 utm_countItems(UToolMemory *mem) {
384 return mem->idx;
385 }
386
387
388 static UBool
utm_hasCapacity(UToolMemory * mem,int32_t capacity)389 utm_hasCapacity(UToolMemory *mem, int32_t capacity) {
390 if(mem->capacity<capacity) {
391 int32_t newCapacity;
392
393 if(mem->maxCapacity<capacity) {
394 fprintf(stderr, "error: %s - trying to use more than maxCapacity=%ld units\n",
395 mem->name, static_cast<long>(mem->maxCapacity));
396 exit(U_MEMORY_ALLOCATION_ERROR);
397 }
398
399 /* try to allocate a larger array */
400 if(capacity>=2*mem->capacity) {
401 newCapacity=capacity;
402 } else if(mem->capacity<=mem->maxCapacity/3) {
403 newCapacity=2*mem->capacity;
404 } else {
405 newCapacity=mem->maxCapacity;
406 }
407
408 if(mem->array==mem->staticArray) {
409 mem->array=uprv_malloc(newCapacity*mem->size);
410 if(mem->array!=nullptr) {
411 uprv_memcpy(mem->array, mem->staticArray, (size_t)mem->idx*mem->size);
412 }
413 } else {
414 mem->array=uprv_realloc(mem->array, newCapacity*mem->size);
415 }
416
417 if(mem->array==nullptr) {
418 fprintf(stderr, "error: %s - out of memory\n", mem->name);
419 exit(U_MEMORY_ALLOCATION_ERROR);
420 }
421 mem->capacity=newCapacity;
422 }
423
424 return true;
425 }
426
427 U_CAPI void * U_EXPORT2
utm_alloc(UToolMemory * mem)428 utm_alloc(UToolMemory *mem) {
429 char *p=nullptr;
430 int32_t oldIndex=mem->idx;
431 int32_t newIndex=oldIndex+1;
432 if(utm_hasCapacity(mem, newIndex)) {
433 p=(char *)mem->array+oldIndex*mem->size;
434 mem->idx=newIndex;
435 uprv_memset(p, 0, mem->size);
436 }
437 return p;
438 }
439
440 U_CAPI void * U_EXPORT2
utm_allocN(UToolMemory * mem,int32_t n)441 utm_allocN(UToolMemory *mem, int32_t n) {
442 char *p=nullptr;
443 int32_t oldIndex=mem->idx;
444 int32_t newIndex=oldIndex+n;
445 if(utm_hasCapacity(mem, newIndex)) {
446 p=(char *)mem->array+oldIndex*mem->size;
447 mem->idx=newIndex;
448 uprv_memset(p, 0, n*mem->size);
449 }
450 return p;
451 }
452