1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1999-2013, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************/ 10 11 12 /*---------------------------------------------------------------------------- 13 * 14 * Memory mapped file wrappers for use by the ICU Data Implementation 15 * All of the platform-specific implementation for mapping data files 16 * is here. The rest of the ICU Data implementation uses only the 17 * wrapper functions. 18 * 19 *----------------------------------------------------------------------------*/ 20 /* Defines _XOPEN_SOURCE for access to POSIX functions. 21 * Must be before any other #includes. */ 22 #include "uposixdefs.h" 23 24 #include "unicode/putil.h" 25 #include "unicode/ustring.h" 26 #include "udatamem.h" 27 #include "umapfile.h" 28 29 /* memory-mapping base definitions ------------------------------------------ */ 30 31 #if MAP_IMPLEMENTATION==MAP_WIN32 32 #ifndef WIN32_LEAN_AND_MEAN 33 # define WIN32_LEAN_AND_MEAN 34 #endif 35 # define VC_EXTRALEAN 36 # define NOUSER 37 # define NOSERVICE 38 # define NOIME 39 # define NOMCX 40 41 # if U_PLATFORM_HAS_WINUWP_API == 1 42 // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications 43 // to use, even though UWP apps are allowed to call and use them. Temporarily change the 44 // WINAPI family partition below to Desktop, so that function declarations are visible for UWP. 45 # include <winapifamily.h> 46 # if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM)) 47 # pragma push_macro("WINAPI_PARTITION_DESKTOP") 48 # undef WINAPI_PARTITION_DESKTOP 49 # define WINAPI_PARTITION_DESKTOP 1 50 # define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE 51 # endif 52 # endif 53 54 # include <windows.h> 55 56 # if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE) 57 # pragma pop_macro("WINAPI_PARTITION_DESKTOP") 58 # endif 59 60 # include "cmemory.h" 61 62 typedef HANDLE MemoryMap; 63 64 # define IS_MAP(map) ((map)!=nullptr) 65 66 #elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL 67 typedef size_t MemoryMap; 68 69 # define IS_MAP(map) ((map)!=0) 70 71 # include <unistd.h> 72 # include <sys/mman.h> 73 # include <sys/stat.h> 74 # include <fcntl.h> 75 76 # ifndef MAP_FAILED 77 # define MAP_FAILED ((void*)-1) 78 # endif 79 80 # if MAP_IMPLEMENTATION==MAP_390DLL 81 /* No memory mapping for 390 batch mode. Fake it using dll loading. */ 82 # include <dll.h> 83 # include "cstring.h" 84 # include "cmemory.h" 85 # include "unicode/udata.h" 86 # define LIB_PREFIX "lib" 87 # define LIB_SUFFIX ".dll" 88 /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ 89 # define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" 90 # endif 91 #elif MAP_IMPLEMENTATION==MAP_STDIO 92 # include <stdio.h> 93 # include "cmemory.h" 94 95 typedef void *MemoryMap; 96 97 # define IS_MAP(map) ((map)!=nullptr) 98 #endif 99 100 /*----------------------------------------------------------------------------* 101 * * 102 * Memory Mapped File support. Platform dependent implementation of * 103 * functions used by the rest of the implementation.* 104 * * 105 *----------------------------------------------------------------------------*/ 106 #if MAP_IMPLEMENTATION==MAP_NONE 107 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)108 uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 109 if (U_FAILURE(*status)) { 110 return FALSE; 111 } 112 UDataMemory_init(pData); /* Clear the output struct. */ 113 return FALSE; /* no file access */ 114 } 115 uprv_unmapFile(UDataMemory * pData)116 U_CFUNC void uprv_unmapFile(UDataMemory *pData) { 117 /* nothing to do */ 118 } 119 #elif MAP_IMPLEMENTATION==MAP_WIN32 120 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)121 uprv_mapFile( 122 UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ 123 /* Output only; any original contents are cleared. */ 124 const char *path, /* File path to be opened/mapped. */ 125 UErrorCode *status /* Error status, used to report out-of-memory errors. */ 126 ) 127 { 128 if (U_FAILURE(*status)) { 129 return FALSE; 130 } 131 132 HANDLE map = nullptr; 133 HANDLE file = INVALID_HANDLE_VALUE; 134 135 UDataMemory_init(pData); /* Clear the output struct. */ 136 137 /* open the input file */ 138 #if U_PLATFORM_HAS_WINUWP_API == 0 139 // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from 140 // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page. 141 // This means that we can't call the *W version of API below, whereas in the UWP code-path 142 // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters. 143 file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr, 144 OPEN_EXISTING, 145 FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr); 146 #else 147 // Convert from UTF-8 string to UTF-16 string. 148 wchar_t utf16Path[MAX_PATH]; 149 int32_t pathUtf16Len = 0; 150 u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status); 151 152 if (U_FAILURE(*status)) { 153 return FALSE; 154 } 155 if (*status == U_STRING_NOT_TERMINATED_WARNING) { 156 // Report back an error instead of a warning. 157 *status = U_BUFFER_OVERFLOW_ERROR; 158 return FALSE; 159 } 160 161 file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr, 162 OPEN_EXISTING, 163 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr); 164 #endif 165 if (file == INVALID_HANDLE_VALUE) { 166 // If we failed to open the file due to an out-of-memory error, then we want 167 // to report that error back to the caller. 168 if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { 169 *status = U_MEMORY_ALLOCATION_ERROR; 170 } 171 return FALSE; 172 } 173 174 // Note: We use NULL/nullptr for lpAttributes parameter below. 175 // This means our handle cannot be inherited and we will get the default security descriptor. 176 /* create an unnamed Windows file-mapping object for the specified file */ 177 map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr); 178 179 CloseHandle(file); 180 if (map == nullptr) { 181 // If we failed to create the mapping due to an out-of-memory error, then 182 // we want to report that error back to the caller. 183 if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { 184 *status = U_MEMORY_ALLOCATION_ERROR; 185 } 186 return FALSE; 187 } 188 189 /* map a view of the file into our address space */ 190 pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0)); 191 if (pData->pHeader == nullptr) { 192 CloseHandle(map); 193 return FALSE; 194 } 195 pData->map = map; 196 return TRUE; 197 } 198 199 U_CFUNC void uprv_unmapFile(UDataMemory * pData)200 uprv_unmapFile(UDataMemory *pData) { 201 if (pData != nullptr && pData->map != nullptr) { 202 UnmapViewOfFile(pData->pHeader); 203 CloseHandle(pData->map); 204 pData->pHeader = nullptr; 205 pData->map = nullptr; 206 } 207 } 208 209 210 211 #elif MAP_IMPLEMENTATION==MAP_POSIX 212 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)213 uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 214 int fd; 215 int length; 216 struct stat mystat; 217 void *data; 218 219 if (U_FAILURE(*status)) { 220 return FALSE; 221 } 222 223 UDataMemory_init(pData); /* Clear the output struct. */ 224 225 /* determine the length of the file */ 226 if(stat(path, &mystat)!=0 || mystat.st_size<=0) { 227 return FALSE; 228 } 229 length=mystat.st_size; 230 231 /* open the file */ 232 fd=open(path, O_RDONLY); 233 if(fd==-1) { 234 return FALSE; 235 } 236 237 /* get a view of the mapping */ 238 #if U_PLATFORM != U_PF_HPUX 239 data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); 240 #else 241 data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); 242 #endif 243 close(fd); /* no longer needed */ 244 if(data==MAP_FAILED) { 245 // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? 246 return FALSE; 247 } 248 249 pData->map = (char *)data + length; 250 pData->pHeader=(const DataHeader *)data; 251 pData->mapAddr = data; 252 #if U_PLATFORM == U_PF_IPHONE 253 posix_madvise(data, length, POSIX_MADV_RANDOM); 254 #endif 255 return TRUE; 256 } 257 258 U_CFUNC void uprv_unmapFile(UDataMemory * pData)259 uprv_unmapFile(UDataMemory *pData) { 260 if(pData!=nullptr && pData->map!=nullptr) { 261 size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; 262 if(munmap(pData->mapAddr, dataLen)==-1) { 263 } 264 pData->pHeader=nullptr; 265 pData->map=0; 266 pData->mapAddr=nullptr; 267 } 268 } 269 270 271 272 #elif MAP_IMPLEMENTATION==MAP_STDIO 273 /* copy of the filestrm.c/T_FileStream_size() implementation */ 274 static int32_t umap_fsize(FILE * f)275 umap_fsize(FILE *f) { 276 int32_t savedPos = ftell(f); 277 int32_t size = 0; 278 279 /*Changes by Bertrand A. D. doesn't affect the current position 280 goes to the end of the file before ftell*/ 281 fseek(f, 0, SEEK_END); 282 size = (int32_t)ftell(f); 283 fseek(f, savedPos, SEEK_SET); 284 return size; 285 } 286 287 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)288 uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 289 FILE *file; 290 int32_t fileLength; 291 void *p; 292 293 if (U_FAILURE(*status)) { 294 return FALSE; 295 } 296 297 UDataMemory_init(pData); /* Clear the output struct. */ 298 /* open the input file */ 299 file=fopen(path, "rb"); 300 if(file==nullptr) { 301 return FALSE; 302 } 303 304 /* get the file length */ 305 fileLength=umap_fsize(file); 306 if(ferror(file) || fileLength<=20) { 307 fclose(file); 308 return FALSE; 309 } 310 311 /* allocate the memory to hold the file data */ 312 p=uprv_malloc(fileLength); 313 if(p==nullptr) { 314 fclose(file); 315 *status = U_MEMORY_ALLOCATION_ERROR; 316 return FALSE; 317 } 318 319 /* read the file */ 320 if(fileLength!=fread(p, 1, fileLength, file)) { 321 uprv_free(p); 322 fclose(file); 323 return FALSE; 324 } 325 326 fclose(file); 327 pData->map=p; 328 pData->pHeader=(const DataHeader *)p; 329 pData->mapAddr=p; 330 return TRUE; 331 } 332 333 U_CFUNC void uprv_unmapFile(UDataMemory * pData)334 uprv_unmapFile(UDataMemory *pData) { 335 if(pData!=nullptr && pData->map!=nullptr) { 336 uprv_free(pData->map); 337 pData->map = nullptr; 338 pData->mapAddr = nullptr; 339 pData->pHeader = nullptr; 340 } 341 } 342 343 344 #elif MAP_IMPLEMENTATION==MAP_390DLL 345 /* 390 specific Library Loading. 346 * This is the only platform left that dynamically loads an ICU Data Library. 347 * All other platforms use .data files when dynamic loading is required, but 348 * this turn out to be awkward to support in 390 batch mode. 349 * 350 * The idea here is to hide the fact that 390 is using dll loading from the 351 * rest of ICU, and make it look like there is file loading happening. 352 * 353 */ 354 strcpy_returnEnd(char * dest,const char * src)355 static char *strcpy_returnEnd(char *dest, const char *src) 356 { 357 while((*dest=*src)!=0) { 358 ++dest; 359 ++src; 360 } 361 return dest; 362 } 363 364 /*------------------------------------------------------------------------------ 365 * 366 * computeDirPath given a user-supplied path of an item to be opened, 367 * compute and return 368 * - the full directory path to be used 369 * when opening the file. 370 * - Pointer to null at end of above returned path 371 * 372 * Parameters: 373 * path: input path. Buffer is not altered. 374 * pathBuffer: Output buffer. Any contents are overwritten. 375 * 376 * Returns: 377 * Pointer to null termination in returned pathBuffer. 378 * 379 * TODO: This works the way ICU historically has, but the 380 * whole data fallback search path is so complicated that 381 * probably almost no one will ever really understand it, 382 * the potential for confusion is large. (It's not just 383 * this one function, but the whole scheme.) 384 * 385 *------------------------------------------------------------------------------*/ uprv_computeDirPath(const char * path,char * pathBuffer)386 static char *uprv_computeDirPath(const char *path, char *pathBuffer) 387 { 388 char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ 389 int32_t pathLen; /* Length of the returned directory path */ 390 391 finalSlash = 0; 392 if (path != 0) { 393 finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); 394 } 395 396 *pathBuffer = 0; 397 if (finalSlash == 0) { 398 /* No user-supplied path. 399 * Copy the ICU_DATA path to the path buffer and return that*/ 400 const char *icuDataDir; 401 icuDataDir=u_getDataDirectory(); 402 if(icuDataDir!=nullptr && *icuDataDir!=0) { 403 return strcpy_returnEnd(pathBuffer, icuDataDir); 404 } else { 405 /* there is no icuDataDir either. Just return the empty pathBuffer. */ 406 return pathBuffer; 407 } 408 } 409 410 /* User supplied path did contain a directory portion. 411 * Copy it to the output path buffer */ 412 pathLen = (int32_t)(finalSlash - path + 1); 413 uprv_memcpy(pathBuffer, path, pathLen); 414 *(pathBuffer+pathLen) = 0; 415 return pathBuffer+pathLen; 416 } 417 418 419 # define DATA_TYPE "dat" 420 uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)421 U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 422 const char *inBasename; 423 char *basename; 424 char pathBuffer[1024]; 425 const DataHeader *pHeader; 426 dllhandle *handle; 427 void *val=0; 428 429 if (U_FAILURE(*status)) { 430 return FALSE; 431 } 432 433 inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); 434 if(inBasename==nullptr) { 435 inBasename = path; 436 } else { 437 inBasename++; 438 } 439 basename=uprv_computeDirPath(path, pathBuffer); 440 if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) { 441 /* must mmap file... for build */ 442 int fd; 443 int length; 444 struct stat mystat; 445 void *data; 446 UDataMemory_init(pData); /* Clear the output struct. */ 447 448 /* determine the length of the file */ 449 if(stat(path, &mystat)!=0 || mystat.st_size<=0) { 450 return FALSE; 451 } 452 length=mystat.st_size; 453 454 /* open the file */ 455 fd=open(path, O_RDONLY); 456 if(fd==-1) { 457 return FALSE; 458 } 459 460 /* get a view of the mapping */ 461 data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); 462 close(fd); /* no longer needed */ 463 if(data==MAP_FAILED) { 464 // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? 465 return FALSE; 466 } 467 pData->map = (char *)data + length; 468 pData->pHeader=(const DataHeader *)data; 469 pData->mapAddr = data; 470 return TRUE; 471 } 472 473 # ifdef OS390BATCH 474 /* ### hack: we still need to get u_getDataDirectory() fixed 475 for OS/390 (batch mode - always return "//"? ) 476 and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) 477 This is probably due to the strange file system on OS/390. It's more like 478 a database with short entry names than a typical file system. */ 479 /* U_ICUDATA_NAME should always have the correct name */ 480 /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ 481 /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ 482 /* PROJECT!!!!! */ 483 uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA"); 484 # else 485 # ifdef U_ICU_USE_OLD_DATA 486 /* set up the library name */ 487 uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME "68" LIB_SUFFIX); 488 # else 489 /* set up the library name */ 490 uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); 491 # endif 492 # endif 493 494 # ifdef UDATA_DEBUG 495 fprintf(stderr, "dllload: %s ", pathBuffer); 496 # endif 497 498 handle=dllload(pathBuffer); 499 500 # ifdef UDATA_DEBUG 501 fprintf(stderr, " -> %08X\n", handle ); 502 # endif 503 504 if(handle != nullptr) { 505 /* we have a data DLL - what kind of lookup do we need here? */ 506 /* try to find the Table of Contents */ 507 UDataMemory_init(pData); /* Clear the output struct. */ 508 val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); 509 if(val == 0) { 510 /* failed... so keep looking */ 511 return FALSE; 512 } 513 # ifdef UDATA_DEBUG 514 fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val); 515 # endif 516 517 pData->pHeader=(const DataHeader *)val; 518 return TRUE; 519 } else { 520 return FALSE; /* no handle */ 521 } 522 } 523 uprv_unmapFile(UDataMemory * pData)524 U_CFUNC void uprv_unmapFile(UDataMemory *pData) { 525 if(pData!=nullptr && pData->map!=nullptr) { 526 uprv_free(pData->map); 527 pData->map = nullptr; 528 pData->mapAddr = nullptr; 529 pData->pHeader = nullptr; 530 } 531 } 532 533 #else 534 # error MAP_IMPLEMENTATION is set incorrectly 535 #endif 536