1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ****************************************************************************** 5 * 6 * Copyright (C) 1999-2013, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************/ 10 11 12 /*---------------------------------------------------------------------------- 13 * 14 * Memory mapped file wrappers for use by the ICU Data Implementation 15 * All of the platform-specific implementation for mapping data files 16 * is here. The rest of the ICU Data implementation uses only the 17 * wrapper functions. 18 * 19 *----------------------------------------------------------------------------*/ 20 /* Defines _XOPEN_SOURCE for access to POSIX functions. 21 * Must be before any other #includes. */ 22 #include "uposixdefs.h" 23 24 #include "unicode/putil.h" 25 #include "unicode/ustring.h" 26 #include "udatamem.h" 27 #include "umapfile.h" 28 29 /* memory-mapping base definitions ------------------------------------------ */ 30 31 #if MAP_IMPLEMENTATION==MAP_WIN32 32 #ifndef WIN32_LEAN_AND_MEAN 33 # define WIN32_LEAN_AND_MEAN 34 #endif 35 # define VC_EXTRALEAN 36 # define NOUSER 37 # define NOSERVICE 38 # define NOIME 39 # define NOMCX 40 41 # if U_PLATFORM_HAS_WINUWP_API == 1 42 // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications 43 // to use, even though UWP apps are allowed to call and use them. Temporarily change the 44 // WINAPI family partition below to Desktop, so that function declarations are visible for UWP. 45 # include <winapifamily.h> 46 # if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM)) 47 # pragma push_macro("WINAPI_PARTITION_DESKTOP") 48 # undef WINAPI_PARTITION_DESKTOP 49 # define WINAPI_PARTITION_DESKTOP 1 50 # define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE 51 # endif 52 # endif 53 54 # include <windows.h> 55 56 # if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE) 57 # pragma pop_macro("WINAPI_PARTITION_DESKTOP") 58 # endif 59 60 # include "cmemory.h" 61 62 typedef HANDLE MemoryMap; 63 64 # define IS_MAP(map) ((map)!=nullptr) 65 66 #elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL 67 typedef size_t MemoryMap; 68 69 # define IS_MAP(map) ((map)!=0) 70 71 # include <unistd.h> 72 # include <sys/mman.h> 73 # include <sys/stat.h> 74 # include <fcntl.h> 75 76 # ifndef MAP_FAILED 77 # define MAP_FAILED ((void*)-1) 78 # endif 79 80 # if MAP_IMPLEMENTATION==MAP_390DLL 81 /* No memory mapping for 390 batch mode. Fake it using dll loading. */ 82 # include <dll.h> 83 # include "cstring.h" 84 # include "cmemory.h" 85 # include "unicode/udata.h" 86 # define LIB_PREFIX "lib" 87 # define LIB_SUFFIX ".dll" 88 /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ 89 # define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" 90 # endif 91 #elif MAP_IMPLEMENTATION==MAP_STDIO 92 # include <stdio.h> 93 # include "cmemory.h" 94 95 typedef void *MemoryMap; 96 97 # define IS_MAP(map) ((map)!=nullptr) 98 #endif 99 100 /*----------------------------------------------------------------------------* 101 * * 102 * Memory Mapped File support. Platform dependent implementation of * 103 * functions used by the rest of the implementation.* 104 * * 105 *----------------------------------------------------------------------------*/ 106 #if MAP_IMPLEMENTATION==MAP_NONE 107 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)108 uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 109 if (U_FAILURE(*status)) { 110 return false; 111 } 112 UDataMemory_init(pData); /* Clear the output struct. */ 113 return false; /* no file access */ 114 } 115 uprv_unmapFile(UDataMemory * pData)116 U_CFUNC void uprv_unmapFile(UDataMemory *pData) { 117 /* nothing to do */ 118 } 119 #elif MAP_IMPLEMENTATION==MAP_WIN32 120 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)121 uprv_mapFile( 122 UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ 123 /* Output only; any original contents are cleared. */ 124 const char *path, /* File path to be opened/mapped. */ 125 UErrorCode *status /* Error status, used to report out-of-memory errors. */ 126 ) 127 { 128 if (U_FAILURE(*status)) { 129 return false; 130 } 131 132 HANDLE map = nullptr; 133 HANDLE file = INVALID_HANDLE_VALUE; 134 135 UDataMemory_init(pData); /* Clear the output struct. */ 136 137 /* open the input file */ 138 #if U_PLATFORM_HAS_WINUWP_API == 0 139 // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from 140 // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page. 141 // This means that we can't call the *W version of API below, whereas in the UWP code-path 142 // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters. 143 file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr, 144 OPEN_EXISTING, 145 FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr); 146 #else 147 // Convert from UTF-8 string to UTF-16 string. 148 wchar_t utf16Path[MAX_PATH]; 149 int32_t pathUtf16Len = 0; 150 u_strFromUTF8(reinterpret_cast<char16_t*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status); 151 152 if (U_FAILURE(*status)) { 153 return false; 154 } 155 if (*status == U_STRING_NOT_TERMINATED_WARNING) { 156 // Report back an error instead of a warning. 157 *status = U_BUFFER_OVERFLOW_ERROR; 158 return false; 159 } 160 161 file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr, 162 OPEN_EXISTING, 163 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr); 164 #endif 165 if (file == INVALID_HANDLE_VALUE) { 166 // If we failed to open the file due to an out-of-memory error, then we want 167 // to report that error back to the caller. 168 if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { 169 *status = U_MEMORY_ALLOCATION_ERROR; 170 } 171 return false; 172 } 173 174 // Note: We use nullptr/nullptr for lpAttributes parameter below. 175 // This means our handle cannot be inherited and we will get the default security descriptor. 176 /* create an unnamed Windows file-mapping object for the specified file */ 177 map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr); 178 179 CloseHandle(file); 180 if (map == nullptr) { 181 // If we failed to create the mapping due to an out-of-memory error, then 182 // we want to report that error back to the caller. 183 if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { 184 *status = U_MEMORY_ALLOCATION_ERROR; 185 } 186 return false; 187 } 188 189 /* map a view of the file into our address space */ 190 pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0)); 191 if (pData->pHeader == nullptr) { 192 CloseHandle(map); 193 return false; 194 } 195 pData->map = map; 196 return true; 197 } 198 199 U_CFUNC void uprv_unmapFile(UDataMemory * pData)200 uprv_unmapFile(UDataMemory *pData) { 201 if (pData != nullptr && pData->map != nullptr) { 202 UnmapViewOfFile(pData->pHeader); 203 CloseHandle(pData->map); 204 pData->pHeader = nullptr; 205 pData->map = nullptr; 206 } 207 } 208 209 210 211 #elif MAP_IMPLEMENTATION==MAP_POSIX 212 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)213 uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 214 int fd; 215 int length; 216 struct stat mystat; 217 void *data; 218 219 if (U_FAILURE(*status)) { 220 return false; 221 } 222 223 UDataMemory_init(pData); /* Clear the output struct. */ 224 225 /* determine the length of the file */ 226 if(stat(path, &mystat)!=0 || mystat.st_size<=0) { 227 return false; 228 } 229 length=mystat.st_size; 230 231 /* open the file */ 232 fd=open(path, O_RDONLY); 233 if(fd==-1) { 234 return false; 235 } 236 237 /* get a view of the mapping */ 238 #if U_PLATFORM != U_PF_HPUX 239 data=mmap(nullptr, length, PROT_READ, MAP_SHARED, fd, 0); 240 #else 241 data=mmap(nullptr, length, PROT_READ, MAP_PRIVATE, fd, 0); 242 #endif 243 close(fd); /* no longer needed */ 244 if(data==MAP_FAILED) { 245 // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? 246 return false; 247 } 248 249 pData->map = (char *)data + length; 250 pData->pHeader=(const DataHeader *)data; 251 pData->mapAddr = data; 252 // Android-changed: madvise on Android for performance reason. 253 #if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID 254 posix_madvise(data, length, POSIX_MADV_RANDOM); 255 #endif 256 return true; 257 } 258 259 U_CFUNC void uprv_unmapFile(UDataMemory * pData)260 uprv_unmapFile(UDataMemory *pData) { 261 if(pData!=nullptr && pData->map!=nullptr) { 262 size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; 263 if(munmap(pData->mapAddr, dataLen)==-1) { 264 } 265 pData->pHeader=nullptr; 266 pData->map=nullptr; 267 pData->mapAddr=nullptr; 268 } 269 } 270 271 272 273 #elif MAP_IMPLEMENTATION==MAP_STDIO 274 /* copy of the filestrm.c/T_FileStream_size() implementation */ 275 static int32_t umap_fsize(FILE * f)276 umap_fsize(FILE *f) { 277 int32_t savedPos = ftell(f); 278 int32_t size = 0; 279 280 /*Changes by Bertrand A. D. doesn't affect the current position 281 goes to the end of the file before ftell*/ 282 fseek(f, 0, SEEK_END); 283 size = (int32_t)ftell(f); 284 fseek(f, savedPos, SEEK_SET); 285 return size; 286 } 287 288 U_CFUNC UBool uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)289 uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 290 FILE *file; 291 int32_t fileLength; 292 void *p; 293 294 if (U_FAILURE(*status)) { 295 return false; 296 } 297 298 UDataMemory_init(pData); /* Clear the output struct. */ 299 /* open the input file */ 300 file=fopen(path, "rb"); 301 if(file==nullptr) { 302 return false; 303 } 304 305 /* get the file length */ 306 fileLength=umap_fsize(file); 307 if(ferror(file) || fileLength<=20) { 308 fclose(file); 309 return false; 310 } 311 312 /* allocate the memory to hold the file data */ 313 p=uprv_malloc(fileLength); 314 if(p==nullptr) { 315 fclose(file); 316 *status = U_MEMORY_ALLOCATION_ERROR; 317 return false; 318 } 319 320 /* read the file */ 321 if(fileLength!=fread(p, 1, fileLength, file)) { 322 uprv_free(p); 323 fclose(file); 324 return false; 325 } 326 327 fclose(file); 328 pData->map=p; 329 pData->pHeader=(const DataHeader *)p; 330 pData->mapAddr=p; 331 return true; 332 } 333 334 U_CFUNC void uprv_unmapFile(UDataMemory * pData)335 uprv_unmapFile(UDataMemory *pData) { 336 if(pData!=nullptr && pData->map!=nullptr) { 337 uprv_free(pData->map); 338 pData->map = nullptr; 339 pData->mapAddr = nullptr; 340 pData->pHeader = nullptr; 341 } 342 } 343 344 345 #elif MAP_IMPLEMENTATION==MAP_390DLL 346 /* 390 specific Library Loading. 347 * This is the only platform left that dynamically loads an ICU Data Library. 348 * All other platforms use .data files when dynamic loading is required, but 349 * this turn out to be awkward to support in 390 batch mode. 350 * 351 * The idea here is to hide the fact that 390 is using dll loading from the 352 * rest of ICU, and make it look like there is file loading happening. 353 * 354 */ 355 strcpy_returnEnd(char * dest,const char * src)356 static char *strcpy_returnEnd(char *dest, const char *src) 357 { 358 while((*dest=*src)!=0) { 359 ++dest; 360 ++src; 361 } 362 return dest; 363 } 364 365 /*------------------------------------------------------------------------------ 366 * 367 * computeDirPath given a user-supplied path of an item to be opened, 368 * compute and return 369 * - the full directory path to be used 370 * when opening the file. 371 * - Pointer to null at end of above returned path 372 * 373 * Parameters: 374 * path: input path. Buffer is not altered. 375 * pathBuffer: Output buffer. Any contents are overwritten. 376 * 377 * Returns: 378 * Pointer to null termination in returned pathBuffer. 379 * 380 * TODO: This works the way ICU historically has, but the 381 * whole data fallback search path is so complicated that 382 * probably almost no one will ever really understand it, 383 * the potential for confusion is large. (It's not just 384 * this one function, but the whole scheme.) 385 * 386 *------------------------------------------------------------------------------*/ uprv_computeDirPath(const char * path,char * pathBuffer)387 static char *uprv_computeDirPath(const char *path, char *pathBuffer) 388 { 389 char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ 390 int32_t pathLen; /* Length of the returned directory path */ 391 392 finalSlash = 0; 393 if (path != 0) { 394 finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); 395 } 396 397 *pathBuffer = 0; 398 if (finalSlash == 0) { 399 /* No user-supplied path. 400 * Copy the ICU_DATA path to the path buffer and return that*/ 401 const char *icuDataDir; 402 icuDataDir=u_getDataDirectory(); 403 if(icuDataDir!=nullptr && *icuDataDir!=0) { 404 return strcpy_returnEnd(pathBuffer, icuDataDir); 405 } else { 406 /* there is no icuDataDir either. Just return the empty pathBuffer. */ 407 return pathBuffer; 408 } 409 } 410 411 /* User supplied path did contain a directory portion. 412 * Copy it to the output path buffer */ 413 pathLen = (int32_t)(finalSlash - path + 1); 414 uprv_memcpy(pathBuffer, path, pathLen); 415 *(pathBuffer+pathLen) = 0; 416 return pathBuffer+pathLen; 417 } 418 419 420 # define DATA_TYPE "dat" 421 uprv_mapFile(UDataMemory * pData,const char * path,UErrorCode * status)422 U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { 423 const char *inBasename; 424 char *basename; 425 char pathBuffer[1024]; 426 const DataHeader *pHeader; 427 dllhandle *handle; 428 void *val=0; 429 430 if (U_FAILURE(*status)) { 431 return false; 432 } 433 434 inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); 435 if(inBasename==nullptr) { 436 inBasename = path; 437 } else { 438 inBasename++; 439 } 440 basename=uprv_computeDirPath(path, pathBuffer); 441 if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) { 442 /* must mmap file... for build */ 443 int fd; 444 int length; 445 struct stat mystat; 446 void *data; 447 UDataMemory_init(pData); /* Clear the output struct. */ 448 449 /* determine the length of the file */ 450 if(stat(path, &mystat)!=0 || mystat.st_size<=0) { 451 return false; 452 } 453 length=mystat.st_size; 454 455 /* open the file */ 456 fd=open(path, O_RDONLY); 457 if(fd==-1) { 458 return false; 459 } 460 461 /* get a view of the mapping */ 462 data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); 463 close(fd); /* no longer needed */ 464 if(data==MAP_FAILED) { 465 // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? 466 return false; 467 } 468 pData->map = (char *)data + length; 469 pData->pHeader=(const DataHeader *)data; 470 pData->mapAddr = data; 471 return true; 472 } 473 474 # ifdef OS390BATCH 475 /* ### hack: we still need to get u_getDataDirectory() fixed 476 for OS/390 (batch mode - always return "//"? ) 477 and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) 478 This is probably due to the strange file system on OS/390. It's more like 479 a database with short entry names than a typical file system. */ 480 /* U_ICUDATA_NAME should always have the correct name */ 481 /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ 482 /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ 483 /* PROJECT!!!!! */ 484 uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA"); 485 # else 486 /* set up the library name */ 487 uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); 488 # endif 489 490 # ifdef UDATA_DEBUG 491 fprintf(stderr, "dllload: %s ", pathBuffer); 492 # endif 493 494 handle=dllload(pathBuffer); 495 496 # ifdef UDATA_DEBUG 497 fprintf(stderr, " -> %08X\n", handle ); 498 # endif 499 500 if(handle != nullptr) { 501 /* we have a data DLL - what kind of lookup do we need here? */ 502 /* try to find the Table of Contents */ 503 UDataMemory_init(pData); /* Clear the output struct. */ 504 val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); 505 if(val == 0) { 506 /* failed... so keep looking */ 507 return false; 508 } 509 # ifdef UDATA_DEBUG 510 fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val); 511 # endif 512 513 pData->pHeader=(const DataHeader *)val; 514 return true; 515 } else { 516 return false; /* no handle */ 517 } 518 } 519 uprv_unmapFile(UDataMemory * pData)520 U_CFUNC void uprv_unmapFile(UDataMemory *pData) { 521 if(pData!=nullptr && pData->map!=nullptr) { 522 uprv_free(pData->map); 523 pData->map = nullptr; 524 pData->mapAddr = nullptr; 525 pData->pHeader = nullptr; 526 } 527 } 528 529 #else 530 # error MAP_IMPLEMENTATION is set incorrectly 531 #endif 532