• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 // This file is included in `src/prim/prim.c`
9 
10 #include "mimalloc.h"
11 #include "mimalloc/internal.h"
12 #include "mimalloc/atomic.h"
13 #include "mimalloc/prim.h"
14 #include <stdio.h>   // fputs, stderr
15 
16 
17 //---------------------------------------------
18 // Dynamically bind Windows API points for portability
19 //---------------------------------------------
20 
21 // We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016.
22 // So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility)
23 // NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB)
24 // We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's.
25 typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
26   MiMemExtendedParameterInvalidType = 0,
27   MiMemExtendedParameterAddressRequirements,
28   MiMemExtendedParameterNumaNode,
29   MiMemExtendedParameterPartitionHandle,
30   MiMemExtendedParameterUserPhysicalHandle,
31   MiMemExtendedParameterAttributeFlags,
32   MiMemExtendedParameterMax
33 } MI_MEM_EXTENDED_PARAMETER_TYPE;
34 
35 typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
36   struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
37   union  { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg;
38 } MI_MEM_EXTENDED_PARAMETER;
39 
40 typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S {
41   PVOID  LowestStartingAddress;
42   PVOID  HighestEndingAddress;
43   SIZE_T Alignment;
44 } MI_MEM_ADDRESS_REQUIREMENTS;
45 
46 #define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE   0x00000010
47 
48 #include <winternl.h>
49 typedef PVOID    (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
50 typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG);
51 static PVirtualAlloc2 pVirtualAlloc2 = NULL;
52 static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL;
53 
54 // Similarly, GetNumaProcesorNodeEx is only supported since Windows 7
55 typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER;
56 
57 typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
58 typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
59 typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
60 typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
61 static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
62 static PGetNumaProcessorNodeEx      pGetNumaProcessorNodeEx = NULL;
63 static PGetNumaNodeProcessorMaskEx  pGetNumaNodeProcessorMaskEx = NULL;
64 static PGetNumaProcessorNode        pGetNumaProcessorNode = NULL;
65 
66 //---------------------------------------------
67 // Enable large page support dynamically (if possible)
68 //---------------------------------------------
69 
win_enable_large_os_pages(size_t * large_page_size)70 static bool win_enable_large_os_pages(size_t* large_page_size)
71 {
72   static bool large_initialized = false;
73   if (large_initialized) return (_mi_os_large_page_size() > 0);
74   large_initialized = true;
75 
76   // Try to see if large OS pages are supported
77   // To use large pages on Windows, we first need access permission
78   // Set "Lock pages in memory" permission in the group policy editor
79   // <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643>
80   unsigned long err = 0;
81   HANDLE token = NULL;
82   BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
83   if (ok) {
84     TOKEN_PRIVILEGES tp;
85     ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid);
86     if (ok) {
87       tp.PrivilegeCount = 1;
88       tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
89       ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
90       if (ok) {
91         err = GetLastError();
92         ok = (err == ERROR_SUCCESS);
93         if (ok && large_page_size != NULL) {
94           *large_page_size = GetLargePageMinimum();
95         }
96       }
97     }
98     CloseHandle(token);
99   }
100   if (!ok) {
101     if (err == 0) err = GetLastError();
102     _mi_warning_message("cannot enable large OS page support, error %lu\n", err);
103   }
104   return (ok!=0);
105 }
106 
107 
108 //---------------------------------------------
109 // Initialize
110 //---------------------------------------------
111 
_mi_prim_mem_init(mi_os_mem_config_t * config)112 void _mi_prim_mem_init( mi_os_mem_config_t* config )
113 {
114   config->has_overcommit = false;
115   config->must_free_whole = true;
116   config->has_virtual_reserve = true;
117   // get the page size
118   SYSTEM_INFO si;
119   GetSystemInfo(&si);
120   if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; }
121   if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; }
122   // get the VirtualAlloc2 function
123   HINSTANCE  hDll;
124   hDll = LoadLibrary(TEXT("kernelbase.dll"));
125   if (hDll != NULL) {
126     // use VirtualAlloc2FromApp if possible as it is available to Windows store apps
127     pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
128     if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
129     FreeLibrary(hDll);
130   }
131   // NtAllocateVirtualMemoryEx is used for huge page allocation
132   hDll = LoadLibrary(TEXT("ntdll.dll"));
133   if (hDll != NULL) {
134     pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
135     FreeLibrary(hDll);
136   }
137   // Try to use Win7+ numa API
138   hDll = LoadLibrary(TEXT("kernel32.dll"));
139   if (hDll != NULL) {
140     pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
141     pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
142     pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
143     pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
144     FreeLibrary(hDll);
145   }
146   if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
147     win_enable_large_os_pages(&config->large_page_size);
148   }
149 }
150 
151 
152 //---------------------------------------------
153 // Free
154 //---------------------------------------------
155 
_mi_prim_free(void * addr,size_t size)156 int _mi_prim_free(void* addr, size_t size ) {
157   MI_UNUSED(size);
158   DWORD errcode = 0;
159   bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0);
160   if (err) { errcode = GetLastError(); }
161   if (errcode == ERROR_INVALID_ADDRESS) {
162     // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside
163     // the memory region returned by VirtualAlloc; in that case we need to free using
164     // the start of the region.
165     MEMORY_BASIC_INFORMATION info = { 0 };
166     VirtualQuery(addr, &info, sizeof(info));
167     if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
168       errcode = 0;
169       err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
170       if (err) { errcode = GetLastError(); }
171     }
172   }
173   return (int)errcode;
174 }
175 
176 
177 //---------------------------------------------
178 // VirtualAlloc
179 //---------------------------------------------
180 
win_virtual_alloc_prim(void * addr,size_t size,size_t try_alignment,DWORD flags)181 static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) {
182   #if (MI_INTPTR_SIZE >= 8)
183   // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations
184   if (addr == NULL) {
185     void* hint = _mi_os_get_aligned_hint(try_alignment,size);
186     if (hint != NULL) {
187       void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE);
188       if (p != NULL) return p;
189       _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
190       // fall through on error
191     }
192   }
193   #endif
194   // on modern Windows try use VirtualAlloc2 for aligned allocation
195   if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
196     MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 };
197     reqs.Alignment = try_alignment;
198     MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} };
199     param.Type.Type = MiMemExtendedParameterAddressRequirements;
200     param.Arg.Pointer = &reqs;
201     void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, &param, 1);
202     if (p != NULL) return p;
203     _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags);
204     // fall through on error
205   }
206   // last resort
207   return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
208 }
209 
win_virtual_alloc(void * addr,size_t size,size_t try_alignment,DWORD flags,bool large_only,bool allow_large,bool * is_large)210 static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) {
211   mi_assert_internal(!(large_only && !allow_large));
212   static _Atomic(size_t) large_page_try_ok; // = 0;
213   void* p = NULL;
214   // Try to allocate large OS pages (2MiB) if allowed or required.
215   if ((large_only || _mi_os_use_large_page(size, try_alignment))
216       && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) {
217     size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok);
218     if (!large_only && try_ok > 0) {
219       // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive.
220       // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times.
221       mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1);
222     }
223     else {
224       // large OS pages must always reserve and commit.
225       *is_large = true;
226       p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES);
227       if (large_only) return p;
228       // fall back to non-large page allocation on error (`p == NULL`).
229       if (p == NULL) {
230         mi_atomic_store_release(&large_page_try_ok,10UL);  // on error, don't try again for the next N allocations
231       }
232     }
233   }
234   // Fall back to regular page allocation
235   if (p == NULL) {
236     *is_large = ((flags&MEM_LARGE_PAGES) != 0);
237     p = win_virtual_alloc_prim(addr, size, try_alignment, flags);
238   }
239   //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); }
240   return p;
241 }
242 
_mi_prim_alloc(size_t size,size_t try_alignment,bool commit,bool allow_large,bool * is_large,bool * is_zero,void ** addr)243 int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) {
244   mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0);
245   mi_assert_internal(commit || !allow_large);
246   mi_assert_internal(try_alignment > 0);
247   *is_zero = true;
248   int flags = MEM_RESERVE;
249   if (commit) { flags |= MEM_COMMIT; }
250   *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large);
251   return (*addr != NULL ? 0 : (int)GetLastError());
252 }
253 
254 
255 //---------------------------------------------
256 // Commit/Reset/Protect
257 //---------------------------------------------
258 #ifdef _MSC_VER
259 #pragma warning(disable:6250)   // suppress warning calling VirtualFree without MEM_RELEASE (for decommit)
260 #endif
261 
_mi_prim_commit(void * addr,size_t size,bool * is_zero)262 int _mi_prim_commit(void* addr, size_t size, bool* is_zero) {
263   *is_zero = false;
264   /*
265   // zero'ing only happens on an initial commit... but checking upfront seems expensive..
266   _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo);
267   if (VirtualQuery(addr, &meminfo, size) > 0) {
268     if ((meminfo.State & MEM_COMMIT) == 0) {
269       *is_zero = true;
270     }
271   }
272   */
273   // commit
274   void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE);
275   if (p == NULL) return (int)GetLastError();
276   return 0;
277 }
278 
_mi_prim_decommit(void * addr,size_t size,bool * needs_recommit)279 int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) {
280   BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT);
281   *needs_recommit = true;  // for safety, assume always decommitted even in the case of an error.
282   return (ok ? 0 : (int)GetLastError());
283 }
284 
_mi_prim_reset(void * addr,size_t size)285 int _mi_prim_reset(void* addr, size_t size) {
286   void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
287   mi_assert_internal(p == addr);
288   #if 0
289   if (p != NULL) {
290     VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set
291   }
292   #endif
293   return (p != NULL ? 0 : (int)GetLastError());
294 }
295 
_mi_prim_protect(void * addr,size_t size,bool protect)296 int _mi_prim_protect(void* addr, size_t size, bool protect) {
297   DWORD oldprotect = 0;
298   BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect);
299   return (ok ? 0 : (int)GetLastError());
300 }
301 
302 
303 //---------------------------------------------
304 // Huge page allocation
305 //---------------------------------------------
306 
_mi_prim_alloc_huge_os_pagesx(void * hint_addr,size_t size,int numa_node)307 static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node)
308 {
309   const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE;
310 
311   win_enable_large_os_pages(NULL);
312 
313   MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} };
314   // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages
315   static bool mi_huge_pages_available = true;
316   if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) {
317     params[0].Type.Type = MiMemExtendedParameterAttributeFlags;
318     params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE;
319     ULONG param_count = 1;
320     if (numa_node >= 0) {
321       param_count++;
322       params[1].Type.Type = MiMemExtendedParameterNumaNode;
323       params[1].Arg.ULong = (unsigned)numa_node;
324     }
325     SIZE_T psize = size;
326     void* base = hint_addr;
327     NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count);
328     if (err == 0 && base != NULL) {
329       return base;
330     }
331     else {
332       // fall back to regular large pages
333       mi_huge_pages_available = false; // don't try further huge pages
334       _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err);
335     }
336   }
337   // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation
338   if (pVirtualAlloc2 != NULL && numa_node >= 0) {
339     params[0].Type.Type = MiMemExtendedParameterNumaNode;
340     params[0].Arg.ULong = (unsigned)numa_node;
341     return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1);
342   }
343 
344   // otherwise use regular virtual alloc on older windows
345   return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE);
346 }
347 
_mi_prim_alloc_huge_os_pages(void * hint_addr,size_t size,int numa_node,bool * is_zero,void ** addr)348 int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) {
349   *is_zero = true;
350   *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node);
351   return (*addr != NULL ? 0 : (int)GetLastError());
352 }
353 
354 
355 //---------------------------------------------
356 // Numa nodes
357 //---------------------------------------------
358 
_mi_prim_numa_node(void)359 size_t _mi_prim_numa_node(void) {
360   USHORT numa_node = 0;
361   if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
362     // Extended API is supported
363     MI_PROCESSOR_NUMBER pnum;
364     (*pGetCurrentProcessorNumberEx)(&pnum);
365     USHORT nnode = 0;
366     BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
367     if (ok) { numa_node = nnode; }
368   }
369   else if (pGetNumaProcessorNode != NULL) {
370     // Vista or earlier, use older API that is limited to 64 processors. Issue #277
371     DWORD pnum = GetCurrentProcessorNumber();
372     UCHAR nnode = 0;
373     BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
374     if (ok) { numa_node = nnode; }
375   }
376   return numa_node;
377 }
378 
_mi_prim_numa_node_count(void)379 size_t _mi_prim_numa_node_count(void) {
380   ULONG numa_max = 0;
381   GetNumaHighestNodeNumber(&numa_max);
382   // find the highest node number that has actual processors assigned to it. Issue #282
383   while(numa_max > 0) {
384     if (pGetNumaNodeProcessorMaskEx != NULL) {
385       // Extended API is supported
386       GROUP_AFFINITY affinity;
387       if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) {
388         if (affinity.Mask != 0) break;  // found the maximum non-empty node
389       }
390     }
391     else {
392       // Vista or earlier, use older API that is limited to 64 processors.
393       ULONGLONG mask;
394       if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
395         if (mask != 0) break; // found the maximum non-empty node
396       };
397     }
398     // max node was invalid or had no processor assigned, try again
399     numa_max--;
400   }
401   return ((size_t)numa_max + 1);
402 }
403 
404 
405 //----------------------------------------------------------------
406 // Clock
407 //----------------------------------------------------------------
408 
mi_to_msecs(LARGE_INTEGER t)409 static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
410   static LARGE_INTEGER mfreq; // = 0
411   if (mfreq.QuadPart == 0LL) {
412     LARGE_INTEGER f;
413     QueryPerformanceFrequency(&f);
414     mfreq.QuadPart = f.QuadPart/1000LL;
415     if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
416   }
417   return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
418 }
419 
_mi_prim_clock_now(void)420 mi_msecs_t _mi_prim_clock_now(void) {
421   LARGE_INTEGER t;
422   QueryPerformanceCounter(&t);
423   return mi_to_msecs(t);
424 }
425 
426 
427 //----------------------------------------------------------------
428 // Process Info
429 //----------------------------------------------------------------
430 
431 #include <windows.h>
432 #include <psapi.h>
433 
filetime_msecs(const FILETIME * ftime)434 static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
435   ULARGE_INTEGER i;
436   i.LowPart = ftime->dwLowDateTime;
437   i.HighPart = ftime->dwHighDateTime;
438   mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
439   return msecs;
440 }
441 
442 typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
443 static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
444 
_mi_prim_process_info(mi_process_info_t * pinfo)445 void _mi_prim_process_info(mi_process_info_t* pinfo)
446 {
447   FILETIME ct;
448   FILETIME ut;
449   FILETIME st;
450   FILETIME et;
451   GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
452   pinfo->utime = filetime_msecs(&ut);
453   pinfo->stime = filetime_msecs(&st);
454 
455   // load psapi on demand
456   if (pGetProcessMemoryInfo == NULL) {
457     HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
458     if (hDll != NULL) {
459       pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
460     }
461   }
462 
463   // get process info
464   PROCESS_MEMORY_COUNTERS info;
465   memset(&info, 0, sizeof(info));
466   if (pGetProcessMemoryInfo != NULL) {
467     pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
468   }
469   pinfo->current_rss    = (size_t)info.WorkingSetSize;
470   pinfo->peak_rss       = (size_t)info.PeakWorkingSetSize;
471   pinfo->current_commit = (size_t)info.PagefileUsage;
472   pinfo->peak_commit    = (size_t)info.PeakPagefileUsage;
473   pinfo->page_faults    = (size_t)info.PageFaultCount;
474 }
475 
476 //----------------------------------------------------------------
477 // Output
478 //----------------------------------------------------------------
479 
_mi_prim_out_stderr(const char * msg)480 void _mi_prim_out_stderr( const char* msg )
481 {
482   // on windows with redirection, the C runtime cannot handle locale dependent output
483   // after the main thread closes so we use direct console output.
484   if (!_mi_preloading()) {
485     // _cputs(msg);  // _cputs cannot be used at is aborts if it fails to lock the console
486     static HANDLE hcon = INVALID_HANDLE_VALUE;
487     static bool hconIsConsole;
488     if (hcon == INVALID_HANDLE_VALUE) {
489       CONSOLE_SCREEN_BUFFER_INFO sbi;
490       hcon = GetStdHandle(STD_ERROR_HANDLE);
491       hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
492     }
493     const size_t len = _mi_strlen(msg);
494     if (len > 0 && len < UINT32_MAX) {
495       DWORD written = 0;
496       if (hconIsConsole) {
497         WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
498       }
499       else if (hcon != INVALID_HANDLE_VALUE) {
500         // use direct write if stderr was redirected
501         WriteFile(hcon, msg, (DWORD)len, &written, NULL);
502       }
503       else {
504         // finally fall back to fputs after all
505         fputs(msg, stderr);
506       }
507     }
508   }
509 }
510 
511 
512 //----------------------------------------------------------------
513 // Environment
514 //----------------------------------------------------------------
515 
516 // On Windows use GetEnvironmentVariable instead of getenv to work
517 // reliably even when this is invoked before the C runtime is initialized.
518 // i.e. when `_mi_preloading() == true`.
519 // Note: on windows, environment names are not case sensitive.
_mi_prim_getenv(const char * name,char * result,size_t result_size)520 bool _mi_prim_getenv(const char* name, char* result, size_t result_size) {
521   result[0] = 0;
522   size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size);
523   return (len > 0 && len < result_size);
524 }
525 
526 
527 
528 //----------------------------------------------------------------
529 // Random
530 //----------------------------------------------------------------
531 
532 #if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
533 // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
534 // dynamic overriding, we observed it can raise an exception when compiled with C++, and
535 // sometimes deadlocks when also running under the VS debugger.
536 // In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
537 // To be continued..
538 #pragma comment (lib,"advapi32.lib")
539 #define RtlGenRandom  SystemFunction036
540 mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
541 
_mi_prim_random_buf(void * buf,size_t buf_len)542 bool _mi_prim_random_buf(void* buf, size_t buf_len) {
543   return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
544 }
545 
546 #else
547 
548 #ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
549 #define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
550 #endif
551 
552 typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
553 static  PBCryptGenRandom pBCryptGenRandom = NULL;
554 
_mi_prim_random_buf(void * buf,size_t buf_len)555 bool _mi_prim_random_buf(void* buf, size_t buf_len) {
556   if (pBCryptGenRandom == NULL) {
557     HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
558     if (hDll != NULL) {
559       pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
560     }
561     if (pBCryptGenRandom == NULL) return false;
562   }
563   return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
564 }
565 
566 #endif  // MI_USE_RTLGENRANDOM
567 
568 //----------------------------------------------------------------
569 // Thread init/done
570 //----------------------------------------------------------------
571 
572 #if !defined(MI_SHARED_LIB)
573 
574 // use thread local storage keys to detect thread ending
575 #include <fibersapi.h>
576 #if (_WIN32_WINNT < 0x600)  // before Windows Vista
577 WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
578 WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
579 WINBASEAPI BOOL  WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
580 WINBASEAPI BOOL  WINAPI FlsFree(_In_ DWORD dwFlsIndex);
581 #endif
582 
583 static DWORD mi_fls_key = (DWORD)(-1);
584 
mi_fls_done(PVOID value)585 static void NTAPI mi_fls_done(PVOID value) {
586   mi_heap_t* heap = (mi_heap_t*)value;
587   if (heap != NULL) {
588     _mi_thread_done(heap);
589     FlsSetValue(mi_fls_key, NULL);  // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672
590   }
591 }
592 
_mi_prim_thread_init_auto_done(void)593 void _mi_prim_thread_init_auto_done(void) {
594   mi_fls_key = FlsAlloc(&mi_fls_done);
595 }
596 
_mi_prim_thread_done_auto_done(void)597 void _mi_prim_thread_done_auto_done(void) {
598   // call thread-done on all threads (except the main thread) to prevent
599   // dangling callback pointer if statically linked with a DLL; Issue #208
600   FlsFree(mi_fls_key);
601 }
602 
_mi_prim_thread_associate_default_heap(mi_heap_t * heap)603 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
604   mi_assert_internal(mi_fls_key != (DWORD)(-1));
605   FlsSetValue(mi_fls_key, heap);
606 }
607 
608 #else
609 
610 // Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event.
611 
_mi_prim_thread_init_auto_done(void)612 void _mi_prim_thread_init_auto_done(void) {
613 }
614 
_mi_prim_thread_done_auto_done(void)615 void _mi_prim_thread_done_auto_done(void) {
616 }
617 
_mi_prim_thread_associate_default_heap(mi_heap_t * heap)618 void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) {
619   MI_UNUSED(heap);
620 }
621 
622 #endif
623