1 /****************************************************************************
2 * Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ****************************************************************************/
23
24 #ifndef __SWR_OS_H__
25 #define __SWR_OS_H__
26
27 #include <cstddef>
28 #include "core/knobs.h"
29
30 #if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
31
32 #define SWR_API __cdecl
33 #define SWR_VISIBLE __declspec(dllexport)
34
35 #ifndef NOMINMAX
36 #undef UNICODE
37 #define NOMINMAX
38 #include <windows.h>
39 #undef NOMINMAX
40 #define UNICODE
41 #else
42 #include <windows.h>
43 #endif
44 #include <intrin.h>
45 #include <cstdint>
46
47 #if defined(MemoryFence)
48 // Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
49 #undef MemoryFence
50 #endif
51
52 #if defined(_MSC_VER)
53 #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
54 #elif defined(__GNUC__)
55 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
56 #endif
57
58 #if defined(_DEBUG)
59 // We compile Debug builds with inline function expansion enabled. This allows
60 // functions compiled with __forceinline to be inlined even in Debug builds.
61 // The inline_depth(0) pragma below will disable inline function expansion for
62 // normal INLINE / inline functions, but not for __forceinline functions.
63 // Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
64 // Debug builds.
65 #define INLINE inline
66 #pragma inline_depth(0)
67 #else
68 // Use of __forceinline increases compile time dramatically in release builds
69 // and provides almost 0 measurable benefit. Disable until we have a compelling
70 // use-case
71 // #define INLINE __forceinline
72 #define INLINE inline
73 #endif
74 #ifndef FORCEINLINE
75 #define FORCEINLINE __forceinline
76 #endif
77
78 #define DEBUGBREAK __debugbreak()
79
80 #define PRAGMA_WARNING_PUSH_DISABLE(...) \
81 __pragma(warning(push)); \
82 __pragma(warning(disable : __VA_ARGS__));
83
84 #define PRAGMA_WARNING_POP() __pragma(warning(pop))
85
AlignedMalloc(size_t _Size,size_t _Alignment)86 static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)
87 {
88 return _aligned_malloc(_Size, _Alignment);
89 }
90
AlignedFree(void * p)91 static inline void AlignedFree(void* p)
92 {
93 return _aligned_free(p);
94 }
95
96 #if defined(_WIN64)
97 #define BitScanReverseSizeT BitScanReverse64
98 #define BitScanForwardSizeT BitScanForward64
99 #define _mm_popcount_sizeT _mm_popcnt_u64
100 #else
101 #define BitScanReverseSizeT BitScanReverse
102 #define BitScanForwardSizeT BitScanForward
103 #define _mm_popcount_sizeT _mm_popcnt_u32
104 #endif
105
106 #if !defined(_WIN64)
_BitScanForward64(unsigned long * Index,uint64_t Mask)107 inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
108 {
109 #ifdef __GNUC__
110 *Index = __builtin_ctzll(Mask);
111 #else
112 *Index = 0;
113 for (int i = 0; i < 64; ++ i)
114 if ((1ULL << i) & Mask)
115 *Index = i;
116 #endif
117 return (Mask != 0);
118 }
119
_BitScanReverse64(unsigned long * Index,uint64_t Mask)120 inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
121 {
122 #ifdef __GNUC__
123 *Index = 63 - __builtin_clzll(Mask);
124 #else
125 *Index = 0;
126 for (int i = 63; i >= 0; -- i)
127 if ((1ULL << i) & Mask)
128 *Index = i;
129 #endif
130 return (Mask != 0);
131 }
132 #endif
133
134 #elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
135
136 #define SWR_API
137 #define SWR_VISIBLE __attribute__((visibility("default")))
138
139 #include <stdlib.h>
140 #include <string.h>
141 #include <x86intrin.h>
142 #include <stdint.h>
143 #include <sys/types.h>
144 #include <unistd.h>
145 #include <sys/stat.h>
146 #include <stdio.h>
147 #include <limits.h>
148
149 typedef void VOID;
150 typedef void* LPVOID;
151 typedef int INT;
152 typedef unsigned int UINT;
153 typedef void* HANDLE;
154 typedef int LONG;
155 typedef unsigned int DWORD;
156
157 #undef FALSE
158 #define FALSE 0
159
160 #undef TRUE
161 #define TRUE 1
162
163 #define MAX_PATH PATH_MAX
164
165 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
166 #ifndef INLINE
167 #define INLINE __inline
168 #endif
169 #ifndef FORCEINLINE
170 #define FORCEINLINE INLINE
171 #endif
172 #define DEBUGBREAK asm("int $3")
173
174 #if !defined(__CYGWIN__)
175
176 #ifndef __cdecl
177 #define __cdecl
178 #endif
179 #ifndef __stdcall
180 #define __stdcall
181 #endif
182
183 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
184 #define __declspec(x) __declspec_##x
185 #define __declspec_align(y) __attribute__((aligned(y)))
186 #define __declspec_deprecated __attribute__((deprecated))
187 #define __declspec_dllexport
188 #define __declspec_dllimport
189 #define __declspec_noinline __attribute__((__noinline__))
190 #define __declspec_nothrow __attribute__((nothrow))
191 #define __declspec_novtable
192 #define __declspec_thread __thread
193 #else
194 #define __declspec(X)
195 #endif
196
197 #endif
198
199 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
200
201 #if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
__rdtsc()202 inline uint64_t __rdtsc()
203 {
204 long low, high;
205 asm volatile("rdtsc" : "=a"(low), "=d"(high));
206 return (low | ((uint64_t)high << 32));
207 }
208 #endif
209
210 #if !defined(__clang__) && !defined(__INTEL_COMPILER)
211 // Intrinsic not defined in gcc < 10
212 #if (__GNUC__) && (GCC_VERSION < 100000)
_mm256_storeu2_m128i(__m128i * hi,__m128i * lo,__m256i a)213 static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
214 {
215 _mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));
216 _mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));
217 }
218 #endif
219
220 // gcc prior to 4.9 doesn't have _mm*_undefined_*
221 #if (__GNUC__) && (GCC_VERSION < 40900)
222 #define _mm_undefined_si128 _mm_setzero_si128
223 #define _mm256_undefined_ps _mm256_setzero_ps
224 #endif
225 #endif
226
_BitScanForward64(unsigned long * Index,uint64_t Mask)227 inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)
228 {
229 *Index = __builtin_ctzll(Mask);
230 return (Mask != 0);
231 }
232
_BitScanForward(unsigned long * Index,uint32_t Mask)233 inline unsigned char _BitScanForward(unsigned long* Index, uint32_t Mask)
234 {
235 *Index = __builtin_ctz(Mask);
236 return (Mask != 0);
237 }
238
_BitScanReverse64(unsigned long * Index,uint64_t Mask)239 inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)
240 {
241 *Index = 63 - __builtin_clzll(Mask);
242 return (Mask != 0);
243 }
244
_BitScanReverse(unsigned long * Index,uint32_t Mask)245 inline unsigned char _BitScanReverse(unsigned long* Index, uint32_t Mask)
246 {
247 *Index = 31 - __builtin_clz(Mask);
248 return (Mask != 0);
249 }
250
AlignedMalloc(size_t size,size_t alignment)251 inline void* AlignedMalloc(size_t size, size_t alignment)
252 {
253 void* ret;
254 if (posix_memalign(&ret, alignment, size))
255 {
256 return NULL;
257 }
258 return ret;
259 }
260
AlignedFree(void * p)261 static inline void AlignedFree(void* p)
262 {
263 free(p);
264 }
265
266 #define _countof(a) (sizeof(a) / sizeof(*(a)))
267
268 #define sprintf_s sprintf
269 #define strcpy_s(dst, size, src) strncpy(dst, src, size)
270 #define GetCurrentProcessId getpid
271
272 #define InterlockedCompareExchange(Dest, Exchange, Comparand) \
273 __sync_val_compare_and_swap(Dest, Comparand, Exchange)
274 #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
275 #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
276 #define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
277 #define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
278 #define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
279 #define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
280 #define _ReadWriteBarrier() asm volatile("" ::: "memory")
281
282 #define PRAGMA_WARNING_PUSH_DISABLE(...)
283 #define PRAGMA_WARNING_POP()
284
285 #define ZeroMemory(dst, size) memset(dst, 0, size)
286 #else
287
288 #error Unsupported OS/system.
289
290 #endif
291
292 #define THREAD thread_local
293
294 // Universal types
295 typedef uint8_t KILOBYTE[1024];
296 typedef KILOBYTE MEGABYTE[1024];
297 typedef MEGABYTE GIGABYTE[1024];
298
299 #define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
300 #define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
301 #define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
302
303 #include "common/swr_assert.h"
304
305 #ifdef __GNUC__
306 #define ATTR_UNUSED __attribute__((unused))
307 #else
308 #define ATTR_UNUSED
309 #endif
310
311 #define SWR_FUNC(_retType, _funcName, /* args */...) \
312 typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
313 _retType SWR_API _funcName(__VA_ARGS__);
314
315 // Defined in os.cpp
316 void SWR_API SetCurrentThreadName(const char* pThreadName);
317 void SWR_API CreateDirectoryPath(const std::string& path);
318
319 /// Execute Command (block until finished)
320 /// @returns process exit value
321 int SWR_API
322 ExecCmd(const std::string& cmd, ///< (In) Command line string
323 const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
324 std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text
325 std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
326 const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
327
328
329 /// Helper for setting up FP state
330 /// @returns old csr state
SetOptimalVectorCSR()331 static INLINE uint32_t SetOptimalVectorCSR()
332 {
333 uint32_t oldCSR = _mm_getcsr();
334
335 uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));
336 newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);
337 _mm_setcsr(newCSR);
338
339 return oldCSR;
340 }
341
342 /// Set Vector CSR state.
343 /// @param csrState - should be value returned from SetOptimalVectorCSR()
RestoreVectorCSR(uint32_t csrState)344 static INLINE void RestoreVectorCSR(uint32_t csrState)
345 {
346 _mm_setcsr(csrState);
347 }
348
349 #endif //__SWR_OS_H__
350