1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Platform-specific function implementations.
20 *
21 * This module contains functions with strongly OS-dependent implementations:
22 *
23 * * CPU count queries
24 * * Threading
25 * * Time
26 *
27 * In addition to the basic thread abstraction (which is native pthreads on
28 * all platforms, except Windows where it is an emulation of pthreads), a
29 * utility function to create N threads and wait for them to complete a batch
30 * task has also been provided.
31 */
32
33 #include "astcenccli_internal.h"
34
35 /* ============================================================================
36 Platform code for Windows using the Win32 APIs.
37 ============================================================================ */
38 #if defined(_WIN32) && !defined(__CYGWIN__)
39
40 #define WIN32_LEAN_AND_MEAN
41 #include <windows.h>
42
43 /** @brief Alias pthread_t to one of the internal Windows types. */
44 typedef HANDLE pthread_t;
45
46 /** @brief Alias pthread_attr_t to one of the internal Windows types. */
47 typedef int pthread_attr_t;
48
49 /**
50 * @brief Proxy Windows @c CreateThread underneath a pthreads-like wrapper.
51 */
pthread_create(pthread_t * thread,const pthread_attr_t * attribs,void * (* threadfunc)(void *),void * thread_arg)52 static int pthread_create(
53 pthread_t* thread,
54 const pthread_attr_t* attribs,
55 void* (*threadfunc)(void*),
56 void* thread_arg
57 ) {
58 static_cast<void>(attribs);
59 LPTHREAD_START_ROUTINE func = reinterpret_cast<LPTHREAD_START_ROUTINE>(threadfunc);
60 *thread = CreateThread(nullptr, 0, func, thread_arg, 0, nullptr);
61
62 // Ensure we return 0 on success, non-zero on error
63 if (*thread == NULL)
64 {
65 return 1;
66 }
67
68 return 0;
69 }
70
71 /**
72 * @brief Manually set CPU group and thread affinity.
73 *
74 * This is needed on Windows 10 or older to allow benefit from large core count
75 * systems with more than 64 logical CPUs. The assignment is skipped on systems
76 * with a single processor group, as it is not necessary.
77 */
set_group_affinity(pthread_t thread,int thread_index)78 static void set_group_affinity(
79 pthread_t thread,
80 int thread_index
81 ) {
82 // Skip thread assignment for hardware with a single CPU group
83 int group_count = GetActiveProcessorGroupCount();
84 if (group_count == 1)
85 {
86 return;
87 }
88
89 // Ensure we have a valid assign if user creates more threads than cores
90 int assign_index = thread_index % get_cpu_count();
91 int assign_group { 0 };
92 int assign_group_cpu_count { 0 };
93
94 // Determine which core group and core in the group to use for this thread
95 int group_cpu_count_sum { 0 };
96 for (int group = 0; group < group_count; group++)
97 {
98 int group_cpu_count = static_cast<int>(GetMaximumProcessorCount(group));
99 group_cpu_count_sum += group_cpu_count;
100
101 if (assign_index < group_cpu_count_sum)
102 {
103 assign_group = group;
104 assign_group_cpu_count = group_cpu_count;
105 break;
106 }
107 }
108
109 // Set the affinity to the assigned group, and all supported cores
110 GROUP_AFFINITY affinity {};
111 affinity.Mask = (1 << assign_group_cpu_count) - 1;
112 affinity.Group = assign_group;
113 SetThreadGroupAffinity(thread, &affinity, nullptr);
114 }
115
116 /**
117 * @brief Proxy Windows @c WaitForSingleObject underneath a pthreads-like wrapper.
118 */
pthread_join(pthread_t thread,void ** value)119 static int pthread_join(
120 pthread_t thread,
121 void** value
122 ) {
123 static_cast<void>(value);
124 WaitForSingleObject(thread, INFINITE);
125 return 0;
126 }
127
128 /* See header for documentation */
get_cpu_count()129 int get_cpu_count()
130 {
131 DWORD cpu_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
132 return static_cast<int>(cpu_count);
133 }
134
135 /* See header for documentation */
get_time()136 double get_time()
137 {
138 FILETIME tv;
139 GetSystemTimePreciseAsFileTime(&tv);
140 unsigned long long ticks = tv.dwHighDateTime;
141 ticks = (ticks << 32) | tv.dwLowDateTime;
142 return static_cast<double>(ticks) / 1.0e7;
143 }
144
145 /* ============================================================================
146 Platform code for an platform using POSIX APIs.
147 ============================================================================ */
148 #else
149
150 #include <pthread.h>
151 #include <sys/time.h>
152 #include <unistd.h>
153
154 /* See header for documentation */
get_cpu_count()155 int get_cpu_count()
156 {
157 return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
158 }
159
160 /* See header for documentation */
get_time()161 double get_time()
162 {
163 timeval tv;
164 gettimeofday(&tv, 0);
165 return static_cast<double>(tv.tv_sec) + static_cast<double>(tv.tv_usec) * 1.0e-6;
166 }
167
168 #endif
169
170 /**
171 * @brief Worker thread helper payload for launch_threads.
172 */
173 struct launch_desc
174 {
175 /** @brief The native thread handle. */
176 pthread_t thread_handle;
177 /** @brief The total number of threads in the thread pool. */
178 int thread_count;
179 /** @brief The thread index in the thread pool. */
180 int thread_id;
181 /** @brief The user thread function to execute. */
182 void (*func)(int, int, void*);
183 /** @brief The user thread payload. */
184 void* payload;
185 };
186
187 /**
188 * @brief Helper function to translate thread entry points.
189 *
190 * Convert a (void*) thread entry to an (int, void*) thread entry, where the
191 * integer contains the thread ID in the thread pool.
192 *
193 * @param p The thread launch helper payload.
194 */
launch_threads_helper(void * p)195 static void* launch_threads_helper(
196 void *p
197 ) {
198 launch_desc* ltd = reinterpret_cast<launch_desc*>(p);
199 ltd->func(ltd->thread_count, ltd->thread_id, ltd->payload);
200 return nullptr;
201 }
202
203 /* See header for documentation */
launch_threads(const char * operation,int thread_count,void (* func)(int,int,void *),void * payload)204 void launch_threads(
205 const char* operation,
206 int thread_count,
207 void (*func)(int, int, void*),
208 void *payload
209 ) {
210 // Directly execute single threaded workloads on this thread
211 if (thread_count <= 1)
212 {
213 func(1, 0, payload);
214 return;
215 }
216
217 // Otherwise spawn worker threads
218 launch_desc *thread_descs = new launch_desc[thread_count];
219 int actual_thread_count { 0 };
220
221 for (int i = 0; i < thread_count; i++)
222 {
223 thread_descs[actual_thread_count].thread_count = thread_count;
224 thread_descs[actual_thread_count].thread_id = actual_thread_count;
225 thread_descs[actual_thread_count].payload = payload;
226 thread_descs[actual_thread_count].func = func;
227
228 // Handle pthread_create failing by simply using fewer threads
229 int error = pthread_create(
230 &(thread_descs[actual_thread_count].thread_handle),
231 nullptr,
232 launch_threads_helper,
233 reinterpret_cast<void*>(thread_descs + actual_thread_count));
234
235 // Track how many threads we actually created
236 if (!error)
237 {
238 // Windows needs explicit thread assignment to handle large core count systems
239 #if defined(_WIN32) && !defined(__CYGWIN__)
240 set_group_affinity(
241 thread_descs[actual_thread_count].thread_handle,
242 actual_thread_count);
243 #endif
244
245 actual_thread_count++;
246 }
247 }
248
249 // If we did not create thread_count threads then emit a warning
250 if (actual_thread_count != thread_count)
251 {
252 int log_count = actual_thread_count == 0 ? 1 : actual_thread_count;
253 const char* log_s = log_count == 1 ? "" : "s";
254 printf("WARNING: %s using %d thread%s due to thread creation error\n\n",
255 operation, log_count, log_s);
256 }
257
258 // If we managed to spawn any threads wait for them to complete
259 if (actual_thread_count != 0)
260 {
261 for (int i = 0; i < actual_thread_count; i++)
262 {
263 pthread_join(thread_descs[i].thread_handle, nullptr);
264 }
265 }
266 // Else fall back to using this thread
267 else
268 {
269 func(1, 0, payload);
270 }
271
272 delete[] thread_descs;
273 }
274