• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Platform-specific function implementations.
20  *
21  * This module contains functions with strongly OS-dependent implementations:
22  *
23  *  * CPU count queries
24  *  * Threading
25  *  * Time
26  *
27  * In addition to the basic thread abstraction (which is native pthreads on
28  * all platforms, except Windows where it is an emulation of pthreads), a
29  * utility function to create N threads and wait for them to complete a batch
30  * task has also been provided.
31  */
32 
33 #include "astcenccli_internal.h"
34 
35 /* ============================================================================
36    Platform code for Windows using the Win32 APIs.
37 ============================================================================ */
38 #if defined(_WIN32) && !defined(__CYGWIN__)
39 
40 #define WIN32_LEAN_AND_MEAN
41 #include <windows.h>
42 
43 /** @brief Alias pthread_t to one of the internal Windows types. */
44 typedef HANDLE pthread_t;
45 
46 /** @brief Alias pthread_attr_t to one of the internal Windows types. */
47 typedef int pthread_attr_t;
48 
49 /**
50  * @brief Proxy Windows @c CreateThread underneath a pthreads-like wrapper.
51  */
pthread_create(pthread_t * thread,const pthread_attr_t * attribs,void * (* threadfunc)(void *),void * thread_arg)52 static int pthread_create(
53 	pthread_t* thread,
54 	const pthread_attr_t* attribs,
55 	void* (*threadfunc)(void*),
56 	void* thread_arg
57 ) {
58 	static_cast<void>(attribs);
59 	LPTHREAD_START_ROUTINE func = reinterpret_cast<LPTHREAD_START_ROUTINE>(threadfunc);
60 	*thread = CreateThread(nullptr, 0, func, thread_arg, 0, nullptr);
61 
62 	// Ensure we return 0 on success, non-zero on error
63 	if (*thread == NULL)
64 	{
65 		return 1;
66 	}
67 
68 	return 0;
69 }
70 
71 /**
72  * @brief Manually set CPU group and thread affinity.
73  *
74  * This is needed on Windows 10 or older to allow benefit from large core count
75  * systems with more than 64 logical CPUs. The assignment is skipped on systems
76  * with a single processor group, as it is not necessary.
77  */
set_group_affinity(pthread_t thread,int thread_index)78 static void set_group_affinity(
79 	pthread_t thread,
80 	int thread_index
81 ) {
82 	// Skip thread assignment for hardware with a single CPU group
83 	int group_count = GetActiveProcessorGroupCount();
84 	if (group_count == 1)
85 	{
86 		return;
87 	}
88 
89 	// Ensure we have a valid assign if user creates more threads than cores
90 	int assign_index = thread_index % get_cpu_count();
91 	int assign_group { 0 };
92 	int assign_group_cpu_count { 0 };
93 
94 	// Determine which core group and core in the group to use for this thread
95 	int group_cpu_count_sum { 0 };
96 	for (int group = 0; group < group_count; group++)
97 	{
98 		int group_cpu_count = static_cast<int>(GetMaximumProcessorCount(group));
99 		group_cpu_count_sum += group_cpu_count;
100 
101 		if (assign_index < group_cpu_count_sum)
102 		{
103 			assign_group = group;
104 			assign_group_cpu_count = group_cpu_count;
105 			break;
106 		}
107 	}
108 
109 	// Set the affinity to the assigned group, and all supported cores
110 	GROUP_AFFINITY affinity {};
111 	affinity.Mask = (1 << assign_group_cpu_count) - 1;
112 	affinity.Group = assign_group;
113 	SetThreadGroupAffinity(thread, &affinity, nullptr);
114 }
115 
116 /**
117  * @brief Proxy Windows @c WaitForSingleObject underneath a pthreads-like wrapper.
118  */
pthread_join(pthread_t thread,void ** value)119 static int pthread_join(
120 	pthread_t thread,
121 	void** value
122 ) {
123 	static_cast<void>(value);
124 	WaitForSingleObject(thread, INFINITE);
125 	return 0;
126 }
127 
128 /* See header for documentation */
get_cpu_count()129 int get_cpu_count()
130 {
131 	DWORD cpu_count = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
132 	return static_cast<int>(cpu_count);
133 }
134 
135 /* See header for documentation */
get_time()136 double get_time()
137 {
138 	FILETIME tv;
139 	GetSystemTimePreciseAsFileTime(&tv);
140 	unsigned long long ticks = tv.dwHighDateTime;
141 	ticks = (ticks << 32) | tv.dwLowDateTime;
142 	return static_cast<double>(ticks) / 1.0e7;
143 }
144 
145 /* ============================================================================
146    Platform code for an platform using POSIX APIs.
147 ============================================================================ */
148 #else
149 
150 #include <pthread.h>
151 #include <sys/time.h>
152 #include <unistd.h>
153 
154 /* See header for documentation */
get_cpu_count()155 int get_cpu_count()
156 {
157 	return static_cast<int>(sysconf(_SC_NPROCESSORS_ONLN));
158 }
159 
160 /* See header for documentation */
get_time()161 double get_time()
162 {
163 	timeval tv;
164 	gettimeofday(&tv, 0);
165 	return static_cast<double>(tv.tv_sec) + static_cast<double>(tv.tv_usec) * 1.0e-6;
166 }
167 
168 #endif
169 
170 /**
171  * @brief Worker thread helper payload for launch_threads.
172  */
173 struct launch_desc
174 {
175 	/** @brief The native thread handle. */
176 	pthread_t thread_handle;
177 	/** @brief The total number of threads in the thread pool. */
178 	int thread_count;
179 	/** @brief The thread index in the thread pool. */
180 	int thread_id;
181 	/** @brief The user thread function to execute. */
182 	void (*func)(int, int, void*);
183 	/** @brief The user thread payload. */
184 	void* payload;
185 };
186 
187 /**
188  * @brief Helper function to translate thread entry points.
189  *
190  * Convert a (void*) thread entry to an (int, void*) thread entry, where the
191  * integer contains the thread ID in the thread pool.
192  *
193  * @param p The thread launch helper payload.
194  */
launch_threads_helper(void * p)195 static void* launch_threads_helper(
196 	void *p
197 ) {
198 	launch_desc* ltd = reinterpret_cast<launch_desc*>(p);
199 	ltd->func(ltd->thread_count, ltd->thread_id, ltd->payload);
200 	return nullptr;
201 }
202 
203 /* See header for documentation */
launch_threads(const char * operation,int thread_count,void (* func)(int,int,void *),void * payload)204 void launch_threads(
205 	const char* operation,
206 	int thread_count,
207 	void (*func)(int, int, void*),
208 	void *payload
209 ) {
210 	// Directly execute single threaded workloads on this thread
211 	if (thread_count <= 1)
212 	{
213 		func(1, 0, payload);
214 		return;
215 	}
216 
217 	// Otherwise spawn worker threads
218 	launch_desc *thread_descs = new launch_desc[thread_count];
219 	int actual_thread_count { 0 };
220 
221 	for (int i = 0; i < thread_count; i++)
222 	{
223 		thread_descs[actual_thread_count].thread_count = thread_count;
224 		thread_descs[actual_thread_count].thread_id = actual_thread_count;
225 		thread_descs[actual_thread_count].payload = payload;
226 		thread_descs[actual_thread_count].func = func;
227 
228 		// Handle pthread_create failing by simply using fewer threads
229 		int error = pthread_create(
230 			&(thread_descs[actual_thread_count].thread_handle),
231 			nullptr,
232 			launch_threads_helper,
233 			reinterpret_cast<void*>(thread_descs + actual_thread_count));
234 
235 		// Track how many threads we actually created
236 		if (!error)
237 		{
238 			// Windows needs explicit thread assignment to handle large core count systems
239 			#if defined(_WIN32) && !defined(__CYGWIN__)
240 				set_group_affinity(
241 					thread_descs[actual_thread_count].thread_handle,
242 					actual_thread_count);
243 			#endif
244 
245 			actual_thread_count++;
246 		}
247 	}
248 
249 	// If we did not create thread_count threads then emit a warning
250 	if (actual_thread_count != thread_count)
251 	{
252 		int log_count = actual_thread_count == 0 ? 1 : actual_thread_count;
253 		const char* log_s = log_count == 1 ? "" : "s";
254 		printf("WARNING: %s using %d thread%s due to thread creation error\n\n",
255 		       operation, log_count, log_s);
256 	}
257 
258 	// If we managed to spawn any threads wait for them to complete
259 	if (actual_thread_count != 0)
260 	{
261 		for (int i = 0; i < actual_thread_count; i++)
262 		{
263 			pthread_join(thread_descs[i].thread_handle, nullptr);
264 		}
265 	}
266 	// Else fall back to using this thread
267 	else
268 	{
269 		func(1, 0, payload);
270 	}
271 
272 	delete[] thread_descs;
273 }
274