• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Standard C headers */
2 #include <stddef.h>
3 
4 /* Dependencies */
5 #include <fxdiv.h>
6 
7 /* Library header */
8 #include <pthreadpool.h>
9 
10 
divide_round_up(size_t dividend,size_t divisor)11 static inline size_t divide_round_up(size_t dividend, size_t divisor) {
12 	if (dividend % divisor == 0) {
13 		return dividend / divisor;
14 	} else {
15 		return dividend / divisor + 1;
16 	}
17 }
18 
min(size_t a,size_t b)19 static inline size_t min(size_t a, size_t b) {
20 	return a < b ? a : b;
21 }
22 
pthreadpool_compute_1d(pthreadpool_t threadpool,pthreadpool_function_1d_t function,void * argument,size_t range)23 void pthreadpool_compute_1d(
24 	pthreadpool_t threadpool,
25 	pthreadpool_function_1d_t function,
26 	void* argument,
27 	size_t range)
28 {
29 	pthreadpool_parallelize_1d(threadpool,
30 		(pthreadpool_task_1d_t) function, argument,
31 		range, 0 /* flags */);
32 }
33 
pthreadpool_compute_1d_tiled(pthreadpool_t threadpool,pthreadpool_function_1d_tiled_t function,void * argument,size_t range,size_t tile)34 void pthreadpool_compute_1d_tiled(
35 	pthreadpool_t threadpool,
36 	pthreadpool_function_1d_tiled_t function,
37 	void* argument,
38 	size_t range,
39 	size_t tile)
40 {
41 	pthreadpool_parallelize_1d_tile_1d(threadpool,
42 		(pthreadpool_task_1d_tile_1d_t) function, argument,
43 		range, tile, 0 /* flags */);
44 }
45 
pthreadpool_compute_2d(pthreadpool_t threadpool,pthreadpool_function_2d_t function,void * argument,size_t range_i,size_t range_j)46 void pthreadpool_compute_2d(
47 	pthreadpool_t threadpool,
48 	pthreadpool_function_2d_t function,
49 	void* argument,
50 	size_t range_i,
51 	size_t range_j)
52 {
53 	pthreadpool_parallelize_2d(threadpool,
54 		(pthreadpool_task_2d_t) function, argument,
55 		range_i, range_j, 0 /* flags */);
56 }
57 
pthreadpool_compute_2d_tiled(pthreadpool_t threadpool,pthreadpool_function_2d_tiled_t function,void * argument,size_t range_i,size_t range_j,size_t tile_i,size_t tile_j)58 void pthreadpool_compute_2d_tiled(
59 	pthreadpool_t threadpool,
60 	pthreadpool_function_2d_tiled_t function,
61 	void* argument,
62 	size_t range_i,
63 	size_t range_j,
64 	size_t tile_i,
65 	size_t tile_j)
66 {
67 	pthreadpool_parallelize_2d_tile_2d(threadpool,
68 		(pthreadpool_task_2d_tile_2d_t) function, argument,
69 		range_i, range_j, tile_i, tile_j, 0 /* flags */);
70 }
71 
72 struct compute_3d_tiled_context {
73 	pthreadpool_function_3d_tiled_t function;
74 	void* argument;
75 	struct fxdiv_divisor_size_t tile_range_j;
76 	struct fxdiv_divisor_size_t tile_range_k;
77 	size_t range_i;
78 	size_t range_j;
79 	size_t range_k;
80 	size_t tile_i;
81 	size_t tile_j;
82 	size_t tile_k;
83 };
84 
compute_3d_tiled(const struct compute_3d_tiled_context * context,size_t linear_index)85 static void compute_3d_tiled(const struct compute_3d_tiled_context* context, size_t linear_index) {
86 	const struct fxdiv_divisor_size_t tile_range_k = context->tile_range_k;
87 	const struct fxdiv_result_size_t tile_index_ij_k = fxdiv_divide_size_t(linear_index, tile_range_k);
88 	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
89 	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_k.quotient, tile_range_j);
90 	const size_t max_tile_i = context->tile_i;
91 	const size_t max_tile_j = context->tile_j;
92 	const size_t max_tile_k = context->tile_k;
93 	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
94 	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
95 	const size_t index_k = tile_index_ij_k.remainder * max_tile_k;
96 	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
97 	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
98 	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
99 	context->function(context->argument, index_i, index_j, index_k, tile_i, tile_j, tile_k);
100 }
101 
pthreadpool_compute_3d_tiled(pthreadpool_t threadpool,pthreadpool_function_3d_tiled_t function,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t tile_i,size_t tile_j,size_t tile_k)102 void pthreadpool_compute_3d_tiled(
103 	pthreadpool_t threadpool,
104 	pthreadpool_function_3d_tiled_t function,
105 	void* argument,
106 	size_t range_i,
107 	size_t range_j,
108 	size_t range_k,
109 	size_t tile_i,
110 	size_t tile_j,
111 	size_t tile_k)
112 {
113 	if (pthreadpool_get_threads_count(threadpool) <= 1) {
114 		/* No thread pool used: execute function sequentially on the calling thread */
115 		for (size_t i = 0; i < range_i; i += tile_i) {
116 			for (size_t j = 0; j < range_j; j += tile_j) {
117 				for (size_t k = 0; k < range_k; k += tile_k) {
118 					function(argument, i, j, k, min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k));
119 				}
120 			}
121 		}
122 	} else {
123 		/* Execute in parallel on the thread pool using linearized index */
124 		const size_t tile_range_i = divide_round_up(range_i, tile_i);
125 		const size_t tile_range_j = divide_round_up(range_j, tile_j);
126 		const size_t tile_range_k = divide_round_up(range_k, tile_k);
127 		struct compute_3d_tiled_context context = {
128 			.function = function,
129 			.argument = argument,
130 			.tile_range_j = fxdiv_init_size_t(tile_range_j),
131 			.tile_range_k = fxdiv_init_size_t(tile_range_k),
132 			.range_i = range_i,
133 			.range_j = range_j,
134 			.range_k = range_k,
135 			.tile_i = tile_i,
136 			.tile_j = tile_j,
137 			.tile_k = tile_k
138 		};
139 		pthreadpool_parallelize_1d(threadpool,
140 			(pthreadpool_task_1d_t) compute_3d_tiled, &context,
141 			tile_range_i * tile_range_j * tile_range_k,
142 			0 /* flags */);
143 	}
144 }
145 
146 struct compute_4d_tiled_context {
147 	pthreadpool_function_4d_tiled_t function;
148 	void* argument;
149 	struct fxdiv_divisor_size_t tile_range_kl;
150 	struct fxdiv_divisor_size_t tile_range_j;
151 	struct fxdiv_divisor_size_t tile_range_l;
152 	size_t range_i;
153 	size_t range_j;
154 	size_t range_k;
155 	size_t range_l;
156 	size_t tile_i;
157 	size_t tile_j;
158 	size_t tile_k;
159 	size_t tile_l;
160 };
161 
compute_4d_tiled(const struct compute_4d_tiled_context * context,size_t linear_index)162 static void compute_4d_tiled(const struct compute_4d_tiled_context* context, size_t linear_index) {
163 	const struct fxdiv_divisor_size_t tile_range_kl = context->tile_range_kl;
164 	const struct fxdiv_result_size_t tile_index_ij_kl = fxdiv_divide_size_t(linear_index, tile_range_kl);
165 	const struct fxdiv_divisor_size_t tile_range_j = context->tile_range_j;
166 	const struct fxdiv_result_size_t tile_index_i_j = fxdiv_divide_size_t(tile_index_ij_kl.quotient, tile_range_j);
167 	const struct fxdiv_divisor_size_t tile_range_l = context->tile_range_l;
168 	const struct fxdiv_result_size_t tile_index_k_l = fxdiv_divide_size_t(tile_index_ij_kl.remainder, tile_range_l);
169 	const size_t max_tile_i = context->tile_i;
170 	const size_t max_tile_j = context->tile_j;
171 	const size_t max_tile_k = context->tile_k;
172 	const size_t max_tile_l = context->tile_l;
173 	const size_t index_i = tile_index_i_j.quotient * max_tile_i;
174 	const size_t index_j = tile_index_i_j.remainder * max_tile_j;
175 	const size_t index_k = tile_index_k_l.quotient * max_tile_k;
176 	const size_t index_l = tile_index_k_l.remainder * max_tile_l;
177 	const size_t tile_i = min(max_tile_i, context->range_i - index_i);
178 	const size_t tile_j = min(max_tile_j, context->range_j - index_j);
179 	const size_t tile_k = min(max_tile_k, context->range_k - index_k);
180 	const size_t tile_l = min(max_tile_l, context->range_l - index_l);
181 	context->function(context->argument, index_i, index_j, index_k, index_l, tile_i, tile_j, tile_k, tile_l);
182 }
183 
pthreadpool_compute_4d_tiled(pthreadpool_t threadpool,pthreadpool_function_4d_tiled_t function,void * argument,size_t range_i,size_t range_j,size_t range_k,size_t range_l,size_t tile_i,size_t tile_j,size_t tile_k,size_t tile_l)184 void pthreadpool_compute_4d_tiled(
185 	pthreadpool_t threadpool,
186 	pthreadpool_function_4d_tiled_t function,
187 	void* argument,
188 	size_t range_i,
189 	size_t range_j,
190 	size_t range_k,
191 	size_t range_l,
192 	size_t tile_i,
193 	size_t tile_j,
194 	size_t tile_k,
195 	size_t tile_l)
196 {
197 	if (pthreadpool_get_threads_count(threadpool) <= 1) {
198 		/* No thread pool used: execute function sequentially on the calling thread */
199 		for (size_t i = 0; i < range_i; i += tile_i) {
200 			for (size_t j = 0; j < range_j; j += tile_j) {
201 				for (size_t k = 0; k < range_k; k += tile_k) {
202 					for (size_t l = 0; l < range_l; l += tile_l) {
203 						function(argument, i, j, k, l,
204 							min(range_i - i, tile_i), min(range_j - j, tile_j), min(range_k - k, tile_k), min(range_l - l, tile_l));
205 					}
206 				}
207 			}
208 		}
209 	} else {
210 		/* Execute in parallel on the thread pool using linearized index */
211 		const size_t tile_range_i = divide_round_up(range_i, tile_i);
212 		const size_t tile_range_j = divide_round_up(range_j, tile_j);
213 		const size_t tile_range_k = divide_round_up(range_k, tile_k);
214 		const size_t tile_range_l = divide_round_up(range_l, tile_l);
215 		struct compute_4d_tiled_context context = {
216 			.function = function,
217 			.argument = argument,
218 			.tile_range_kl = fxdiv_init_size_t(tile_range_k * tile_range_l),
219 			.tile_range_j = fxdiv_init_size_t(tile_range_j),
220 			.tile_range_l = fxdiv_init_size_t(tile_range_l),
221 			.range_i = range_i,
222 			.range_j = range_j,
223 			.range_k = range_k,
224 			.range_l = range_l,
225 			.tile_i = tile_i,
226 			.tile_j = tile_j,
227 			.tile_k = tile_k,
228 			.tile_l = tile_l
229 		};
230 		pthreadpool_parallelize_1d(threadpool,
231 			(pthreadpool_task_1d_t) compute_4d_tiled, &context,
232 			tile_range_i * tile_range_j * tile_range_k * tile_range_l,
233 			0 /* flags */);
234 	}
235 }
236