1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can
5 * be found in the LICENSE file.
6 *
7 */
8
9 //
10 //
11 //
12
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <assert.h>
17
18 //
19 //
20 //
21
22 #include "context.h"
23 #include "block.h"
24 #include "grid.h"
25 #include "common/cl/assert_cl.h"
26 #include "config_cl.h"
27 #include "runtime_cl_12.h"
28 #include "export_cl_12.h"
29
30 //
31 //
32 //
33
34 static
35 void
skc_block_pool_create(struct skc_runtime * const runtime,cl_command_queue cq)36 skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq)
37 {
38 // save size
39 runtime->block_pool.size = &runtime->config->block_pool;
40
41 // create block extent
42 skc_extent_pdrw_alloc(runtime,
43 &runtime->block_pool.blocks,
44 runtime->block_pool.size->pool_size *
45 runtime->config->block.bytes);
46
47 // allocate block pool ids
48 skc_extent_pdrw_alloc(runtime,
49 &runtime->block_pool.ids,
50 runtime->block_pool.size->ring_pow2 * sizeof(skc_uint));
51
52 // allocate block pool atomics
53 skc_extent_phr_pdrw_alloc(runtime,
54 &runtime->block_pool.atomics,
55 sizeof(union skc_block_pool_atomic));
56
57 // acquire pool id and atomic initialization kernels
58 cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS);
59 cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS);
60
61 // init ids
62 cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw));
63 cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
64
65 // the kernel grid is shaped by the target device -- always 2 for atomics
66 skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS,
67 cq,k0,runtime->block_pool.size->pool_size,
68 0,NULL,NULL);
69
70 // init atomics
71 cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw));
72 cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
73
74 // the kernel grid is shaped by the target device
75 skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS,
76 cq,k1,2,
77 0,NULL,NULL);
78
79 // kickstart kernel execution
80 cl(Flush(cq));
81
82 // release kernels
83 cl(ReleaseKernel(k0));
84 cl(ReleaseKernel(k1));
85 }
86
87 static
88 void
skc_block_pool_dispose(struct skc_runtime * const runtime)89 skc_block_pool_dispose(struct skc_runtime * const runtime)
90 {
91 skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics);
92 skc_extent_pdrw_free (runtime,&runtime->block_pool.ids);
93 skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks);
94 }
95
96 //
97 //
98 //
99
100 static
101 bool
skc_runtime_yield(struct skc_runtime * const runtime)102 skc_runtime_yield(struct skc_runtime * const runtime)
103 {
104 return skc_scheduler_yield(runtime->scheduler);
105 }
106
107 static
108 void
skc_runtime_wait(struct skc_runtime * const runtime)109 skc_runtime_wait(struct skc_runtime * const runtime)
110 {
111 skc_scheduler_wait(runtime->scheduler);
112 }
113
114 //
115 //
116 //
117
118 skc_err
skc_runtime_cl_12_create(struct skc_context * const context,cl_context context_cl,cl_device_id device_id_cl)119 skc_runtime_cl_12_create(struct skc_context * const context,
120 cl_context context_cl,
121 cl_device_id device_id_cl)
122 {
123 // allocate the runtime
124 struct skc_runtime * const runtime = malloc(sizeof(*runtime));
125
126 // save off CL objects
127 runtime->cl.context = context_cl;
128 runtime->cl.device_id = device_id_cl;
129
130 // query device alignment
131 cl_uint align_bits;
132
133 cl(GetDeviceInfo(device_id_cl,
134 CL_DEVICE_MEM_BASE_ADDR_ALIGN,
135 sizeof(align_bits),
136 &align_bits,
137 NULL));
138
139 runtime->cl.align_bytes = align_bits / 8;
140
141 // create device
142 skc_device_create(runtime);
143
144 // create the host and device allocators
145 skc_allocator_host_create(runtime);
146 skc_allocator_device_create(runtime);
147
148 // how many slots in the scheduler?
149 runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size);
150
151 // allocate deps structure
152 runtime->deps = skc_grid_deps_create(runtime,
153 runtime->scheduler,
154 runtime->config->block_pool.pool_size);
155
156 // initialize cq pool
157 skc_cq_pool_create(runtime,
158 &runtime->cq_pool,
159 runtime->config->cq_pool.cq_props,
160 runtime->config->cq_pool.size);
161
162 // acquire in-order cq
163 cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
164
165 // initialize block pool
166 skc_block_pool_create(runtime,cq);
167
168 // intialize handle pool
169 skc_handle_pool_create(runtime,
170 &runtime->handle_pool,
171 runtime->config->handle_pool.size,
172 runtime->config->handle_pool.width,
173 runtime->config->handle_pool.recs);
174
175 //
176 // initialize pfns
177 //
178 // FIXME -- at this point we will have identified which device we've
179 // targeted and will load a DLL (or select from a built-in library)
180 // that contains all the pfns.
181 //
182 context->runtime = runtime;
183
184 context->yield = skc_runtime_yield;
185 context->wait = skc_runtime_wait;
186
187 context->path_builder = skc_path_builder_cl_12_create;
188 context->path_retain = skc_runtime_path_host_retain;
189 context->path_release = skc_runtime_path_host_release;
190 context->path_flush = skc_runtime_path_host_flush;
191
192 context->raster_builder = skc_raster_builder_cl_12_create;
193 context->raster_retain = skc_runtime_raster_host_retain;
194 context->raster_release = skc_runtime_raster_host_release;
195 context->raster_flush = skc_runtime_raster_host_flush;
196
197 context->composition = skc_composition_cl_12_create;
198 context->styling = skc_styling_cl_12_create;
199
200 context->surface = skc_surface_cl_12_create;
201
202 // block on pool creation
203 cl(Finish(cq));
204
205 // dispose of in-order cq
206 skc_runtime_release_cq_in_order(runtime,cq);
207
208 return SKC_ERR_SUCCESS;
209 };
210
211 //
212 //
213 //
214
215 skc_err
skc_runtime_cl_12_dispose(struct skc_context * const context)216 skc_runtime_cl_12_dispose(struct skc_context * const context)
217 {
218 //
219 // FIXME -- incomplete
220 //
221 fprintf(stderr,"%s incomplete!\n",__func__);
222
223 struct skc_runtime * runtime = context->runtime;
224
225 skc_allocator_device_dispose(runtime);
226 skc_allocator_host_dispose(runtime);
227
228 skc_scheduler_dispose(context->runtime,context->runtime->scheduler);
229
230 skc_grid_deps_dispose(context->runtime->deps);
231
232 skc_cq_pool_dispose(runtime,&runtime->cq_pool);
233
234 skc_block_pool_dispose(context->runtime);
235
236 // skc_handle_pool_dispose(context->runtime);
237
238 return SKC_ERR_SUCCESS;
239 }
240
241 //
242 // REPORT BLOCK POOL ALLOCATION
243 //
244
245 void
skc_runtime_cl_12_debug(struct skc_context * const context)246 skc_runtime_cl_12_debug(struct skc_context * const context)
247 {
248 struct skc_runtime * const runtime = context->runtime;
249
250 // acquire out-of-order cq
251 cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
252
253 // copy atomics to host
254 skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL);
255
256 // block until complete
257 cl(Finish(cq));
258
259 // dispose of out-of-order cq
260 skc_runtime_release_cq_in_order(runtime,cq);
261
262 union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr;
263
264 skc_uint const available = bp_atomic->writes - bp_atomic->reads;
265 skc_uint const inuse = runtime->config->block_pool.pool_size - available;
266
267 fprintf(stderr,
268 "writes/reads/avail/alloc: %9u / %9u / %9u = %6.2f MB / %9u = %6.2f MB\n",
269 bp_atomic->writes,
270 bp_atomic->reads,
271 available,
272 (available * runtime->config->block.bytes) / (1024.0*1024.0),
273 inuse,
274 (inuse * runtime->config->block.bytes) / (1024.0*1024.0));
275 }
276
277 //
278 //
279 //
280