1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can
5 * be found in the LICENSE file.
6 *
7 */
8
9 //
10 //
11 //
12
13 #include <assert.h>
14 #include <memory.h>
15
16 #include "runtime_cl_12.h"
17 #include "scheduler.h"
18
19 //
20 //
21 //
22
23 #ifndef NDEBUG
24
25 #include <stdio.h>
26
27 #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss) \
28 fprintf(stderr, \
29 "suballocator %s : [ %4u ] : alloc( %9u ) @ %4u = %u\n", \
30 suballocator->name, \
31 suballocator->rem.avail, \
32 (skc_uint)ss, \
33 subbuf_id, \
34 (skc_uint)suballocator->total);
35
36 #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss) \
37 fprintf(stderr, \
38 "suballocator %s : [ %4u ] : free ( %9u ) @ %4u = %u\n", \
39 suballocator->name, \
40 suballocator->rem.avail, \
41 (skc_uint)ss, \
42 subbuf_id, \
43 (skc_uint)suballocator->total);
44
45 #else
46
47 #define SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,subbuf_id,ss)
48 #define SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,ss)
49
50 #endif
51
52 //
53 //
54 //
55
56 void
skc_suballocator_create(struct skc_runtime * const runtime,struct skc_suballocator * const suballocator,char const * const name,skc_uint const subbufs,size_t const align,size_t const size)57 skc_suballocator_create(struct skc_runtime * const runtime,
58 struct skc_suballocator * const suballocator,
59 char const * const name,
60 skc_uint const subbufs,
61 size_t const align,
62 size_t const size)
63 {
64 size_t const subbufs_size = sizeof(*suballocator->subbufs) * subbufs;
65
66 // allocate array of subbuf records
67 suballocator->subbufs = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,subbufs_size);
68
69 // zero subbufs
70 memset(suballocator->subbufs,0,subbufs_size);
71
72 // initialize starting subbuf
73 suballocator->subbufs[0].size = (skc_subbuf_size_t)size;
74
75 // allocate array of ids
76 suballocator->ids = skc_runtime_host_perm_alloc(runtime,
77 SKC_MEM_FLAGS_READ_WRITE,
78 sizeof(*suballocator->ids) * subbufs);
79 for (skc_uint ii=0; ii<subbufs; ii++)
80 suballocator->ids[ii] = ii;
81
82 suballocator->rem.avail = 1;
83 suballocator->rem.spare = subbufs - 1;
84
85 suballocator->align = (skc_uint)align;
86 suballocator->count = subbufs;
87
88 suballocator->size = (skc_subbuf_size_t)size;
89 suballocator->total = 0;
90
91 suballocator->name = name;
92 }
93
94 void
skc_suballocator_dispose(struct skc_runtime * const runtime,struct skc_suballocator * const suballocator)95 skc_suballocator_dispose(struct skc_runtime * const runtime,
96 struct skc_suballocator * const suballocator)
97 {
98 skc_runtime_host_perm_free(runtime,suballocator->ids);
99 skc_runtime_host_perm_free(runtime,suballocator->subbufs);
100 }
101
102
103 //
104 // Sets id and returns origin
105 //
106
107 size_t
skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,struct skc_scheduler * const scheduler,size_t const size,skc_subbuf_id_t * const subbuf_id,size_t * const subbuf_size)108 skc_suballocator_subbuf_alloc(struct skc_suballocator * const suballocator,
109 struct skc_scheduler * const scheduler,
110 size_t const size,
111 skc_subbuf_id_t * const subbuf_id,
112 size_t * const subbuf_size)
113 {
114 //
115 // Note that we can't deadlock here because everything allocated is
116 // expected to be freed within msecs. Worst case, we wait for a
117 // availability of resources while a fully utilized GPU is making
118 // forward progress on kernels.
119 //
120 // This behavior should guide the sizing of the suballocator's
121 // number of subbuffers and extent.
122 //
123 // We want to allocate a large enough extent and enough subbuffer
124 // records so that the CPU/GPU is never starved.
125 //
126
127 // round up the size
128 skc_subbuf_size_t const size_ru = (skc_subbuf_size_t)SKC_ROUND_UP_POW2(size,suballocator->align);
129
130 // save it
131 if (subbuf_size != NULL)
132 *subbuf_size = size_ru;
133
134 //
135 // We precheck to see there is at least one region of memory
136 // available but do not check to see if there is a spare. Instead,
137 // we simply keep looking for an exact fit.
138 //
139 skc_subbuf_id_t * const ids = suballocator->ids;
140
141 while (true)
142 {
143 skc_uint avail_rem = suballocator->rem.avail;
144 skc_uint spare_rem = suballocator->rem.spare;
145
146 for (skc_uint avail_idx=0; avail_idx<avail_rem; avail_idx++)
147 {
148 skc_subbuf_id_t const avail_id = ids[avail_idx];
149 struct skc_subbuf * const avail = suballocator->subbufs + avail_id;
150
151 assert(avail->inuse == 0);
152
153 if (avail->size == size_ru) // size matches exactly
154 {
155 suballocator->total += size_ru;
156
157 // return this id
158 *subbuf_id = avail_id;
159
160 SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,avail_id,size_ru);
161
162 // mark the subbuffer as in use
163 avail->inuse += 1;
164
165 assert(avail->inuse == 1);
166
167 // update rem avail count
168 suballocator->rem.avail = --avail_rem;
169
170 // replace now inuse id with last avail id
171 if ((avail_rem > 0) && (avail_idx != avail_rem))
172 {
173 skc_subbuf_id_t const last_id = ids[avail_rem];
174 struct skc_subbuf * const last = suballocator->subbufs + last_id;
175
176 ids[avail_idx] = last_id; // move id
177 last->idx = avail_idx; // update idx[]
178 }
179
180 assert(suballocator->rem.avail > 0);
181
182 // return origin
183 return avail->origin;
184 }
185 else if ((avail->size > size_ru) && (spare_rem > 0)) // requested is less than available so split it
186 {
187 suballocator->total += size_ru;
188
189 skc_uint spare_idx = suballocator->count - spare_rem;
190 skc_subbuf_id_t const spare_id = ids[spare_idx];
191 struct skc_subbuf * const spare = suballocator->subbufs + spare_id;
192
193 assert(spare->inuse == 0);
194
195 // simple -- we're popping the top-of-stack of spares
196 suballocator->rem.spare -= 1;
197
198 // return id
199 *subbuf_id = spare_id;
200
201 SKC_SUBALLOCATOR_DEBUG_ALLOC(suballocator,spare_id,size_ru);
202
203 // get prev
204 struct skc_subbuf * const prev = avail->prev;
205
206 if (prev != NULL)
207 prev->next = spare;
208
209 // init spare
210 spare->prev = prev;
211 spare->next = avail;
212 spare->size = size_ru;
213 spare->origin = avail->origin;
214 spare->idx = SKC_UINT_MAX; // defensive
215 spare->inuse += 1;
216
217 // update curr
218 avail->prev = spare;
219 avail->size -= size_ru;
220 avail->origin += size_ru;
221
222 assert(suballocator->rem.avail > 0);
223
224 return spare->origin;
225 }
226 }
227
228 // uh oh... couldn't find enough memory
229 skc_scheduler_wait(scheduler);
230 }
231 }
232
233 //
234 // FIXME -- simplify this with a merge-with-prev() primitive
235 //
236
237 void
skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,skc_subbuf_id_t subbuf_id)238 skc_suballocator_subbuf_free(struct skc_suballocator * const suballocator,
239 skc_subbuf_id_t subbuf_id)
240 {
241 // get subbuf for id
242 struct skc_subbuf * const subbuf = suballocator->subbufs + subbuf_id;
243
244 assert(subbuf->inuse == 1);
245
246 suballocator->total -= subbuf->size;
247
248 SKC_SUBALLOCATOR_DEBUG_FREE(suballocator,subbuf_id,subbuf->size);
249
250 //
251 // try to merge subbuf with left and maybe right and then dispose
252 //
253 struct skc_subbuf * prev;
254 struct skc_subbuf * next;
255
256 if (((prev = subbuf->prev) != NULL) && !prev->inuse)
257 {
258 next = subbuf->next;
259
260 if ((next != NULL) && !next->inuse)
261 {
262 subbuf->inuse -= 1;
263
264 assert(next->inuse == 0);
265
266 // increment size
267 prev->size += (subbuf->size + next->size);
268
269 struct skc_subbuf * const nextnext = next->next;
270
271 // update next link
272 prev->next = nextnext;
273
274 // update prev link
275 if (nextnext != NULL)
276 nextnext->prev = prev;
277
278 //
279 // both subbuf and next are now spare which means we need to
280 // move final available subbuffer into next's old position
281 // unless they're the same
282 //
283 skc_uint const last_idx = --suballocator->rem.avail;
284 skc_uint const next_idx = next->idx;
285
286 assert(suballocator->rem.avail > 0);
287
288 if (last_idx != next_idx)
289 {
290 skc_subbuf_id_t const last_id = suballocator->ids[last_idx];
291 struct skc_subbuf * const last = suballocator->subbufs + last_id;
292
293 suballocator->ids[next_idx] = last_id;
294 last->idx = next_idx;
295 }
296
297 skc_subbuf_id_t const next_id = (skc_subbuf_id_t)(next - suballocator->subbufs);
298
299 skc_uint const spare_rem = suballocator->rem.spare + 2;
300 skc_uint const spare_idx = suballocator->count - spare_rem;
301
302 suballocator->rem.spare = spare_rem;
303 suballocator->ids[spare_idx + 0] = subbuf_id;
304 suballocator->ids[spare_idx + 1] = next_id;
305 }
306 else
307 {
308 prev->size += subbuf->size;
309 prev->next = next;
310
311 if (next != NULL)
312 next->prev = prev;
313
314 subbuf->inuse -= 1;
315
316 assert(subbuf->inuse == 0);
317 assert(suballocator->rem.avail > 0);
318
319 suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
320 }
321 }
322 //
323 // try to merge with right
324 //
325 else if (((next = subbuf->next) != NULL) && !next->inuse)
326 {
327 subbuf->inuse -= 1;
328
329 assert(subbuf->inuse == 0);
330 assert(suballocator->rem.avail > 0);
331
332 next->prev = prev;
333 next->origin = subbuf->origin;
334 next->size += subbuf->size;
335
336 if (prev != NULL)
337 prev->next = next;
338
339 // subbuf is now spare
340 suballocator->ids[suballocator->count - ++suballocator->rem.spare] = subbuf_id;
341 }
342 else // couldn't merge with a neighbor
343 {
344 skc_uint avail_idx = suballocator->rem.avail++;
345
346 // subbuf is now available
347 subbuf->idx = avail_idx;
348 subbuf->inuse -= 1;
349
350 assert(subbuf->inuse == 0);
351 assert(suballocator->rem.avail > 0);
352
353 suballocator->ids[avail_idx] = subbuf_id;
354 }
355 }
356
357 //
358 //
359 //
360
361 #if 0
362
363 //
364 // At some point there might be a reason to sort the available
365 // subbuffers into some useful order -- presumably to binary search
366 // for the closest match or to chip away at the largest available
367 // subbuffer
368 //
369
370 static
371 void
372 skc_suballocator_optimize(struct skc_suballocator * const suballocator)
373 {
374 ;
375 }
376
377 #endif
378
379 //
380 //
381 //
382