• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GStreamer
2  * Copyright (C) <2018-2019> Seungha Yang <seungha.yang@navercorp.com>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include "gstcudamemory.h"
25 #include "gstcudautils.h"
26 
27 #include <string.h>
28 
29 GST_DEBUG_CATEGORY_STATIC (cudaallocator_debug);
30 #define GST_CAT_DEFAULT cudaallocator_debug
31 GST_DEBUG_CATEGORY_STATIC (GST_CAT_MEMORY);
32 
33 #define gst_cuda_allocator_parent_class parent_class
34 G_DEFINE_TYPE (GstCudaAllocator, gst_cuda_allocator, GST_TYPE_ALLOCATOR);
35 
36 static void gst_cuda_allocator_dispose (GObject * object);
37 static void gst_cuda_allocator_free (GstAllocator * allocator,
38     GstMemory * memory);
39 
40 static gpointer cuda_mem_map (GstCudaMemory * mem, gsize maxsize,
41     GstMapFlags flags);
42 static void cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info);
43 static GstMemory *cuda_mem_copy (GstMemory * mem, gssize offset, gssize size);
44 
45 static GstMemory *
gst_cuda_allocator_dummy_alloc(GstAllocator * allocator,gsize size,GstAllocationParams * params)46 gst_cuda_allocator_dummy_alloc (GstAllocator * allocator, gsize size,
47     GstAllocationParams * params)
48 {
49   g_return_val_if_reached (NULL);
50 }
51 
52 static void
gst_cuda_allocator_class_init(GstCudaAllocatorClass * klass)53 gst_cuda_allocator_class_init (GstCudaAllocatorClass * klass)
54 {
55   GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
56   GstAllocatorClass *allocator_class = GST_ALLOCATOR_CLASS (klass);
57 
58   gobject_class->dispose = gst_cuda_allocator_dispose;
59 
60   allocator_class->alloc = GST_DEBUG_FUNCPTR (gst_cuda_allocator_dummy_alloc);
61   allocator_class->free = GST_DEBUG_FUNCPTR (gst_cuda_allocator_free);
62 
63   GST_DEBUG_CATEGORY_INIT (cudaallocator_debug, "cudaallocator", 0,
64       "CUDA Allocator");
65   GST_DEBUG_CATEGORY_GET (GST_CAT_MEMORY, "GST_MEMORY");
66 }
67 
68 static void
gst_cuda_allocator_init(GstCudaAllocator * allocator)69 gst_cuda_allocator_init (GstCudaAllocator * allocator)
70 {
71   GstAllocator *alloc = GST_ALLOCATOR_CAST (allocator);
72 
73   GST_DEBUG_OBJECT (allocator, "init");
74 
75   alloc->mem_type = GST_CUDA_MEMORY_TYPE_NAME;
76 
77   alloc->mem_map = (GstMemoryMapFunction) cuda_mem_map;
78   alloc->mem_unmap_full = (GstMemoryUnmapFullFunction) cuda_mem_unmap_full;
79   alloc->mem_copy = (GstMemoryCopyFunction) cuda_mem_copy;
80 
81   GST_OBJECT_FLAG_SET (allocator, GST_ALLOCATOR_FLAG_CUSTOM_ALLOC);
82 }
83 
84 static void
gst_cuda_allocator_dispose(GObject * object)85 gst_cuda_allocator_dispose (GObject * object)
86 {
87   GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (object);
88 
89   GST_DEBUG_OBJECT (self, "dispose");
90 
91   gst_clear_object (&self->context);
92   G_OBJECT_CLASS (parent_class)->dispose (object);
93 }
94 
95 GstMemory *
gst_cuda_allocator_alloc(GstAllocator * allocator,gsize size,GstCudaAllocationParams * params)96 gst_cuda_allocator_alloc (GstAllocator * allocator, gsize size,
97     GstCudaAllocationParams * params)
98 {
99   GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
100   gsize maxsize = size + params->parent.prefix + params->parent.padding;
101   gsize align = params->parent.align;
102   gsize offset = params->parent.prefix;
103   GstMemoryFlags flags = params->parent.flags;
104   CUdeviceptr data;
105   gboolean ret = FALSE;
106   GstCudaMemory *mem;
107   GstVideoInfo *info = &params->info;
108   gint i;
109   guint width, height;
110   gsize stride, plane_offset;
111 
112   if (!gst_cuda_context_push (self->context))
113     return NULL;
114 
115   /* ensure configured alignment */
116   align |= gst_memory_alignment;
117   /* allocate more to compensate for alignment */
118   maxsize += align;
119 
120   GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, self, "allocate new cuda memory");
121 
122   width = GST_VIDEO_INFO_COMP_WIDTH (info, 0) *
123       GST_VIDEO_INFO_COMP_PSTRIDE (info, 0);
124   height = 0;
125   for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++)
126     height += GST_VIDEO_INFO_COMP_HEIGHT (info, i);
127 
128   ret = gst_cuda_result (CuMemAllocPitch (&data, &stride, width, height, 16));
129   gst_cuda_context_pop (NULL);
130 
131   if (G_UNLIKELY (!ret)) {
132     GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY, self, "CUDA allocation failure");
133     return NULL;
134   }
135 
136   mem = g_new0 (GstCudaMemory, 1);
137   g_mutex_init (&mem->lock);
138   mem->data = data;
139   mem->alloc_params = *params;
140   mem->stride = stride;
141 
142   plane_offset = 0;
143   for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
144     mem->offset[i] = plane_offset;
145     plane_offset += stride * GST_VIDEO_INFO_COMP_HEIGHT (info, i);
146   }
147 
148   mem->context = gst_object_ref (self->context);
149 
150   gst_memory_init (GST_MEMORY_CAST (mem),
151       flags, GST_ALLOCATOR_CAST (self), NULL, maxsize, align, offset, size);
152 
153   return GST_MEMORY_CAST (mem);
154 }
155 
156 static void
gst_cuda_allocator_free(GstAllocator * allocator,GstMemory * memory)157 gst_cuda_allocator_free (GstAllocator * allocator, GstMemory * memory)
158 {
159   GstCudaAllocator *self = GST_CUDA_ALLOCATOR_CAST (allocator);
160   GstCudaMemory *mem = GST_CUDA_MEMORY_CAST (memory);
161 
162   GST_CAT_DEBUG_OBJECT (GST_CAT_MEMORY, allocator, "free cuda memory");
163 
164   g_mutex_clear (&mem->lock);
165 
166   gst_cuda_context_push (self->context);
167   if (mem->data)
168     gst_cuda_result (CuMemFree (mem->data));
169 
170   if (mem->map_alloc_data)
171     gst_cuda_result (CuMemFreeHost (mem->map_alloc_data));
172 
173   gst_cuda_context_pop (NULL);
174   gst_object_unref (mem->context);
175 
176   g_free (mem);
177 }
178 
179 /* called with lock */
180 static gboolean
gst_cuda_memory_upload_transfer(GstCudaMemory * mem)181 gst_cuda_memory_upload_transfer (GstCudaMemory * mem)
182 {
183   gint i;
184   GstVideoInfo *info = &mem->alloc_params.info;
185   gboolean ret = TRUE;
186 
187   if (!mem->map_data) {
188     GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
189     return FALSE;
190   }
191 
192   for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
193     CUDA_MEMCPY2D param = { 0, };
194 
195     param.srcMemoryType = CU_MEMORYTYPE_HOST;
196     param.srcHost =
197         (guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
198     param.srcPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
199 
200     param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
201     param.dstDevice = mem->data + mem->offset[i];
202     param.dstPitch = mem->stride;
203     param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
204         GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
205     param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
206 
207     if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
208       GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
209       ret = FALSE;
210       break;
211     }
212   }
213   gst_cuda_result (CuStreamSynchronize (NULL));
214 
215   return ret;
216 }
217 
218 /* called with lock */
219 static gboolean
gst_cuda_memory_download_transfer(GstCudaMemory * mem)220 gst_cuda_memory_download_transfer (GstCudaMemory * mem)
221 {
222   gint i;
223   GstVideoInfo *info = &mem->alloc_params.info;
224 
225   if (!mem->map_data) {
226     GST_CAT_ERROR (GST_CAT_MEMORY, "no staging memory to upload");
227     return FALSE;
228   }
229 
230   for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
231     CUDA_MEMCPY2D param = { 0, };
232 
233     param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
234     param.srcDevice = mem->data + mem->offset[i];
235     param.srcPitch = mem->stride;
236 
237     param.dstMemoryType = CU_MEMORYTYPE_HOST;
238     param.dstHost =
239         (guint8 *) mem->map_data + GST_VIDEO_INFO_PLANE_OFFSET (info, i);
240     param.dstPitch = GST_VIDEO_INFO_PLANE_STRIDE (info, i);
241     param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
242         GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
243     param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
244 
245     if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
246       GST_CAT_ERROR (GST_CAT_MEMORY, "Failed to copy %dth plane", i);
247       CuMemFreeHost (mem->map_alloc_data);
248       mem->map_alloc_data = mem->map_data = mem->align_data = NULL;
249       break;
250     }
251   }
252   gst_cuda_result (CuStreamSynchronize (NULL));
253 
254   return ! !mem->map_data;
255 }
256 
257 static gpointer
gst_cuda_memory_device_memory_map(GstCudaMemory * mem)258 gst_cuda_memory_device_memory_map (GstCudaMemory * mem)
259 {
260   GstMemory *memory = GST_MEMORY_CAST (mem);
261   gpointer data;
262   gsize aoffset;
263   gsize align = memory->align;
264 
265   if (mem->map_data) {
266     return mem->map_data;
267   }
268 
269   GST_CAT_DEBUG (GST_CAT_MEMORY, "alloc host memory for map");
270 
271   if (!mem->map_alloc_data) {
272     gsize maxsize;
273     guint8 *align_data;
274 
275     maxsize = memory->maxsize + align;
276     if (!gst_cuda_context_push (mem->context)) {
277       GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
278 
279       return NULL;
280     }
281 
282     if (!gst_cuda_result (CuMemAllocHost (&data, maxsize))) {
283       GST_CAT_ERROR (GST_CAT_MEMORY, "cannot alloc host memory");
284       gst_cuda_context_pop (NULL);
285 
286       return NULL;
287     }
288 
289     if (!gst_cuda_context_pop (NULL)) {
290       GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
291     }
292 
293     mem->map_alloc_data = data;
294     align_data = data;
295 
296     /* do align */
297     if ((aoffset = ((guintptr) align_data & align))) {
298       aoffset = (align + 1) - aoffset;
299       align_data += aoffset;
300     }
301     mem->align_data = align_data;
302 
303     /* first memory, always need download to staging */
304     GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
305   }
306 
307   mem->map_data = mem->align_data;
308 
309   if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD)) {
310     if (!gst_cuda_context_push (mem->context)) {
311       GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
312 
313       return NULL;
314     }
315 
316     gst_cuda_memory_download_transfer (mem);
317 
318     if (!gst_cuda_context_pop (NULL)) {
319       GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
320     }
321   }
322 
323   return mem->map_data;
324 }
325 
326 static gpointer
cuda_mem_map(GstCudaMemory * mem,gsize maxsize,GstMapFlags flags)327 cuda_mem_map (GstCudaMemory * mem, gsize maxsize, GstMapFlags flags)
328 {
329   gpointer ret = NULL;
330 
331   g_mutex_lock (&mem->lock);
332   mem->map_count++;
333 
334   if ((flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
335     /* upload from staging to device memory if necessary */
336     if (GST_MEMORY_FLAG_IS_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD)) {
337       if (!gst_cuda_context_push (mem->context)) {
338         GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
339         g_mutex_unlock (&mem->lock);
340 
341         return NULL;
342       }
343 
344       if (!gst_cuda_memory_upload_transfer (mem)) {
345         g_mutex_unlock (&mem->lock);
346         return NULL;
347       }
348 
349       gst_cuda_context_pop (NULL);
350     }
351 
352     GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
353 
354     if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
355       GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
356 
357     g_mutex_unlock (&mem->lock);
358     return (gpointer) mem->data;
359   }
360 
361   ret = gst_cuda_memory_device_memory_map (mem);
362   if (ret == NULL) {
363     mem->map_count--;
364     g_mutex_unlock (&mem->lock);
365     return NULL;
366   }
367 
368   if ((flags & GST_MAP_WRITE) == GST_MAP_WRITE)
369     GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
370 
371   GST_MEMORY_FLAG_UNSET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
372 
373   g_mutex_unlock (&mem->lock);
374 
375   return ret;
376 }
377 
378 static void
cuda_mem_unmap_full(GstCudaMemory * mem,GstMapInfo * info)379 cuda_mem_unmap_full (GstCudaMemory * mem, GstMapInfo * info)
380 {
381   g_mutex_lock (&mem->lock);
382   mem->map_count--;
383   GST_CAT_TRACE (GST_CAT_MEMORY,
384       "unmap CUDA memory %p, map count %d, have map_data %s",
385       mem, mem->map_count, mem->map_data ? "true" : "false");
386 
387   if ((info->flags & GST_MAP_CUDA) == GST_MAP_CUDA) {
388     if ((info->flags & GST_MAP_WRITE) == GST_MAP_WRITE)
389       GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_DOWNLOAD);
390 
391     g_mutex_unlock (&mem->lock);
392     return;
393   }
394 
395   if ((info->flags & GST_MAP_WRITE))
396     GST_MINI_OBJECT_FLAG_SET (mem, GST_CUDA_MEMORY_TRANSFER_NEED_UPLOAD);
397 
398   if (mem->map_count > 0 || !mem->map_data) {
399     g_mutex_unlock (&mem->lock);
400     return;
401   }
402 
403   mem->map_data = NULL;
404   g_mutex_unlock (&mem->lock);
405 
406   return;
407 }
408 
409 static GstMemory *
cuda_mem_copy(GstMemory * mem,gssize offset,gssize size)410 cuda_mem_copy (GstMemory * mem, gssize offset, gssize size)
411 {
412   GstMemory *copy;
413   GstCudaMemory *src_mem = GST_CUDA_MEMORY_CAST (mem);
414   GstCudaMemory *dst_mem;
415   GstCudaContext *ctx = GST_CUDA_ALLOCATOR_CAST (mem->allocator)->context;
416   gint i;
417   GstVideoInfo *info;
418 
419   /* offset and size are ignored */
420   copy = gst_cuda_allocator_alloc (mem->allocator, mem->size,
421       &src_mem->alloc_params);
422 
423   dst_mem = GST_CUDA_MEMORY_CAST (copy);
424 
425   info = &src_mem->alloc_params.info;
426 
427   if (!gst_cuda_context_push (ctx)) {
428     GST_CAT_ERROR (GST_CAT_MEMORY, "cannot push cuda context");
429     gst_cuda_allocator_free (mem->allocator, copy);
430 
431     return NULL;
432   }
433 
434   for (i = 0; i < GST_VIDEO_INFO_N_PLANES (info); i++) {
435     CUDA_MEMCPY2D param = { 0, };
436 
437     param.srcMemoryType = CU_MEMORYTYPE_DEVICE;
438     param.srcDevice = src_mem->data + src_mem->offset[i];
439     param.srcPitch = src_mem->stride;
440 
441     param.dstMemoryType = CU_MEMORYTYPE_DEVICE;
442     param.dstDevice = dst_mem->data + dst_mem->offset[i];
443     param.dstPitch = dst_mem->stride;
444     param.WidthInBytes = GST_VIDEO_INFO_COMP_WIDTH (info, i) *
445         GST_VIDEO_INFO_COMP_PSTRIDE (info, i);
446     param.Height = GST_VIDEO_INFO_COMP_HEIGHT (info, i);
447 
448     if (!gst_cuda_result (CuMemcpy2DAsync (&param, NULL))) {
449       GST_CAT_ERROR_OBJECT (GST_CAT_MEMORY,
450           mem->allocator, "Failed to copy %dth plane", i);
451       gst_cuda_context_pop (NULL);
452       gst_cuda_allocator_free (mem->allocator, copy);
453 
454       return NULL;
455     }
456   }
457 
458   gst_cuda_result (CuStreamSynchronize (NULL));
459 
460   if (!gst_cuda_context_pop (NULL)) {
461     GST_CAT_WARNING (GST_CAT_MEMORY, "cannot pop cuda context");
462   }
463 
464   return copy;
465 }
466 
467 GstAllocator *
gst_cuda_allocator_new(GstCudaContext * context)468 gst_cuda_allocator_new (GstCudaContext * context)
469 {
470   GstCudaAllocator *allocator;
471 
472   g_return_val_if_fail (GST_IS_CUDA_CONTEXT (context), NULL);
473 
474   allocator = g_object_new (GST_TYPE_CUDA_ALLOCATOR, NULL);
475   allocator->context = gst_object_ref (context);
476 
477   return GST_ALLOCATOR_CAST (allocator);
478 }
479 
480 gboolean
gst_is_cuda_memory(GstMemory * mem)481 gst_is_cuda_memory (GstMemory * mem)
482 {
483   return mem != NULL && mem->allocator != NULL &&
484       GST_IS_CUDA_ALLOCATOR (mem->allocator);
485 }
486