1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % %
6 % AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7 % A A C C E L E R R A A T E %
8 % AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9 % A A C C E L E R R A A T E %
10 % A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11 % %
12 % %
13 % MagickCore Acceleration Methods %
14 % %
15 % Software Design %
16 % Cristy %
17 % SiuChi Chan %
18 % Guansong Zhang %
19 % January 2010 %
20 % Dirk Lemstra %
21 % April 2016 %
22 % %
23 % %
24 % Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization %
25 % dedicated to making software imaging solutions freely available. %
26 % %
27 % You may not use this file except in compliance with the License. You may %
28 % obtain a copy of the License at %
29 % %
30 % http://www.imagemagick.org/script/license.php %
31 % %
32 % Unless required by applicable law or agreed to in writing, software %
33 % distributed under the License is distributed on an "AS IS" BASIS, %
34 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
35 % See the License for the specific language governing permissions and %
36 % limitations under the License. %
37 % %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87
88 /*
89 Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92
93 /*
94 Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98 BoxWeightingFunction,
99 TriangleWeightingFunction,
100 HannWeightingFunction,
101 HammingWeightingFunction,
102 BlackmanWeightingFunction,
103 CubicBCWeightingFunction,
104 SincWeightingFunction,
105 SincFastWeightingFunction,
106 LastWeightingFunction
107 };
108
109 /*
110 Helper functions.
111 */
checkAccelerateCondition(const Image * image)112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114 /* check if the image's colorspace is supported */
115 if (image->colorspace != RGBColorspace &&
116 image->colorspace != sRGBColorspace &&
117 image->colorspace != GRAYColorspace)
118 return(MagickFalse);
119
120 /* check if the virtual pixel method is compatible with the OpenCL implementation */
121 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
122 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
123 return(MagickFalse);
124
125 /* check if the image has read / write mask */
126 if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
127 return(MagickFalse);
128
129 if (image->number_channels > 4)
130 return(MagickFalse);
131
132 /* check if pixel order is R */
133 if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
134 return(MagickFalse);
135
136 if (image->number_channels == 1)
137 return(MagickTrue);
138
139 /* check if pixel order is RA */
140 if ((image->number_channels == 2) &&
141 (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
142 return(MagickTrue);
143
144 if (image->number_channels == 2)
145 return(MagickFalse);
146
147 /* check if pixel order is RGB */
148 if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
149 (GetPixelChannelOffset(image,BluePixelChannel) != 2))
150 return(MagickFalse);
151
152 if (image->number_channels == 3)
153 return(MagickTrue);
154
155 /* check if pixel order is RGBA */
156 if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
157 return(MagickFalse);
158
159 return(MagickTrue);
160 }
161
checkAccelerateConditionRGBA(const Image * image)162 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
163 {
164 if (checkAccelerateCondition(image) == MagickFalse)
165 return(MagickFalse);
166
167 /* the order will be RGBA if the image has 4 channels */
168 if (image->number_channels != 4)
169 return(MagickFalse);
170
171 if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
172 (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
173 (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
174 (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
175 return(MagickFalse);
176
177 return(MagickTrue);
178 }
179
checkPixelIntensity(const Image * image,const PixelIntensityMethod method)180 static MagickBooleanType checkPixelIntensity(const Image *image,
181 const PixelIntensityMethod method)
182 {
183 /* EncodePixelGamma and DecodePixelGamma are not supported */
184 if ((method == Rec601LumaPixelIntensityMethod) ||
185 (method == Rec709LumaPixelIntensityMethod))
186 {
187 if (image->colorspace == RGBColorspace)
188 return(MagickFalse);
189 }
190
191 if ((method == Rec601LuminancePixelIntensityMethod) ||
192 (method == Rec709LuminancePixelIntensityMethod))
193 {
194 if (image->colorspace == sRGBColorspace)
195 return(MagickFalse);
196 }
197
198 return(MagickTrue);
199 }
200
checkHistogramCondition(const Image * image,const PixelIntensityMethod method)201 static MagickBooleanType checkHistogramCondition(const Image *image,
202 const PixelIntensityMethod method)
203 {
204 /* ensure this is the only pass get in for now. */
205 if ((image->channel_mask & SyncChannels) == 0)
206 return MagickFalse;
207
208 return(checkPixelIntensity(image,method));
209 }
210
getOpenCLEnvironment(ExceptionInfo * exception)211 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
212 {
213 MagickCLEnv
214 clEnv;
215
216 clEnv=GetCurrentOpenCLEnv();
217 if (clEnv == (MagickCLEnv) NULL)
218 return((MagickCLEnv) NULL);
219
220 if (clEnv->enabled == MagickFalse)
221 return((MagickCLEnv) NULL);
222
223 if (InitializeOpenCL(clEnv,exception) == MagickFalse)
224 return((MagickCLEnv) NULL);
225
226 return(clEnv);
227 }
228
cloneImage(const Image * image,ExceptionInfo * exception)229 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
230 {
231 Image
232 *clone;
233
234 if (((image->channel_mask & RedChannel) != 0) &&
235 ((image->channel_mask & GreenChannel) != 0) &&
236 ((image->channel_mask & BlueChannel) != 0) &&
237 ((image->channel_mask & AlphaChannel) != 0))
238 clone=CloneImage(image,image->columns,image->rows,MagickTrue,exception);
239 else
240 {
241 clone=CloneImage(image,0,0,MagickTrue,exception);
242 if (clone != (Image *) NULL)
243 SyncImagePixelCache(clone,exception);
244 }
245 return(clone);
246 }
247
248 /* pad the global workgroup size to the next multiple of
249 the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)250 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
251 const unsigned int orgGlobalSize,const unsigned int localGroupSize)
252 {
253 return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
254 }
255
createKernelInfo(MagickCLDevice device,const double radius,const double sigma,cl_uint * width,ExceptionInfo * exception)256 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
257 const double sigma,cl_uint *width,ExceptionInfo *exception)
258 {
259 char
260 geometry[MagickPathExtent];
261
262 cl_int
263 status;
264
265 cl_mem
266 imageKernelBuffer;
267
268 float
269 *kernelBufferPtr;
270
271 KernelInfo
272 *kernel;
273
274 ssize_t
275 i;
276
277 (void) FormatLocaleString(geometry,MagickPathExtent,
278 "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
279 kernel=AcquireKernelInfo(geometry,exception);
280 if (kernel == (KernelInfo *) NULL)
281 {
282 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
283 ResourceLimitWarning,"AcquireKernelInfo failed.",".");
284 return((cl_mem) NULL);
285 }
286 kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
287 sizeof(*kernelBufferPtr));
288 for (i = 0; i < (ssize_t) kernel->width; i++)
289 kernelBufferPtr[i] = (float)kernel->values[i];
290 imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
291 CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
292 *width=kernel->width;
293 kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
294 kernel=DestroyKernelInfo(kernel);
295 if (imageKernelBuffer == (cl_mem) NULL)
296 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
297 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
298 return(imageKernelBuffer);
299 }
300
LaunchHistogramKernel(MagickCLEnv clEnv,MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)301 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
302 MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
303 cl_mem histogramBuffer,Image *image,const ChannelType channel,
304 ExceptionInfo *exception)
305 {
306 MagickBooleanType
307 outputReady;
308
309 cl_int
310 clStatus;
311
312 cl_kernel
313 histogramKernel;
314
315 cl_event
316 event;
317
318 cl_uint
319 colorspace,
320 method;
321
322 register ssize_t
323 i;
324
325 size_t
326 global_work_size[2];
327
328 histogramKernel = NULL;
329
330 outputReady = MagickFalse;
331 colorspace = image->colorspace;
332 method = image->intensity;
333
334 /* get the OpenCL kernel */
335 histogramKernel = AcquireOpenCLKernel(device,"Histogram");
336 if (histogramKernel == NULL)
337 {
338 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
339 goto cleanup;
340 }
341
342 /* set the kernel arguments */
343 i = 0;
344 clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
345 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
346 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
347 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
348 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
349 if (clStatus != CL_SUCCESS)
350 {
351 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
352 goto cleanup;
353 }
354
355 /* launch the kernel */
356 global_work_size[0] = image->columns;
357 global_work_size[1] = image->rows;
358
359 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
360
361 if (clStatus != CL_SUCCESS)
362 {
363 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
364 goto cleanup;
365 }
366 RecordProfileData(device,histogramKernel,event);
367
368 outputReady = MagickTrue;
369
370 cleanup:
371
372 if (histogramKernel!=NULL)
373 ReleaseOpenCLKernel(histogramKernel);
374
375 return(outputReady);
376 }
377
378 /*
379 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
380 % %
381 % %
382 % %
383 % A c c e l e r a t e A d d N o i s e I m a g e %
384 % %
385 % %
386 % %
387 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
388 */
389
ComputeAddNoiseImage(const Image * image,MagickCLEnv clEnv,const NoiseType noise_type,ExceptionInfo * exception)390 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
391 const NoiseType noise_type,ExceptionInfo *exception)
392 {
393 cl_command_queue
394 queue;
395
396 cl_float
397 attenuate;
398
399 cl_int
400 status;
401
402 cl_kernel
403 addNoiseKernel;
404
405 cl_mem
406 filteredImageBuffer,
407 imageBuffer;
408
409 cl_uint
410 bufferLength,
411 inputPixelCount,
412 number_channels,
413 numRandomNumberPerPixel,
414 pixelsPerWorkitem,
415 seed0,
416 seed1,
417 workItemCount;
418
419 const char
420 *option;
421
422 const unsigned long
423 *s;
424
425 MagickBooleanType
426 outputReady;
427
428 MagickCLDevice
429 device;
430
431 Image
432 *filteredImage;
433
434 RandomInfo
435 *randomInfo;
436
437 size_t
438 gsize[1],
439 i,
440 lsize[1],
441 numRandPerChannel;
442
443 filteredImage=NULL;
444 addNoiseKernel=NULL;
445 outputReady=MagickFalse;
446
447 device=RequestOpenCLDevice(clEnv);
448 queue=AcquireOpenCLCommandQueue(device);
449 if (queue == (cl_command_queue) NULL)
450 goto cleanup;
451 filteredImage=cloneImage(image,exception);
452 if (filteredImage == (Image *) NULL)
453 goto cleanup;
454 if (filteredImage->number_channels != image->number_channels)
455 goto cleanup;
456 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
457 if (imageBuffer == (cl_mem) NULL)
458 goto cleanup;
459 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
460 if (filteredImageBuffer == (cl_mem) NULL)
461 goto cleanup;
462
463 /* find out how many random numbers needed by pixel */
464 numRandPerChannel=0;
465 numRandomNumberPerPixel=0;
466 switch (noise_type)
467 {
468 case UniformNoise:
469 case ImpulseNoise:
470 case LaplacianNoise:
471 case RandomNoise:
472 default:
473 numRandPerChannel=1;
474 break;
475 case GaussianNoise:
476 case MultiplicativeGaussianNoise:
477 case PoissonNoise:
478 numRandPerChannel=2;
479 break;
480 };
481 if (GetPixelRedTraits(image) != UndefinedPixelTrait)
482 numRandomNumberPerPixel+=numRandPerChannel;
483 if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
484 numRandomNumberPerPixel+=numRandPerChannel;
485 if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
486 numRandomNumberPerPixel+=numRandPerChannel;
487 if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
488 numRandomNumberPerPixel+=numRandPerChannel;
489
490 addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
491 if (addNoiseKernel == (cl_kernel) NULL)
492 {
493 (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
494 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
495 goto cleanup;
496 }
497
498 /* 256 work items per group, 2 groups per CU */
499 workItemCount=device->max_compute_units*2*256;
500 inputPixelCount=(cl_int) (image->columns*image->rows);
501 pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
502 pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
503 lsize[0]=256;
504 gsize[0]=workItemCount;
505
506 randomInfo=AcquireRandomInfo();
507 s=GetRandomInfoSeed(randomInfo);
508 seed0=s[0];
509 (void) GetPseudoRandomValue(randomInfo);
510 seed1=s[0];
511 randomInfo=DestroyRandomInfo(randomInfo);
512
513 number_channels=(cl_uint) image->number_channels;
514 bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
515 attenuate=1.0f;
516 option=GetImageArtifact(image,"attenuate");
517 if (option != (char *) NULL)
518 attenuate=(float)StringToDouble(option,(char **) NULL);
519
520 i=0;
521 status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
522 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
523 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
524 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
525 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
526 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
527 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&attenuate);
528 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
529 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
530 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
531 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
532 if (status != CL_SUCCESS)
533 {
534 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
535 ResourceLimitWarning,"clSetKernelArg failed.",".");
536 goto cleanup;
537 }
538
539 outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
540 lsize,image,filteredImage,exception);
541
542 cleanup:
543
544 if (addNoiseKernel != (cl_kernel) NULL)
545 ReleaseOpenCLKernel(addNoiseKernel);
546 if (queue != (cl_command_queue) NULL)
547 ReleaseOpenCLCommandQueue(device,queue);
548 if (device != (MagickCLDevice) NULL)
549 ReleaseOpenCLDevice(device);
550 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
551 filteredImage=DestroyImage(filteredImage);
552
553 return(filteredImage);
554 }
555
AccelerateAddNoiseImage(const Image * image,const NoiseType noise_type,ExceptionInfo * exception)556 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
557 const NoiseType noise_type,ExceptionInfo *exception)
558 {
559 Image
560 *filteredImage;
561
562 MagickCLEnv
563 clEnv;
564
565 assert(image != NULL);
566 assert(exception != (ExceptionInfo *) NULL);
567
568 if (checkAccelerateCondition(image) == MagickFalse)
569 return((Image *) NULL);
570
571 clEnv=getOpenCLEnvironment(exception);
572 if (clEnv == (MagickCLEnv) NULL)
573 return((Image *) NULL);
574
575 filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,exception);
576 return(filteredImage);
577 }
578
579 /*
580 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581 % %
582 % %
583 % %
584 % A c c e l e r a t e B l u r I m a g e %
585 % %
586 % %
587 % %
588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
589 */
590
ComputeBlurImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,ExceptionInfo * exception)591 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
592 const double radius,const double sigma,ExceptionInfo *exception)
593 {
594 cl_command_queue
595 queue;
596
597 cl_int
598 status;
599
600 cl_kernel
601 blurColumnKernel,
602 blurRowKernel;
603
604 cl_mem
605 filteredImageBuffer,
606 imageBuffer,
607 imageKernelBuffer,
608 tempImageBuffer;
609
610 cl_uint
611 imageColumns,
612 imageRows,
613 kernelWidth,
614 number_channels;
615
616 Image
617 *filteredImage;
618
619 MagickBooleanType
620 outputReady;
621
622 MagickCLDevice
623 device;
624
625 MagickSizeType
626 length;
627
628 size_t
629 chunkSize=256,
630 gsize[2],
631 i,
632 lsize[2];
633
634 filteredImage=NULL;
635 tempImageBuffer=NULL;
636 imageKernelBuffer=NULL;
637 blurRowKernel=NULL;
638 blurColumnKernel=NULL;
639 outputReady=MagickFalse;
640
641 device=RequestOpenCLDevice(clEnv);
642 queue=AcquireOpenCLCommandQueue(device);
643 filteredImage=cloneImage(image,exception);
644 if (filteredImage == (Image *) NULL)
645 goto cleanup;
646 if (filteredImage->number_channels != image->number_channels)
647 goto cleanup;
648 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
649 if (imageBuffer == (cl_mem) NULL)
650 goto cleanup;
651 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
652 if (filteredImageBuffer == (cl_mem) NULL)
653 goto cleanup;
654
655 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
656 exception);
657 if (imageKernelBuffer == (cl_mem) NULL)
658 goto cleanup;
659
660 length=image->columns*image->rows;
661 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
662 sizeof(cl_float4),(void *) NULL);
663 if (tempImageBuffer == (cl_mem) NULL)
664 goto cleanup;
665
666 blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
667 if (blurRowKernel == (cl_kernel) NULL)
668 {
669 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
670 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
671 goto cleanup;
672 }
673
674 number_channels=(cl_uint) image->number_channels;
675 imageColumns=(cl_uint) image->columns;
676 imageRows=(cl_uint) image->rows;
677
678 i=0;
679 status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
680 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
681 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
682 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
683 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
684 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
685 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
686 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
687 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
688 if (status != CL_SUCCESS)
689 {
690 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
691 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
692 goto cleanup;
693 }
694
695 gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
696 gsize[1]=image->rows;
697 lsize[0]=chunkSize;
698 lsize[1]=1;
699
700 outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
701 lsize,image,filteredImage,exception);
702 if (outputReady == MagickFalse)
703 goto cleanup;
704
705 blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
706 if (blurColumnKernel == (cl_kernel) NULL)
707 {
708 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
709 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
710 goto cleanup;
711 }
712
713 i=0;
714 status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
715 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
716 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
717 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
718 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
719 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
720 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
721 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
722 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
723 if (status != CL_SUCCESS)
724 {
725 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
726 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
727 goto cleanup;
728 }
729
730 gsize[0]=image->columns;
731 gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
732 lsize[0]=1;
733 lsize[1]=chunkSize;
734
735 outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
736 lsize,image,filteredImage,exception);
737
738 cleanup:
739
740 if (tempImageBuffer != (cl_mem) NULL)
741 ReleaseOpenCLMemObject(tempImageBuffer);
742 if (imageKernelBuffer != (cl_mem) NULL)
743 ReleaseOpenCLMemObject(imageKernelBuffer);
744 if (blurRowKernel != (cl_kernel) NULL)
745 ReleaseOpenCLKernel(blurRowKernel);
746 if (blurColumnKernel != (cl_kernel) NULL)
747 ReleaseOpenCLKernel(blurColumnKernel);
748 if (queue != (cl_command_queue) NULL)
749 ReleaseOpenCLCommandQueue(device,queue);
750 if (device != (MagickCLDevice) NULL)
751 ReleaseOpenCLDevice(device);
752 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
753 filteredImage=DestroyImage(filteredImage);
754
755 return(filteredImage);
756 }
757
AccelerateBlurImage(const Image * image,const double radius,const double sigma,ExceptionInfo * exception)758 MagickPrivate Image* AccelerateBlurImage(const Image *image,
759 const double radius,const double sigma,ExceptionInfo *exception)
760 {
761 Image
762 *filteredImage;
763
764 MagickCLEnv
765 clEnv;
766
767 assert(image != NULL);
768 assert(exception != (ExceptionInfo *) NULL);
769
770 if (checkAccelerateCondition(image) == MagickFalse)
771 return((Image *) NULL);
772
773 clEnv=getOpenCLEnvironment(exception);
774 if (clEnv == (MagickCLEnv) NULL)
775 return((Image *) NULL);
776
777 filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
778 return(filteredImage);
779 }
780
781 /*
782 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
783 % %
784 % %
785 % %
786 % A c c e l e r a t e C o n t r a s t I m a g e %
787 % %
788 % %
789 % %
790 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
791 */
792
ComputeContrastImage(Image * image,MagickCLEnv clEnv,const MagickBooleanType sharpen,ExceptionInfo * exception)793 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
794 const MagickBooleanType sharpen,ExceptionInfo *exception)
795 {
796 CacheView
797 *image_view;
798
799 cl_command_queue
800 queue;
801
802 cl_int
803 clStatus;
804
805 cl_kernel
806 filterKernel;
807
808 cl_event
809 event;
810
811 cl_mem
812 imageBuffer;
813
814 cl_mem_flags
815 mem_flags;
816
817 MagickBooleanType
818 outputReady;
819
820 MagickCLDevice
821 device;
822
823 MagickSizeType
824 length;
825
826 size_t
827 global_work_size[2];
828
829 unsigned int
830 i,
831 uSharpen;
832
833 void
834 *inputPixels;
835
836 outputReady = MagickFalse;
837 inputPixels = NULL;
838 imageBuffer = NULL;
839 filterKernel = NULL;
840 queue = NULL;
841
842 device = RequestOpenCLDevice(clEnv);
843
844 /* Create and initialize OpenCL buffers. */
845 image_view=AcquireAuthenticCacheView(image,exception);
846 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
847 if (inputPixels == (void *) NULL)
848 {
849 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
850 goto cleanup;
851 }
852
853 /* If the host pointer is aligned to the size of CLPixelPacket,
854 then use the host buffer directly from the GPU; otherwise,
855 create a buffer on the GPU and copy the data over */
856 if (ALIGNED(inputPixels,CLPixelPacket))
857 {
858 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
859 }
860 else
861 {
862 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
863 }
864 /* create a CL buffer from image pixel buffer */
865 length = image->columns * image->rows;
866 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
867 if (clStatus != CL_SUCCESS)
868 {
869 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
870 goto cleanup;
871 }
872
873 filterKernel = AcquireOpenCLKernel(device,"Contrast");
874 if (filterKernel == NULL)
875 {
876 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
877 goto cleanup;
878 }
879
880 i = 0;
881 clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
882
883 uSharpen = (sharpen == MagickFalse)?0:1;
884 clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
885 if (clStatus != CL_SUCCESS)
886 {
887 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
888 goto cleanup;
889 }
890
891 global_work_size[0] = image->columns;
892 global_work_size[1] = image->rows;
893 /* launch the kernel */
894 queue = AcquireOpenCLCommandQueue(device);
895 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
896 if (clStatus != CL_SUCCESS)
897 {
898 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
899 goto cleanup;
900 }
901 RecordProfileData(device,filterKernel,event);
902
903 if (ALIGNED(inputPixels,CLPixelPacket))
904 {
905 length = image->columns * image->rows;
906 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
907 }
908 else
909 {
910 length = image->columns * image->rows;
911 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
912 }
913 if (clStatus != CL_SUCCESS)
914 {
915 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
916 goto cleanup;
917 }
918 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
919
920 cleanup:
921
922 image_view=DestroyCacheView(image_view);
923
924 if (imageBuffer!=NULL)
925 clEnv->library->clReleaseMemObject(imageBuffer);
926 if (filterKernel!=NULL)
927 ReleaseOpenCLKernel(filterKernel);
928 if (queue != NULL)
929 ReleaseOpenCLCommandQueue(device,queue);
930 if (device != NULL)
931 ReleaseOpenCLDevice(device);
932
933 return(outputReady);
934 }
935
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)936 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
937 const MagickBooleanType sharpen,ExceptionInfo *exception)
938 {
939 MagickBooleanType
940 status;
941
942 MagickCLEnv
943 clEnv;
944
945 assert(image != NULL);
946 assert(exception != (ExceptionInfo *) NULL);
947
948 if (checkAccelerateConditionRGBA(image) == MagickFalse)
949 return(MagickFalse);
950
951 clEnv=getOpenCLEnvironment(exception);
952 if (clEnv == (MagickCLEnv) NULL)
953 return(MagickFalse);
954
955 status=ComputeContrastImage(image,clEnv,sharpen,exception);
956 return(status);
957 }
958
959 /*
960 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
961 % %
962 % %
963 % %
964 % A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e %
965 % %
966 % %
967 % %
968 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
969 */
970
ComputeContrastStretchImage(Image * image,MagickCLEnv clEnv,const double black_point,const double white_point,ExceptionInfo * exception)971 static MagickBooleanType ComputeContrastStretchImage(Image *image,
972 MagickCLEnv clEnv,const double black_point,const double white_point,
973 ExceptionInfo *exception)
974 {
975 #define ContrastStretchImageTag "ContrastStretch/Image"
976 #define MaxRange(color) ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
977
978 CacheView
979 *image_view;
980
981 cl_command_queue
982 queue;
983
984 cl_int
985 clStatus;
986
987 cl_mem_flags
988 mem_flags;
989
990 cl_mem
991 histogramBuffer,
992 imageBuffer,
993 stretchMapBuffer;
994
995 cl_kernel
996 histogramKernel,
997 stretchKernel;
998
999 cl_event
1000 event;
1001
1002 cl_uint4
1003 *histogram;
1004
1005 double
1006 intensity;
1007
1008 FloatPixelPacket
1009 black,
1010 white;
1011
1012 MagickBooleanType
1013 outputReady,
1014 status;
1015
1016 MagickCLDevice
1017 device;
1018
1019 MagickSizeType
1020 length;
1021
1022 PixelPacket
1023 *stretch_map;
1024
1025 register ssize_t
1026 i;
1027
1028 size_t
1029 global_work_size[2];
1030
1031 void
1032 *hostPtr,
1033 *inputPixels;
1034
1035 histogram=NULL;
1036 stretch_map=NULL;
1037 inputPixels = NULL;
1038 imageBuffer = NULL;
1039 histogramBuffer = NULL;
1040 stretchMapBuffer = NULL;
1041 histogramKernel = NULL;
1042 stretchKernel = NULL;
1043 queue = NULL;
1044 outputReady = MagickFalse;
1045
1046
1047 assert(image != (Image *) NULL);
1048 assert(image->signature == MagickCoreSignature);
1049 if (image->debug != MagickFalse)
1050 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1051
1052 //exception=(&image->exception);
1053
1054 /*
1055 * initialize opencl env
1056 */
1057 device = RequestOpenCLDevice(clEnv);
1058 queue = AcquireOpenCLCommandQueue(device);
1059
1060 /*
1061 Allocate and initialize histogram arrays.
1062 */
1063 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1064
1065 if (histogram == (cl_uint4 *) NULL)
1066 ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1067
1068 /* reset histogram */
1069 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
1070
1071 /*
1072 if (IsGrayImage(image,exception) != MagickFalse)
1073 (void) SetImageColorspace(image,GRAYColorspace);
1074 */
1075
1076 status=MagickTrue;
1077
1078
1079 /*
1080 Form histogram.
1081 */
1082 /* Create and initialize OpenCL buffers. */
1083 /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1084 /* assume this will get a writable image */
1085 image_view=AcquireAuthenticCacheView(image,exception);
1086 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1087
1088 if (inputPixels == (void *) NULL)
1089 {
1090 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1091 goto cleanup;
1092 }
1093 /* If the host pointer is aligned to the size of CLPixelPacket,
1094 then use the host buffer directly from the GPU; otherwise,
1095 create a buffer on the GPU and copy the data over */
1096 if (ALIGNED(inputPixels,CLPixelPacket))
1097 {
1098 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1099 }
1100 else
1101 {
1102 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1103 }
1104 /* create a CL buffer from image pixel buffer */
1105 length = image->columns * image->rows;
1106 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1107 if (clStatus != CL_SUCCESS)
1108 {
1109 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1110 goto cleanup;
1111 }
1112
1113 /* If the host pointer is aligned to the size of cl_uint,
1114 then use the host buffer directly from the GPU; otherwise,
1115 create a buffer on the GPU and copy the data over */
1116 if (ALIGNED(histogram,cl_uint4))
1117 {
1118 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1119 hostPtr = histogram;
1120 }
1121 else
1122 {
1123 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1124 hostPtr = histogram;
1125 }
1126 /* create a CL buffer for histogram */
1127 length = (MaxMap+1);
1128 histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1129 if (clStatus != CL_SUCCESS)
1130 {
1131 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1132 goto cleanup;
1133 }
1134
1135 status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1136 if (status == MagickFalse)
1137 goto cleanup;
1138
1139 /* read from the kenel output */
1140 if (ALIGNED(histogram,cl_uint4))
1141 {
1142 length = (MaxMap+1);
1143 clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1144 }
1145 else
1146 {
1147 length = (MaxMap+1);
1148 clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1149 }
1150 if (clStatus != CL_SUCCESS)
1151 {
1152 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1153 goto cleanup;
1154 }
1155
1156 /* unmap, don't block gpu to use this buffer again. */
1157 if (ALIGNED(histogram,cl_uint4))
1158 {
1159 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1160 if (clStatus != CL_SUCCESS)
1161 {
1162 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1163 goto cleanup;
1164 }
1165 }
1166
1167 /* recreate input buffer later, in case image updated */
1168 #ifdef RECREATEBUFFER
1169 if (imageBuffer!=NULL)
1170 clEnv->library->clReleaseMemObject(imageBuffer);
1171 #endif
1172
1173 /* CPU stuff */
1174 /*
1175 Find the histogram boundaries by locating the black/white levels.
1176 */
1177 black.red=0.0;
1178 white.red=MaxRange(QuantumRange);
1179 if ((image->channel_mask & RedChannel) != 0)
1180 {
1181 intensity=0.0;
1182 for (i=0; i <= (ssize_t) MaxMap; i++)
1183 {
1184 intensity+=histogram[i].s[2];
1185 if (intensity > black_point)
1186 break;
1187 }
1188 black.red=(MagickRealType) i;
1189 intensity=0.0;
1190 for (i=(ssize_t) MaxMap; i != 0; i--)
1191 {
1192 intensity+=histogram[i].s[2];
1193 if (intensity > ((double) image->columns*image->rows-white_point))
1194 break;
1195 }
1196 white.red=(MagickRealType) i;
1197 }
1198 black.green=0.0;
1199 white.green=MaxRange(QuantumRange);
1200 if ((image->channel_mask & GreenChannel) != 0)
1201 {
1202 intensity=0.0;
1203 for (i=0; i <= (ssize_t) MaxMap; i++)
1204 {
1205 intensity+=histogram[i].s[2];
1206 if (intensity > black_point)
1207 break;
1208 }
1209 black.green=(MagickRealType) i;
1210 intensity=0.0;
1211 for (i=(ssize_t) MaxMap; i != 0; i--)
1212 {
1213 intensity+=histogram[i].s[2];
1214 if (intensity > ((double) image->columns*image->rows-white_point))
1215 break;
1216 }
1217 white.green=(MagickRealType) i;
1218 }
1219 black.blue=0.0;
1220 white.blue=MaxRange(QuantumRange);
1221 if ((image->channel_mask & BlueChannel) != 0)
1222 {
1223 intensity=0.0;
1224 for (i=0; i <= (ssize_t) MaxMap; i++)
1225 {
1226 intensity+=histogram[i].s[2];
1227 if (intensity > black_point)
1228 break;
1229 }
1230 black.blue=(MagickRealType) i;
1231 intensity=0.0;
1232 for (i=(ssize_t) MaxMap; i != 0; i--)
1233 {
1234 intensity+=histogram[i].s[2];
1235 if (intensity > ((double) image->columns*image->rows-white_point))
1236 break;
1237 }
1238 white.blue=(MagickRealType) i;
1239 }
1240 black.alpha=0.0;
1241 white.alpha=MaxRange(QuantumRange);
1242 if ((image->channel_mask & AlphaChannel) != 0)
1243 {
1244 intensity=0.0;
1245 for (i=0; i <= (ssize_t) MaxMap; i++)
1246 {
1247 intensity+=histogram[i].s[2];
1248 if (intensity > black_point)
1249 break;
1250 }
1251 black.alpha=(MagickRealType) i;
1252 intensity=0.0;
1253 for (i=(ssize_t) MaxMap; i != 0; i--)
1254 {
1255 intensity+=histogram[i].s[2];
1256 if (intensity > ((double) image->columns*image->rows-white_point))
1257 break;
1258 }
1259 white.alpha=(MagickRealType) i;
1260 }
1261 /*
1262 black.index=0.0;
1263 white.index=MaxRange(QuantumRange);
1264 if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
1265 {
1266 intensity=0.0;
1267 for (i=0; i <= (ssize_t) MaxMap; i++)
1268 {
1269 intensity+=histogram[i].index;
1270 if (intensity > black_point)
1271 break;
1272 }
1273 black.index=(MagickRealType) i;
1274 intensity=0.0;
1275 for (i=(ssize_t) MaxMap; i != 0; i--)
1276 {
1277 intensity+=histogram[i].index;
1278 if (intensity > ((double) image->columns*image->rows-white_point))
1279 break;
1280 }
1281 white.index=(MagickRealType) i;
1282 }
1283 */
1284
1285
1286 stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1287 sizeof(*stretch_map));
1288
1289 if (stretch_map == (PixelPacket *) NULL)
1290 ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1291 image->filename);
1292
1293 /*
1294 Stretch the histogram to create the stretched image mapping.
1295 */
1296 (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1297 for (i=0; i <= (ssize_t) MaxMap; i++)
1298 {
1299 if ((image->channel_mask & RedChannel) != 0)
1300 {
1301 if (i < (ssize_t) black.red)
1302 stretch_map[i].red=(Quantum) 0;
1303 else
1304 if (i > (ssize_t) white.red)
1305 stretch_map[i].red=QuantumRange;
1306 else
1307 if (black.red != white.red)
1308 stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1309 (i-black.red)/(white.red-black.red)));
1310 }
1311 if ((image->channel_mask & GreenChannel) != 0)
1312 {
1313 if (i < (ssize_t) black.green)
1314 stretch_map[i].green=0;
1315 else
1316 if (i > (ssize_t) white.green)
1317 stretch_map[i].green=QuantumRange;
1318 else
1319 if (black.green != white.green)
1320 stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1321 (i-black.green)/(white.green-black.green)));
1322 }
1323 if ((image->channel_mask & BlueChannel) != 0)
1324 {
1325 if (i < (ssize_t) black.blue)
1326 stretch_map[i].blue=0;
1327 else
1328 if (i > (ssize_t) white.blue)
1329 stretch_map[i].blue= QuantumRange;
1330 else
1331 if (black.blue != white.blue)
1332 stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1333 (i-black.blue)/(white.blue-black.blue)));
1334 }
1335 if ((image->channel_mask & AlphaChannel) != 0)
1336 {
1337 if (i < (ssize_t) black.alpha)
1338 stretch_map[i].alpha=0;
1339 else
1340 if (i > (ssize_t) white.alpha)
1341 stretch_map[i].alpha=QuantumRange;
1342 else
1343 if (black.alpha != white.alpha)
1344 stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1345 (i-black.alpha)/(white.alpha-black.alpha)));
1346 }
1347 /*
1348 if (((channel & IndexChannel) != 0) &&
1349 (image->colorspace == CMYKColorspace))
1350 {
1351 if (i < (ssize_t) black.index)
1352 stretch_map[i].index=0;
1353 else
1354 if (i > (ssize_t) white.index)
1355 stretch_map[i].index=QuantumRange;
1356 else
1357 if (black.index != white.index)
1358 stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
1359 (i-black.index)/(white.index-black.index)));
1360 }
1361 */
1362 }
1363
1364 /*
1365 Stretch the image.
1366 */
1367 if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1368 (image->colorspace == CMYKColorspace)))
1369 image->storage_class=DirectClass;
1370 if (image->storage_class == PseudoClass)
1371 {
1372 /*
1373 Stretch colormap.
1374 */
1375 for (i=0; i < (ssize_t) image->colors; i++)
1376 {
1377 if ((image->channel_mask & RedChannel) != 0)
1378 {
1379 if (black.red != white.red)
1380 image->colormap[i].red=stretch_map[
1381 ScaleQuantumToMap(image->colormap[i].red)].red;
1382 }
1383 if ((image->channel_mask & GreenChannel) != 0)
1384 {
1385 if (black.green != white.green)
1386 image->colormap[i].green=stretch_map[
1387 ScaleQuantumToMap(image->colormap[i].green)].green;
1388 }
1389 if ((image->channel_mask & BlueChannel) != 0)
1390 {
1391 if (black.blue != white.blue)
1392 image->colormap[i].blue=stretch_map[
1393 ScaleQuantumToMap(image->colormap[i].blue)].blue;
1394 }
1395 if ((image->channel_mask & AlphaChannel) != 0)
1396 {
1397 if (black.alpha != white.alpha)
1398 image->colormap[i].alpha=stretch_map[
1399 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1400 }
1401 }
1402 }
1403
1404 /*
1405 Stretch image.
1406 */
1407
1408
1409 /* GPU can work on this again, image and equalize map as input
1410 image: uchar4 (CLPixelPacket)
1411 stretch_map: uchar4 (PixelPacket)
1412 black, white: float4 (FloatPixelPacket) */
1413
1414 #ifdef RECREATEBUFFER
1415 /* If the host pointer is aligned to the size of CLPixelPacket,
1416 then use the host buffer directly from the GPU; otherwise,
1417 create a buffer on the GPU and copy the data over */
1418 if (ALIGNED(inputPixels,CLPixelPacket))
1419 {
1420 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1421 }
1422 else
1423 {
1424 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1425 }
1426 /* create a CL buffer from image pixel buffer */
1427 length = image->columns * image->rows;
1428 imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1429 if (clStatus != CL_SUCCESS)
1430 {
1431 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1432 goto cleanup;
1433 }
1434 #endif
1435
1436 /* Create and initialize OpenCL buffers. */
1437 if (ALIGNED(stretch_map, PixelPacket))
1438 {
1439 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1440 hostPtr = stretch_map;
1441 }
1442 else
1443 {
1444 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1445 hostPtr = stretch_map;
1446 }
1447 /* create a CL buffer for stretch_map */
1448 length = (MaxMap+1);
1449 stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1450 if (clStatus != CL_SUCCESS)
1451 {
1452 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1453 goto cleanup;
1454 }
1455
1456 /* get the OpenCL kernel */
1457 stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1458 if (stretchKernel == NULL)
1459 {
1460 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1461 goto cleanup;
1462 }
1463
1464 /* set the kernel arguments */
1465 i = 0;
1466 clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1467 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1468 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1469 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
1470 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
1471 if (clStatus != CL_SUCCESS)
1472 {
1473 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1474 goto cleanup;
1475 }
1476
1477 /* launch the kernel */
1478 global_work_size[0] = image->columns;
1479 global_work_size[1] = image->rows;
1480
1481 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1482
1483 if (clStatus != CL_SUCCESS)
1484 {
1485 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1486 goto cleanup;
1487 }
1488 RecordProfileData(device,stretchKernel,event);
1489
1490 /* read the data back */
1491 if (ALIGNED(inputPixels,CLPixelPacket))
1492 {
1493 length = image->columns * image->rows;
1494 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1495 }
1496 else
1497 {
1498 length = image->columns * image->rows;
1499 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1500 }
1501 if (clStatus != CL_SUCCESS)
1502 {
1503 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1504 goto cleanup;
1505 }
1506
1507 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1508
1509 cleanup:
1510
1511 image_view=DestroyCacheView(image_view);
1512
1513 if (imageBuffer!=NULL)
1514 clEnv->library->clReleaseMemObject(imageBuffer);
1515
1516 if (stretchMapBuffer!=NULL)
1517 clEnv->library->clReleaseMemObject(stretchMapBuffer);
1518 if (stretch_map!=NULL)
1519 stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1520 if (histogramBuffer!=NULL)
1521 clEnv->library->clReleaseMemObject(histogramBuffer);
1522 if (histogram!=NULL)
1523 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1524 if (histogramKernel!=NULL)
1525 ReleaseOpenCLKernel(histogramKernel);
1526 if (stretchKernel!=NULL)
1527 ReleaseOpenCLKernel(stretchKernel);
1528 if (queue != NULL)
1529 ReleaseOpenCLCommandQueue(device,queue);
1530 if (device != NULL)
1531 ReleaseOpenCLDevice(device);
1532
1533 return(outputReady);
1534 }
1535
AccelerateContrastStretchImage(Image * image,const double black_point,const double white_point,ExceptionInfo * exception)1536 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1537 Image *image,const double black_point,const double white_point,
1538 ExceptionInfo *exception)
1539 {
1540 MagickBooleanType
1541 status;
1542
1543 MagickCLEnv
1544 clEnv;
1545
1546 assert(image != NULL);
1547 assert(exception != (ExceptionInfo *) NULL);
1548
1549 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1550 (checkHistogramCondition(image,image->intensity) == MagickFalse))
1551 return(MagickFalse);
1552
1553 clEnv=getOpenCLEnvironment(exception);
1554 if (clEnv == (MagickCLEnv) NULL)
1555 return(MagickFalse);
1556
1557 status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1558 exception);
1559 return(status);
1560 }
1561
1562 /*
1563 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1564 % %
1565 % %
1566 % %
1567 % A c c e l e r a t e C o n v o l v e I m a g e %
1568 % %
1569 % %
1570 % %
1571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1572 */
1573
ComputeConvolveImage(const Image * image,MagickCLEnv clEnv,const KernelInfo * kernel,ExceptionInfo * exception)1574 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1575 const KernelInfo *kernel,ExceptionInfo *exception)
1576 {
1577 CacheView
1578 *filteredImage_view,
1579 *image_view;
1580
1581 cl_command_queue
1582 queue;
1583
1584 cl_event
1585 event;
1586
1587 cl_kernel
1588 clkernel;
1589
1590 cl_int
1591 clStatus;
1592
1593 cl_mem
1594 convolutionKernel,
1595 filteredImageBuffer,
1596 imageBuffer;
1597
1598 cl_mem_flags
1599 mem_flags;
1600
1601 const void
1602 *inputPixels;
1603
1604 float
1605 *kernelBufferPtr;
1606
1607 Image
1608 *filteredImage;
1609
1610 MagickBooleanType
1611 outputReady;
1612
1613 MagickCLDevice
1614 device;
1615
1616 MagickSizeType
1617 length;
1618
1619 size_t
1620 global_work_size[3],
1621 localGroupSize[3],
1622 localMemoryRequirement;
1623
1624 unsigned
1625 kernelSize;
1626
1627 unsigned int
1628 filterHeight,
1629 filterWidth,
1630 i,
1631 imageHeight,
1632 imageWidth,
1633 matte;
1634
1635 void
1636 *filteredPixels,
1637 *hostPtr;
1638
1639 /* intialize all CL objects to NULL */
1640 imageBuffer = NULL;
1641 filteredImageBuffer = NULL;
1642 convolutionKernel = NULL;
1643 clkernel = NULL;
1644 queue = NULL;
1645
1646 filteredImage = NULL;
1647 filteredImage_view = NULL;
1648 outputReady = MagickFalse;
1649
1650 device = RequestOpenCLDevice(clEnv);
1651
1652 image_view=AcquireAuthenticCacheView(image,exception);
1653 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1654 if (inputPixels == (const void *) NULL)
1655 {
1656 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1657 goto cleanup;
1658 }
1659
1660 /* Create and initialize OpenCL buffers. */
1661
1662 /* If the host pointer is aligned to the size of CLPixelPacket,
1663 then use the host buffer directly from the GPU; otherwise,
1664 create a buffer on the GPU and copy the data over */
1665 if (ALIGNED(inputPixels,CLPixelPacket))
1666 {
1667 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1668 }
1669 else
1670 {
1671 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1672 }
1673 /* create a CL buffer from image pixel buffer */
1674 length = image->columns * image->rows;
1675 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1676 if (clStatus != CL_SUCCESS)
1677 {
1678 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1679 goto cleanup;
1680 }
1681
1682 filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1683 assert(filteredImage != NULL);
1684 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1685 {
1686 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1687 goto cleanup;
1688 }
1689 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1690 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1691 if (filteredPixels == (void *) NULL)
1692 {
1693 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1694 goto cleanup;
1695 }
1696
1697 if (ALIGNED(filteredPixels,CLPixelPacket))
1698 {
1699 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1700 hostPtr = filteredPixels;
1701 }
1702 else
1703 {
1704 mem_flags = CL_MEM_WRITE_ONLY;
1705 hostPtr = NULL;
1706 }
1707 /* create a CL buffer from image pixel buffer */
1708 length = image->columns * image->rows;
1709 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1710 if (clStatus != CL_SUCCESS)
1711 {
1712 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1713 goto cleanup;
1714 }
1715
1716 kernelSize = (unsigned int) (kernel->width * kernel->height);
1717 convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1718 if (clStatus != CL_SUCCESS)
1719 {
1720 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1721 goto cleanup;
1722 }
1723
1724 queue = AcquireOpenCLCommandQueue(device);
1725
1726 kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1727 , 0, NULL, NULL, &clStatus);
1728 if (clStatus != CL_SUCCESS)
1729 {
1730 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1731 goto cleanup;
1732 }
1733 for (i = 0; i < kernelSize; i++)
1734 {
1735 kernelBufferPtr[i] = (float) kernel->values[i];
1736 }
1737 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1738 if (clStatus != CL_SUCCESS)
1739 {
1740 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1741 goto cleanup;
1742 }
1743
1744 /* Compute the local memory requirement for a 16x16 workgroup.
1745 If it's larger than 16k, reduce the workgroup size to 8x8 */
1746 localGroupSize[0] = 16;
1747 localGroupSize[1] = 16;
1748 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1749 + kernel->width*kernel->height*sizeof(float);
1750
1751 if (localMemoryRequirement > device->local_memory_size)
1752 {
1753 localGroupSize[0] = 8;
1754 localGroupSize[1] = 8;
1755 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1756 + kernel->width*kernel->height*sizeof(float);
1757 }
1758 if (localMemoryRequirement <= device->local_memory_size)
1759 {
1760 /* get the OpenCL kernel */
1761 clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1762 if (clkernel == NULL)
1763 {
1764 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1765 goto cleanup;
1766 }
1767
1768 /* set the kernel arguments */
1769 i = 0;
1770 clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1771 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1772 imageWidth = (unsigned int) image->columns;
1773 imageHeight = (unsigned int) image->rows;
1774 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1775 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1776 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1777 filterWidth = (unsigned int) kernel->width;
1778 filterHeight = (unsigned int) kernel->height;
1779 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1780 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1781 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1782 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1783 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1784 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1785 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1786 if (clStatus != CL_SUCCESS)
1787 {
1788 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1789 goto cleanup;
1790 }
1791
1792 /* pad the global size to a multiple of the local work size dimension */
1793 global_work_size[0] = ((image->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1794 global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1795
1796 /* launch the kernel */
1797 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1798 if (clStatus != CL_SUCCESS)
1799 {
1800 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1801 goto cleanup;
1802 }
1803 RecordProfileData(device,clkernel,event);
1804 }
1805 else
1806 {
1807 /* get the OpenCL kernel */
1808 clkernel = AcquireOpenCLKernel(device,"Convolve");
1809 if (clkernel == NULL)
1810 {
1811 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1812 goto cleanup;
1813 }
1814
1815 /* set the kernel arguments */
1816 i = 0;
1817 clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1818 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1819 imageWidth = (unsigned int) image->columns;
1820 imageHeight = (unsigned int) image->rows;
1821 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1822 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1823 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1824 filterWidth = (unsigned int) kernel->width;
1825 filterHeight = (unsigned int) kernel->height;
1826 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1827 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1828 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1829 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1830 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1831 if (clStatus != CL_SUCCESS)
1832 {
1833 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1834 goto cleanup;
1835 }
1836
1837 localGroupSize[0] = 8;
1838 localGroupSize[1] = 8;
1839 global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1840 global_work_size[1] = (image->rows + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1841 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1842
1843 if (clStatus != CL_SUCCESS)
1844 {
1845 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1846 goto cleanup;
1847 }
1848 }
1849 RecordProfileData(device,clkernel,event);
1850
1851 if (ALIGNED(filteredPixels,CLPixelPacket))
1852 {
1853 length = image->columns * image->rows;
1854 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1855 }
1856 else
1857 {
1858 length = image->columns * image->rows;
1859 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1860 }
1861 if (clStatus != CL_SUCCESS)
1862 {
1863 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1864 goto cleanup;
1865 }
1866
1867 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1868
1869 cleanup:
1870
1871 image_view=DestroyCacheView(image_view);
1872 if (filteredImage_view != NULL)
1873 filteredImage_view=DestroyCacheView(filteredImage_view);
1874 if (imageBuffer != NULL)
1875 clEnv->library->clReleaseMemObject(imageBuffer);
1876 if (filteredImageBuffer != NULL)
1877 clEnv->library->clReleaseMemObject(filteredImageBuffer);
1878 if (convolutionKernel != NULL)
1879 clEnv->library->clReleaseMemObject(convolutionKernel);
1880 if (clkernel != NULL)
1881 ReleaseOpenCLKernel(clkernel);
1882 if (queue != NULL)
1883 ReleaseOpenCLCommandQueue(device,queue);
1884 if (device != NULL)
1885 ReleaseOpenCLDevice(device);
1886 if (outputReady == MagickFalse)
1887 {
1888 if (filteredImage != NULL)
1889 {
1890 DestroyImage(filteredImage);
1891 filteredImage = NULL;
1892 }
1893 }
1894
1895 return(filteredImage);
1896 }
1897
AccelerateConvolveImage(const Image * image,const KernelInfo * kernel,ExceptionInfo * exception)1898 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1899 const KernelInfo *kernel,ExceptionInfo *exception)
1900 {
1901 /* Temporary disabled due to access violation
1902
1903 Image
1904 *filteredImage;
1905
1906 assert(image != NULL);
1907 assert(kernel != (KernelInfo *) NULL);
1908 assert(exception != (ExceptionInfo *) NULL);
1909 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1910 (checkOpenCLEnvironment(exception) == MagickFalse))
1911 return((Image *) NULL);
1912
1913 filteredImage=ComputeConvolveImage(image,kernel,exception);
1914 return(filteredImage);
1915 */
1916 magick_unreferenced(image);
1917 magick_unreferenced(kernel);
1918 magick_unreferenced(exception);
1919 return((Image *)NULL);
1920 }
1921
1922 /*
1923 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1924 % %
1925 % %
1926 % %
1927 % A c c e l e r a t e D e s p e c k l e I m a g e %
1928 % %
1929 % %
1930 % %
1931 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1932 */
1933
ComputeDespeckleImage(const Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)1934 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1935 ExceptionInfo*exception)
1936 {
1937 static const int
1938 X[4] = {0, 1, 1,-1},
1939 Y[4] = {1, 0, 1, 1};
1940
1941 CacheView
1942 *filteredImage_view,
1943 *image_view;
1944
1945 cl_command_queue
1946 queue;
1947
1948 cl_int
1949 clStatus;
1950
1951 cl_kernel
1952 hullPass1,
1953 hullPass2;
1954
1955 cl_event
1956 event;
1957
1958 cl_mem_flags
1959 mem_flags;
1960
1961 cl_mem
1962 filteredImageBuffer,
1963 imageBuffer,
1964 tempImageBuffer[2];
1965
1966 const void
1967 *inputPixels;
1968
1969 Image
1970 *filteredImage;
1971
1972 int
1973 k,
1974 matte;
1975
1976 MagickBooleanType
1977 outputReady;
1978
1979 MagickCLDevice
1980 device;
1981
1982 MagickSizeType
1983 length;
1984
1985 size_t
1986 global_work_size[2];
1987
1988 unsigned int
1989 imageHeight,
1990 imageWidth;
1991
1992 void
1993 *filteredPixels,
1994 *hostPtr;
1995
1996 outputReady = MagickFalse;
1997 inputPixels = NULL;
1998 filteredImage = NULL;
1999 filteredImage_view = NULL;
2000 filteredPixels = NULL;
2001 imageBuffer = NULL;
2002 filteredImageBuffer = NULL;
2003 hullPass1 = NULL;
2004 hullPass2 = NULL;
2005 queue = NULL;
2006 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
2007
2008 device = RequestOpenCLDevice(clEnv);
2009 queue = AcquireOpenCLCommandQueue(device);
2010
2011 image_view=AcquireAuthenticCacheView(image,exception);
2012 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2013 if (inputPixels == (void *) NULL)
2014 {
2015 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2016 goto cleanup;
2017 }
2018
2019 if (ALIGNED(inputPixels,CLPixelPacket))
2020 {
2021 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2022 }
2023 else
2024 {
2025 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2026 }
2027 /* create a CL buffer from image pixel buffer */
2028 length = image->columns * image->rows;
2029 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2030 if (clStatus != CL_SUCCESS)
2031 {
2032 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2033 goto cleanup;
2034 }
2035
2036 mem_flags = CL_MEM_READ_WRITE;
2037 length = image->columns * image->rows;
2038 for (k = 0; k < 2; k++)
2039 {
2040 tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
2041 if (clStatus != CL_SUCCESS)
2042 {
2043 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2044 goto cleanup;
2045 }
2046 }
2047
2048 filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2049 assert(filteredImage != NULL);
2050 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2051 {
2052 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
2053 goto cleanup;
2054 }
2055 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2056 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2057 if (filteredPixels == (void *) NULL)
2058 {
2059 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2060 goto cleanup;
2061 }
2062
2063 if (ALIGNED(filteredPixels,CLPixelPacket))
2064 {
2065 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2066 hostPtr = filteredPixels;
2067 }
2068 else
2069 {
2070 mem_flags = CL_MEM_WRITE_ONLY;
2071 hostPtr = NULL;
2072 }
2073 /* create a CL buffer from image pixel buffer */
2074 length = image->columns * image->rows;
2075 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2076 if (clStatus != CL_SUCCESS)
2077 {
2078 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2079 goto cleanup;
2080 }
2081
2082 hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2083 hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2084
2085 clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2086 clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2087 imageWidth = (unsigned int) image->columns;
2088 clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2089 imageHeight = (unsigned int) image->rows;
2090 clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2091 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2092 clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2093 if (clStatus != CL_SUCCESS)
2094 {
2095 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2096 goto cleanup;
2097 }
2098
2099 clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2100 clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2101 imageWidth = (unsigned int) image->columns;
2102 clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2103 imageHeight = (unsigned int) image->rows;
2104 clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2105 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2106 clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2107 if (clStatus != CL_SUCCESS)
2108 {
2109 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2110 goto cleanup;
2111 }
2112
2113
2114 global_work_size[0] = image->columns;
2115 global_work_size[1] = image->rows;
2116
2117
2118 for (k = 0; k < 4; k++)
2119 {
2120 cl_int2 offset;
2121 int polarity;
2122
2123
2124 offset.s[0] = X[k];
2125 offset.s[1] = Y[k];
2126 polarity = 1;
2127 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2128 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2129 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2130 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2131 if (clStatus != CL_SUCCESS)
2132 {
2133 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2134 goto cleanup;
2135 }
2136 /* launch the kernel */
2137 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2138 if (clStatus != CL_SUCCESS)
2139 {
2140 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2141 goto cleanup;
2142 }
2143 RecordProfileData(device,hullPass1,event);
2144
2145 /* launch the kernel */
2146 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2147 if (clStatus != CL_SUCCESS)
2148 {
2149 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2150 goto cleanup;
2151 }
2152 RecordProfileData(device,hullPass2,event);
2153
2154 if (k == 0)
2155 clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2156 offset.s[0] = -X[k];
2157 offset.s[1] = -Y[k];
2158 polarity = 1;
2159 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2160 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2161 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2162 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2163 if (clStatus != CL_SUCCESS)
2164 {
2165 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2166 goto cleanup;
2167 }
2168 /* launch the kernel */
2169 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2170 if (clStatus != CL_SUCCESS)
2171 {
2172 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2173 goto cleanup;
2174 }
2175 RecordProfileData(device,hullPass1,event);
2176
2177 /* launch the kernel */
2178 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2179 if (clStatus != CL_SUCCESS)
2180 {
2181 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2182 goto cleanup;
2183 }
2184 RecordProfileData(device,hullPass2,event);
2185
2186 offset.s[0] = -X[k];
2187 offset.s[1] = -Y[k];
2188 polarity = -1;
2189 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2190 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2191 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2192 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2193 if (clStatus != CL_SUCCESS)
2194 {
2195 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2196 goto cleanup;
2197 }
2198 /* launch the kernel */
2199 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2200 if (clStatus != CL_SUCCESS)
2201 {
2202 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2203 goto cleanup;
2204 }
2205 RecordProfileData(device,hullPass1,event);
2206
2207 /* launch the kernel */
2208 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2209 if (clStatus != CL_SUCCESS)
2210 {
2211 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2212 goto cleanup;
2213 }
2214 RecordProfileData(device,hullPass2,event);
2215
2216 offset.s[0] = X[k];
2217 offset.s[1] = Y[k];
2218 polarity = -1;
2219 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2220 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2221 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2222 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2223
2224 if (k == 3)
2225 clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2226
2227 if (clStatus != CL_SUCCESS)
2228 {
2229 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2230 goto cleanup;
2231 }
2232 /* launch the kernel */
2233 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2234 if (clStatus != CL_SUCCESS)
2235 {
2236 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2237 goto cleanup;
2238 }
2239 RecordProfileData(device,hullPass1,event);
2240
2241 /* launch the kernel */
2242 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2243 if (clStatus != CL_SUCCESS)
2244 {
2245 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2246 goto cleanup;
2247 }
2248 RecordProfileData(device,hullPass2,event);
2249 }
2250
2251 if (ALIGNED(filteredPixels,CLPixelPacket))
2252 {
2253 length = image->columns * image->rows;
2254 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2255 }
2256 else
2257 {
2258 length = image->columns * image->rows;
2259 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2260 }
2261 if (clStatus != CL_SUCCESS)
2262 {
2263 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2264 goto cleanup;
2265 }
2266
2267 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2268
2269 cleanup:
2270
2271 image_view=DestroyCacheView(image_view);
2272 if (filteredImage_view != NULL)
2273 filteredImage_view=DestroyCacheView(filteredImage_view);
2274
2275 if (queue != NULL)
2276 ReleaseOpenCLCommandQueue(device,queue);
2277 if (device != NULL)
2278 ReleaseOpenCLDevice(device);
2279 if (imageBuffer!=NULL)
2280 clEnv->library->clReleaseMemObject(imageBuffer);
2281 for (k = 0; k < 2; k++)
2282 {
2283 if (tempImageBuffer[k]!=NULL)
2284 clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2285 }
2286 if (filteredImageBuffer!=NULL)
2287 clEnv->library->clReleaseMemObject(filteredImageBuffer);
2288 if (hullPass1!=NULL)
2289 ReleaseOpenCLKernel(hullPass1);
2290 if (hullPass2!=NULL)
2291 ReleaseOpenCLKernel(hullPass2);
2292 if (outputReady == MagickFalse && filteredImage != NULL)
2293 filteredImage=DestroyImage(filteredImage);
2294
2295 return(filteredImage);
2296 }
2297
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2298 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2299 ExceptionInfo* exception)
2300 {
2301 Image
2302 *filteredImage;
2303
2304 MagickCLEnv
2305 clEnv;
2306
2307 assert(image != NULL);
2308 assert(exception != (ExceptionInfo *) NULL);
2309
2310 if (checkAccelerateConditionRGBA(image) == MagickFalse)
2311 return((Image *) NULL);
2312
2313 clEnv=getOpenCLEnvironment(exception);
2314 if (clEnv == (MagickCLEnv) NULL)
2315 return((Image *) NULL);
2316
2317 filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2318 return(filteredImage);
2319 }
2320
2321 /*
2322 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2323 % %
2324 % %
2325 % %
2326 % A c c e l e r a t e E q u a l i z e I m a g e %
2327 % %
2328 % %
2329 % %
2330 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2331 */
2332
ComputeEqualizeImage(Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)2333 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2334 ExceptionInfo *exception)
2335 {
2336 #define EqualizeImageTag "Equalize/Image"
2337
2338 CacheView
2339 *image_view;
2340
2341 cl_command_queue
2342 queue;
2343
2344 cl_int
2345 clStatus;
2346
2347 cl_mem_flags
2348 mem_flags;
2349
2350 cl_mem
2351 equalizeMapBuffer,
2352 histogramBuffer,
2353 imageBuffer;
2354
2355 cl_kernel
2356 equalizeKernel,
2357 histogramKernel;
2358
2359 cl_event
2360 event;
2361
2362 cl_uint4
2363 *histogram;
2364
2365 FloatPixelPacket
2366 white,
2367 black,
2368 intensity,
2369 *map;
2370
2371 MagickBooleanType
2372 outputReady,
2373 status;
2374
2375 MagickCLDevice
2376 device;
2377
2378 MagickSizeType
2379 length;
2380
2381 PixelPacket
2382 *equalize_map;
2383
2384 register ssize_t
2385 i;
2386
2387 size_t
2388 global_work_size[2];
2389
2390 void
2391 *hostPtr,
2392 *inputPixels;
2393
2394 map=NULL;
2395 histogram=NULL;
2396 equalize_map=NULL;
2397 inputPixels = NULL;
2398 imageBuffer = NULL;
2399 histogramBuffer = NULL;
2400 equalizeMapBuffer = NULL;
2401 histogramKernel = NULL;
2402 equalizeKernel = NULL;
2403 queue = NULL;
2404 outputReady = MagickFalse;
2405
2406 assert(image != (Image *) NULL);
2407 assert(image->signature == MagickCoreSignature);
2408 if (image->debug != MagickFalse)
2409 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2410
2411 /*
2412 * initialize opencl env
2413 */
2414 device = RequestOpenCLDevice(clEnv);
2415 queue = AcquireOpenCLCommandQueue(device);
2416
2417 /*
2418 Allocate and initialize histogram arrays.
2419 */
2420 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2421 if (histogram == (cl_uint4 *) NULL)
2422 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2423
2424 /* reset histogram */
2425 (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
2426
2427 /* Create and initialize OpenCL buffers. */
2428 /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2429 /* assume this will get a writable image */
2430 image_view=AcquireAuthenticCacheView(image,exception);
2431 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2432
2433 if (inputPixels == (void *) NULL)
2434 {
2435 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2436 goto cleanup;
2437 }
2438 /* If the host pointer is aligned to the size of CLPixelPacket,
2439 then use the host buffer directly from the GPU; otherwise,
2440 create a buffer on the GPU and copy the data over */
2441 if (ALIGNED(inputPixels,CLPixelPacket))
2442 {
2443 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2444 }
2445 else
2446 {
2447 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2448 }
2449 /* create a CL buffer from image pixel buffer */
2450 length = image->columns * image->rows;
2451 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2452 if (clStatus != CL_SUCCESS)
2453 {
2454 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2455 goto cleanup;
2456 }
2457
2458 /* If the host pointer is aligned to the size of cl_uint,
2459 then use the host buffer directly from the GPU; otherwise,
2460 create a buffer on the GPU and copy the data over */
2461 if (ALIGNED(histogram,cl_uint4))
2462 {
2463 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2464 hostPtr = histogram;
2465 }
2466 else
2467 {
2468 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2469 hostPtr = histogram;
2470 }
2471 /* create a CL buffer for histogram */
2472 length = (MaxMap+1);
2473 histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2474 if (clStatus != CL_SUCCESS)
2475 {
2476 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2477 goto cleanup;
2478 }
2479
2480 status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2481 if (status == MagickFalse)
2482 goto cleanup;
2483
2484 /* read from the kenel output */
2485 if (ALIGNED(histogram,cl_uint4))
2486 {
2487 length = (MaxMap+1);
2488 clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2489 }
2490 else
2491 {
2492 length = (MaxMap+1);
2493 clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2494 }
2495 if (clStatus != CL_SUCCESS)
2496 {
2497 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2498 goto cleanup;
2499 }
2500
2501 /* unmap, don't block gpu to use this buffer again. */
2502 if (ALIGNED(histogram,cl_uint4))
2503 {
2504 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2505 if (clStatus != CL_SUCCESS)
2506 {
2507 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2508 goto cleanup;
2509 }
2510 }
2511
2512 /* recreate input buffer later, in case image updated */
2513 #ifdef RECREATEBUFFER
2514 if (imageBuffer!=NULL)
2515 clEnv->library->clReleaseMemObject(imageBuffer);
2516 #endif
2517
2518 /* CPU stuff */
2519 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2520 if (equalize_map == (PixelPacket *) NULL)
2521 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2522
2523 map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2524 if (map == (FloatPixelPacket *) NULL)
2525 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2526
2527 /*
2528 Integrate the histogram to get the equalization map.
2529 */
2530 (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
2531 for (i=0; i <= (ssize_t) MaxMap; i++)
2532 {
2533 if ((image->channel_mask & SyncChannels) != 0)
2534 {
2535 intensity.red+=histogram[i].s[2];
2536 map[i]=intensity;
2537 continue;
2538 }
2539 if ((image->channel_mask & RedChannel) != 0)
2540 intensity.red+=histogram[i].s[2];
2541 if ((image->channel_mask & GreenChannel) != 0)
2542 intensity.green+=histogram[i].s[1];
2543 if ((image->channel_mask & BlueChannel) != 0)
2544 intensity.blue+=histogram[i].s[0];
2545 if ((image->channel_mask & AlphaChannel) != 0)
2546 intensity.alpha+=histogram[i].s[3];
2547 /*
2548 if (((channel & IndexChannel) != 0) &&
2549 (image->colorspace == CMYKColorspace))
2550 {
2551 intensity.index+=histogram[i].index;
2552 }
2553 */
2554 map[i]=intensity;
2555 }
2556 black=map[0];
2557 white=map[(int) MaxMap];
2558 (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2559 for (i=0; i <= (ssize_t) MaxMap; i++)
2560 {
2561 if ((image->channel_mask & SyncChannels) != 0)
2562 {
2563 if (white.red != black.red)
2564 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2565 (map[i].red-black.red))/(white.red-black.red)));
2566 continue;
2567 }
2568 if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2569 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2570 (map[i].red-black.red))/(white.red-black.red)));
2571 if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2572 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2573 (map[i].green-black.green))/(white.green-black.green)));
2574 if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2575 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2576 (map[i].blue-black.blue))/(white.blue-black.blue)));
2577 if (((image->channel_mask & AlphaChannel) != 0) && (white.alpha != black.alpha))
2578 equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2579 (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
2580 /*
2581 if ((((channel & IndexChannel) != 0) &&
2582 (image->colorspace == CMYKColorspace)) &&
2583 (white.index != black.index))
2584 equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2585 (map[i].index-black.index))/(white.index-black.index)));
2586 */
2587 }
2588
2589 if (image->storage_class == PseudoClass)
2590 {
2591 /*
2592 Equalize colormap.
2593 */
2594 for (i=0; i < (ssize_t) image->colors; i++)
2595 {
2596 if ((image->channel_mask & SyncChannels) != 0)
2597 {
2598 if (white.red != black.red)
2599 {
2600 image->colormap[i].red=equalize_map[
2601 ScaleQuantumToMap(image->colormap[i].red)].red;
2602 image->colormap[i].green=equalize_map[
2603 ScaleQuantumToMap(image->colormap[i].green)].red;
2604 image->colormap[i].blue=equalize_map[
2605 ScaleQuantumToMap(image->colormap[i].blue)].red;
2606 image->colormap[i].alpha=equalize_map[
2607 ScaleQuantumToMap(image->colormap[i].alpha)].red;
2608 }
2609 continue;
2610 }
2611 if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2612 image->colormap[i].red=equalize_map[
2613 ScaleQuantumToMap(image->colormap[i].red)].red;
2614 if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2615 image->colormap[i].green=equalize_map[
2616 ScaleQuantumToMap(image->colormap[i].green)].green;
2617 if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2618 image->colormap[i].blue=equalize_map[
2619 ScaleQuantumToMap(image->colormap[i].blue)].blue;
2620 if (((image->channel_mask & AlphaChannel) != 0) &&
2621 (white.alpha != black.alpha))
2622 image->colormap[i].alpha=equalize_map[
2623 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2624 }
2625 }
2626
2627 /*
2628 Equalize image.
2629 */
2630
2631 /* GPU can work on this again, image and equalize map as input
2632 image: uchar4 (CLPixelPacket)
2633 equalize_map: uchar4 (PixelPacket)
2634 black, white: float4 (FloatPixelPacket) */
2635
2636 #ifdef RECREATEBUFFER
2637 /* If the host pointer is aligned to the size of CLPixelPacket,
2638 then use the host buffer directly from the GPU; otherwise,
2639 create a buffer on the GPU and copy the data over */
2640 if (ALIGNED(inputPixels,CLPixelPacket))
2641 {
2642 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2643 }
2644 else
2645 {
2646 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2647 }
2648 /* create a CL buffer from image pixel buffer */
2649 length = image->columns * image->rows;
2650 imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2651 if (clStatus != CL_SUCCESS)
2652 {
2653 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2654 goto cleanup;
2655 }
2656 #endif
2657
2658 /* Create and initialize OpenCL buffers. */
2659 if (ALIGNED(equalize_map, PixelPacket))
2660 {
2661 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2662 hostPtr = equalize_map;
2663 }
2664 else
2665 {
2666 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2667 hostPtr = equalize_map;
2668 }
2669 /* create a CL buffer for eqaulize_map */
2670 length = (MaxMap+1);
2671 equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2672 if (clStatus != CL_SUCCESS)
2673 {
2674 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2675 goto cleanup;
2676 }
2677
2678 /* get the OpenCL kernel */
2679 equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2680 if (equalizeKernel == NULL)
2681 {
2682 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2683 goto cleanup;
2684 }
2685
2686 /* set the kernel arguments */
2687 i = 0;
2688 clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2689 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2690 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2691 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
2692 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
2693 if (clStatus != CL_SUCCESS)
2694 {
2695 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2696 goto cleanup;
2697 }
2698
2699 /* launch the kernel */
2700 global_work_size[0] = image->columns;
2701 global_work_size[1] = image->rows;
2702
2703 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2704
2705 if (clStatus != CL_SUCCESS)
2706 {
2707 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2708 goto cleanup;
2709 }
2710 RecordProfileData(device,equalizeKernel,event);
2711
2712 /* read the data back */
2713 if (ALIGNED(inputPixels,CLPixelPacket))
2714 {
2715 length = image->columns * image->rows;
2716 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2717 }
2718 else
2719 {
2720 length = image->columns * image->rows;
2721 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2722 }
2723 if (clStatus != CL_SUCCESS)
2724 {
2725 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2726 goto cleanup;
2727 }
2728
2729 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2730
2731 cleanup:
2732
2733 image_view=DestroyCacheView(image_view);
2734
2735 if (imageBuffer!=NULL)
2736 clEnv->library->clReleaseMemObject(imageBuffer);
2737 if (map!=NULL)
2738 map=(FloatPixelPacket *) RelinquishMagickMemory(map);
2739 if (equalizeMapBuffer!=NULL)
2740 clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2741 if (equalize_map!=NULL)
2742 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2743 if (histogramBuffer!=NULL)
2744 clEnv->library->clReleaseMemObject(histogramBuffer);
2745 if (histogram!=NULL)
2746 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2747 if (histogramKernel!=NULL)
2748 ReleaseOpenCLKernel(histogramKernel);
2749 if (equalizeKernel!=NULL)
2750 ReleaseOpenCLKernel(equalizeKernel);
2751 if (queue != NULL)
2752 ReleaseOpenCLCommandQueue(device, queue);
2753 if (device != NULL)
2754 ReleaseOpenCLDevice(device);
2755
2756 return(outputReady);
2757 }
2758
AccelerateEqualizeImage(Image * image,ExceptionInfo * exception)2759 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2760 ExceptionInfo *exception)
2761 {
2762 MagickBooleanType
2763 status;
2764
2765 MagickCLEnv
2766 clEnv;
2767
2768 assert(image != NULL);
2769 assert(exception != (ExceptionInfo *) NULL);
2770
2771 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2772 (checkHistogramCondition(image,image->intensity) == MagickFalse))
2773 return(MagickFalse);
2774
2775 clEnv=getOpenCLEnvironment(exception);
2776 if (clEnv == (MagickCLEnv) NULL)
2777 return(MagickFalse);
2778
2779 status=ComputeEqualizeImage(image,clEnv,exception);
2780 return(status);
2781 }
2782
2783 /*
2784 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2785 % %
2786 % %
2787 % %
2788 % A c c e l e r a t e F u n c t i o n I m a g e %
2789 % %
2790 % %
2791 % %
2792 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2793 */
2794
ComputeFunctionImage(Image * image,MagickCLEnv clEnv,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2795 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2796 const MagickFunction function,const size_t number_parameters,
2797 const double *parameters,ExceptionInfo *exception)
2798 {
2799 cl_command_queue
2800 queue;
2801
2802 cl_int
2803 status;
2804
2805 cl_kernel
2806 functionKernel;
2807
2808 cl_mem
2809 imageBuffer,
2810 parametersBuffer;
2811
2812 cl_uint
2813 number_params,
2814 number_channels;
2815
2816 float
2817 *parametersBufferPtr;
2818
2819 MagickBooleanType
2820 outputReady;
2821
2822 MagickCLDevice
2823 device;
2824
2825 size_t
2826 gsize[2],
2827 i;
2828
2829 outputReady=MagickFalse;
2830
2831 functionKernel=NULL;
2832 parametersBuffer=NULL;
2833
2834 device=RequestOpenCLDevice(clEnv);
2835 queue=AcquireOpenCLCommandQueue(device);
2836 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2837 if (imageBuffer == (cl_mem) NULL)
2838 goto cleanup;
2839
2840 parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2841 sizeof(float));
2842 if (parametersBufferPtr == (float *) NULL)
2843 goto cleanup;
2844 for (i=0; i<number_parameters; i++)
2845 parametersBufferPtr[i]=(float) parameters[i];
2846 parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2847 CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2848 parametersBufferPtr);
2849 parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2850 if (parametersBuffer == (cl_mem) NULL)
2851 {
2852 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2853 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2854 goto cleanup;
2855 }
2856
2857 functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2858 if (functionKernel == (cl_kernel) NULL)
2859 {
2860 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2861 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2862 goto cleanup;
2863 }
2864
2865 number_channels=(cl_uint) image->number_channels;
2866 number_params=(cl_uint) number_parameters;
2867
2868 i=0;
2869 status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2870 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2871 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2872 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2873 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2874 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)¶metersBuffer);
2875 if (status != CL_SUCCESS)
2876 {
2877 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2878 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2879 goto cleanup;
2880 }
2881
2882 gsize[0]=image->columns;
2883 gsize[1]=image->rows;
2884 outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2885 gsize,(const size_t *) NULL,image,(const Image *) NULL,exception);
2886
2887 cleanup:
2888
2889 if (parametersBuffer != (cl_mem) NULL)
2890 ReleaseOpenCLMemObject(parametersBuffer);
2891 if (functionKernel != (cl_kernel) NULL)
2892 ReleaseOpenCLKernel(functionKernel);
2893 if (queue != (cl_command_queue) NULL)
2894 ReleaseOpenCLCommandQueue(device,queue);
2895 if (device != (MagickCLDevice) NULL)
2896 ReleaseOpenCLDevice(device);
2897 return(outputReady);
2898 }
2899
AccelerateFunctionImage(Image * image,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2900 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2901 const MagickFunction function,const size_t number_parameters,
2902 const double *parameters,ExceptionInfo *exception)
2903 {
2904 MagickBooleanType
2905 status;
2906
2907 MagickCLEnv
2908 clEnv;
2909
2910 assert(image != NULL);
2911 assert(exception != (ExceptionInfo *) NULL);
2912
2913 if (checkAccelerateCondition(image) == MagickFalse)
2914 return(MagickFalse);
2915
2916 clEnv=getOpenCLEnvironment(exception);
2917 if (clEnv == (MagickCLEnv) NULL)
2918 return(MagickFalse);
2919
2920 status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2921 parameters,exception);
2922 return(status);
2923 }
2924
2925 /*
2926 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2927 % %
2928 % %
2929 % %
2930 % A c c e l e r a t e G r a y s c a l e I m a g e %
2931 % %
2932 % %
2933 % %
2934 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2935 */
2936
ComputeGrayscaleImage(Image * image,MagickCLEnv clEnv,const PixelIntensityMethod method,ExceptionInfo * exception)2937 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2938 const PixelIntensityMethod method,ExceptionInfo *exception)
2939 {
2940 cl_command_queue
2941 queue;
2942
2943 cl_int
2944 status;
2945
2946 cl_kernel
2947 grayscaleKernel;
2948
2949 cl_mem
2950 imageBuffer;
2951
2952 cl_uint
2953 number_channels,
2954 colorspace,
2955 intensityMethod;
2956
2957 MagickBooleanType
2958 outputReady;
2959
2960 MagickCLDevice
2961 device;
2962
2963 size_t
2964 gsize[2],
2965 i;
2966
2967 outputReady=MagickFalse;
2968 grayscaleKernel=NULL;
2969
2970 assert(image != (Image *) NULL);
2971 assert(image->signature == MagickCoreSignature);
2972 device=RequestOpenCLDevice(clEnv);
2973 queue=AcquireOpenCLCommandQueue(device);
2974 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2975 if (imageBuffer == (cl_mem) NULL)
2976 goto cleanup;
2977
2978 grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2979 if (grayscaleKernel == (cl_kernel) NULL)
2980 {
2981 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2982 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2983 goto cleanup;
2984 }
2985
2986 number_channels=(cl_uint) image->number_channels;
2987 intensityMethod=(cl_uint) method;
2988 colorspace=(cl_uint) image->colorspace;
2989
2990 i=0;
2991 status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2992 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2993 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2994 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2995 if (status != CL_SUCCESS)
2996 {
2997 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2998 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2999 goto cleanup;
3000 }
3001
3002 gsize[0]=image->columns;
3003 gsize[1]=image->rows;
3004 outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
3005 (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
3006 exception);
3007
3008 cleanup:
3009
3010 if (grayscaleKernel != (cl_kernel) NULL)
3011 ReleaseOpenCLKernel(grayscaleKernel);
3012 if (queue != (cl_command_queue) NULL)
3013 ReleaseOpenCLCommandQueue(device,queue);
3014 if (device != (MagickCLDevice) NULL)
3015 ReleaseOpenCLDevice(device);
3016
3017 return(outputReady);
3018 }
3019
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)3020 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
3021 const PixelIntensityMethod method,ExceptionInfo *exception)
3022 {
3023 MagickBooleanType
3024 status;
3025
3026 MagickCLEnv
3027 clEnv;
3028
3029 assert(image != NULL);
3030 assert(exception != (ExceptionInfo *) NULL);
3031
3032 if ((checkAccelerateCondition(image) == MagickFalse) ||
3033 (checkPixelIntensity(image,method) == MagickFalse))
3034 return(MagickFalse);
3035
3036 if (image->number_channels < 3)
3037 return(MagickFalse);
3038
3039 if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
3040 (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
3041 (GetPixelBlueTraits(image) == UndefinedPixelTrait))
3042 return(MagickFalse);
3043
3044 clEnv=getOpenCLEnvironment(exception);
3045 if (clEnv == (MagickCLEnv) NULL)
3046 return(MagickFalse);
3047
3048 status=ComputeGrayscaleImage(image,clEnv,method,exception);
3049 return(status);
3050 }
3051
3052 /*
3053 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3054 % %
3055 % %
3056 % %
3057 % A c c e l e r a t e L o c a l C o n t r a s t I m a g e %
3058 % %
3059 % %
3060 % %
3061 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3062 */
3063
ComputeLocalContrastImage(const Image * image,MagickCLEnv clEnv,const double radius,const double strength,ExceptionInfo * exception)3064 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
3065 const double radius,const double strength,ExceptionInfo *exception)
3066 {
3067 CacheView
3068 *filteredImage_view,
3069 *image_view;
3070
3071 cl_command_queue
3072 queue;
3073
3074 cl_int
3075 clStatus,
3076 iRadius;
3077
3078 cl_kernel
3079 blurRowKernel,
3080 blurColumnKernel;
3081
3082 cl_event
3083 event;
3084
3085 cl_mem
3086 filteredImageBuffer,
3087 imageBuffer,
3088 imageKernelBuffer,
3089 tempImageBuffer;
3090
3091 cl_mem_flags
3092 mem_flags;
3093
3094 const void
3095 *inputPixels;
3096
3097 Image
3098 *filteredImage;
3099
3100 MagickBooleanType
3101 outputReady;
3102
3103 MagickCLDevice
3104 device;
3105
3106 MagickSizeType
3107 length;
3108
3109 void
3110 *filteredPixels,
3111 *hostPtr;
3112
3113 unsigned int
3114 i,
3115 imageColumns,
3116 imageRows,
3117 passes;
3118
3119 filteredImage = NULL;
3120 filteredImage_view = NULL;
3121 imageBuffer = NULL;
3122 filteredImageBuffer = NULL;
3123 tempImageBuffer = NULL;
3124 imageKernelBuffer = NULL;
3125 blurRowKernel = NULL;
3126 blurColumnKernel = NULL;
3127 queue = NULL;
3128 outputReady = MagickFalse;
3129
3130 device = RequestOpenCLDevice(clEnv);
3131 queue = AcquireOpenCLCommandQueue(device);
3132
3133 /* Create and initialize OpenCL buffers. */
3134 {
3135 image_view=AcquireAuthenticCacheView(image,exception);
3136 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3137 if (inputPixels == (const void *) NULL)
3138 {
3139 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3140 goto cleanup;
3141 }
3142
3143 /* If the host pointer is aligned to the size of CLPixelPacket,
3144 then use the host buffer directly from the GPU; otherwise,
3145 create a buffer on the GPU and copy the data over */
3146 if (ALIGNED(inputPixels,CLPixelPacket))
3147 {
3148 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3149 }
3150 else
3151 {
3152 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3153 }
3154 /* create a CL buffer from image pixel buffer */
3155 length = image->columns * image->rows;
3156 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3157 if (clStatus != CL_SUCCESS)
3158 {
3159 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3160 goto cleanup;
3161 }
3162 }
3163
3164 /* create output */
3165 {
3166 filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
3167 assert(filteredImage != NULL);
3168 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3169 {
3170 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3171 goto cleanup;
3172 }
3173 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3174 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3175 if (filteredPixels == (void *) NULL)
3176 {
3177 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3178 goto cleanup;
3179 }
3180
3181 if (ALIGNED(filteredPixels,CLPixelPacket))
3182 {
3183 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3184 hostPtr = filteredPixels;
3185 }
3186 else
3187 {
3188 mem_flags = CL_MEM_WRITE_ONLY;
3189 hostPtr = NULL;
3190 }
3191
3192 /* create a CL buffer from image pixel buffer */
3193 length = image->columns * image->rows;
3194 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3195 if (clStatus != CL_SUCCESS)
3196 {
3197 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3198 goto cleanup;
3199 }
3200 }
3201
3202 {
3203 /* create temp buffer */
3204 {
3205 length = image->columns * image->rows;
3206 tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3207 if (clStatus != CL_SUCCESS)
3208 {
3209 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3210 goto cleanup;
3211 }
3212 }
3213
3214 /* get the opencl kernel */
3215 {
3216 blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3217 if (blurRowKernel == NULL)
3218 {
3219 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3220 goto cleanup;
3221 };
3222
3223 blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3224 if (blurColumnKernel == NULL)
3225 {
3226 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3227 goto cleanup;
3228 };
3229 }
3230
3231 {
3232 imageColumns = (unsigned int) image->columns;
3233 imageRows = (unsigned int) image->rows;
3234 iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); // Normalized radius, 100% gives blur radius of 20% of the largest dimension
3235
3236 passes = (((1.0f * imageColumns) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3237 passes = (passes < 1) ? 1: passes;
3238
3239 /* set the kernel arguments */
3240 i = 0;
3241 clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3242 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3243 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3244 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3245 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3246 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3247
3248 if (clStatus != CL_SUCCESS)
3249 {
3250 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3251 goto cleanup;
3252 }
3253 }
3254
3255 /* launch the kernel */
3256 {
3257 int x;
3258 for (x = 0; x < passes; ++x) {
3259 size_t gsize[2];
3260 size_t wsize[2];
3261 size_t goffset[2];
3262
3263 gsize[0] = 256;
3264 gsize[1] = image->rows / passes;
3265 wsize[0] = 256;
3266 wsize[1] = 1;
3267 goffset[0] = 0;
3268 goffset[1] = x * gsize[1];
3269
3270 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3271 if (clStatus != CL_SUCCESS)
3272 {
3273 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3274 goto cleanup;
3275 }
3276 clEnv->library->clFlush(queue);
3277 RecordProfileData(device,blurRowKernel,event);
3278 }
3279 }
3280
3281 {
3282 cl_float FStrength = strength;
3283 i = 0;
3284 clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3285 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3286 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3287 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3288 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3289 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3290 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3291
3292 if (clStatus != CL_SUCCESS)
3293 {
3294 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3295 goto cleanup;
3296 }
3297 }
3298
3299 /* launch the kernel */
3300 {
3301 int x;
3302 for (x = 0; x < passes; ++x) {
3303 size_t gsize[2];
3304 size_t wsize[2];
3305 size_t goffset[2];
3306
3307 gsize[0] = ((image->columns + 3) / 4) * 4;
3308 gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3309 wsize[0] = 4;
3310 wsize[1] = 64;
3311 goffset[0] = 0;
3312 goffset[1] = x * gsize[1];
3313
3314 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3315 if (clStatus != CL_SUCCESS)
3316 {
3317 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3318 goto cleanup;
3319 }
3320 clEnv->library->clFlush(queue);
3321 RecordProfileData(device,blurColumnKernel,event);
3322 }
3323 }
3324 }
3325
3326 /* get result */
3327 if (ALIGNED(filteredPixels,CLPixelPacket))
3328 {
3329 length = image->columns * image->rows;
3330 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3331 }
3332 else
3333 {
3334 length = image->columns * image->rows;
3335 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3336 }
3337 if (clStatus != CL_SUCCESS)
3338 {
3339 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3340 goto cleanup;
3341 }
3342
3343 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3344
3345 cleanup:
3346
3347 image_view=DestroyCacheView(image_view);
3348 if (filteredImage_view != NULL)
3349 filteredImage_view=DestroyCacheView(filteredImage_view);
3350
3351 if (imageBuffer!=NULL)
3352 clEnv->library->clReleaseMemObject(imageBuffer);
3353 if (filteredImageBuffer!=NULL)
3354 clEnv->library->clReleaseMemObject(filteredImageBuffer);
3355 if (tempImageBuffer!=NULL)
3356 clEnv->library->clReleaseMemObject(tempImageBuffer);
3357 if (imageKernelBuffer!=NULL)
3358 clEnv->library->clReleaseMemObject(imageKernelBuffer);
3359 if (blurRowKernel!=NULL)
3360 ReleaseOpenCLKernel(blurRowKernel);
3361 if (blurColumnKernel!=NULL)
3362 ReleaseOpenCLKernel(blurColumnKernel);
3363 if (queue != NULL)
3364 ReleaseOpenCLCommandQueue(device, queue);
3365 if (device != NULL)
3366 ReleaseOpenCLDevice(device);
3367 if (outputReady == MagickFalse)
3368 {
3369 if (filteredImage != NULL)
3370 {
3371 DestroyImage(filteredImage);
3372 filteredImage = NULL;
3373 }
3374 }
3375
3376 return(filteredImage);
3377 }
3378
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3379 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3380 const double radius,const double strength,ExceptionInfo *exception)
3381 {
3382 Image
3383 *filteredImage;
3384
3385 MagickCLEnv
3386 clEnv;
3387
3388 assert(image != NULL);
3389 assert(exception != (ExceptionInfo *) NULL);
3390
3391 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3392 return((Image *) NULL);
3393
3394 clEnv=getOpenCLEnvironment(exception);
3395 if (clEnv == (MagickCLEnv) NULL)
3396 return((Image *) NULL);
3397
3398 filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3399 exception);
3400 return(filteredImage);
3401 }
3402
3403 /*
3404 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3405 % %
3406 % %
3407 % %
3408 % A c c e l e r a t e M o d u l a t e I m a g e %
3409 % %
3410 % %
3411 % %
3412 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3413 */
3414
ComputeModulateImage(Image * image,MagickCLEnv clEnv,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3415 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3416 const double percent_brightness,const double percent_hue,
3417 const double percent_saturation,const ColorspaceType colorspace,
3418 ExceptionInfo *exception)
3419 {
3420 CacheView
3421 *image_view;
3422
3423 cl_float
3424 bright,
3425 hue,
3426 saturation;
3427
3428 cl_command_queue
3429 queue;
3430
3431 cl_int
3432 color,
3433 clStatus;
3434
3435 cl_kernel
3436 modulateKernel;
3437
3438 cl_event
3439 event;
3440
3441 cl_mem
3442 imageBuffer;
3443
3444 cl_mem_flags
3445 mem_flags;
3446
3447 MagickBooleanType
3448 outputReady;
3449
3450 MagickCLDevice
3451 device;
3452
3453 MagickSizeType
3454 length;
3455
3456 register ssize_t
3457 i;
3458
3459 void
3460 *inputPixels;
3461
3462 inputPixels = NULL;
3463 imageBuffer = NULL;
3464 modulateKernel = NULL;
3465
3466 assert(image != (Image *) NULL);
3467 assert(image->signature == MagickCoreSignature);
3468 if (image->debug != MagickFalse)
3469 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3470
3471 /*
3472 * initialize opencl env
3473 */
3474 device = RequestOpenCLDevice(clEnv);
3475 queue = AcquireOpenCLCommandQueue(device);
3476
3477 outputReady = MagickFalse;
3478
3479 /* Create and initialize OpenCL buffers.
3480 inputPixels = AcquirePixelCachePixels(image, &length, exception);
3481 assume this will get a writable image
3482 */
3483 image_view=AcquireAuthenticCacheView(image,exception);
3484 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3485 if (inputPixels == (void *) NULL)
3486 {
3487 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3488 goto cleanup;
3489 }
3490
3491 /* If the host pointer is aligned to the size of CLPixelPacket,
3492 then use the host buffer directly from the GPU; otherwise,
3493 create a buffer on the GPU and copy the data over
3494 */
3495 if (ALIGNED(inputPixels,CLPixelPacket))
3496 {
3497 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3498 }
3499 else
3500 {
3501 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3502 }
3503 /* create a CL buffer from image pixel buffer */
3504 length = image->columns * image->rows;
3505 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3506 if (clStatus != CL_SUCCESS)
3507 {
3508 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3509 goto cleanup;
3510 }
3511
3512 modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3513 if (modulateKernel == NULL)
3514 {
3515 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3516 goto cleanup;
3517 }
3518
3519 bright=percent_brightness;
3520 hue=percent_hue;
3521 saturation=percent_saturation;
3522 color=colorspace;
3523
3524 i = 0;
3525 clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3526 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3527 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3528 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3529 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3530 if (clStatus != CL_SUCCESS)
3531 {
3532 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3533 goto cleanup;
3534 }
3535
3536 {
3537 size_t global_work_size[2];
3538 global_work_size[0] = image->columns;
3539 global_work_size[1] = image->rows;
3540 /* launch the kernel */
3541 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3542 if (clStatus != CL_SUCCESS)
3543 {
3544 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3545 goto cleanup;
3546 }
3547 RecordProfileData(device,modulateKernel,event);
3548 }
3549
3550 if (ALIGNED(inputPixels,CLPixelPacket))
3551 {
3552 length = image->columns * image->rows;
3553 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3554 }
3555 else
3556 {
3557 length = image->columns * image->rows;
3558 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3559 }
3560 if (clStatus != CL_SUCCESS)
3561 {
3562 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3563 goto cleanup;
3564 }
3565
3566 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3567
3568 cleanup:
3569
3570 image_view=DestroyCacheView(image_view);
3571
3572 if (imageBuffer!=NULL)
3573 clEnv->library->clReleaseMemObject(imageBuffer);
3574 if (modulateKernel!=NULL)
3575 ReleaseOpenCLKernel(modulateKernel);
3576 if (queue != NULL)
3577 ReleaseOpenCLCommandQueue(device,queue);
3578 if (device != NULL)
3579 ReleaseOpenCLDevice(device);
3580
3581 return outputReady;
3582
3583 }
3584
AccelerateModulateImage(Image * image,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3585 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3586 const double percent_brightness,const double percent_hue,
3587 const double percent_saturation,const ColorspaceType colorspace,
3588 ExceptionInfo *exception)
3589 {
3590 MagickBooleanType
3591 status;
3592
3593 MagickCLEnv
3594 clEnv;
3595
3596 assert(image != NULL);
3597 assert(exception != (ExceptionInfo *) NULL);
3598
3599 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3600 return(MagickFalse);
3601
3602 if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3603 return(MagickFalse);
3604
3605 clEnv=getOpenCLEnvironment(exception);
3606 if (clEnv == (MagickCLEnv) NULL)
3607 return(MagickFalse);
3608
3609 status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3610 percent_saturation,colorspace,exception);
3611 return(status);
3612 }
3613
3614 /*
3615 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3616 % %
3617 % %
3618 % %
3619 % A c c e l e r a t e M o t i o n B l u r I m a g e %
3620 % %
3621 % %
3622 % %
3623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3624 */
3625
ComputeMotionBlurImage(const Image * image,MagickCLEnv clEnv,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3626 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3627 const double *kernel,const size_t width,const OffsetInfo *offset,
3628 ExceptionInfo *exception)
3629 {
3630 CacheView
3631 *filteredImage_view,
3632 *image_view;
3633
3634 cl_command_queue
3635 queue;
3636
3637 cl_float4
3638 biasPixel;
3639
3640 cl_int
3641 clStatus;
3642
3643 cl_kernel
3644 motionBlurKernel;
3645
3646 cl_event
3647 event;
3648
3649 cl_mem
3650 filteredImageBuffer,
3651 imageBuffer,
3652 imageKernelBuffer,
3653 offsetBuffer;
3654
3655 cl_mem_flags
3656 mem_flags;
3657
3658 const void
3659 *inputPixels;
3660
3661 float
3662 *kernelBufferPtr;
3663
3664 Image
3665 *filteredImage;
3666
3667 int
3668 *offsetBufferPtr;
3669
3670 MagickBooleanType
3671 outputReady;
3672
3673 MagickCLDevice
3674 device;
3675
3676 PixelInfo
3677 bias;
3678
3679 MagickSizeType
3680 length;
3681
3682 size_t
3683 global_work_size[2],
3684 local_work_size[2];
3685
3686 unsigned int
3687 i,
3688 imageHeight,
3689 imageWidth,
3690 matte;
3691
3692 void
3693 *filteredPixels,
3694 *hostPtr;
3695
3696 outputReady = MagickFalse;
3697 filteredImage = NULL;
3698 filteredImage_view = NULL;
3699 imageBuffer = NULL;
3700 filteredImageBuffer = NULL;
3701 imageKernelBuffer = NULL;
3702 motionBlurKernel = NULL;
3703 queue = NULL;
3704
3705 device = RequestOpenCLDevice(clEnv);
3706
3707 /* Create and initialize OpenCL buffers. */
3708
3709 image_view=AcquireAuthenticCacheView(image,exception);
3710 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3711 if (inputPixels == (const void *) NULL)
3712 {
3713 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3714 "UnableToReadPixelCache.","`%s'",image->filename);
3715 goto cleanup;
3716 }
3717
3718 // If the host pointer is aligned to the size of CLPixelPacket,
3719 // then use the host buffer directly from the GPU; otherwise,
3720 // create a buffer on the GPU and copy the data over
3721 if (ALIGNED(inputPixels,CLPixelPacket))
3722 {
3723 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3724 }
3725 else
3726 {
3727 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3728 }
3729 // create a CL buffer from image pixel buffer
3730 length = image->columns * image->rows;
3731 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3732 length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3733 if (clStatus != CL_SUCCESS)
3734 {
3735 (void) ThrowMagickException(exception, GetMagickModule(),
3736 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3737 goto cleanup;
3738 }
3739
3740
3741 filteredImage = CloneImage(image,image->columns,image->rows,
3742 MagickTrue,exception);
3743 assert(filteredImage != NULL);
3744 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3745 {
3746 (void) ThrowMagickException(exception, GetMagickModule(),
3747 ResourceLimitError, "CloneImage failed.", ".");
3748 goto cleanup;
3749 }
3750 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3751 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3752 if (filteredPixels == (void *) NULL)
3753 {
3754 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3755 "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3756 goto cleanup;
3757 }
3758
3759 if (ALIGNED(filteredPixels,CLPixelPacket))
3760 {
3761 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3762 hostPtr = filteredPixels;
3763 }
3764 else
3765 {
3766 mem_flags = CL_MEM_WRITE_ONLY;
3767 hostPtr = NULL;
3768 }
3769 // create a CL buffer from image pixel buffer
3770 length = image->columns * image->rows;
3771 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3772 length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3773 if (clStatus != CL_SUCCESS)
3774 {
3775 (void) ThrowMagickException(exception, GetMagickModule(),
3776 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3777 goto cleanup;
3778 }
3779
3780
3781 imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3782 CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3783 &clStatus);
3784 if (clStatus != CL_SUCCESS)
3785 {
3786 (void) ThrowMagickException(exception, GetMagickModule(),
3787 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3788 goto cleanup;
3789 }
3790
3791 queue = AcquireOpenCLCommandQueue(device);
3792 kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3793 CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3794 if (clStatus != CL_SUCCESS)
3795 {
3796 (void) ThrowMagickException(exception, GetMagickModule(),
3797 ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3798 goto cleanup;
3799 }
3800 for (i = 0; i < width; i++)
3801 {
3802 kernelBufferPtr[i] = (float) kernel[i];
3803 }
3804 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3805 0, NULL, NULL);
3806 if (clStatus != CL_SUCCESS)
3807 {
3808 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3809 "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3810 goto cleanup;
3811 }
3812
3813 offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3814 CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3815 &clStatus);
3816 if (clStatus != CL_SUCCESS)
3817 {
3818 (void) ThrowMagickException(exception, GetMagickModule(),
3819 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3820 goto cleanup;
3821 }
3822
3823 offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3824 CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3825 if (clStatus != CL_SUCCESS)
3826 {
3827 (void) ThrowMagickException(exception, GetMagickModule(),
3828 ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3829 goto cleanup;
3830 }
3831 for (i = 0; i < width; i++)
3832 {
3833 offsetBufferPtr[2*i] = (int)offset[i].x;
3834 offsetBufferPtr[2*i+1] = (int)offset[i].y;
3835 }
3836 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3837 NULL, NULL);
3838 if (clStatus != CL_SUCCESS)
3839 {
3840 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3841 "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3842 goto cleanup;
3843 }
3844
3845
3846 // get the OpenCL kernel
3847 motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3848 if (motionBlurKernel == NULL)
3849 {
3850 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3851 "AcquireOpenCLKernel failed.", ".");
3852 goto cleanup;
3853 }
3854
3855 // set the kernel arguments
3856 i = 0;
3857 clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3858 (void *)&imageBuffer);
3859 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3860 (void *)&filteredImageBuffer);
3861 imageWidth = (unsigned int) image->columns;
3862 imageHeight = (unsigned int) image->rows;
3863 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3864 &imageWidth);
3865 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3866 &imageHeight);
3867 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3868 (void *)&imageKernelBuffer);
3869 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3870 &width);
3871 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3872 (void *)&offsetBuffer);
3873
3874 GetPixelInfo(image,&bias);
3875 biasPixel.s[0] = bias.red;
3876 biasPixel.s[1] = bias.green;
3877 biasPixel.s[2] = bias.blue;
3878 biasPixel.s[3] = bias.alpha;
3879 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3880
3881 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3882 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3883 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3884 if (clStatus != CL_SUCCESS)
3885 {
3886 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3887 "clEnv->library->clSetKernelArg failed.", ".");
3888 goto cleanup;
3889 }
3890
3891 // launch the kernel
3892 local_work_size[0] = 16;
3893 local_work_size[1] = 16;
3894 global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3895 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3896 global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3897 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3898 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3899 global_work_size, local_work_size, 0, NULL, &event);
3900
3901 if (clStatus != CL_SUCCESS)
3902 {
3903 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3904 "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3905 goto cleanup;
3906 }
3907 RecordProfileData(device,motionBlurKernel,event);
3908
3909 if (ALIGNED(filteredPixels,CLPixelPacket))
3910 {
3911 length = image->columns * image->rows;
3912 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3913 CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3914 NULL, &clStatus);
3915 }
3916 else
3917 {
3918 length = image->columns * image->rows;
3919 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3920 length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3921 }
3922 if (clStatus != CL_SUCCESS)
3923 {
3924 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3925 "Reading output image from CL buffer failed.", ".");
3926 goto cleanup;
3927 }
3928 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3929
3930 cleanup:
3931
3932 image_view=DestroyCacheView(image_view);
3933 if (filteredImage_view != NULL)
3934 filteredImage_view=DestroyCacheView(filteredImage_view);
3935
3936 if (filteredImageBuffer!=NULL)
3937 clEnv->library->clReleaseMemObject(filteredImageBuffer);
3938 if (imageBuffer!=NULL)
3939 clEnv->library->clReleaseMemObject(imageBuffer);
3940 if (imageKernelBuffer!=NULL)
3941 clEnv->library->clReleaseMemObject(imageKernelBuffer);
3942 if (motionBlurKernel!=NULL)
3943 ReleaseOpenCLKernel(motionBlurKernel);
3944 if (queue != NULL)
3945 ReleaseOpenCLCommandQueue(device,queue);
3946 if (device != NULL)
3947 ReleaseOpenCLDevice(device);
3948 if (outputReady == MagickFalse && filteredImage != NULL)
3949 filteredImage=DestroyImage(filteredImage);
3950
3951 return(filteredImage);
3952 }
3953
AccelerateMotionBlurImage(const Image * image,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3954 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3955 const double* kernel,const size_t width,const OffsetInfo *offset,
3956 ExceptionInfo *exception)
3957 {
3958 Image
3959 *filteredImage;
3960
3961 MagickCLEnv
3962 clEnv;
3963
3964 assert(image != NULL);
3965 assert(kernel != (double *) NULL);
3966 assert(offset != (OffsetInfo *) NULL);
3967 assert(exception != (ExceptionInfo *) NULL);
3968
3969 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3970 return((Image *) NULL);
3971
3972 clEnv=getOpenCLEnvironment(exception);
3973 if (clEnv == (MagickCLEnv) NULL)
3974 return((Image *) NULL);
3975
3976 filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3977 exception);
3978 return(filteredImage);
3979 }
3980
3981 /*
3982 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3983 % %
3984 % %
3985 % %
3986 % A c c e l e r a t e R e s i z e I m a g e %
3987 % %
3988 % %
3989 % %
3990 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3991 */
3992
resizeHorizontalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,ExceptionInfo * exception)3993 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3994 cl_command_queue queue,const Image *image,Image *filteredImage,
3995 cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3996 cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3997 const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3998 const float xFactor,ExceptionInfo *exception)
3999 {
4000 cl_kernel
4001 horizontalKernel;
4002
4003 cl_int
4004 status;
4005
4006 const unsigned int
4007 workgroupSize = 256;
4008
4009 float
4010 resizeFilterScale,
4011 resizeFilterSupport,
4012 resizeFilterWindowSupport,
4013 resizeFilterBlur,
4014 scale,
4015 support;
4016
4017 int
4018 cacheRangeStart,
4019 cacheRangeEnd,
4020 numCachedPixels,
4021 resizeFilterType,
4022 resizeWindowType;
4023
4024 MagickBooleanType
4025 outputReady;
4026
4027 size_t
4028 gammaAccumulatorLocalMemorySize,
4029 gsize[2],
4030 i,
4031 imageCacheLocalMemorySize,
4032 pixelAccumulatorLocalMemorySize,
4033 lsize[2],
4034 totalLocalMemorySize,
4035 weightAccumulatorLocalMemorySize;
4036
4037 unsigned int
4038 chunkSize,
4039 pixelPerWorkgroup;
4040
4041 horizontalKernel=NULL;
4042 outputReady=MagickFalse;
4043
4044 /*
4045 Apply filter to resize vertically from image to resize image.
4046 */
4047 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4048 support=scale*GetResizeFilterSupport(resizeFilter);
4049 if (support < 0.5)
4050 {
4051 /*
4052 Support too small even for nearest neighbour: Reduce to point
4053 sampling.
4054 */
4055 support=(MagickRealType) 0.5;
4056 scale=1.0;
4057 }
4058 scale=PerceptibleReciprocal(scale);
4059
4060 if (resizedColumns < workgroupSize)
4061 {
4062 chunkSize=32;
4063 pixelPerWorkgroup=32;
4064 }
4065 else
4066 {
4067 chunkSize=workgroupSize;
4068 pixelPerWorkgroup=workgroupSize;
4069 }
4070
4071 DisableMSCWarning(4127)
4072 while(1)
4073 RestoreMSCWarning
4074 {
4075 /* calculate the local memory size needed per workgroup */
4076 cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4077 cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
4078 MagickEpsilon)+support+0.5);
4079 numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4080 imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4081 number_channels;
4082 totalLocalMemorySize=imageCacheLocalMemorySize;
4083
4084 /* local size for the pixel accumulator */
4085 pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4086 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4087
4088 /* local memory size for the weight accumulator */
4089 weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4090 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4091
4092 /* local memory size for the gamma accumulator */
4093 if ((number_channels == 4) || (number_channels == 2))
4094 gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4095 else
4096 gammaAccumulatorLocalMemorySize=sizeof(float);
4097 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4098
4099 if (totalLocalMemorySize <= device->local_memory_size)
4100 break;
4101 else
4102 {
4103 pixelPerWorkgroup=pixelPerWorkgroup/2;
4104 chunkSize=chunkSize/2;
4105 if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4106 {
4107 /* quit, fallback to CPU */
4108 goto cleanup;
4109 }
4110 }
4111 }
4112
4113 resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4114 resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4115
4116 horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4117 if (horizontalKernel == (cl_kernel) NULL)
4118 {
4119 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4120 ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4121 goto cleanup;
4122 }
4123
4124 resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4125 resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4126 resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4127 resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4128
4129 i=0;
4130 status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4131 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4132 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4133 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4134 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4135 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4136 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4137 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4138 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4139 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4140 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4141 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4142 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4143 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4144 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4145 status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4146 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4147 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4148 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4149 status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4150 status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4151 status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4152
4153 if (status != CL_SUCCESS)
4154 {
4155 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4156 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4157 goto cleanup;
4158 }
4159
4160 gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4161 workgroupSize;
4162 gsize[1]=resizedRows;
4163 lsize[0]=workgroupSize;
4164 lsize[1]=1;
4165 outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4166 (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4167 cleanup:
4168
4169 if (horizontalKernel != (cl_kernel) NULL)
4170 ReleaseOpenCLKernel(horizontalKernel);
4171
4172 return(outputReady);
4173 }
4174
resizeVerticalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,ExceptionInfo * exception)4175 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4176 cl_command_queue queue,const Image *image,Image * filteredImage,
4177 cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4178 cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4179 const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4180 const float yFactor,ExceptionInfo *exception)
4181 {
4182 cl_kernel
4183 verticalKernel;
4184
4185 cl_int
4186 status;
4187
4188 const unsigned int
4189 workgroupSize = 256;
4190
4191 float
4192 resizeFilterScale,
4193 resizeFilterSupport,
4194 resizeFilterWindowSupport,
4195 resizeFilterBlur,
4196 scale,
4197 support;
4198
4199 int
4200 cacheRangeStart,
4201 cacheRangeEnd,
4202 numCachedPixels,
4203 resizeFilterType,
4204 resizeWindowType;
4205
4206 MagickBooleanType
4207 outputReady;
4208
4209 size_t
4210 gammaAccumulatorLocalMemorySize,
4211 gsize[2],
4212 i,
4213 imageCacheLocalMemorySize,
4214 pixelAccumulatorLocalMemorySize,
4215 lsize[2],
4216 totalLocalMemorySize,
4217 weightAccumulatorLocalMemorySize;
4218
4219 unsigned int
4220 chunkSize,
4221 pixelPerWorkgroup;
4222
4223 verticalKernel=NULL;
4224 outputReady=MagickFalse;
4225
4226 /*
4227 Apply filter to resize vertically from image to resize image.
4228 */
4229 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4230 support=scale*GetResizeFilterSupport(resizeFilter);
4231 if (support < 0.5)
4232 {
4233 /*
4234 Support too small even for nearest neighbour: Reduce to point
4235 sampling.
4236 */
4237 support=(MagickRealType) 0.5;
4238 scale=1.0;
4239 }
4240 scale=PerceptibleReciprocal(scale);
4241
4242 if (resizedRows < workgroupSize)
4243 {
4244 chunkSize=32;
4245 pixelPerWorkgroup=32;
4246 }
4247 else
4248 {
4249 chunkSize=workgroupSize;
4250 pixelPerWorkgroup=workgroupSize;
4251 }
4252
4253 DisableMSCWarning(4127)
4254 while(1)
4255 RestoreMSCWarning
4256 {
4257 /* calculate the local memory size needed per workgroup */
4258 cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4259 cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4260 MagickEpsilon)+support+0.5);
4261 numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4262 imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4263 number_channels;
4264 totalLocalMemorySize=imageCacheLocalMemorySize;
4265
4266 /* local size for the pixel accumulator */
4267 pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4268 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4269
4270 /* local memory size for the weight accumulator */
4271 weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4272 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4273
4274 /* local memory size for the gamma accumulator */
4275 if ((number_channels == 4) || (number_channels == 2))
4276 gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4277 else
4278 gammaAccumulatorLocalMemorySize=sizeof(float);
4279 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4280
4281 if (totalLocalMemorySize <= device->local_memory_size)
4282 break;
4283 else
4284 {
4285 pixelPerWorkgroup=pixelPerWorkgroup/2;
4286 chunkSize=chunkSize/2;
4287 if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4288 {
4289 /* quit, fallback to CPU */
4290 goto cleanup;
4291 }
4292 }
4293 }
4294
4295 resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4296 resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4297
4298 verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4299 if (verticalKernel == (cl_kernel) NULL)
4300 {
4301 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4302 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4303 goto cleanup;
4304 }
4305
4306 resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4307 resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4308 resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4309 resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4310
4311 i=0;
4312 status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4313 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4314 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4315 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4316 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4317 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4318 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4319 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4320 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4321 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4322 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4323 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4324 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4325 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4326 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4327 status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4328 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4329 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4330 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4331 status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4332 status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4333 status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4334
4335 if (status != CL_SUCCESS)
4336 {
4337 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4338 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4339 goto cleanup;
4340 }
4341
4342 gsize[0]=resizedColumns;
4343 gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4344 workgroupSize;
4345 lsize[0]=1;
4346 lsize[1]=workgroupSize;
4347 outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4348 gsize,lsize,image,filteredImage,exception);
4349
4350 cleanup:
4351
4352 if (verticalKernel != (cl_kernel) NULL)
4353 ReleaseOpenCLKernel(verticalKernel);
4354
4355 return(outputReady);
4356 }
4357
ComputeResizeImage(const Image * image,MagickCLEnv clEnv,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4358 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4359 const size_t resizedColumns,const size_t resizedRows,
4360 const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4361 {
4362 cl_command_queue
4363 queue;
4364
4365 cl_mem
4366 cubicCoefficientsBuffer,
4367 filteredImageBuffer,
4368 imageBuffer,
4369 tempImageBuffer;
4370
4371 cl_uint
4372 number_channels;
4373
4374 const double
4375 *resizeFilterCoefficient;
4376
4377 float
4378 coefficientBuffer[7],
4379 xFactor,
4380 yFactor;
4381
4382 MagickBooleanType
4383 outputReady;
4384
4385 MagickCLDevice
4386 device;
4387
4388 MagickSizeType
4389 length;
4390
4391 Image
4392 *filteredImage;
4393
4394 size_t
4395 i;
4396
4397 filteredImage=NULL;
4398 tempImageBuffer=NULL;
4399 cubicCoefficientsBuffer=NULL;
4400 outputReady=MagickFalse;
4401
4402 device=RequestOpenCLDevice(clEnv);
4403 queue=AcquireOpenCLCommandQueue(device);
4404 filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4405 exception);
4406 if (filteredImage == (Image *) NULL)
4407 goto cleanup;
4408 if (filteredImage->number_channels != image->number_channels)
4409 goto cleanup;
4410 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4411 if (imageBuffer == (cl_mem) NULL)
4412 goto cleanup;
4413 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4414 if (filteredImageBuffer == (cl_mem) NULL)
4415 goto cleanup;
4416
4417 resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4418 for (i = 0; i < 7; i++)
4419 coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4420 cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4421 CL_MEM_READ_ONLY,7*sizeof(*resizeFilterCoefficient),&coefficientBuffer);
4422 if (cubicCoefficientsBuffer == (cl_mem) NULL)
4423 {
4424 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4425 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4426 goto cleanup;
4427 }
4428
4429 number_channels=(cl_uint) image->number_channels;
4430 xFactor=(float) resizedColumns/(float) image->columns;
4431 yFactor=(float) resizedRows/(float) image->rows;
4432 if (xFactor > yFactor)
4433 {
4434 length=resizedColumns*image->rows*number_channels;
4435 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4436 sizeof(CLQuantum),(void *) NULL);
4437 if (tempImageBuffer == (cl_mem) NULL)
4438 {
4439 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4440 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4441 goto cleanup;
4442 }
4443
4444 outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4445 imageBuffer,number_channels,(cl_uint) image->columns,
4446 (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4447 (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4448 exception);
4449 if (outputReady == MagickFalse)
4450 goto cleanup;
4451
4452 outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4453 tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4454 (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4455 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4456 exception);
4457 if (outputReady == MagickFalse)
4458 goto cleanup;
4459 }
4460 else
4461 {
4462 length=image->columns*resizedRows*number_channels;
4463 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4464 sizeof(CLQuantum),(void *) NULL);
4465 if (tempImageBuffer == (cl_mem) NULL)
4466 {
4467 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4468 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4469 goto cleanup;
4470 }
4471
4472 outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4473 imageBuffer,number_channels,(cl_uint) image->columns,
4474 (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4475 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4476 exception);
4477 if (outputReady == MagickFalse)
4478 goto cleanup;
4479
4480 outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4481 tempImageBuffer,number_channels,(cl_uint) image->columns,
4482 (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4483 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4484 exception);
4485 if (outputReady == MagickFalse)
4486 goto cleanup;
4487 }
4488
4489 cleanup:
4490
4491 if (tempImageBuffer != (cl_mem) NULL)
4492 ReleaseOpenCLMemObject(tempImageBuffer);
4493 if (cubicCoefficientsBuffer != (cl_mem) NULL)
4494 ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4495 if (queue != (cl_command_queue) NULL)
4496 ReleaseOpenCLCommandQueue(device,queue);
4497 if (device != (MagickCLDevice) NULL)
4498 ReleaseOpenCLDevice(device);
4499 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4500 filteredImage=DestroyImage(filteredImage);
4501
4502 return(filteredImage);
4503 }
4504
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4505 static MagickBooleanType gpuSupportedResizeWeighting(
4506 ResizeWeightingFunctionType f)
4507 {
4508 unsigned int
4509 i;
4510
4511 for (i = 0; ;i++)
4512 {
4513 if (supportedResizeWeighting[i] == LastWeightingFunction)
4514 break;
4515 if (supportedResizeWeighting[i] == f)
4516 return(MagickTrue);
4517 }
4518 return(MagickFalse);
4519 }
4520
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4521 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4522 const size_t resizedColumns,const size_t resizedRows,
4523 const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4524 {
4525 Image
4526 *filteredImage;
4527
4528 MagickCLEnv
4529 clEnv;
4530
4531 assert(image != NULL);
4532 assert(exception != (ExceptionInfo *) NULL);
4533
4534 if (checkAccelerateCondition(image) == MagickFalse)
4535 return((Image *) NULL);
4536
4537 if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4538 resizeFilter)) == MagickFalse) ||
4539 (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4540 resizeFilter)) == MagickFalse))
4541 return((Image *) NULL);
4542
4543 clEnv=getOpenCLEnvironment(exception);
4544 if (clEnv == (MagickCLEnv) NULL)
4545 return((Image *) NULL);
4546
4547 filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4548 resizeFilter,exception);
4549 return(filteredImage);
4550 }
4551
4552 /*
4553 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4554 % %
4555 % %
4556 % %
4557 % A c c e l e r a t e R o t a t i o n a l B l u r I m a g e %
4558 % %
4559 % %
4560 % %
4561 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4562 */
4563
ComputeRotationalBlurImage(const Image * image,MagickCLEnv clEnv,const double angle,ExceptionInfo * exception)4564 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4565 const double angle,ExceptionInfo *exception)
4566 {
4567 cl_command_queue
4568 queue;
4569
4570 cl_float2
4571 blurCenter;
4572
4573 cl_int
4574 status;
4575
4576 cl_mem
4577 cosThetaBuffer,
4578 filteredImageBuffer,
4579 imageBuffer,
4580 sinThetaBuffer;
4581
4582 cl_kernel
4583 rotationalBlurKernel;
4584
4585 cl_uint
4586 cossin_theta_size,
4587 number_channels;
4588
4589 float
4590 blurRadius,
4591 *cosThetaPtr,
4592 offset,
4593 *sinThetaPtr,
4594 theta;
4595
4596 Image
4597 *filteredImage;
4598
4599 MagickBooleanType
4600 outputReady;
4601
4602 MagickCLDevice
4603 device;
4604
4605 size_t
4606 gsize[2],
4607 i;
4608
4609 filteredImage=NULL;
4610 sinThetaBuffer=NULL;
4611 cosThetaBuffer=NULL;
4612 rotationalBlurKernel=NULL;
4613 outputReady=MagickFalse;
4614
4615 device=RequestOpenCLDevice(clEnv);
4616 queue=AcquireOpenCLCommandQueue(device);
4617 filteredImage=cloneImage(image,exception);
4618 if (filteredImage == (Image *) NULL)
4619 goto cleanup;
4620 if (filteredImage->number_channels != image->number_channels)
4621 goto cleanup;
4622 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4623 if (imageBuffer == (cl_mem) NULL)
4624 goto cleanup;
4625 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4626 if (filteredImageBuffer == (cl_mem) NULL)
4627 goto cleanup;
4628
4629 blurCenter.x=(float) (image->columns-1)/2.0;
4630 blurCenter.y=(float) (image->rows-1)/2.0;
4631 blurRadius=hypot(blurCenter.x,blurCenter.y);
4632 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4633 (double) blurRadius)+2UL);
4634
4635 cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4636 if (cosThetaPtr == (float *) NULL)
4637 goto cleanup;
4638 sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4639 if (sinThetaPtr == (float *) NULL)
4640 {
4641 cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4642 goto cleanup;
4643 }
4644
4645 theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
4646 offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
4647 for (i=0; i < (ssize_t) cossin_theta_size; i++)
4648 {
4649 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4650 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4651 }
4652
4653 sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4654 CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4655 sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4656 cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4657 CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4658 cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4659 if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4660 {
4661 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4662 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4663 goto cleanup;
4664 }
4665
4666 rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4667 if (rotationalBlurKernel == (cl_kernel) NULL)
4668 {
4669 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4670 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4671 goto cleanup;
4672 }
4673
4674 number_channels=(cl_uint) image->number_channels;
4675
4676 i=0;
4677 status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4678 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4679 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4680 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4681 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4682 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4683 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4684 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4685 if (status != CL_SUCCESS)
4686 {
4687 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4688 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4689 goto cleanup;
4690 }
4691
4692 gsize[0]=image->columns;
4693 gsize[1]=image->rows;
4694 outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4695 (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4696 exception);
4697
4698 cleanup:
4699
4700 if (sinThetaBuffer != (cl_mem) NULL)
4701 ReleaseOpenCLMemObject(sinThetaBuffer);
4702 if (cosThetaBuffer != (cl_mem) NULL)
4703 ReleaseOpenCLMemObject(cosThetaBuffer);
4704 if (rotationalBlurKernel != (cl_kernel) NULL)
4705 ReleaseOpenCLKernel(rotationalBlurKernel);
4706 if (queue != (cl_command_queue) NULL)
4707 ReleaseOpenCLCommandQueue(device,queue);
4708 if (device != (MagickCLDevice) NULL)
4709 ReleaseOpenCLDevice(device);
4710 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4711 filteredImage=DestroyImage(filteredImage);
4712
4713 return(filteredImage);
4714 }
4715
AccelerateRotationalBlurImage(const Image * image,const double angle,ExceptionInfo * exception)4716 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4717 const double angle,ExceptionInfo *exception)
4718 {
4719 Image
4720 *filteredImage;
4721
4722 MagickCLEnv
4723 clEnv;
4724
4725 assert(image != NULL);
4726 assert(exception != (ExceptionInfo *) NULL);
4727
4728 if (checkAccelerateCondition(image) == MagickFalse)
4729 return((Image *) NULL);
4730
4731 clEnv=getOpenCLEnvironment(exception);
4732 if (clEnv == (MagickCLEnv) NULL)
4733 return((Image *) NULL);
4734
4735 filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4736 return filteredImage;
4737 }
4738
4739 /*
4740 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4741 % %
4742 % %
4743 % %
4744 % A c c e l e r a t e U n s h a r p M a s k I m a g e %
4745 % %
4746 % %
4747 % %
4748 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4749 */
4750
ComputeUnsharpMaskImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4751 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4752 const double radius,const double sigma,const double gain,
4753 const double threshold,ExceptionInfo *exception)
4754 {
4755 cl_command_queue
4756 queue;
4757
4758 cl_int
4759 status;
4760
4761 cl_kernel
4762 blurRowKernel,
4763 unsharpMaskBlurColumnKernel;
4764
4765 cl_mem
4766 filteredImageBuffer,
4767 imageBuffer,
4768 imageKernelBuffer,
4769 tempImageBuffer;
4770
4771 cl_uint
4772 imageColumns,
4773 imageRows,
4774 kernelWidth,
4775 number_channels;
4776
4777 float
4778 fGain,
4779 fThreshold;
4780
4781 Image
4782 *filteredImage;
4783
4784 int
4785 chunkSize;
4786
4787 MagickBooleanType
4788 outputReady;
4789
4790 MagickCLDevice
4791 device;
4792
4793 MagickSizeType
4794 length;
4795
4796 size_t
4797 gsize[2],
4798 i,
4799 lsize[2];
4800
4801 filteredImage=NULL;
4802 tempImageBuffer=NULL;
4803 imageKernelBuffer=NULL;
4804 blurRowKernel=NULL;
4805 unsharpMaskBlurColumnKernel=NULL;
4806 outputReady=MagickFalse;
4807
4808 device=RequestOpenCLDevice(clEnv);
4809 queue=AcquireOpenCLCommandQueue(device);
4810 filteredImage=cloneImage(image,exception);
4811 if (filteredImage == (Image *) NULL)
4812 goto cleanup;
4813 if (filteredImage->number_channels != image->number_channels)
4814 goto cleanup;
4815 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4816 if (imageBuffer == (cl_mem) NULL)
4817 goto cleanup;
4818 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4819 if (filteredImageBuffer == (cl_mem) NULL)
4820 goto cleanup;
4821
4822 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4823 exception);
4824
4825 length=image->columns*image->rows;
4826 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4827 sizeof(cl_float4),NULL);
4828 if (tempImageBuffer == (cl_mem) NULL)
4829 {
4830 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4831 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4832 goto cleanup;
4833 }
4834
4835 blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4836 if (blurRowKernel == (cl_kernel) NULL)
4837 {
4838 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4839 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4840 goto cleanup;
4841 }
4842
4843 unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4844 "UnsharpMaskBlurColumn");
4845 if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4846 {
4847 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4848 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4849 goto cleanup;
4850 }
4851
4852 number_channels=(cl_uint) image->number_channels;
4853 imageColumns=(cl_uint) image->columns;
4854 imageRows=(cl_uint) image->rows;
4855
4856 chunkSize = 256;
4857
4858 i=0;
4859 status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4860 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4861 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4862 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4863 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4864 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4865 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4866 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4867 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4868 if (status != CL_SUCCESS)
4869 {
4870 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4871 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4872 goto cleanup;
4873 }
4874
4875 gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4876 gsize[1]=image->rows;
4877 lsize[0]=chunkSize;
4878 lsize[1]=1;
4879 outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4880 (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4881
4882 chunkSize=256;
4883 fGain=(float) gain;
4884 fThreshold=(float) threshold;
4885
4886 i=0;
4887 status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4888 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4889 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4890 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4891 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4892 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4893 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4894 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4895 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4896 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4897 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4898 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4899 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4900 if (status != CL_SUCCESS)
4901 {
4902 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4903 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4904 goto cleanup;
4905 }
4906
4907 gsize[0]=image->columns;
4908 gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4909 lsize[0]=1;
4910 lsize[1]=chunkSize;
4911 outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4912 (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4913
4914 cleanup:
4915
4916 if (tempImageBuffer != (cl_mem) NULL)
4917 ReleaseOpenCLMemObject(tempImageBuffer);
4918 if (imageKernelBuffer != (cl_mem) NULL)
4919 ReleaseOpenCLMemObject(imageKernelBuffer);
4920 if (blurRowKernel != (cl_kernel) NULL)
4921 ReleaseOpenCLKernel(blurRowKernel);
4922 if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4923 ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4924 if (queue != (cl_command_queue) NULL)
4925 ReleaseOpenCLCommandQueue(device,queue);
4926 if (device != (MagickCLDevice) NULL)
4927 ReleaseOpenCLDevice(device);
4928 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4929 filteredImage=DestroyImage(filteredImage);
4930
4931 return(filteredImage);
4932 }
4933
ComputeUnsharpMaskImageSingle(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4934 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4935 MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4936 const double threshold,ExceptionInfo *exception)
4937 {
4938 cl_command_queue
4939 queue;
4940
4941 cl_int
4942 status;
4943
4944 cl_kernel
4945 unsharpMaskKernel;
4946
4947 cl_mem
4948 filteredImageBuffer,
4949 imageBuffer,
4950 imageKernelBuffer;
4951
4952 cl_uint
4953 imageColumns,
4954 imageRows,
4955 kernelWidth,
4956 number_channels;
4957
4958 float
4959 fGain,
4960 fThreshold;
4961
4962 Image
4963 *filteredImage;
4964
4965 MagickBooleanType
4966 outputReady;
4967
4968 MagickCLDevice
4969 device;
4970
4971 size_t
4972 gsize[2],
4973 i,
4974 lsize[2];
4975
4976 filteredImage=NULL;
4977 imageKernelBuffer=NULL;
4978 unsharpMaskKernel=NULL;
4979 outputReady=MagickFalse;
4980
4981 device=RequestOpenCLDevice(clEnv);
4982 queue=AcquireOpenCLCommandQueue(device);
4983 filteredImage=cloneImage(image,exception);
4984 if (filteredImage == (Image *) NULL)
4985 goto cleanup;
4986 if (filteredImage->number_channels != image->number_channels)
4987 goto cleanup;
4988 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4989 if (imageBuffer == (cl_mem) NULL)
4990 goto cleanup;
4991 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4992 if (filteredImageBuffer == (cl_mem) NULL)
4993 goto cleanup;
4994
4995 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4996 exception);
4997
4998 unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4999 if (unsharpMaskKernel == NULL)
5000 {
5001 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5002 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5003 goto cleanup;
5004 }
5005
5006 imageColumns=(cl_uint) image->columns;
5007 imageRows=(cl_uint) image->rows;
5008 number_channels=(cl_uint) image->number_channels;
5009 fGain=(float) gain;
5010 fThreshold=(float) threshold;
5011
5012 i=0;
5013 status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5014 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5015 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
5016 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
5017 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
5018 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
5019 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
5020 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
5021 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5022 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5023 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5024 if (status != CL_SUCCESS)
5025 {
5026 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5027 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5028 goto cleanup;
5029 }
5030
5031 gsize[0]=((image->columns + 7) / 8)*8;
5032 gsize[1]=((image->rows + 31) / 32)*32;
5033 lsize[0]=8;
5034 lsize[1]=32;
5035 outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
5036 gsize,lsize,image,filteredImage,exception);
5037
5038 cleanup:
5039
5040 if (imageKernelBuffer != (cl_mem) NULL)
5041 ReleaseOpenCLMemObject(imageKernelBuffer);
5042 if (unsharpMaskKernel != (cl_kernel) NULL)
5043 ReleaseOpenCLKernel(unsharpMaskKernel);
5044 if (queue != (cl_command_queue) NULL)
5045 ReleaseOpenCLCommandQueue(device,queue);
5046 if (device != (MagickCLDevice) NULL)
5047 ReleaseOpenCLDevice(device);
5048 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5049 filteredImage=DestroyImage(filteredImage);
5050
5051 return(filteredImage);
5052 }
5053
AccelerateUnsharpMaskImage(const Image * image,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5054 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5055 const double radius,const double sigma,const double gain,
5056 const double threshold,ExceptionInfo *exception)
5057 {
5058 Image
5059 *filteredImage;
5060
5061 MagickCLEnv
5062 clEnv;
5063
5064 assert(image != NULL);
5065 assert(exception != (ExceptionInfo *) NULL);
5066
5067 if (checkAccelerateCondition(image) == MagickFalse)
5068 return((Image *) NULL);
5069
5070 clEnv=getOpenCLEnvironment(exception);
5071 if (clEnv == (MagickCLEnv) NULL)
5072 return((Image *) NULL);
5073
5074 if (radius < 12.1)
5075 filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5076 threshold,exception);
5077 else
5078 filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5079 threshold,exception);
5080 return(filteredImage);
5081 }
5082
ComputeWaveletDenoiseImage(const Image * image,MagickCLEnv clEnv,const double threshold,ExceptionInfo * exception)5083 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5084 const double threshold,ExceptionInfo *exception)
5085 {
5086 cl_command_queue
5087 queue;
5088
5089 const cl_int
5090 PASSES=5;
5091
5092 const int
5093 TILESIZE=64,
5094 PAD=1<<(PASSES-1),
5095 SIZE=TILESIZE-2*PAD;
5096
5097 cl_float
5098 thresh;
5099
5100 cl_int
5101 status;
5102
5103 cl_kernel
5104 denoiseKernel;
5105
5106 cl_mem
5107 filteredImageBuffer,
5108 imageBuffer;
5109
5110 cl_uint
5111 number_channels,
5112 width,
5113 height,
5114 max_channels;
5115
5116 Image
5117 *filteredImage;
5118
5119 MagickBooleanType
5120 outputReady;
5121
5122 MagickCLDevice
5123 device;
5124
5125 size_t
5126 gsize[2],
5127 i,
5128 lsize[2];
5129
5130 filteredImage=NULL;
5131 denoiseKernel=NULL;
5132 outputReady=MagickFalse;
5133
5134 device=RequestOpenCLDevice(clEnv);
5135 queue=AcquireOpenCLCommandQueue(device);
5136 filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
5137 exception);
5138 if (filteredImage == (Image *) NULL)
5139 goto cleanup;
5140 if (filteredImage->number_channels != image->number_channels)
5141 goto cleanup;
5142 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5143 if (imageBuffer == (cl_mem) NULL)
5144 goto cleanup;
5145 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5146 if (filteredImageBuffer == (cl_mem) NULL)
5147 goto cleanup;
5148
5149 denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5150 if (denoiseKernel == (cl_kernel) NULL)
5151 {
5152 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5153 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5154 goto cleanup;
5155 }
5156
5157 number_channels=(cl_uint)image->number_channels;
5158 width=(cl_uint)image->columns;
5159 height=(cl_uint)image->rows;
5160 max_channels=number_channels;
5161 if ((max_channels == 4) || (max_channels == 2))
5162 max_channels=max_channels-1;
5163 thresh=threshold;
5164
5165 i=0;
5166 status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5167 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5168 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5169 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5170 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5171 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5172 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5173 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5174 if (status != CL_SUCCESS)
5175 {
5176 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5177 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5178 goto cleanup;
5179 }
5180
5181 gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5182 gsize[1]=((height+(SIZE-1))/SIZE)*4;
5183 lsize[0]=TILESIZE;
5184 lsize[1]=4;
5185 outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,(const size_t *) NULL,
5186 gsize,lsize,image,filteredImage,exception);
5187
5188 cleanup:
5189
5190 if (denoiseKernel != (cl_kernel) NULL)
5191 ReleaseOpenCLKernel(denoiseKernel);
5192 if (queue != (cl_command_queue) NULL)
5193 ReleaseOpenCLCommandQueue(device,queue);
5194 if (device != (MagickCLDevice) NULL)
5195 ReleaseOpenCLDevice(device);
5196 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5197 filteredImage=DestroyImage(filteredImage);
5198
5199 return(filteredImage);
5200 }
5201
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5202 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5203 const double threshold,ExceptionInfo *exception)
5204 {
5205 Image
5206 *filteredImage;
5207
5208 MagickCLEnv
5209 clEnv;
5210
5211 assert(image != NULL);
5212 assert(exception != (ExceptionInfo *)NULL);
5213
5214 if (checkAccelerateCondition(image) == MagickFalse)
5215 return((Image *) NULL);
5216
5217 clEnv=getOpenCLEnvironment(exception);
5218 if (clEnv == (MagickCLEnv) NULL)
5219 return((Image *) NULL);
5220
5221 filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5222
5223 return(filteredImage);
5224 }
5225 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5226