1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % %
6 % AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7 % A A C C E L E R R A A T E %
8 % AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9 % A A C C E L E R R A A T E %
10 % A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11 % %
12 % %
13 % MagickCore Acceleration Methods %
14 % %
15 % Software Design %
16 % Cristy %
17 % SiuChi Chan %
18 % Guansong Zhang %
19 % January 2010 %
20 % Dirk Lemstra %
21 % April 2016 %
22 % %
23 % %
24 % Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization %
25 % dedicated to making software imaging solutions freely available. %
26 % %
27 % You may not use this file except in compliance with the License. You may %
28 % obtain a copy of the License at %
29 % %
30 % https://imagemagick.org/script/license.php %
31 % %
32 % Unless required by applicable law or agreed to in writing, software %
33 % distributed under the License is distributed on an "AS IS" BASIS, %
34 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
35 % See the License for the specific language governing permissions and %
36 % limitations under the License. %
37 % %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87
88 /*
89 Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92
93 /*
94 Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98 BoxWeightingFunction,
99 TriangleWeightingFunction,
100 HannWeightingFunction,
101 HammingWeightingFunction,
102 BlackmanWeightingFunction,
103 CubicBCWeightingFunction,
104 SincWeightingFunction,
105 SincFastWeightingFunction,
106 LastWeightingFunction
107 };
108
109 /*
110 Helper functions.
111 */
checkAccelerateCondition(const Image * image)112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114 /* only direct class images are supported */
115 if (image->storage_class != DirectClass)
116 return(MagickFalse);
117
118 /* check if the image's colorspace is supported */
119 if (image->colorspace != RGBColorspace &&
120 image->colorspace != sRGBColorspace &&
121 image->colorspace != LinearGRAYColorspace &&
122 image->colorspace != GRAYColorspace)
123 return(MagickFalse);
124
125 /* check if the virtual pixel method is compatible with the OpenCL implementation */
126 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
127 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
128 return(MagickFalse);
129
130 /* check if the image has mask */
131 if (((image->channels & ReadMaskChannel) != 0) ||
132 ((image->channels & WriteMaskChannel) != 0) ||
133 ((image->channels & CompositeMaskChannel) != 0))
134 return(MagickFalse);
135
136 if (image->number_channels > 4)
137 return(MagickFalse);
138
139 /* check if pixel order is R */
140 if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
141 return(MagickFalse);
142
143 if (image->number_channels == 1)
144 return(MagickTrue);
145
146 /* check if pixel order is RA */
147 if ((image->number_channels == 2) &&
148 (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
149 return(MagickTrue);
150
151 if (image->number_channels == 2)
152 return(MagickFalse);
153
154 /* check if pixel order is RGB */
155 if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
156 (GetPixelChannelOffset(image,BluePixelChannel) != 2))
157 return(MagickFalse);
158
159 if (image->number_channels == 3)
160 return(MagickTrue);
161
162 /* check if pixel order is RGBA */
163 if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
164 return(MagickFalse);
165
166 return(MagickTrue);
167 }
168
checkAccelerateConditionRGBA(const Image * image)169 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
170 {
171 if (checkAccelerateCondition(image) == MagickFalse)
172 return(MagickFalse);
173
174 /* the order will be RGBA if the image has 4 channels */
175 if (image->number_channels != 4)
176 return(MagickFalse);
177
178 if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
179 (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
180 (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
181 (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
182 return(MagickFalse);
183
184 return(MagickTrue);
185 }
186
checkPixelIntensity(const Image * image,const PixelIntensityMethod method)187 static MagickBooleanType checkPixelIntensity(const Image *image,
188 const PixelIntensityMethod method)
189 {
190 /* EncodePixelGamma and DecodePixelGamma are not supported */
191 if ((method == Rec601LumaPixelIntensityMethod) ||
192 (method == Rec709LumaPixelIntensityMethod))
193 {
194 if (image->colorspace == RGBColorspace)
195 return(MagickFalse);
196 }
197
198 if ((method == Rec601LuminancePixelIntensityMethod) ||
199 (method == Rec709LuminancePixelIntensityMethod))
200 {
201 if (image->colorspace == sRGBColorspace)
202 return(MagickFalse);
203 }
204
205 return(MagickTrue);
206 }
207
checkHistogramCondition(const Image * image,const PixelIntensityMethod method)208 static MagickBooleanType checkHistogramCondition(const Image *image,
209 const PixelIntensityMethod method)
210 {
211 /* ensure this is the only pass get in for now. */
212 if ((image->channel_mask & SyncChannels) == 0)
213 return MagickFalse;
214
215 return(checkPixelIntensity(image,method));
216 }
217
getOpenCLEnvironment(ExceptionInfo * exception)218 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
219 {
220 MagickCLEnv
221 clEnv;
222
223 clEnv=GetCurrentOpenCLEnv();
224 if (clEnv == (MagickCLEnv) NULL)
225 return((MagickCLEnv) NULL);
226
227 if (clEnv->enabled == MagickFalse)
228 return((MagickCLEnv) NULL);
229
230 if (InitializeOpenCL(clEnv,exception) == MagickFalse)
231 return((MagickCLEnv) NULL);
232
233 return(clEnv);
234 }
235
cloneImage(const Image * image,ExceptionInfo * exception)236 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
237 {
238 Image
239 *clone;
240
241 if (((image->channel_mask & RedChannel) != 0) &&
242 ((image->channel_mask & GreenChannel) != 0) &&
243 ((image->channel_mask & BlueChannel) != 0) &&
244 ((image->channel_mask & AlphaChannel) != 0))
245 clone=CloneImage(image,0,0,MagickTrue,exception);
246 else
247 {
248 clone=CloneImage(image,0,0,MagickTrue,exception);
249 if (clone != (Image *) NULL)
250 SyncImagePixelCache(clone,exception);
251 }
252 return(clone);
253 }
254
255 /* pad the global workgroup size to the next multiple of
256 the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)257 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
258 const unsigned int orgGlobalSize,const unsigned int localGroupSize)
259 {
260 return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
261 }
262
createKernelInfo(MagickCLDevice device,const double radius,const double sigma,cl_uint * width,ExceptionInfo * exception)263 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
264 const double sigma,cl_uint *width,ExceptionInfo *exception)
265 {
266 char
267 geometry[MagickPathExtent];
268
269 cl_mem
270 imageKernelBuffer;
271
272 float
273 *kernelBufferPtr;
274
275 KernelInfo
276 *kernel;
277
278 ssize_t
279 i;
280
281 (void) FormatLocaleString(geometry,MagickPathExtent,
282 "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
283 kernel=AcquireKernelInfo(geometry,exception);
284 if (kernel == (KernelInfo *) NULL)
285 {
286 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
287 ResourceLimitWarning,"AcquireKernelInfo failed.",".");
288 return((cl_mem) NULL);
289 }
290 kernelBufferPtr=(float *) AcquireMagickMemory(kernel->width*
291 sizeof(*kernelBufferPtr));
292 if (kernelBufferPtr == (float *) NULL)
293 {
294 kernel=DestroyKernelInfo(kernel);
295 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
296 ResourceLimitWarning,"MemoryAllocationFailed.",".");
297 return((cl_mem) NULL);
298 }
299 for (i = 0; i < (ssize_t) kernel->width; i++)
300 kernelBufferPtr[i]=(float) kernel->values[i];
301 imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
302 CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
303 *width=(cl_uint) kernel->width;
304 kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
305 kernel=DestroyKernelInfo(kernel);
306 if (imageKernelBuffer == (cl_mem) NULL)
307 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
308 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
309 return(imageKernelBuffer);
310 }
311
LaunchHistogramKernel(MagickCLEnv clEnv,MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)312 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
313 MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
314 cl_mem histogramBuffer,Image *image,const ChannelType channel,
315 ExceptionInfo *exception)
316 {
317 MagickBooleanType
318 outputReady;
319
320 cl_int
321 clStatus;
322
323 cl_kernel
324 histogramKernel;
325
326 cl_event
327 event;
328
329 cl_uint
330 colorspace,
331 method;
332
333 ssize_t
334 i;
335
336 size_t
337 global_work_size[2];
338
339 histogramKernel = NULL;
340
341 outputReady = MagickFalse;
342 colorspace = image->colorspace;
343 method = image->intensity;
344
345 /* get the OpenCL kernel */
346 histogramKernel = AcquireOpenCLKernel(device,"Histogram");
347 if (histogramKernel == NULL)
348 {
349 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
350 goto cleanup;
351 }
352
353 /* set the kernel arguments */
354 i = 0;
355 clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
356 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
357 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
358 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
359 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
360 if (clStatus != CL_SUCCESS)
361 {
362 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
363 goto cleanup;
364 }
365
366 /* launch the kernel */
367 global_work_size[0] = image->columns;
368 global_work_size[1] = image->rows;
369
370 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
371
372 if (clStatus != CL_SUCCESS)
373 {
374 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
375 goto cleanup;
376 }
377 RecordProfileData(device,histogramKernel,event);
378
379 outputReady = MagickTrue;
380
381 cleanup:
382
383 if (histogramKernel!=NULL)
384 ReleaseOpenCLKernel(histogramKernel);
385
386 return(outputReady);
387 }
388
389 /*
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391 % %
392 % %
393 % %
394 % A c c e l e r a t e A d d N o i s e I m a g e %
395 % %
396 % %
397 % %
398 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
399 */
400
ComputeAddNoiseImage(const Image * image,MagickCLEnv clEnv,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)401 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
402 const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
403 {
404 cl_command_queue
405 queue;
406
407 cl_float
408 cl_attenuate;
409
410 cl_int
411 status;
412
413 cl_kernel
414 addNoiseKernel;
415
416 cl_mem
417 filteredImageBuffer,
418 imageBuffer;
419
420 cl_uint
421 bufferLength,
422 inputPixelCount,
423 number_channels,
424 numRandomNumberPerPixel,
425 pixelsPerWorkitem,
426 seed0,
427 seed1,
428 workItemCount;
429
430 const unsigned long
431 *s;
432
433 MagickBooleanType
434 outputReady;
435
436 MagickCLDevice
437 device;
438
439 Image
440 *filteredImage;
441
442 RandomInfo
443 *randomInfo;
444
445 size_t
446 gsize[1],
447 i,
448 lsize[1],
449 numRandPerChannel;
450
451 filteredImage=NULL;
452 imageBuffer=NULL;
453 filteredImageBuffer=NULL;
454 addNoiseKernel=NULL;
455 outputReady=MagickFalse;
456
457 device=RequestOpenCLDevice(clEnv);
458 queue=AcquireOpenCLCommandQueue(device);
459 if (queue == (cl_command_queue) NULL)
460 goto cleanup;
461 filteredImage=cloneImage(image,exception);
462 if (filteredImage == (Image *) NULL)
463 goto cleanup;
464 if (filteredImage->number_channels != image->number_channels)
465 goto cleanup;
466 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
467 if (imageBuffer == (cl_mem) NULL)
468 goto cleanup;
469 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
470 if (filteredImageBuffer == (cl_mem) NULL)
471 goto cleanup;
472
473 /* find out how many random numbers needed by pixel */
474 numRandPerChannel=0;
475 numRandomNumberPerPixel=0;
476 switch (noise_type)
477 {
478 case UniformNoise:
479 case ImpulseNoise:
480 case LaplacianNoise:
481 case RandomNoise:
482 default:
483 numRandPerChannel=1;
484 break;
485 case GaussianNoise:
486 case MultiplicativeGaussianNoise:
487 case PoissonNoise:
488 numRandPerChannel=2;
489 break;
490 };
491 if (GetPixelRedTraits(image) != UndefinedPixelTrait)
492 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
493 if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
494 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
495 if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
496 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
497 if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
498 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
499
500 addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
501 if (addNoiseKernel == (cl_kernel) NULL)
502 {
503 (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
504 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
505 goto cleanup;
506 }
507
508 /* 256 work items per group, 2 groups per CU */
509 workItemCount=device->max_compute_units*2*256;
510 inputPixelCount=(cl_int) (image->columns*image->rows);
511 pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
512 pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
513 lsize[0]=256;
514 gsize[0]=workItemCount;
515
516 randomInfo=AcquireRandomInfo();
517 s=GetRandomInfoSeed(randomInfo);
518 seed0=s[0];
519 (void) GetPseudoRandomValue(randomInfo);
520 seed1=s[0];
521 randomInfo=DestroyRandomInfo(randomInfo);
522
523 number_channels=(cl_uint) image->number_channels;
524 bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
525 cl_attenuate=(cl_float) attenuate;
526
527 i=0;
528 status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
529 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
530 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
531 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
532 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
533 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
534 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&cl_attenuate);
535 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
536 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
537 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
538 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
539 if (status != CL_SUCCESS)
540 {
541 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
542 ResourceLimitWarning,"clSetKernelArg failed.",".");
543 goto cleanup;
544 }
545
546 outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
547 lsize,image,filteredImage,MagickFalse,exception);
548
549 cleanup:
550
551 if (imageBuffer != (cl_mem) NULL)
552 ReleaseOpenCLMemObject(imageBuffer);
553 if (filteredImageBuffer != (cl_mem) NULL)
554 ReleaseOpenCLMemObject(filteredImageBuffer);
555 if (addNoiseKernel != (cl_kernel) NULL)
556 ReleaseOpenCLKernel(addNoiseKernel);
557 if (queue != (cl_command_queue) NULL)
558 ReleaseOpenCLCommandQueue(device,queue);
559 if (device != (MagickCLDevice) NULL)
560 ReleaseOpenCLDevice(device);
561 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
562 filteredImage=DestroyImage(filteredImage);
563
564 return(filteredImage);
565 }
566
AccelerateAddNoiseImage(const Image * image,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)567 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
568 const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
569 {
570 /* Temporary disabled because of repetition.
571
572 Image
573 *filteredImage;
574
575 MagickCLEnv
576 clEnv;
577
578 assert(image != NULL);
579 assert(exception != (ExceptionInfo *) NULL);
580
581 if (checkAccelerateCondition(image) == MagickFalse)
582 return((Image *) NULL);
583
584 clEnv=getOpenCLEnvironment(exception);
585 if (clEnv == (MagickCLEnv) NULL)
586 return((Image *) NULL);
587
588 filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,attenuate,
589 exception);
590 return(filteredImage);
591 */
592 magick_unreferenced(image);
593 magick_unreferenced(noise_type);
594 magick_unreferenced(attenuate);
595 magick_unreferenced(exception);
596 return((Image *)NULL);
597 }
598
599 /*
600 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
601 % %
602 % %
603 % %
604 % A c c e l e r a t e B l u r I m a g e %
605 % %
606 % %
607 % %
608 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
609 */
610
ComputeBlurImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,ExceptionInfo * exception)611 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
612 const double radius,const double sigma,ExceptionInfo *exception)
613 {
614 cl_command_queue
615 queue;
616
617 cl_int
618 status;
619
620 cl_kernel
621 blurColumnKernel,
622 blurRowKernel;
623
624 cl_mem
625 filteredImageBuffer,
626 imageBuffer,
627 imageKernelBuffer,
628 tempImageBuffer;
629
630 cl_uint
631 imageColumns,
632 imageRows,
633 kernelWidth,
634 number_channels;
635
636 Image
637 *filteredImage;
638
639 MagickBooleanType
640 outputReady;
641
642 MagickCLDevice
643 device;
644
645 MagickSizeType
646 length;
647
648 size_t
649 chunkSize=256,
650 gsize[2],
651 i,
652 lsize[2];
653
654 filteredImage=NULL;
655 imageBuffer=NULL;
656 filteredImageBuffer=NULL;
657 tempImageBuffer=NULL;
658 imageKernelBuffer=NULL;
659 blurRowKernel=NULL;
660 blurColumnKernel=NULL;
661 outputReady=MagickFalse;
662
663 assert(image != (Image *) NULL);
664 assert(image->signature == MagickCoreSignature);
665 if (image->debug != MagickFalse)
666 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
667
668 device=RequestOpenCLDevice(clEnv);
669 queue=AcquireOpenCLCommandQueue(device);
670 filteredImage=cloneImage(image,exception);
671 if (filteredImage == (Image *) NULL)
672 goto cleanup;
673 if (filteredImage->number_channels != image->number_channels)
674 goto cleanup;
675 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
676 if (imageBuffer == (cl_mem) NULL)
677 goto cleanup;
678 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
679 if (filteredImageBuffer == (cl_mem) NULL)
680 goto cleanup;
681
682 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
683 exception);
684 if (imageKernelBuffer == (cl_mem) NULL)
685 goto cleanup;
686
687 length=image->columns*image->rows;
688 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
689 sizeof(cl_float4),(void *) NULL);
690 if (tempImageBuffer == (cl_mem) NULL)
691 goto cleanup;
692
693 blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
694 if (blurRowKernel == (cl_kernel) NULL)
695 {
696 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
697 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
698 goto cleanup;
699 }
700
701 number_channels=(cl_uint) image->number_channels;
702 imageColumns=(cl_uint) image->columns;
703 imageRows=(cl_uint) image->rows;
704
705 i=0;
706 status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
707 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
708 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
709 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
710 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
711 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
712 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
713 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
714 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
715 if (status != CL_SUCCESS)
716 {
717 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
718 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
719 goto cleanup;
720 }
721
722 gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
723 gsize[1]=image->rows;
724 lsize[0]=chunkSize;
725 lsize[1]=1;
726
727 outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
728 lsize,image,filteredImage,MagickFalse,exception);
729 if (outputReady == MagickFalse)
730 goto cleanup;
731
732 blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
733 if (blurColumnKernel == (cl_kernel) NULL)
734 {
735 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
736 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
737 goto cleanup;
738 }
739
740 i=0;
741 status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
742 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
743 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
744 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
745 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
746 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
747 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
748 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
749 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
750 if (status != CL_SUCCESS)
751 {
752 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
753 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
754 goto cleanup;
755 }
756
757 gsize[0]=image->columns;
758 gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
759 lsize[0]=1;
760 lsize[1]=chunkSize;
761
762 outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
763 lsize,image,filteredImage,MagickFalse,exception);
764
765 cleanup:
766
767 if (imageBuffer != (cl_mem) NULL)
768 ReleaseOpenCLMemObject(imageBuffer);
769 if (filteredImageBuffer != (cl_mem) NULL)
770 ReleaseOpenCLMemObject(filteredImageBuffer);
771 if (tempImageBuffer != (cl_mem) NULL)
772 ReleaseOpenCLMemObject(tempImageBuffer);
773 if (imageKernelBuffer != (cl_mem) NULL)
774 ReleaseOpenCLMemObject(imageKernelBuffer);
775 if (blurRowKernel != (cl_kernel) NULL)
776 ReleaseOpenCLKernel(blurRowKernel);
777 if (blurColumnKernel != (cl_kernel) NULL)
778 ReleaseOpenCLKernel(blurColumnKernel);
779 if (queue != (cl_command_queue) NULL)
780 ReleaseOpenCLCommandQueue(device,queue);
781 if (device != (MagickCLDevice) NULL)
782 ReleaseOpenCLDevice(device);
783 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
784 filteredImage=DestroyImage(filteredImage);
785
786 return(filteredImage);
787 }
788
AccelerateBlurImage(const Image * image,const double radius,const double sigma,ExceptionInfo * exception)789 MagickPrivate Image* AccelerateBlurImage(const Image *image,
790 const double radius,const double sigma,ExceptionInfo *exception)
791 {
792 Image
793 *filteredImage;
794
795 MagickCLEnv
796 clEnv;
797
798 assert(image != NULL);
799 assert(exception != (ExceptionInfo *) NULL);
800 if (image->debug != MagickFalse)
801 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
802
803 if (checkAccelerateCondition(image) == MagickFalse)
804 return((Image *) NULL);
805
806 clEnv=getOpenCLEnvironment(exception);
807 if (clEnv == (MagickCLEnv) NULL)
808 return((Image *) NULL);
809
810 filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
811 return(filteredImage);
812 }
813
814 /*
815 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
816 % %
817 % %
818 % %
819 % A c c e l e r a t e C o n t r a s t I m a g e %
820 % %
821 % %
822 % %
823 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
824 */
825
ComputeContrastImage(Image * image,MagickCLEnv clEnv,const MagickBooleanType sharpen,ExceptionInfo * exception)826 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
827 const MagickBooleanType sharpen,ExceptionInfo *exception)
828 {
829 cl_command_queue
830 queue;
831
832 cl_int
833 status,
834 sign;
835
836 cl_kernel
837 contrastKernel;
838
839 cl_mem
840 imageBuffer;
841
842 cl_uint
843 number_channels;
844
845 MagickBooleanType
846 outputReady;
847
848 MagickCLDevice
849 device;
850
851 size_t
852 gsize[2],
853 i;
854
855 assert(image != (Image *) NULL);
856 assert(image->signature == MagickCoreSignature);
857 if (image->debug != MagickFalse)
858 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
859
860 contrastKernel=NULL;
861 imageBuffer=NULL;
862 outputReady=MagickFalse;
863
864 device=RequestOpenCLDevice(clEnv);
865 queue=AcquireOpenCLCommandQueue(device);
866 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
867 if (imageBuffer == (cl_mem) NULL)
868 goto cleanup;
869
870 contrastKernel=AcquireOpenCLKernel(device,"Contrast");
871 if (contrastKernel == (cl_kernel) NULL)
872 {
873 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
874 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
875 goto cleanup;
876 }
877
878 number_channels=(cl_uint) image->number_channels;
879 sign=sharpen != MagickFalse ? 1 : -1;
880
881 i=0;
882 status =SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
883 status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_uint),&number_channels);
884 status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_int),&sign);
885 if (status != CL_SUCCESS)
886 {
887 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
888 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
889 goto cleanup;
890 }
891
892 gsize[0]=image->columns;
893 gsize[1]=image->rows;
894
895 outputReady=EnqueueOpenCLKernel(queue,contrastKernel,2,(const size_t *) NULL,
896 gsize,(const size_t *) NULL,image,(Image *) NULL,MagickFalse,exception);
897
898 cleanup:
899
900 if (imageBuffer != (cl_mem) NULL)
901 ReleaseOpenCLMemObject(imageBuffer);
902 if (contrastKernel != (cl_kernel) NULL)
903 ReleaseOpenCLKernel(contrastKernel);
904 if (queue != (cl_command_queue) NULL)
905 ReleaseOpenCLCommandQueue(device,queue);
906 if (device != (MagickCLDevice) NULL)
907 ReleaseOpenCLDevice(device);
908
909 return(outputReady);
910 }
911
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)912 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
913 const MagickBooleanType sharpen,ExceptionInfo *exception)
914 {
915 MagickBooleanType
916 status;
917
918 MagickCLEnv
919 clEnv;
920
921 assert(image != NULL);
922 assert(exception != (ExceptionInfo *) NULL);
923 if (image->debug != MagickFalse)
924 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
925
926 if (checkAccelerateCondition(image) == MagickFalse)
927 return(MagickFalse);
928
929 clEnv=getOpenCLEnvironment(exception);
930 if (clEnv == (MagickCLEnv) NULL)
931 return(MagickFalse);
932
933 status=ComputeContrastImage(image,clEnv,sharpen,exception);
934 return(status);
935 }
936
937 /*
938 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
939 % %
940 % %
941 % %
942 % A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e %
943 % %
944 % %
945 % %
946 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
947 */
948
ComputeContrastStretchImage(Image * image,MagickCLEnv clEnv,const double black_point,const double white_point,ExceptionInfo * exception)949 static MagickBooleanType ComputeContrastStretchImage(Image *image,
950 MagickCLEnv clEnv,const double black_point,const double white_point,
951 ExceptionInfo *exception)
952 {
953 #define ContrastStretchImageTag "ContrastStretch/Image"
954 #define MaxRange(color) ((cl_float) ScaleQuantumToMap((Quantum) (color)))
955
956 CacheView
957 *image_view;
958
959 cl_command_queue
960 queue;
961
962 cl_int
963 clStatus;
964
965 cl_mem_flags
966 mem_flags;
967
968 cl_mem
969 histogramBuffer,
970 imageBuffer,
971 stretchMapBuffer;
972
973 cl_kernel
974 histogramKernel,
975 stretchKernel;
976
977 cl_event
978 event;
979
980 cl_uint4
981 *histogram;
982
983 double
984 intensity;
985
986 cl_float4
987 black,
988 white;
989
990 MagickBooleanType
991 outputReady,
992 status;
993
994 MagickCLDevice
995 device;
996
997 MagickSizeType
998 length;
999
1000 PixelPacket
1001 *stretch_map;
1002
1003 ssize_t
1004 i;
1005
1006 size_t
1007 global_work_size[2];
1008
1009 void
1010 *hostPtr,
1011 *inputPixels;
1012
1013 assert(image != (Image *) NULL);
1014 assert(image->signature == MagickCoreSignature);
1015 if (image->debug != MagickFalse)
1016 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1017
1018 histogram=NULL;
1019 stretch_map=NULL;
1020 inputPixels = NULL;
1021 imageBuffer = NULL;
1022 histogramBuffer = NULL;
1023 stretchMapBuffer = NULL;
1024 histogramKernel = NULL;
1025 stretchKernel = NULL;
1026 queue = NULL;
1027 outputReady = MagickFalse;
1028
1029 /* exception=(&image->exception); */
1030
1031 /*
1032 Initialize opencl environment.
1033 */
1034 device = RequestOpenCLDevice(clEnv);
1035 queue = AcquireOpenCLCommandQueue(device);
1036
1037 /*
1038 Allocate and initialize histogram arrays.
1039 */
1040 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1041
1042 if (histogram == (cl_uint4 *) NULL)
1043 ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1044
1045 /* reset histogram */
1046 (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
1047
1048 /*
1049 if (IsGrayImage(image,exception) != MagickFalse)
1050 (void) SetImageColorspace(image,GRAYColorspace);
1051 */
1052
1053 status=MagickTrue;
1054
1055
1056 /*
1057 Form histogram.
1058 */
1059 /* Create and initialize OpenCL buffers. */
1060 /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1061 /* assume this will get a writable image */
1062 image_view=AcquireAuthenticCacheView(image,exception);
1063 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1064
1065 if (inputPixels == (void *) NULL)
1066 {
1067 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1068 goto cleanup;
1069 }
1070 /* If the host pointer is aligned to the size of CLPixelPacket,
1071 then use the host buffer directly from the GPU; otherwise,
1072 create a buffer on the GPU and copy the data over */
1073 if (ALIGNED(inputPixels,CLPixelPacket))
1074 {
1075 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1076 }
1077 else
1078 {
1079 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1080 }
1081 /* create a CL buffer from image pixel buffer */
1082 length = image->columns * image->rows;
1083 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1084 if (clStatus != CL_SUCCESS)
1085 {
1086 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1087 goto cleanup;
1088 }
1089
1090 /* If the host pointer is aligned to the size of cl_uint,
1091 then use the host buffer directly from the GPU; otherwise,
1092 create a buffer on the GPU and copy the data over */
1093 if (ALIGNED(histogram,cl_uint4))
1094 {
1095 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1096 hostPtr = histogram;
1097 }
1098 else
1099 {
1100 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1101 hostPtr = histogram;
1102 }
1103 /* create a CL buffer for histogram */
1104 length = (MaxMap+1);
1105 histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1106 if (clStatus != CL_SUCCESS)
1107 {
1108 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1109 goto cleanup;
1110 }
1111
1112 status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1113 if (status == MagickFalse)
1114 goto cleanup;
1115
1116 /* read from the kenel output */
1117 if (ALIGNED(histogram,cl_uint4))
1118 {
1119 length = (MaxMap+1);
1120 clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1121 }
1122 else
1123 {
1124 length = (MaxMap+1);
1125 clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1126 }
1127 if (clStatus != CL_SUCCESS)
1128 {
1129 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1130 goto cleanup;
1131 }
1132
1133 /* unmap, don't block gpu to use this buffer again. */
1134 if (ALIGNED(histogram,cl_uint4))
1135 {
1136 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1137 if (clStatus != CL_SUCCESS)
1138 {
1139 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1140 goto cleanup;
1141 }
1142 }
1143
1144 /* recreate input buffer later, in case image updated */
1145 #ifdef RECREATEBUFFER
1146 if (imageBuffer!=NULL)
1147 clEnv->library->clReleaseMemObject(imageBuffer);
1148 #endif
1149
1150 /* CPU stuff */
1151 /*
1152 Find the histogram boundaries by locating the black/white levels.
1153 */
1154 black.x=0.0;
1155 white.x=MaxRange(QuantumRange);
1156 if ((image->channel_mask & RedChannel) != 0)
1157 {
1158 intensity=0.0;
1159 for (i=0; i <= (ssize_t) MaxMap; i++)
1160 {
1161 intensity+=histogram[i].s[2];
1162 if (intensity > black_point)
1163 break;
1164 }
1165 black.x=(cl_float) i;
1166 intensity=0.0;
1167 for (i=(ssize_t) MaxMap; i != 0; i--)
1168 {
1169 intensity+=histogram[i].s[2];
1170 if (intensity > ((double) image->columns*image->rows-white_point))
1171 break;
1172 }
1173 white.x=(cl_float) i;
1174 }
1175 black.y=0.0;
1176 white.y=MaxRange(QuantumRange);
1177 if ((image->channel_mask & GreenChannel) != 0)
1178 {
1179 intensity=0.0;
1180 for (i=0; i <= (ssize_t) MaxMap; i++)
1181 {
1182 intensity+=histogram[i].s[2];
1183 if (intensity > black_point)
1184 break;
1185 }
1186 black.y=(cl_float) i;
1187 intensity=0.0;
1188 for (i=(ssize_t) MaxMap; i != 0; i--)
1189 {
1190 intensity+=histogram[i].s[2];
1191 if (intensity > ((double) image->columns*image->rows-white_point))
1192 break;
1193 }
1194 white.y=(cl_float) i;
1195 }
1196 black.z=0.0;
1197 white.z=MaxRange(QuantumRange);
1198 if ((image->channel_mask & BlueChannel) != 0)
1199 {
1200 intensity=0.0;
1201 for (i=0; i <= (ssize_t) MaxMap; i++)
1202 {
1203 intensity+=histogram[i].s[2];
1204 if (intensity > black_point)
1205 break;
1206 }
1207 black.z=(cl_float) i;
1208 intensity=0.0;
1209 for (i=(ssize_t) MaxMap; i != 0; i--)
1210 {
1211 intensity+=histogram[i].s[2];
1212 if (intensity > ((double) image->columns*image->rows-white_point))
1213 break;
1214 }
1215 white.z=(cl_float) i;
1216 }
1217 black.w=0.0;
1218 white.w=MaxRange(QuantumRange);
1219 if ((image->channel_mask & AlphaChannel) != 0)
1220 {
1221 intensity=0.0;
1222 for (i=0; i <= (ssize_t) MaxMap; i++)
1223 {
1224 intensity+=histogram[i].s[2];
1225 if (intensity > black_point)
1226 break;
1227 }
1228 black.w=(cl_float) i;
1229 intensity=0.0;
1230 for (i=(ssize_t) MaxMap; i != 0; i--)
1231 {
1232 intensity+=histogram[i].s[2];
1233 if (intensity > ((double) image->columns*image->rows-white_point))
1234 break;
1235 }
1236 white.w=(cl_float) i;
1237 }
1238
1239 stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1240 sizeof(*stretch_map));
1241
1242 if (stretch_map == (PixelPacket *) NULL)
1243 ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1244 image->filename);
1245
1246 /*
1247 Stretch the histogram to create the stretched image mapping.
1248 */
1249 (void) memset(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1250 for (i=0; i <= (ssize_t) MaxMap; i++)
1251 {
1252 if ((image->channel_mask & RedChannel) != 0)
1253 {
1254 if (i < (ssize_t) black.x)
1255 stretch_map[i].red=(Quantum) 0;
1256 else
1257 if (i > (ssize_t) white.x)
1258 stretch_map[i].red=QuantumRange;
1259 else
1260 if (black.x != white.x)
1261 stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1262 (i-black.x)/(white.x-black.x)));
1263 }
1264 if ((image->channel_mask & GreenChannel) != 0)
1265 {
1266 if (i < (ssize_t) black.y)
1267 stretch_map[i].green=0;
1268 else
1269 if (i > (ssize_t) white.y)
1270 stretch_map[i].green=QuantumRange;
1271 else
1272 if (black.y != white.y)
1273 stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1274 (i-black.y)/(white.y-black.y)));
1275 }
1276 if ((image->channel_mask & BlueChannel) != 0)
1277 {
1278 if (i < (ssize_t) black.z)
1279 stretch_map[i].blue=0;
1280 else
1281 if (i > (ssize_t) white.z)
1282 stretch_map[i].blue= QuantumRange;
1283 else
1284 if (black.z != white.z)
1285 stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1286 (i-black.z)/(white.z-black.z)));
1287 }
1288 if ((image->channel_mask & AlphaChannel) != 0)
1289 {
1290 if (i < (ssize_t) black.w)
1291 stretch_map[i].alpha=0;
1292 else
1293 if (i > (ssize_t) white.w)
1294 stretch_map[i].alpha=QuantumRange;
1295 else
1296 if (black.w != white.w)
1297 stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1298 (i-black.w)/(white.w-black.w)));
1299 }
1300 }
1301
1302 /*
1303 Stretch the image.
1304 */
1305 if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1306 (image->colorspace == CMYKColorspace)))
1307 image->storage_class=DirectClass;
1308 if (image->storage_class == PseudoClass)
1309 {
1310 /*
1311 Stretch colormap.
1312 */
1313 for (i=0; i < (ssize_t) image->colors; i++)
1314 {
1315 if ((image->channel_mask & RedChannel) != 0)
1316 {
1317 if (black.x != white.x)
1318 image->colormap[i].red=stretch_map[
1319 ScaleQuantumToMap(image->colormap[i].red)].red;
1320 }
1321 if ((image->channel_mask & GreenChannel) != 0)
1322 {
1323 if (black.y != white.y)
1324 image->colormap[i].green=stretch_map[
1325 ScaleQuantumToMap(image->colormap[i].green)].green;
1326 }
1327 if ((image->channel_mask & BlueChannel) != 0)
1328 {
1329 if (black.z != white.z)
1330 image->colormap[i].blue=stretch_map[
1331 ScaleQuantumToMap(image->colormap[i].blue)].blue;
1332 }
1333 if ((image->channel_mask & AlphaChannel) != 0)
1334 {
1335 if (black.w != white.w)
1336 image->colormap[i].alpha=stretch_map[
1337 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1338 }
1339 }
1340 }
1341
1342 /*
1343 Stretch image.
1344 */
1345
1346
1347 /* GPU can work on this again, image and equalize map as input
1348 image: uchar4 (CLPixelPacket)
1349 stretch_map: uchar4 (PixelPacket)
1350 black, white: float4 (FloatPixelPacket) */
1351
1352 #ifdef RECREATEBUFFER
1353 /* If the host pointer is aligned to the size of CLPixelPacket,
1354 then use the host buffer directly from the GPU; otherwise,
1355 create a buffer on the GPU and copy the data over */
1356 if (ALIGNED(inputPixels,CLPixelPacket))
1357 {
1358 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1359 }
1360 else
1361 {
1362 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1363 }
1364 /* create a CL buffer from image pixel buffer */
1365 length = image->columns * image->rows;
1366 imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1367 if (clStatus != CL_SUCCESS)
1368 {
1369 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1370 goto cleanup;
1371 }
1372 #endif
1373
1374 /* Create and initialize OpenCL buffers. */
1375 if (ALIGNED(stretch_map, PixelPacket))
1376 {
1377 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1378 hostPtr = stretch_map;
1379 }
1380 else
1381 {
1382 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1383 hostPtr = stretch_map;
1384 }
1385 /* create a CL buffer for stretch_map */
1386 length = (MaxMap+1);
1387 stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1388 if (clStatus != CL_SUCCESS)
1389 {
1390 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1391 goto cleanup;
1392 }
1393
1394 /* get the OpenCL kernel */
1395 stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1396 if (stretchKernel == NULL)
1397 {
1398 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1399 goto cleanup;
1400 }
1401
1402 /* set the kernel arguments */
1403 i = 0;
1404 clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1405 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1406 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1407 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
1408 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
1409 if (clStatus != CL_SUCCESS)
1410 {
1411 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1412 goto cleanup;
1413 }
1414
1415 /* launch the kernel */
1416 global_work_size[0] = image->columns;
1417 global_work_size[1] = image->rows;
1418
1419 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1420
1421 if (clStatus != CL_SUCCESS)
1422 {
1423 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1424 goto cleanup;
1425 }
1426 RecordProfileData(device,stretchKernel,event);
1427
1428 /* read the data back */
1429 if (ALIGNED(inputPixels,CLPixelPacket))
1430 {
1431 length = image->columns * image->rows;
1432 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1433 }
1434 else
1435 {
1436 length = image->columns * image->rows;
1437 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1438 }
1439 if (clStatus != CL_SUCCESS)
1440 {
1441 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1442 goto cleanup;
1443 }
1444
1445 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1446
1447 cleanup:
1448
1449 image_view=DestroyCacheView(image_view);
1450
1451 if (imageBuffer!=NULL)
1452 clEnv->library->clReleaseMemObject(imageBuffer);
1453
1454 if (stretchMapBuffer!=NULL)
1455 clEnv->library->clReleaseMemObject(stretchMapBuffer);
1456 if (stretch_map!=NULL)
1457 stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1458 if (histogramBuffer!=NULL)
1459 clEnv->library->clReleaseMemObject(histogramBuffer);
1460 if (histogram!=NULL)
1461 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1462 if (histogramKernel!=NULL)
1463 ReleaseOpenCLKernel(histogramKernel);
1464 if (stretchKernel!=NULL)
1465 ReleaseOpenCLKernel(stretchKernel);
1466 if (queue != NULL)
1467 ReleaseOpenCLCommandQueue(device,queue);
1468 if (device != NULL)
1469 ReleaseOpenCLDevice(device);
1470
1471 return(outputReady);
1472 }
1473
AccelerateContrastStretchImage(Image * image,const double black_point,const double white_point,ExceptionInfo * exception)1474 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1475 Image *image,const double black_point,const double white_point,
1476 ExceptionInfo *exception)
1477 {
1478 MagickBooleanType
1479 status;
1480
1481 MagickCLEnv
1482 clEnv;
1483
1484 assert(image != NULL);
1485 assert(exception != (ExceptionInfo *) NULL);
1486 if (image->debug != MagickFalse)
1487 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1488
1489 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1490 (checkHistogramCondition(image,image->intensity) == MagickFalse))
1491 return(MagickFalse);
1492
1493 clEnv=getOpenCLEnvironment(exception);
1494 if (clEnv == (MagickCLEnv) NULL)
1495 return(MagickFalse);
1496
1497 status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1498 exception);
1499 return(status);
1500 }
1501
1502 /*
1503 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1504 % %
1505 % %
1506 % %
1507 % A c c e l e r a t e C o n v o l v e I m a g e %
1508 % %
1509 % %
1510 % %
1511 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1512 */
1513
ComputeConvolveImage(const Image * image,MagickCLEnv clEnv,const KernelInfo * kernel,ExceptionInfo * exception)1514 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1515 const KernelInfo *kernel,ExceptionInfo *exception)
1516 {
1517 CacheView
1518 *filteredImage_view,
1519 *image_view;
1520
1521 cl_command_queue
1522 queue;
1523
1524 cl_event
1525 event;
1526
1527 cl_kernel
1528 clkernel;
1529
1530 cl_int
1531 clStatus;
1532
1533 cl_mem
1534 convolutionKernel,
1535 filteredImageBuffer,
1536 imageBuffer;
1537
1538 cl_mem_flags
1539 mem_flags;
1540
1541 const void
1542 *inputPixels;
1543
1544 float
1545 *kernelBufferPtr;
1546
1547 Image
1548 *filteredImage;
1549
1550 MagickBooleanType
1551 outputReady;
1552
1553 MagickCLDevice
1554 device;
1555
1556 MagickSizeType
1557 length;
1558
1559 size_t
1560 global_work_size[3],
1561 localGroupSize[3],
1562 localMemoryRequirement;
1563
1564 unsigned
1565 kernelSize;
1566
1567 unsigned int
1568 filterHeight,
1569 filterWidth,
1570 i,
1571 imageHeight,
1572 imageWidth,
1573 matte;
1574
1575 void
1576 *filteredPixels,
1577 *hostPtr;
1578
1579 /* intialize all CL objects to NULL */
1580 imageBuffer = NULL;
1581 filteredImageBuffer = NULL;
1582 convolutionKernel = NULL;
1583 clkernel = NULL;
1584 queue = NULL;
1585
1586 filteredImage = NULL;
1587 filteredImage_view = NULL;
1588 outputReady = MagickFalse;
1589
1590 device = RequestOpenCLDevice(clEnv);
1591
1592 image_view=AcquireAuthenticCacheView(image,exception);
1593 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1594 if (inputPixels == (const void *) NULL)
1595 {
1596 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1597 goto cleanup;
1598 }
1599
1600 /* Create and initialize OpenCL buffers. */
1601
1602 /* If the host pointer is aligned to the size of CLPixelPacket,
1603 then use the host buffer directly from the GPU; otherwise,
1604 create a buffer on the GPU and copy the data over */
1605 if (ALIGNED(inputPixels,CLPixelPacket))
1606 {
1607 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1608 }
1609 else
1610 {
1611 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1612 }
1613 /* create a CL buffer from image pixel buffer */
1614 length = image->columns * image->rows;
1615 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1616 if (clStatus != CL_SUCCESS)
1617 {
1618 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1619 goto cleanup;
1620 }
1621
1622 filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1623 assert(filteredImage != NULL);
1624 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1625 {
1626 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1627 goto cleanup;
1628 }
1629 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1630 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1631 if (filteredPixels == (void *) NULL)
1632 {
1633 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1634 goto cleanup;
1635 }
1636
1637 if (ALIGNED(filteredPixels,CLPixelPacket))
1638 {
1639 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1640 hostPtr = filteredPixels;
1641 }
1642 else
1643 {
1644 mem_flags = CL_MEM_WRITE_ONLY;
1645 hostPtr = NULL;
1646 }
1647 /* create a CL buffer from image pixel buffer */
1648 length = image->columns * image->rows;
1649 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1650 if (clStatus != CL_SUCCESS)
1651 {
1652 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1653 goto cleanup;
1654 }
1655
1656 kernelSize = (unsigned int) (kernel->width * kernel->height);
1657 convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1658 if (clStatus != CL_SUCCESS)
1659 {
1660 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1661 goto cleanup;
1662 }
1663
1664 queue = AcquireOpenCLCommandQueue(device);
1665
1666 kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1667 , 0, NULL, NULL, &clStatus);
1668 if (clStatus != CL_SUCCESS)
1669 {
1670 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1671 goto cleanup;
1672 }
1673 for (i = 0; i < kernelSize; i++)
1674 {
1675 kernelBufferPtr[i] = (float) kernel->values[i];
1676 }
1677 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1678 if (clStatus != CL_SUCCESS)
1679 {
1680 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1681 goto cleanup;
1682 }
1683
1684 /* Compute the local memory requirement for a 16x16 workgroup.
1685 If it's larger than 16k, reduce the workgroup size to 8x8 */
1686 localGroupSize[0] = 16;
1687 localGroupSize[1] = 16;
1688 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1689 + kernel->width*kernel->height*sizeof(float);
1690
1691 if (localMemoryRequirement > device->local_memory_size)
1692 {
1693 localGroupSize[0] = 8;
1694 localGroupSize[1] = 8;
1695 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1696 + kernel->width*kernel->height*sizeof(float);
1697 }
1698 if (localMemoryRequirement <= device->local_memory_size)
1699 {
1700 /* get the OpenCL kernel */
1701 clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1702 if (clkernel == NULL)
1703 {
1704 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1705 goto cleanup;
1706 }
1707
1708 /* set the kernel arguments */
1709 i = 0;
1710 clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1711 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1712 imageWidth = (unsigned int) image->columns;
1713 imageHeight = (unsigned int) image->rows;
1714 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1715 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1716 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1717 filterWidth = (unsigned int) kernel->width;
1718 filterHeight = (unsigned int) kernel->height;
1719 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1720 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1721 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1722 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1723 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1724 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1725 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1726 if (clStatus != CL_SUCCESS)
1727 {
1728 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1729 goto cleanup;
1730 }
1731
1732 /* pad the global size to a multiple of the local work size dimension */
1733 global_work_size[0] = ((image->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1734 global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1735
1736 /* launch the kernel */
1737 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1738 if (clStatus != CL_SUCCESS)
1739 {
1740 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1741 goto cleanup;
1742 }
1743 RecordProfileData(device,clkernel,event);
1744 }
1745 else
1746 {
1747 /* get the OpenCL kernel */
1748 clkernel = AcquireOpenCLKernel(device,"Convolve");
1749 if (clkernel == NULL)
1750 {
1751 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1752 goto cleanup;
1753 }
1754
1755 /* set the kernel arguments */
1756 i = 0;
1757 clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1758 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1759 imageWidth = (unsigned int) image->columns;
1760 imageHeight = (unsigned int) image->rows;
1761 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1762 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1763 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1764 filterWidth = (unsigned int) kernel->width;
1765 filterHeight = (unsigned int) kernel->height;
1766 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1767 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1768 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1769 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1770 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1771 if (clStatus != CL_SUCCESS)
1772 {
1773 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1774 goto cleanup;
1775 }
1776
1777 localGroupSize[0] = 8;
1778 localGroupSize[1] = 8;
1779 global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1780 global_work_size[1] = (image->rows + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1781 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1782
1783 if (clStatus != CL_SUCCESS)
1784 {
1785 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1786 goto cleanup;
1787 }
1788 }
1789 RecordProfileData(device,clkernel,event);
1790
1791 if (ALIGNED(filteredPixels,CLPixelPacket))
1792 {
1793 length = image->columns * image->rows;
1794 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1795 }
1796 else
1797 {
1798 length = image->columns * image->rows;
1799 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1800 }
1801 if (clStatus != CL_SUCCESS)
1802 {
1803 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1804 goto cleanup;
1805 }
1806
1807 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1808
1809 cleanup:
1810
1811 image_view=DestroyCacheView(image_view);
1812 if (filteredImage_view != NULL)
1813 filteredImage_view=DestroyCacheView(filteredImage_view);
1814 if (imageBuffer != NULL)
1815 clEnv->library->clReleaseMemObject(imageBuffer);
1816 if (filteredImageBuffer != NULL)
1817 clEnv->library->clReleaseMemObject(filteredImageBuffer);
1818 if (convolutionKernel != NULL)
1819 clEnv->library->clReleaseMemObject(convolutionKernel);
1820 if (clkernel != NULL)
1821 ReleaseOpenCLKernel(clkernel);
1822 if (queue != NULL)
1823 ReleaseOpenCLCommandQueue(device,queue);
1824 if (device != NULL)
1825 ReleaseOpenCLDevice(device);
1826 if (outputReady == MagickFalse)
1827 {
1828 if (filteredImage != NULL)
1829 {
1830 DestroyImage(filteredImage);
1831 filteredImage = NULL;
1832 }
1833 }
1834
1835 return(filteredImage);
1836 }
1837
AccelerateConvolveImage(const Image * image,const KernelInfo * kernel,ExceptionInfo * exception)1838 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1839 const KernelInfo *kernel,ExceptionInfo *exception)
1840 {
1841 /* Temporary disabled due to access violation
1842
1843 Image
1844 *filteredImage;
1845
1846 assert(image != NULL);
1847 assert(kernel != (KernelInfo *) NULL);
1848 assert(exception != (ExceptionInfo *) NULL);
1849 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1850 (checkOpenCLEnvironment(exception) == MagickFalse))
1851 return((Image *) NULL);
1852
1853 filteredImage=ComputeConvolveImage(image,kernel,exception);
1854 return(filteredImage);
1855 */
1856 magick_unreferenced(image);
1857 magick_unreferenced(kernel);
1858 magick_unreferenced(exception);
1859 return((Image *)NULL);
1860 }
1861
1862 /*
1863 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1864 % %
1865 % %
1866 % %
1867 % A c c e l e r a t e D e s p e c k l e I m a g e %
1868 % %
1869 % %
1870 % %
1871 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1872 */
1873
ComputeDespeckleImage(const Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)1874 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1875 ExceptionInfo*exception)
1876 {
1877 static const int
1878 X[4] = {0, 1, 1,-1},
1879 Y[4] = {1, 0, 1, 1};
1880
1881 CacheView
1882 *filteredImage_view,
1883 *image_view;
1884
1885 cl_command_queue
1886 queue;
1887
1888 cl_int
1889 clStatus;
1890
1891 cl_kernel
1892 hullPass1,
1893 hullPass2;
1894
1895 cl_event
1896 event;
1897
1898 cl_mem_flags
1899 mem_flags;
1900
1901 cl_mem
1902 filteredImageBuffer,
1903 imageBuffer,
1904 tempImageBuffer[2];
1905
1906 const void
1907 *inputPixels;
1908
1909 Image
1910 *filteredImage;
1911
1912 int
1913 k,
1914 matte;
1915
1916 MagickBooleanType
1917 outputReady;
1918
1919 MagickCLDevice
1920 device;
1921
1922 MagickSizeType
1923 length;
1924
1925 size_t
1926 global_work_size[2];
1927
1928 unsigned int
1929 imageHeight,
1930 imageWidth;
1931
1932 void
1933 *filteredPixels,
1934 *hostPtr;
1935
1936 outputReady = MagickFalse;
1937 inputPixels = NULL;
1938 filteredImage = NULL;
1939 filteredImage_view = NULL;
1940 filteredPixels = NULL;
1941 imageBuffer = NULL;
1942 filteredImageBuffer = NULL;
1943 hullPass1 = NULL;
1944 hullPass2 = NULL;
1945 queue = NULL;
1946 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
1947
1948 device = RequestOpenCLDevice(clEnv);
1949 queue = AcquireOpenCLCommandQueue(device);
1950
1951 image_view=AcquireAuthenticCacheView(image,exception);
1952 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1953 if (inputPixels == (void *) NULL)
1954 {
1955 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1956 goto cleanup;
1957 }
1958
1959 if (ALIGNED(inputPixels,CLPixelPacket))
1960 {
1961 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1962 }
1963 else
1964 {
1965 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1966 }
1967 /* create a CL buffer from image pixel buffer */
1968 length = image->columns * image->rows;
1969 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1970 if (clStatus != CL_SUCCESS)
1971 {
1972 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1973 goto cleanup;
1974 }
1975
1976 mem_flags = CL_MEM_READ_WRITE;
1977 length = image->columns * image->rows;
1978 for (k = 0; k < 2; k++)
1979 {
1980 tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
1981 if (clStatus != CL_SUCCESS)
1982 {
1983 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1984 goto cleanup;
1985 }
1986 }
1987
1988 filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1989 assert(filteredImage != NULL);
1990 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1991 {
1992 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1993 goto cleanup;
1994 }
1995 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1996 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1997 if (filteredPixels == (void *) NULL)
1998 {
1999 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2000 goto cleanup;
2001 }
2002
2003 if (ALIGNED(filteredPixels,CLPixelPacket))
2004 {
2005 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2006 hostPtr = filteredPixels;
2007 }
2008 else
2009 {
2010 mem_flags = CL_MEM_WRITE_ONLY;
2011 hostPtr = NULL;
2012 }
2013 /* create a CL buffer from image pixel buffer */
2014 length = image->columns * image->rows;
2015 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2016 if (clStatus != CL_SUCCESS)
2017 {
2018 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2019 goto cleanup;
2020 }
2021
2022 hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2023 hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2024
2025 clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2026 clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2027 imageWidth = (unsigned int) image->columns;
2028 clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2029 imageHeight = (unsigned int) image->rows;
2030 clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2031 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2032 clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2033 if (clStatus != CL_SUCCESS)
2034 {
2035 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2036 goto cleanup;
2037 }
2038
2039 clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2040 clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2041 imageWidth = (unsigned int) image->columns;
2042 clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2043 imageHeight = (unsigned int) image->rows;
2044 clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2045 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2046 clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2047 if (clStatus != CL_SUCCESS)
2048 {
2049 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2050 goto cleanup;
2051 }
2052
2053
2054 global_work_size[0] = image->columns;
2055 global_work_size[1] = image->rows;
2056
2057
2058 for (k = 0; k < 4; k++)
2059 {
2060 cl_int2 offset;
2061 int polarity;
2062
2063
2064 offset.s[0] = X[k];
2065 offset.s[1] = Y[k];
2066 polarity = 1;
2067 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2068 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2069 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2070 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2071 if (clStatus != CL_SUCCESS)
2072 {
2073 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2074 goto cleanup;
2075 }
2076 /* launch the kernel */
2077 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2078 if (clStatus != CL_SUCCESS)
2079 {
2080 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2081 goto cleanup;
2082 }
2083 RecordProfileData(device,hullPass1,event);
2084
2085 /* launch the kernel */
2086 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2087 if (clStatus != CL_SUCCESS)
2088 {
2089 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2090 goto cleanup;
2091 }
2092 RecordProfileData(device,hullPass2,event);
2093
2094 if (k == 0)
2095 clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2096 offset.s[0] = -X[k];
2097 offset.s[1] = -Y[k];
2098 polarity = 1;
2099 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2100 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2101 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2102 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2103 if (clStatus != CL_SUCCESS)
2104 {
2105 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2106 goto cleanup;
2107 }
2108 /* launch the kernel */
2109 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2110 if (clStatus != CL_SUCCESS)
2111 {
2112 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2113 goto cleanup;
2114 }
2115 RecordProfileData(device,hullPass1,event);
2116
2117 /* launch the kernel */
2118 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2119 if (clStatus != CL_SUCCESS)
2120 {
2121 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2122 goto cleanup;
2123 }
2124 RecordProfileData(device,hullPass2,event);
2125
2126 offset.s[0] = -X[k];
2127 offset.s[1] = -Y[k];
2128 polarity = -1;
2129 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2130 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2131 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2132 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2133 if (clStatus != CL_SUCCESS)
2134 {
2135 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2136 goto cleanup;
2137 }
2138 /* launch the kernel */
2139 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2140 if (clStatus != CL_SUCCESS)
2141 {
2142 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2143 goto cleanup;
2144 }
2145 RecordProfileData(device,hullPass1,event);
2146
2147 /* launch the kernel */
2148 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2149 if (clStatus != CL_SUCCESS)
2150 {
2151 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2152 goto cleanup;
2153 }
2154 RecordProfileData(device,hullPass2,event);
2155
2156 offset.s[0] = X[k];
2157 offset.s[1] = Y[k];
2158 polarity = -1;
2159 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2160 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2161 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2162 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2163
2164 if (k == 3)
2165 clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2166
2167 if (clStatus != CL_SUCCESS)
2168 {
2169 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2170 goto cleanup;
2171 }
2172 /* launch the kernel */
2173 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2174 if (clStatus != CL_SUCCESS)
2175 {
2176 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2177 goto cleanup;
2178 }
2179 RecordProfileData(device,hullPass1,event);
2180
2181 /* launch the kernel */
2182 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2183 if (clStatus != CL_SUCCESS)
2184 {
2185 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2186 goto cleanup;
2187 }
2188 RecordProfileData(device,hullPass2,event);
2189 }
2190
2191 if (ALIGNED(filteredPixels,CLPixelPacket))
2192 {
2193 length = image->columns * image->rows;
2194 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2195 }
2196 else
2197 {
2198 length = image->columns * image->rows;
2199 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2200 }
2201 if (clStatus != CL_SUCCESS)
2202 {
2203 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2204 goto cleanup;
2205 }
2206
2207 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2208
2209 cleanup:
2210
2211 image_view=DestroyCacheView(image_view);
2212 if (filteredImage_view != NULL)
2213 filteredImage_view=DestroyCacheView(filteredImage_view);
2214
2215 if (queue != NULL)
2216 ReleaseOpenCLCommandQueue(device,queue);
2217 if (device != NULL)
2218 ReleaseOpenCLDevice(device);
2219 if (imageBuffer!=NULL)
2220 clEnv->library->clReleaseMemObject(imageBuffer);
2221 for (k = 0; k < 2; k++)
2222 {
2223 if (tempImageBuffer[k]!=NULL)
2224 clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2225 }
2226 if (filteredImageBuffer!=NULL)
2227 clEnv->library->clReleaseMemObject(filteredImageBuffer);
2228 if (hullPass1!=NULL)
2229 ReleaseOpenCLKernel(hullPass1);
2230 if (hullPass2!=NULL)
2231 ReleaseOpenCLKernel(hullPass2);
2232 if (outputReady == MagickFalse && filteredImage != NULL)
2233 filteredImage=DestroyImage(filteredImage);
2234
2235 return(filteredImage);
2236 }
2237
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2238 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2239 ExceptionInfo* exception)
2240 {
2241 Image
2242 *filteredImage;
2243
2244 MagickCLEnv
2245 clEnv;
2246
2247 assert(image != NULL);
2248 assert(exception != (ExceptionInfo *) NULL);
2249
2250 if (checkAccelerateConditionRGBA(image) == MagickFalse)
2251 return((Image *) NULL);
2252
2253 clEnv=getOpenCLEnvironment(exception);
2254 if (clEnv == (MagickCLEnv) NULL)
2255 return((Image *) NULL);
2256
2257 filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2258 return(filteredImage);
2259 }
2260
2261 /*
2262 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2263 % %
2264 % %
2265 % %
2266 % A c c e l e r a t e E q u a l i z e I m a g e %
2267 % %
2268 % %
2269 % %
2270 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2271 */
2272
ComputeEqualizeImage(Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)2273 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2274 ExceptionInfo *exception)
2275 {
2276 #define EqualizeImageTag "Equalize/Image"
2277
2278 CacheView
2279 *image_view;
2280
2281 cl_command_queue
2282 queue;
2283
2284 cl_int
2285 clStatus;
2286
2287 cl_mem_flags
2288 mem_flags;
2289
2290 cl_mem
2291 equalizeMapBuffer,
2292 histogramBuffer,
2293 imageBuffer;
2294
2295 cl_kernel
2296 equalizeKernel,
2297 histogramKernel;
2298
2299 cl_event
2300 event;
2301
2302 cl_uint4
2303 *histogram;
2304
2305 cl_float4
2306 white,
2307 black,
2308 intensity,
2309 *map;
2310
2311 MagickBooleanType
2312 outputReady,
2313 status;
2314
2315 MagickCLDevice
2316 device;
2317
2318 MagickSizeType
2319 length;
2320
2321 PixelPacket
2322 *equalize_map;
2323
2324 ssize_t
2325 i;
2326
2327 size_t
2328 global_work_size[2];
2329
2330 void
2331 *hostPtr,
2332 *inputPixels;
2333
2334 assert(image != (Image *) NULL);
2335 assert(image->signature == MagickCoreSignature);
2336 if (image->debug != MagickFalse)
2337 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2338
2339 map=NULL;
2340 histogram=NULL;
2341 equalize_map=NULL;
2342 inputPixels = NULL;
2343 imageBuffer = NULL;
2344 histogramBuffer = NULL;
2345 equalizeMapBuffer = NULL;
2346 histogramKernel = NULL;
2347 equalizeKernel = NULL;
2348 queue = NULL;
2349 outputReady = MagickFalse;
2350
2351 /*
2352 * initialize opencl env
2353 */
2354 device = RequestOpenCLDevice(clEnv);
2355 queue = AcquireOpenCLCommandQueue(device);
2356
2357 /*
2358 Allocate and initialize histogram arrays.
2359 */
2360 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2361 if (histogram == (cl_uint4 *) NULL)
2362 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2363
2364 /* reset histogram */
2365 (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
2366
2367 /* Create and initialize OpenCL buffers. */
2368 /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2369 /* assume this will get a writable image */
2370 image_view=AcquireAuthenticCacheView(image,exception);
2371 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2372
2373 if (inputPixels == (void *) NULL)
2374 {
2375 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2376 goto cleanup;
2377 }
2378 /* If the host pointer is aligned to the size of CLPixelPacket,
2379 then use the host buffer directly from the GPU; otherwise,
2380 create a buffer on the GPU and copy the data over */
2381 if (ALIGNED(inputPixels,CLPixelPacket))
2382 {
2383 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2384 }
2385 else
2386 {
2387 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2388 }
2389 /* create a CL buffer from image pixel buffer */
2390 length = image->columns * image->rows;
2391 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2392 if (clStatus != CL_SUCCESS)
2393 {
2394 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2395 goto cleanup;
2396 }
2397
2398 /* If the host pointer is aligned to the size of cl_uint,
2399 then use the host buffer directly from the GPU; otherwise,
2400 create a buffer on the GPU and copy the data over */
2401 if (ALIGNED(histogram,cl_uint4))
2402 {
2403 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2404 hostPtr = histogram;
2405 }
2406 else
2407 {
2408 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2409 hostPtr = histogram;
2410 }
2411 /* create a CL buffer for histogram */
2412 length = (MaxMap+1);
2413 histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2414 if (clStatus != CL_SUCCESS)
2415 {
2416 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2417 goto cleanup;
2418 }
2419
2420 status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2421 if (status == MagickFalse)
2422 goto cleanup;
2423
2424 /* read from the kenel output */
2425 if (ALIGNED(histogram,cl_uint4))
2426 {
2427 length = (MaxMap+1);
2428 clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2429 }
2430 else
2431 {
2432 length = (MaxMap+1);
2433 clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2434 }
2435 if (clStatus != CL_SUCCESS)
2436 {
2437 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2438 goto cleanup;
2439 }
2440
2441 /* unmap, don't block gpu to use this buffer again. */
2442 if (ALIGNED(histogram,cl_uint4))
2443 {
2444 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2445 if (clStatus != CL_SUCCESS)
2446 {
2447 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2448 goto cleanup;
2449 }
2450 }
2451
2452 /* recreate input buffer later, in case image updated */
2453 #ifdef RECREATEBUFFER
2454 if (imageBuffer!=NULL)
2455 clEnv->library->clReleaseMemObject(imageBuffer);
2456 #endif
2457
2458 /* CPU stuff */
2459 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2460 if (equalize_map == (PixelPacket *) NULL)
2461 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2462
2463 map=(cl_float4 *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2464 if (map == (cl_float4 *) NULL)
2465 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2466
2467 /*
2468 Integrate the histogram to get the equalization map.
2469 */
2470 (void) memset(&intensity,0,sizeof(intensity));
2471 for (i=0; i <= (ssize_t) MaxMap; i++)
2472 {
2473 if ((image->channel_mask & SyncChannels) != 0)
2474 {
2475 intensity.x+=histogram[i].s[2];
2476 map[i]=intensity;
2477 continue;
2478 }
2479 if ((image->channel_mask & RedChannel) != 0)
2480 intensity.x+=histogram[i].s[2];
2481 if ((image->channel_mask & GreenChannel) != 0)
2482 intensity.y+=histogram[i].s[1];
2483 if ((image->channel_mask & BlueChannel) != 0)
2484 intensity.z+=histogram[i].s[0];
2485 if ((image->channel_mask & AlphaChannel) != 0)
2486 intensity.w+=histogram[i].s[3];
2487 map[i]=intensity;
2488 }
2489 black=map[0];
2490 white=map[(int) MaxMap];
2491 (void) memset(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2492 for (i=0; i <= (ssize_t) MaxMap; i++)
2493 {
2494 if ((image->channel_mask & SyncChannels) != 0)
2495 {
2496 if (white.x != black.x)
2497 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2498 (map[i].x-black.x))/(white.x-black.x)));
2499 continue;
2500 }
2501 if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2502 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2503 (map[i].x-black.x))/(white.x-black.x)));
2504 if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2505 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2506 (map[i].y-black.y))/(white.y-black.y)));
2507 if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2508 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2509 (map[i].z-black.z))/(white.z-black.z)));
2510 if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2511 equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2512 (map[i].w-black.w))/(white.w-black.w)));
2513 }
2514
2515 if (image->storage_class == PseudoClass)
2516 {
2517 /*
2518 Equalize colormap.
2519 */
2520 for (i=0; i < (ssize_t) image->colors; i++)
2521 {
2522 if ((image->channel_mask & SyncChannels) != 0)
2523 {
2524 if (white.x != black.x)
2525 {
2526 image->colormap[i].red=equalize_map[
2527 ScaleQuantumToMap(image->colormap[i].red)].red;
2528 image->colormap[i].green=equalize_map[
2529 ScaleQuantumToMap(image->colormap[i].green)].red;
2530 image->colormap[i].blue=equalize_map[
2531 ScaleQuantumToMap(image->colormap[i].blue)].red;
2532 image->colormap[i].alpha=equalize_map[
2533 ScaleQuantumToMap(image->colormap[i].alpha)].red;
2534 }
2535 continue;
2536 }
2537 if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2538 image->colormap[i].red=equalize_map[
2539 ScaleQuantumToMap(image->colormap[i].red)].red;
2540 if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2541 image->colormap[i].green=equalize_map[
2542 ScaleQuantumToMap(image->colormap[i].green)].green;
2543 if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2544 image->colormap[i].blue=equalize_map[
2545 ScaleQuantumToMap(image->colormap[i].blue)].blue;
2546 if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2547 image->colormap[i].alpha=equalize_map[
2548 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2549 }
2550 }
2551
2552 /*
2553 Equalize image.
2554 */
2555
2556 /* GPU can work on this again, image and equalize map as input
2557 image: uchar4 (CLPixelPacket)
2558 equalize_map: uchar4 (PixelPacket)
2559 black, white: float4 (FloatPixelPacket) */
2560
2561 #ifdef RECREATEBUFFER
2562 /* If the host pointer is aligned to the size of CLPixelPacket,
2563 then use the host buffer directly from the GPU; otherwise,
2564 create a buffer on the GPU and copy the data over */
2565 if (ALIGNED(inputPixels,CLPixelPacket))
2566 {
2567 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2568 }
2569 else
2570 {
2571 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2572 }
2573 /* create a CL buffer from image pixel buffer */
2574 length = image->columns * image->rows;
2575 imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2576 if (clStatus != CL_SUCCESS)
2577 {
2578 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2579 goto cleanup;
2580 }
2581 #endif
2582
2583 /* Create and initialize OpenCL buffers. */
2584 if (ALIGNED(equalize_map, PixelPacket))
2585 {
2586 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2587 hostPtr = equalize_map;
2588 }
2589 else
2590 {
2591 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2592 hostPtr = equalize_map;
2593 }
2594 /* create a CL buffer for eqaulize_map */
2595 length = (MaxMap+1);
2596 equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2597 if (clStatus != CL_SUCCESS)
2598 {
2599 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2600 goto cleanup;
2601 }
2602
2603 /* get the OpenCL kernel */
2604 equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2605 if (equalizeKernel == NULL)
2606 {
2607 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2608 goto cleanup;
2609 }
2610
2611 /* set the kernel arguments */
2612 i = 0;
2613 clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2614 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2615 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2616 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
2617 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
2618 if (clStatus != CL_SUCCESS)
2619 {
2620 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2621 goto cleanup;
2622 }
2623
2624 /* launch the kernel */
2625 global_work_size[0] = image->columns;
2626 global_work_size[1] = image->rows;
2627
2628 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2629
2630 if (clStatus != CL_SUCCESS)
2631 {
2632 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2633 goto cleanup;
2634 }
2635 RecordProfileData(device,equalizeKernel,event);
2636
2637 /* read the data back */
2638 if (ALIGNED(inputPixels,CLPixelPacket))
2639 {
2640 length = image->columns * image->rows;
2641 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2642 }
2643 else
2644 {
2645 length = image->columns * image->rows;
2646 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2647 }
2648 if (clStatus != CL_SUCCESS)
2649 {
2650 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2651 goto cleanup;
2652 }
2653
2654 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2655
2656 cleanup:
2657
2658 image_view=DestroyCacheView(image_view);
2659
2660 if (imageBuffer!=NULL)
2661 clEnv->library->clReleaseMemObject(imageBuffer);
2662 if (map!=NULL)
2663 map=(cl_float4 *) RelinquishMagickMemory(map);
2664 if (equalizeMapBuffer!=NULL)
2665 clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2666 if (equalize_map!=NULL)
2667 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2668 if (histogramBuffer!=NULL)
2669 clEnv->library->clReleaseMemObject(histogramBuffer);
2670 if (histogram!=NULL)
2671 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2672 if (histogramKernel!=NULL)
2673 ReleaseOpenCLKernel(histogramKernel);
2674 if (equalizeKernel!=NULL)
2675 ReleaseOpenCLKernel(equalizeKernel);
2676 if (queue != NULL)
2677 ReleaseOpenCLCommandQueue(device, queue);
2678 if (device != NULL)
2679 ReleaseOpenCLDevice(device);
2680
2681 return(outputReady);
2682 }
2683
AccelerateEqualizeImage(Image * image,ExceptionInfo * exception)2684 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2685 ExceptionInfo *exception)
2686 {
2687 MagickBooleanType
2688 status;
2689
2690 MagickCLEnv
2691 clEnv;
2692
2693 assert(image != NULL);
2694 assert(exception != (ExceptionInfo *) NULL);
2695 if (image->debug != MagickFalse)
2696 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2697
2698 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2699 (checkHistogramCondition(image,image->intensity) == MagickFalse))
2700 return(MagickFalse);
2701
2702 clEnv=getOpenCLEnvironment(exception);
2703 if (clEnv == (MagickCLEnv) NULL)
2704 return(MagickFalse);
2705
2706 status=ComputeEqualizeImage(image,clEnv,exception);
2707 return(status);
2708 }
2709
2710 /*
2711 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2712 % %
2713 % %
2714 % %
2715 % A c c e l e r a t e F u n c t i o n I m a g e %
2716 % %
2717 % %
2718 % %
2719 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2720 */
2721
ComputeFunctionImage(Image * image,MagickCLEnv clEnv,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2722 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2723 const MagickFunction function,const size_t number_parameters,
2724 const double *parameters,ExceptionInfo *exception)
2725 {
2726 cl_command_queue
2727 queue;
2728
2729 cl_int
2730 status;
2731
2732 cl_kernel
2733 functionKernel;
2734
2735 cl_mem
2736 imageBuffer,
2737 parametersBuffer;
2738
2739 cl_uint
2740 number_params,
2741 number_channels;
2742
2743 float
2744 *parametersBufferPtr;
2745
2746 MagickBooleanType
2747 outputReady;
2748
2749 MagickCLDevice
2750 device;
2751
2752 size_t
2753 gsize[2],
2754 i;
2755
2756 assert(image != (Image *) NULL);
2757 assert(image->signature == MagickCoreSignature);
2758 if (image->debug != MagickFalse)
2759 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2760
2761 outputReady=MagickFalse;
2762 imageBuffer=NULL;
2763 functionKernel=NULL;
2764 parametersBuffer=NULL;
2765
2766 device=RequestOpenCLDevice(clEnv);
2767 queue=AcquireOpenCLCommandQueue(device);
2768 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2769 if (imageBuffer == (cl_mem) NULL)
2770 goto cleanup;
2771
2772 parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2773 sizeof(float));
2774 if (parametersBufferPtr == (float *) NULL)
2775 goto cleanup;
2776 for (i=0; i<number_parameters; i++)
2777 parametersBufferPtr[i]=(float) parameters[i];
2778 parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2779 CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2780 parametersBufferPtr);
2781 parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2782 if (parametersBuffer == (cl_mem) NULL)
2783 {
2784 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2785 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2786 goto cleanup;
2787 }
2788
2789 functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2790 if (functionKernel == (cl_kernel) NULL)
2791 {
2792 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2793 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2794 goto cleanup;
2795 }
2796
2797 number_channels=(cl_uint) image->number_channels;
2798 number_params=(cl_uint) number_parameters;
2799
2800 i=0;
2801 status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2802 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2803 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2804 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2805 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2806 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)¶metersBuffer);
2807 if (status != CL_SUCCESS)
2808 {
2809 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2810 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2811 goto cleanup;
2812 }
2813
2814 gsize[0]=image->columns;
2815 gsize[1]=image->rows;
2816 outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2817 gsize,(const size_t *) NULL,image,(const Image *) NULL,MagickFalse,
2818 exception);
2819
2820 cleanup:
2821
2822 if (imageBuffer != (cl_mem) NULL)
2823 ReleaseOpenCLMemObject(imageBuffer);
2824 if (parametersBuffer != (cl_mem) NULL)
2825 ReleaseOpenCLMemObject(parametersBuffer);
2826 if (functionKernel != (cl_kernel) NULL)
2827 ReleaseOpenCLKernel(functionKernel);
2828 if (queue != (cl_command_queue) NULL)
2829 ReleaseOpenCLCommandQueue(device,queue);
2830 if (device != (MagickCLDevice) NULL)
2831 ReleaseOpenCLDevice(device);
2832 return(outputReady);
2833 }
2834
AccelerateFunctionImage(Image * image,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2835 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2836 const MagickFunction function,const size_t number_parameters,
2837 const double *parameters,ExceptionInfo *exception)
2838 {
2839 MagickBooleanType
2840 status;
2841
2842 MagickCLEnv
2843 clEnv;
2844
2845 assert(image != NULL);
2846 assert(exception != (ExceptionInfo *) NULL);
2847 if (image->debug != MagickFalse)
2848 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2849
2850 if (checkAccelerateCondition(image) == MagickFalse)
2851 return(MagickFalse);
2852
2853 clEnv=getOpenCLEnvironment(exception);
2854 if (clEnv == (MagickCLEnv) NULL)
2855 return(MagickFalse);
2856
2857 status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2858 parameters,exception);
2859 return(status);
2860 }
2861
2862 /*
2863 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2864 % %
2865 % %
2866 % %
2867 % A c c e l e r a t e G r a y s c a l e I m a g e %
2868 % %
2869 % %
2870 % %
2871 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2872 */
2873
ComputeGrayscaleImage(Image * image,MagickCLEnv clEnv,const PixelIntensityMethod method,ExceptionInfo * exception)2874 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2875 const PixelIntensityMethod method,ExceptionInfo *exception)
2876 {
2877 cl_command_queue
2878 queue;
2879
2880 cl_int
2881 status;
2882
2883 cl_kernel
2884 grayscaleKernel;
2885
2886 cl_mem
2887 imageBuffer;
2888
2889 cl_uint
2890 number_channels,
2891 colorspace,
2892 intensityMethod;
2893
2894 MagickBooleanType
2895 outputReady;
2896
2897 MagickCLDevice
2898 device;
2899
2900 size_t
2901 gsize[2],
2902 i;
2903
2904 assert(image != (Image *) NULL);
2905 assert(image->signature == MagickCoreSignature);
2906 if (image->debug != MagickFalse)
2907 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2908
2909 outputReady=MagickFalse;
2910 imageBuffer=NULL;
2911 grayscaleKernel=NULL;
2912
2913 device=RequestOpenCLDevice(clEnv);
2914 queue=AcquireOpenCLCommandQueue(device);
2915 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2916 if (imageBuffer == (cl_mem) NULL)
2917 goto cleanup;
2918
2919 grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2920 if (grayscaleKernel == (cl_kernel) NULL)
2921 {
2922 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2923 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2924 goto cleanup;
2925 }
2926
2927 number_channels=(cl_uint) image->number_channels;
2928 intensityMethod=(cl_uint) method;
2929 colorspace=(cl_uint) image->colorspace;
2930
2931 i=0;
2932 status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2933 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2934 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2935 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2936 if (status != CL_SUCCESS)
2937 {
2938 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2939 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2940 goto cleanup;
2941 }
2942
2943 gsize[0]=image->columns;
2944 gsize[1]=image->rows;
2945 outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
2946 (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
2947 MagickFalse,exception);
2948
2949 cleanup:
2950
2951 if (imageBuffer != (cl_mem) NULL)
2952 ReleaseOpenCLMemObject(imageBuffer);
2953 if (grayscaleKernel != (cl_kernel) NULL)
2954 ReleaseOpenCLKernel(grayscaleKernel);
2955 if (queue != (cl_command_queue) NULL)
2956 ReleaseOpenCLCommandQueue(device,queue);
2957 if (device != (MagickCLDevice) NULL)
2958 ReleaseOpenCLDevice(device);
2959
2960 return(outputReady);
2961 }
2962
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)2963 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
2964 const PixelIntensityMethod method,ExceptionInfo *exception)
2965 {
2966 MagickBooleanType
2967 status;
2968
2969 MagickCLEnv
2970 clEnv;
2971
2972 assert(image != NULL);
2973 assert(exception != (ExceptionInfo *) NULL);
2974 if (image->debug != MagickFalse)
2975 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2976
2977 if ((checkAccelerateCondition(image) == MagickFalse) ||
2978 (checkPixelIntensity(image,method) == MagickFalse))
2979 return(MagickFalse);
2980
2981 if (image->number_channels < 3)
2982 return(MagickFalse);
2983
2984 if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
2985 (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
2986 (GetPixelBlueTraits(image) == UndefinedPixelTrait))
2987 return(MagickFalse);
2988
2989 clEnv=getOpenCLEnvironment(exception);
2990 if (clEnv == (MagickCLEnv) NULL)
2991 return(MagickFalse);
2992
2993 status=ComputeGrayscaleImage(image,clEnv,method,exception);
2994 return(status);
2995 }
2996
2997 /*
2998 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2999 % %
3000 % %
3001 % %
3002 % A c c e l e r a t e L o c a l C o n t r a s t I m a g e %
3003 % %
3004 % %
3005 % %
3006 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3007 */
3008
ComputeLocalContrastImage(const Image * image,MagickCLEnv clEnv,const double radius,const double strength,ExceptionInfo * exception)3009 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
3010 const double radius,const double strength,ExceptionInfo *exception)
3011 {
3012 CacheView
3013 *filteredImage_view,
3014 *image_view;
3015
3016 cl_command_queue
3017 queue;
3018
3019 cl_int
3020 clStatus,
3021 iRadius;
3022
3023 cl_kernel
3024 blurRowKernel,
3025 blurColumnKernel;
3026
3027 cl_event
3028 event;
3029
3030 cl_mem
3031 filteredImageBuffer,
3032 imageBuffer,
3033 imageKernelBuffer,
3034 tempImageBuffer;
3035
3036 cl_mem_flags
3037 mem_flags;
3038
3039 const void
3040 *inputPixels;
3041
3042 Image
3043 *filteredImage;
3044
3045 MagickBooleanType
3046 outputReady;
3047
3048 MagickCLDevice
3049 device;
3050
3051 MagickSizeType
3052 length;
3053
3054 void
3055 *filteredPixels,
3056 *hostPtr;
3057
3058 unsigned int
3059 i,
3060 imageColumns,
3061 imageRows,
3062 passes;
3063
3064 filteredImage = NULL;
3065 filteredImage_view = NULL;
3066 imageBuffer = NULL;
3067 filteredImageBuffer = NULL;
3068 tempImageBuffer = NULL;
3069 imageKernelBuffer = NULL;
3070 blurRowKernel = NULL;
3071 blurColumnKernel = NULL;
3072 queue = NULL;
3073 outputReady = MagickFalse;
3074
3075 device = RequestOpenCLDevice(clEnv);
3076 queue = AcquireOpenCLCommandQueue(device);
3077
3078 /* Create and initialize OpenCL buffers. */
3079 {
3080 image_view=AcquireAuthenticCacheView(image,exception);
3081 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3082 if (inputPixels == (const void *) NULL)
3083 {
3084 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3085 goto cleanup;
3086 }
3087
3088 /* If the host pointer is aligned to the size of CLPixelPacket,
3089 then use the host buffer directly from the GPU; otherwise,
3090 create a buffer on the GPU and copy the data over */
3091 if (ALIGNED(inputPixels,CLPixelPacket))
3092 {
3093 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3094 }
3095 else
3096 {
3097 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3098 }
3099 /* create a CL buffer from image pixel buffer */
3100 length = image->columns * image->rows;
3101 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3102 if (clStatus != CL_SUCCESS)
3103 {
3104 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3105 goto cleanup;
3106 }
3107 }
3108
3109 /* create output */
3110 {
3111 filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3112 assert(filteredImage != NULL);
3113 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3114 {
3115 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3116 goto cleanup;
3117 }
3118 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3119 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3120 if (filteredPixels == (void *) NULL)
3121 {
3122 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3123 goto cleanup;
3124 }
3125
3126 if (ALIGNED(filteredPixels,CLPixelPacket))
3127 {
3128 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3129 hostPtr = filteredPixels;
3130 }
3131 else
3132 {
3133 mem_flags = CL_MEM_WRITE_ONLY;
3134 hostPtr = NULL;
3135 }
3136
3137 /* create a CL buffer from image pixel buffer */
3138 length = image->columns * image->rows;
3139 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3140 if (clStatus != CL_SUCCESS)
3141 {
3142 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3143 goto cleanup;
3144 }
3145 }
3146
3147 {
3148 /* create temp buffer */
3149 {
3150 length = image->columns * image->rows;
3151 tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3152 if (clStatus != CL_SUCCESS)
3153 {
3154 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3155 goto cleanup;
3156 }
3157 }
3158
3159 /* get the opencl kernel */
3160 {
3161 blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3162 if (blurRowKernel == NULL)
3163 {
3164 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3165 goto cleanup;
3166 };
3167
3168 blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3169 if (blurColumnKernel == NULL)
3170 {
3171 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3172 goto cleanup;
3173 };
3174 }
3175
3176 {
3177 imageColumns = (unsigned int) image->columns;
3178 imageRows = (unsigned int) image->rows;
3179 iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); /* Normalized radius, 100% gives blur radius of 20% of the largest dimension */
3180
3181 passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3182 passes = (passes < 1) ? 1: passes;
3183
3184 /* set the kernel arguments */
3185 i = 0;
3186 clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3187 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3188 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3189 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3190 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3191 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3192
3193 if (clStatus != CL_SUCCESS)
3194 {
3195 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3196 goto cleanup;
3197 }
3198 }
3199
3200 /* launch the kernel */
3201 {
3202 int x;
3203 for (x = 0; x < passes; ++x) {
3204 size_t gsize[2];
3205 size_t wsize[2];
3206 size_t goffset[2];
3207
3208 gsize[0] = 256;
3209 gsize[1] = (image->rows + passes - 1) / passes;
3210 wsize[0] = 256;
3211 wsize[1] = 1;
3212 goffset[0] = 0;
3213 goffset[1] = x * gsize[1];
3214
3215 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3216 if (clStatus != CL_SUCCESS)
3217 {
3218 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3219 goto cleanup;
3220 }
3221 clEnv->library->clFlush(queue);
3222 RecordProfileData(device,blurRowKernel,event);
3223 }
3224 }
3225
3226 {
3227 cl_float FStrength = strength;
3228 i = 0;
3229 clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3230 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3231 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3232 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3233 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3234 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3235 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3236
3237 if (clStatus != CL_SUCCESS)
3238 {
3239 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3240 goto cleanup;
3241 }
3242 }
3243
3244 /* launch the kernel */
3245 {
3246 int x;
3247 for (x = 0; x < passes; ++x) {
3248 size_t gsize[2];
3249 size_t wsize[2];
3250 size_t goffset[2];
3251
3252 gsize[0] = ((image->columns + 3) / 4) * 4;
3253 gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3254 wsize[0] = 4;
3255 wsize[1] = 64;
3256 goffset[0] = 0;
3257 goffset[1] = x * gsize[1];
3258
3259 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3260 if (clStatus != CL_SUCCESS)
3261 {
3262 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3263 goto cleanup;
3264 }
3265 clEnv->library->clFlush(queue);
3266 RecordProfileData(device,blurColumnKernel,event);
3267 }
3268 }
3269 }
3270
3271 /* get result */
3272 if (ALIGNED(filteredPixels,CLPixelPacket))
3273 {
3274 length = image->columns * image->rows;
3275 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3276 }
3277 else
3278 {
3279 length = image->columns * image->rows;
3280 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3281 }
3282 if (clStatus != CL_SUCCESS)
3283 {
3284 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3285 goto cleanup;
3286 }
3287
3288 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3289
3290 cleanup:
3291
3292 image_view=DestroyCacheView(image_view);
3293 if (filteredImage_view != NULL)
3294 filteredImage_view=DestroyCacheView(filteredImage_view);
3295
3296 if (imageBuffer!=NULL)
3297 clEnv->library->clReleaseMemObject(imageBuffer);
3298 if (filteredImageBuffer!=NULL)
3299 clEnv->library->clReleaseMemObject(filteredImageBuffer);
3300 if (tempImageBuffer!=NULL)
3301 clEnv->library->clReleaseMemObject(tempImageBuffer);
3302 if (imageKernelBuffer!=NULL)
3303 clEnv->library->clReleaseMemObject(imageKernelBuffer);
3304 if (blurRowKernel!=NULL)
3305 ReleaseOpenCLKernel(blurRowKernel);
3306 if (blurColumnKernel!=NULL)
3307 ReleaseOpenCLKernel(blurColumnKernel);
3308 if (queue != NULL)
3309 ReleaseOpenCLCommandQueue(device, queue);
3310 if (device != NULL)
3311 ReleaseOpenCLDevice(device);
3312 if (outputReady == MagickFalse)
3313 {
3314 if (filteredImage != NULL)
3315 {
3316 DestroyImage(filteredImage);
3317 filteredImage = NULL;
3318 }
3319 }
3320
3321 return(filteredImage);
3322 }
3323
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3324 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3325 const double radius,const double strength,ExceptionInfo *exception)
3326 {
3327 Image
3328 *filteredImage;
3329
3330 MagickCLEnv
3331 clEnv;
3332
3333 assert(image != NULL);
3334 assert(exception != (ExceptionInfo *) NULL);
3335
3336 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3337 return((Image *) NULL);
3338
3339 clEnv=getOpenCLEnvironment(exception);
3340 if (clEnv == (MagickCLEnv) NULL)
3341 return((Image *) NULL);
3342
3343 filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3344 exception);
3345 return(filteredImage);
3346 }
3347
3348 /*
3349 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3350 % %
3351 % %
3352 % %
3353 % A c c e l e r a t e M o d u l a t e I m a g e %
3354 % %
3355 % %
3356 % %
3357 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3358 */
3359
ComputeModulateImage(Image * image,MagickCLEnv clEnv,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3360 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3361 const double percent_brightness,const double percent_hue,
3362 const double percent_saturation,const ColorspaceType colorspace,
3363 ExceptionInfo *exception)
3364 {
3365 CacheView
3366 *image_view;
3367
3368 cl_float
3369 bright,
3370 hue,
3371 saturation;
3372
3373 cl_command_queue
3374 queue;
3375
3376 cl_int
3377 color,
3378 clStatus;
3379
3380 cl_kernel
3381 modulateKernel;
3382
3383 cl_event
3384 event;
3385
3386 cl_mem
3387 imageBuffer;
3388
3389 cl_mem_flags
3390 mem_flags;
3391
3392 MagickBooleanType
3393 outputReady;
3394
3395 MagickCLDevice
3396 device;
3397
3398 MagickSizeType
3399 length;
3400
3401 ssize_t
3402 i;
3403
3404 void
3405 *inputPixels;
3406
3407 assert(image != (Image *) NULL);
3408 assert(image->signature == MagickCoreSignature);
3409 if (image->debug != MagickFalse)
3410 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3411
3412 inputPixels = NULL;
3413 imageBuffer = NULL;
3414 modulateKernel = NULL;
3415
3416 /*
3417 * initialize opencl env
3418 */
3419 device = RequestOpenCLDevice(clEnv);
3420 queue = AcquireOpenCLCommandQueue(device);
3421
3422 outputReady = MagickFalse;
3423
3424 /* Create and initialize OpenCL buffers.
3425 inputPixels = AcquirePixelCachePixels(image, &length, exception);
3426 assume this will get a writable image
3427 */
3428 image_view=AcquireAuthenticCacheView(image,exception);
3429 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3430 if (inputPixels == (void *) NULL)
3431 {
3432 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3433 goto cleanup;
3434 }
3435
3436 /* If the host pointer is aligned to the size of CLPixelPacket,
3437 then use the host buffer directly from the GPU; otherwise,
3438 create a buffer on the GPU and copy the data over
3439 */
3440 if (ALIGNED(inputPixels,CLPixelPacket))
3441 {
3442 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3443 }
3444 else
3445 {
3446 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3447 }
3448 /* create a CL buffer from image pixel buffer */
3449 length = image->columns * image->rows;
3450 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3451 if (clStatus != CL_SUCCESS)
3452 {
3453 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3454 goto cleanup;
3455 }
3456
3457 modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3458 if (modulateKernel == NULL)
3459 {
3460 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3461 goto cleanup;
3462 }
3463
3464 bright=percent_brightness;
3465 hue=percent_hue;
3466 saturation=percent_saturation;
3467 color=colorspace;
3468
3469 i = 0;
3470 clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3471 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3472 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3473 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3474 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3475 if (clStatus != CL_SUCCESS)
3476 {
3477 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3478 goto cleanup;
3479 }
3480
3481 {
3482 size_t global_work_size[2];
3483 global_work_size[0] = image->columns;
3484 global_work_size[1] = image->rows;
3485 /* launch the kernel */
3486 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3487 if (clStatus != CL_SUCCESS)
3488 {
3489 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3490 goto cleanup;
3491 }
3492 RecordProfileData(device,modulateKernel,event);
3493 }
3494
3495 if (ALIGNED(inputPixels,CLPixelPacket))
3496 {
3497 length = image->columns * image->rows;
3498 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3499 }
3500 else
3501 {
3502 length = image->columns * image->rows;
3503 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3504 }
3505 if (clStatus != CL_SUCCESS)
3506 {
3507 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3508 goto cleanup;
3509 }
3510
3511 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3512
3513 cleanup:
3514
3515 image_view=DestroyCacheView(image_view);
3516
3517 if (imageBuffer!=NULL)
3518 clEnv->library->clReleaseMemObject(imageBuffer);
3519 if (modulateKernel!=NULL)
3520 ReleaseOpenCLKernel(modulateKernel);
3521 if (queue != NULL)
3522 ReleaseOpenCLCommandQueue(device,queue);
3523 if (device != NULL)
3524 ReleaseOpenCLDevice(device);
3525
3526 return outputReady;
3527
3528 }
3529
AccelerateModulateImage(Image * image,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3530 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3531 const double percent_brightness,const double percent_hue,
3532 const double percent_saturation,const ColorspaceType colorspace,
3533 ExceptionInfo *exception)
3534 {
3535 MagickBooleanType
3536 status;
3537
3538 MagickCLEnv
3539 clEnv;
3540
3541 assert(image != NULL);
3542 assert(exception != (ExceptionInfo *) NULL);
3543 if (image->debug != MagickFalse)
3544 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3545
3546 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3547 return(MagickFalse);
3548
3549 if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3550 return(MagickFalse);
3551
3552 clEnv=getOpenCLEnvironment(exception);
3553 if (clEnv == (MagickCLEnv) NULL)
3554 return(MagickFalse);
3555
3556 status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3557 percent_saturation,colorspace,exception);
3558 return(status);
3559 }
3560
3561 /*
3562 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3563 % %
3564 % %
3565 % %
3566 % A c c e l e r a t e M o t i o n B l u r I m a g e %
3567 % %
3568 % %
3569 % %
3570 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3571 */
3572
ComputeMotionBlurImage(const Image * image,MagickCLEnv clEnv,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3573 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3574 const double *kernel,const size_t width,const OffsetInfo *offset,
3575 ExceptionInfo *exception)
3576 {
3577 CacheView
3578 *filteredImage_view,
3579 *image_view;
3580
3581 cl_command_queue
3582 queue;
3583
3584 cl_float4
3585 biasPixel;
3586
3587 cl_int
3588 clStatus;
3589
3590 cl_kernel
3591 motionBlurKernel;
3592
3593 cl_event
3594 event;
3595
3596 cl_mem
3597 filteredImageBuffer,
3598 imageBuffer,
3599 imageKernelBuffer,
3600 offsetBuffer;
3601
3602 cl_mem_flags
3603 mem_flags;
3604
3605 const void
3606 *inputPixels;
3607
3608 float
3609 *kernelBufferPtr;
3610
3611 Image
3612 *filteredImage;
3613
3614 int
3615 *offsetBufferPtr;
3616
3617 MagickBooleanType
3618 outputReady;
3619
3620 MagickCLDevice
3621 device;
3622
3623 PixelInfo
3624 bias;
3625
3626 MagickSizeType
3627 length;
3628
3629 size_t
3630 global_work_size[2],
3631 local_work_size[2];
3632
3633 unsigned int
3634 i,
3635 imageHeight,
3636 imageWidth,
3637 matte;
3638
3639 void
3640 *filteredPixels,
3641 *hostPtr;
3642
3643 assert(image != (Image *) NULL);
3644 assert(image->signature == MagickCoreSignature);
3645 if (image->debug != MagickFalse)
3646 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3647
3648 outputReady = MagickFalse;
3649 filteredImage = NULL;
3650 filteredImage_view = NULL;
3651 imageBuffer = NULL;
3652 filteredImageBuffer = NULL;
3653 imageKernelBuffer = NULL;
3654 motionBlurKernel = NULL;
3655 queue = NULL;
3656
3657 device = RequestOpenCLDevice(clEnv);
3658
3659 /* Create and initialize OpenCL buffers. */
3660
3661 image_view=AcquireAuthenticCacheView(image,exception);
3662 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,
3663 image->rows,exception);
3664 if (inputPixels == (const void *) NULL)
3665 {
3666 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3667 "UnableToReadPixelCache.","`%s'",image->filename);
3668 goto cleanup;
3669 }
3670
3671 /*
3672 If the host pointer is aligned to the size of CLPixelPacket, then use
3673 the host buffer directly from the GPU; otherwise, create a buffer on
3674 the GPU and copy the data over
3675 */
3676 if (ALIGNED(inputPixels,CLPixelPacket))
3677 {
3678 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3679 }
3680 else
3681 {
3682 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3683 }
3684 /*
3685 create a CL buffer from image pixel buffer
3686 */
3687 length = image->columns * image->rows;
3688 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3689 length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3690 if (clStatus != CL_SUCCESS)
3691 {
3692 (void) ThrowMagickException(exception, GetMagickModule(),
3693 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3694 goto cleanup;
3695 }
3696
3697
3698 filteredImage = CloneImage(image,image->columns,image->rows,
3699 MagickTrue,exception);
3700 assert(filteredImage != NULL);
3701 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3702 {
3703 (void) ThrowMagickException(exception, GetMagickModule(),
3704 ResourceLimitError, "CloneImage failed.", ".");
3705 goto cleanup;
3706 }
3707 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3708 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3709 if (filteredPixels == (void *) NULL)
3710 {
3711 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3712 "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3713 goto cleanup;
3714 }
3715
3716 if (ALIGNED(filteredPixels,CLPixelPacket))
3717 {
3718 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3719 hostPtr = filteredPixels;
3720 }
3721 else
3722 {
3723 mem_flags = CL_MEM_WRITE_ONLY;
3724 hostPtr = NULL;
3725 }
3726 /*
3727 Create a CL buffer from image pixel buffer.
3728 */
3729 length = image->columns * image->rows;
3730 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3731 length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3732 if (clStatus != CL_SUCCESS)
3733 {
3734 (void) ThrowMagickException(exception, GetMagickModule(),
3735 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3736 goto cleanup;
3737 }
3738
3739
3740 imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3741 CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3742 &clStatus);
3743 if (clStatus != CL_SUCCESS)
3744 {
3745 (void) ThrowMagickException(exception, GetMagickModule(),
3746 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3747 goto cleanup;
3748 }
3749
3750 queue = AcquireOpenCLCommandQueue(device);
3751 kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3752 CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3753 if (clStatus != CL_SUCCESS)
3754 {
3755 (void) ThrowMagickException(exception, GetMagickModule(),
3756 ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3757 goto cleanup;
3758 }
3759 for (i = 0; i < width; i++)
3760 {
3761 kernelBufferPtr[i] = (float) kernel[i];
3762 }
3763 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3764 0, NULL, NULL);
3765 if (clStatus != CL_SUCCESS)
3766 {
3767 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3768 "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3769 goto cleanup;
3770 }
3771
3772 offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3773 CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3774 &clStatus);
3775 if (clStatus != CL_SUCCESS)
3776 {
3777 (void) ThrowMagickException(exception, GetMagickModule(),
3778 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3779 goto cleanup;
3780 }
3781
3782 offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3783 CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3784 if (clStatus != CL_SUCCESS)
3785 {
3786 (void) ThrowMagickException(exception, GetMagickModule(),
3787 ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3788 goto cleanup;
3789 }
3790 for (i = 0; i < width; i++)
3791 {
3792 offsetBufferPtr[2*i] = (int)offset[i].x;
3793 offsetBufferPtr[2*i+1] = (int)offset[i].y;
3794 }
3795 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3796 NULL, NULL);
3797 if (clStatus != CL_SUCCESS)
3798 {
3799 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3800 "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3801 goto cleanup;
3802 }
3803
3804
3805 /*
3806 Get the OpenCL kernel
3807 */
3808 motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3809 if (motionBlurKernel == NULL)
3810 {
3811 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3812 "AcquireOpenCLKernel failed.", ".");
3813 goto cleanup;
3814 }
3815
3816 /*
3817 Set the kernel arguments.
3818 */
3819 i = 0;
3820 clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3821 (void *)&imageBuffer);
3822 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3823 (void *)&filteredImageBuffer);
3824 imageWidth = (unsigned int) image->columns;
3825 imageHeight = (unsigned int) image->rows;
3826 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3827 &imageWidth);
3828 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3829 &imageHeight);
3830 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3831 (void *)&imageKernelBuffer);
3832 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3833 &width);
3834 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3835 (void *)&offsetBuffer);
3836
3837 GetPixelInfo(image,&bias);
3838 biasPixel.s[0] = bias.red;
3839 biasPixel.s[1] = bias.green;
3840 biasPixel.s[2] = bias.blue;
3841 biasPixel.s[3] = bias.alpha;
3842 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3843
3844 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3845 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3846 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3847 if (clStatus != CL_SUCCESS)
3848 {
3849 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3850 "clEnv->library->clSetKernelArg failed.", ".");
3851 goto cleanup;
3852 }
3853
3854 /*
3855 Launch the kernel.
3856 */
3857 local_work_size[0] = 16;
3858 local_work_size[1] = 16;
3859 global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3860 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3861 global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3862 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3863 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3864 global_work_size, local_work_size, 0, NULL, &event);
3865
3866 if (clStatus != CL_SUCCESS)
3867 {
3868 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3869 "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3870 goto cleanup;
3871 }
3872 RecordProfileData(device,motionBlurKernel,event);
3873
3874 if (ALIGNED(filteredPixels,CLPixelPacket))
3875 {
3876 length = image->columns * image->rows;
3877 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3878 CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3879 NULL, &clStatus);
3880 }
3881 else
3882 {
3883 length = image->columns * image->rows;
3884 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3885 length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3886 }
3887 if (clStatus != CL_SUCCESS)
3888 {
3889 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3890 "Reading output image from CL buffer failed.", ".");
3891 goto cleanup;
3892 }
3893 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3894
3895 cleanup:
3896
3897 image_view=DestroyCacheView(image_view);
3898 if (filteredImage_view != NULL)
3899 filteredImage_view=DestroyCacheView(filteredImage_view);
3900
3901 if (filteredImageBuffer!=NULL)
3902 clEnv->library->clReleaseMemObject(filteredImageBuffer);
3903 if (imageBuffer!=NULL)
3904 clEnv->library->clReleaseMemObject(imageBuffer);
3905 if (imageKernelBuffer!=NULL)
3906 clEnv->library->clReleaseMemObject(imageKernelBuffer);
3907 if (motionBlurKernel!=NULL)
3908 ReleaseOpenCLKernel(motionBlurKernel);
3909 if (queue != NULL)
3910 ReleaseOpenCLCommandQueue(device,queue);
3911 if (device != NULL)
3912 ReleaseOpenCLDevice(device);
3913 if (outputReady == MagickFalse && filteredImage != NULL)
3914 filteredImage=DestroyImage(filteredImage);
3915
3916 return(filteredImage);
3917 }
3918
AccelerateMotionBlurImage(const Image * image,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3919 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3920 const double* kernel,const size_t width,const OffsetInfo *offset,
3921 ExceptionInfo *exception)
3922 {
3923 Image
3924 *filteredImage;
3925
3926 MagickCLEnv
3927 clEnv;
3928
3929 assert(image != NULL);
3930 assert(kernel != (double *) NULL);
3931 assert(offset != (OffsetInfo *) NULL);
3932 assert(exception != (ExceptionInfo *) NULL);
3933
3934 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3935 return((Image *) NULL);
3936
3937 clEnv=getOpenCLEnvironment(exception);
3938 if (clEnv == (MagickCLEnv) NULL)
3939 return((Image *) NULL);
3940
3941 filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3942 exception);
3943 return(filteredImage);
3944 }
3945
3946 /*
3947 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3948 % %
3949 % %
3950 % %
3951 % A c c e l e r a t e R e s i z e I m a g e %
3952 % %
3953 % %
3954 % %
3955 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3956 */
3957
resizeHorizontalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,ExceptionInfo * exception)3958 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3959 cl_command_queue queue,const Image *image,Image *filteredImage,
3960 cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3961 cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3962 const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3963 const float xFactor,ExceptionInfo *exception)
3964 {
3965 cl_kernel
3966 horizontalKernel;
3967
3968 cl_int
3969 status;
3970
3971 const unsigned int
3972 workgroupSize = 256;
3973
3974 float
3975 resizeFilterScale,
3976 resizeFilterSupport,
3977 resizeFilterWindowSupport,
3978 resizeFilterBlur,
3979 scale,
3980 support;
3981
3982 int
3983 cacheRangeStart,
3984 cacheRangeEnd,
3985 numCachedPixels,
3986 resizeFilterType,
3987 resizeWindowType;
3988
3989 MagickBooleanType
3990 outputReady;
3991
3992 size_t
3993 gammaAccumulatorLocalMemorySize,
3994 gsize[2],
3995 i,
3996 imageCacheLocalMemorySize,
3997 pixelAccumulatorLocalMemorySize,
3998 lsize[2],
3999 totalLocalMemorySize,
4000 weightAccumulatorLocalMemorySize;
4001
4002 unsigned int
4003 chunkSize,
4004 pixelPerWorkgroup;
4005
4006 horizontalKernel=NULL;
4007 outputReady=MagickFalse;
4008
4009 /*
4010 Apply filter to resize vertically from image to resize image.
4011 */
4012 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4013 support=scale*GetResizeFilterSupport(resizeFilter);
4014 if (support < 0.5)
4015 {
4016 /*
4017 Support too small even for nearest neighbour: Reduce to point
4018 sampling.
4019 */
4020 support=(float) 0.5;
4021 scale=1.0;
4022 }
4023 scale=PerceptibleReciprocal(scale);
4024
4025 if (resizedColumns < workgroupSize)
4026 {
4027 chunkSize=32;
4028 pixelPerWorkgroup=32;
4029 }
4030 else
4031 {
4032 chunkSize=workgroupSize;
4033 pixelPerWorkgroup=workgroupSize;
4034 }
4035
4036 DisableMSCWarning(4127)
4037 while(1)
4038 RestoreMSCWarning
4039 {
4040 /* calculate the local memory size needed per workgroup */
4041 cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4042 cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
4043 MagickEpsilon)+support+0.5);
4044 numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4045 imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4046 number_channels;
4047 totalLocalMemorySize=imageCacheLocalMemorySize;
4048
4049 /* local size for the pixel accumulator */
4050 pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4051 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4052
4053 /* local memory size for the weight accumulator */
4054 weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4055 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4056
4057 /* local memory size for the gamma accumulator */
4058 if ((number_channels == 4) || (number_channels == 2))
4059 gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4060 else
4061 gammaAccumulatorLocalMemorySize=sizeof(float);
4062 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4063
4064 if (totalLocalMemorySize <= device->local_memory_size)
4065 break;
4066 else
4067 {
4068 pixelPerWorkgroup=pixelPerWorkgroup/2;
4069 chunkSize=chunkSize/2;
4070 if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4071 {
4072 /* quit, fallback to CPU */
4073 goto cleanup;
4074 }
4075 }
4076 }
4077
4078 resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4079 resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4080
4081 horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4082 if (horizontalKernel == (cl_kernel) NULL)
4083 {
4084 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4085 ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4086 goto cleanup;
4087 }
4088
4089 resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4090 resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4091 resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4092 resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4093
4094 i=0;
4095 status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4096 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4097 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4098 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4099 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4100 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4101 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4102 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4103 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4104 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4105 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4106 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4107 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4108 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4109 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4110 status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4111 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4112 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4113 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4114 status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4115 status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4116 status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4117
4118 if (status != CL_SUCCESS)
4119 {
4120 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4121 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4122 goto cleanup;
4123 }
4124
4125 gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4126 workgroupSize;
4127 gsize[1]=resizedRows;
4128 lsize[0]=workgroupSize;
4129 lsize[1]=1;
4130 outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4131 (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4132 exception);
4133
4134 cleanup:
4135
4136 if (horizontalKernel != (cl_kernel) NULL)
4137 ReleaseOpenCLKernel(horizontalKernel);
4138
4139 return(outputReady);
4140 }
4141
resizeVerticalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,ExceptionInfo * exception)4142 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4143 cl_command_queue queue,const Image *image,Image * filteredImage,
4144 cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4145 cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4146 const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4147 const float yFactor,ExceptionInfo *exception)
4148 {
4149 cl_kernel
4150 verticalKernel;
4151
4152 cl_int
4153 status;
4154
4155 const unsigned int
4156 workgroupSize = 256;
4157
4158 float
4159 resizeFilterScale,
4160 resizeFilterSupport,
4161 resizeFilterWindowSupport,
4162 resizeFilterBlur,
4163 scale,
4164 support;
4165
4166 int
4167 cacheRangeStart,
4168 cacheRangeEnd,
4169 numCachedPixels,
4170 resizeFilterType,
4171 resizeWindowType;
4172
4173 MagickBooleanType
4174 outputReady;
4175
4176 size_t
4177 gammaAccumulatorLocalMemorySize,
4178 gsize[2],
4179 i,
4180 imageCacheLocalMemorySize,
4181 pixelAccumulatorLocalMemorySize,
4182 lsize[2],
4183 totalLocalMemorySize,
4184 weightAccumulatorLocalMemorySize;
4185
4186 unsigned int
4187 chunkSize,
4188 pixelPerWorkgroup;
4189
4190 verticalKernel=NULL;
4191 outputReady=MagickFalse;
4192
4193 /*
4194 Apply filter to resize vertically from image to resize image.
4195 */
4196 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4197 support=scale*GetResizeFilterSupport(resizeFilter);
4198 if (support < 0.5)
4199 {
4200 /*
4201 Support too small even for nearest neighbour: Reduce to point
4202 sampling.
4203 */
4204 support=(float) 0.5;
4205 scale=1.0;
4206 }
4207 scale=PerceptibleReciprocal(scale);
4208
4209 if (resizedRows < workgroupSize)
4210 {
4211 chunkSize=32;
4212 pixelPerWorkgroup=32;
4213 }
4214 else
4215 {
4216 chunkSize=workgroupSize;
4217 pixelPerWorkgroup=workgroupSize;
4218 }
4219
4220 DisableMSCWarning(4127)
4221 while(1)
4222 RestoreMSCWarning
4223 {
4224 /* calculate the local memory size needed per workgroup */
4225 cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4226 cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4227 MagickEpsilon)+support+0.5);
4228 numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4229 imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4230 number_channels;
4231 totalLocalMemorySize=imageCacheLocalMemorySize;
4232
4233 /* local size for the pixel accumulator */
4234 pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4235 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4236
4237 /* local memory size for the weight accumulator */
4238 weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4239 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4240
4241 /* local memory size for the gamma accumulator */
4242 if ((number_channels == 4) || (number_channels == 2))
4243 gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4244 else
4245 gammaAccumulatorLocalMemorySize=sizeof(float);
4246 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4247
4248 if (totalLocalMemorySize <= device->local_memory_size)
4249 break;
4250 else
4251 {
4252 pixelPerWorkgroup=pixelPerWorkgroup/2;
4253 chunkSize=chunkSize/2;
4254 if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4255 {
4256 /* quit, fallback to CPU */
4257 goto cleanup;
4258 }
4259 }
4260 }
4261
4262 resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4263 resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4264
4265 verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4266 if (verticalKernel == (cl_kernel) NULL)
4267 {
4268 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4269 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4270 goto cleanup;
4271 }
4272
4273 resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4274 resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4275 resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4276 resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4277
4278 i=0;
4279 status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4280 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4281 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4282 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4283 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4284 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4285 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4286 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4287 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4288 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4289 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4290 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4291 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4292 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4293 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4294 status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4295 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4296 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4297 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4298 status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4299 status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4300 status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4301
4302 if (status != CL_SUCCESS)
4303 {
4304 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4305 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4306 goto cleanup;
4307 }
4308
4309 gsize[0]=resizedColumns;
4310 gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4311 workgroupSize;
4312 lsize[0]=1;
4313 lsize[1]=workgroupSize;
4314 outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4315 gsize,lsize,image,filteredImage,MagickFalse,exception);
4316
4317 cleanup:
4318
4319 if (verticalKernel != (cl_kernel) NULL)
4320 ReleaseOpenCLKernel(verticalKernel);
4321
4322 return(outputReady);
4323 }
4324
ComputeResizeImage(const Image * image,MagickCLEnv clEnv,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4325 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4326 const size_t resizedColumns,const size_t resizedRows,
4327 const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4328 {
4329 cl_command_queue
4330 queue;
4331
4332 cl_mem
4333 cubicCoefficientsBuffer,
4334 filteredImageBuffer,
4335 imageBuffer,
4336 tempImageBuffer;
4337
4338 cl_uint
4339 number_channels;
4340
4341 const double
4342 *resizeFilterCoefficient;
4343
4344 float
4345 coefficientBuffer[7],
4346 xFactor,
4347 yFactor;
4348
4349 MagickBooleanType
4350 outputReady;
4351
4352 MagickCLDevice
4353 device;
4354
4355 MagickSizeType
4356 length;
4357
4358 Image
4359 *filteredImage;
4360
4361 size_t
4362 i;
4363
4364 filteredImage=NULL;
4365 imageBuffer=NULL;
4366 filteredImageBuffer=NULL;
4367 tempImageBuffer=NULL;
4368 cubicCoefficientsBuffer=NULL;
4369 outputReady=MagickFalse;
4370
4371 device=RequestOpenCLDevice(clEnv);
4372 queue=AcquireOpenCLCommandQueue(device);
4373 filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4374 exception);
4375 if (filteredImage == (Image *) NULL)
4376 goto cleanup;
4377 if (filteredImage->number_channels != image->number_channels)
4378 goto cleanup;
4379 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4380 if (imageBuffer == (cl_mem) NULL)
4381 goto cleanup;
4382 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4383 if (filteredImageBuffer == (cl_mem) NULL)
4384 goto cleanup;
4385
4386 resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4387 for (i = 0; i < 7; i++)
4388 coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4389 cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4390 CL_MEM_READ_ONLY,sizeof(coefficientBuffer),&coefficientBuffer);
4391 if (cubicCoefficientsBuffer == (cl_mem) NULL)
4392 {
4393 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4394 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4395 goto cleanup;
4396 }
4397
4398 number_channels=(cl_uint) image->number_channels;
4399 xFactor=(float) resizedColumns/(float) image->columns;
4400 yFactor=(float) resizedRows/(float) image->rows;
4401 if (xFactor > yFactor)
4402 {
4403 length=resizedColumns*image->rows*number_channels;
4404 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4405 sizeof(CLQuantum),(void *) NULL);
4406 if (tempImageBuffer == (cl_mem) NULL)
4407 {
4408 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4409 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4410 goto cleanup;
4411 }
4412
4413 outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4414 imageBuffer,number_channels,(cl_uint) image->columns,
4415 (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4416 (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4417 exception);
4418 if (outputReady == MagickFalse)
4419 goto cleanup;
4420
4421 outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4422 tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4423 (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4424 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4425 exception);
4426 if (outputReady == MagickFalse)
4427 goto cleanup;
4428 }
4429 else
4430 {
4431 length=image->columns*resizedRows*number_channels;
4432 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4433 sizeof(CLQuantum),(void *) NULL);
4434 if (tempImageBuffer == (cl_mem) NULL)
4435 {
4436 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4437 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4438 goto cleanup;
4439 }
4440
4441 outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4442 imageBuffer,number_channels,(cl_uint) image->columns,
4443 (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4444 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4445 exception);
4446 if (outputReady == MagickFalse)
4447 goto cleanup;
4448
4449 outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4450 tempImageBuffer,number_channels,(cl_uint) image->columns,
4451 (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4452 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4453 exception);
4454 if (outputReady == MagickFalse)
4455 goto cleanup;
4456 }
4457
4458 cleanup:
4459
4460 if (imageBuffer != (cl_mem) NULL)
4461 ReleaseOpenCLMemObject(imageBuffer);
4462 if (filteredImageBuffer != (cl_mem) NULL)
4463 ReleaseOpenCLMemObject(filteredImageBuffer);
4464 if (tempImageBuffer != (cl_mem) NULL)
4465 ReleaseOpenCLMemObject(tempImageBuffer);
4466 if (cubicCoefficientsBuffer != (cl_mem) NULL)
4467 ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4468 if (queue != (cl_command_queue) NULL)
4469 ReleaseOpenCLCommandQueue(device,queue);
4470 if (device != (MagickCLDevice) NULL)
4471 ReleaseOpenCLDevice(device);
4472 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4473 filteredImage=DestroyImage(filteredImage);
4474
4475 return(filteredImage);
4476 }
4477
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4478 static MagickBooleanType gpuSupportedResizeWeighting(
4479 ResizeWeightingFunctionType f)
4480 {
4481 unsigned int
4482 i;
4483
4484 for (i = 0; ;i++)
4485 {
4486 if (supportedResizeWeighting[i] == LastWeightingFunction)
4487 break;
4488 if (supportedResizeWeighting[i] == f)
4489 return(MagickTrue);
4490 }
4491 return(MagickFalse);
4492 }
4493
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4494 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4495 const size_t resizedColumns,const size_t resizedRows,
4496 const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4497 {
4498 Image
4499 *filteredImage;
4500
4501 MagickCLEnv
4502 clEnv;
4503
4504 assert(image != NULL);
4505 assert(exception != (ExceptionInfo *) NULL);
4506
4507 if (checkAccelerateCondition(image) == MagickFalse)
4508 return((Image *) NULL);
4509
4510 if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4511 resizeFilter)) == MagickFalse) ||
4512 (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4513 resizeFilter)) == MagickFalse))
4514 return((Image *) NULL);
4515
4516 clEnv=getOpenCLEnvironment(exception);
4517 if (clEnv == (MagickCLEnv) NULL)
4518 return((Image *) NULL);
4519
4520 filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4521 resizeFilter,exception);
4522 return(filteredImage);
4523 }
4524
4525 /*
4526 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4527 % %
4528 % %
4529 % %
4530 % A c c e l e r a t e R o t a t i o n a l B l u r I m a g e %
4531 % %
4532 % %
4533 % %
4534 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4535 */
4536
ComputeRotationalBlurImage(const Image * image,MagickCLEnv clEnv,const double angle,ExceptionInfo * exception)4537 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4538 const double angle,ExceptionInfo *exception)
4539 {
4540 cl_command_queue
4541 queue;
4542
4543 cl_float2
4544 blurCenter;
4545
4546 cl_int
4547 status;
4548
4549 cl_mem
4550 cosThetaBuffer,
4551 filteredImageBuffer,
4552 imageBuffer,
4553 sinThetaBuffer;
4554
4555 cl_kernel
4556 rotationalBlurKernel;
4557
4558 cl_uint
4559 cossin_theta_size,
4560 number_channels;
4561
4562 float
4563 blurRadius,
4564 *cosThetaPtr,
4565 offset,
4566 *sinThetaPtr,
4567 theta;
4568
4569 Image
4570 *filteredImage;
4571
4572 MagickBooleanType
4573 outputReady;
4574
4575 MagickCLDevice
4576 device;
4577
4578 size_t
4579 gsize[2],
4580 i;
4581
4582 assert(image != (Image *) NULL);
4583 assert(image->signature == MagickCoreSignature);
4584 if (image->debug != MagickFalse)
4585 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4586
4587 filteredImage=NULL;
4588 imageBuffer=NULL;
4589 filteredImageBuffer=NULL;
4590 sinThetaBuffer=NULL;
4591 cosThetaBuffer=NULL;
4592 rotationalBlurKernel=NULL;
4593 outputReady=MagickFalse;
4594
4595 device=RequestOpenCLDevice(clEnv);
4596 queue=AcquireOpenCLCommandQueue(device);
4597 filteredImage=cloneImage(image,exception);
4598 if (filteredImage == (Image *) NULL)
4599 goto cleanup;
4600 if (filteredImage->number_channels != image->number_channels)
4601 goto cleanup;
4602 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4603 if (imageBuffer == (cl_mem) NULL)
4604 goto cleanup;
4605 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4606 if (filteredImageBuffer == (cl_mem) NULL)
4607 goto cleanup;
4608
4609 blurCenter.x=(float) (image->columns-1)/2.0;
4610 blurCenter.y=(float) (image->rows-1)/2.0;
4611 blurRadius=hypot(blurCenter.x,blurCenter.y);
4612 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4613 (double) blurRadius)+2UL);
4614
4615 cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4616 if (cosThetaPtr == (float *) NULL)
4617 goto cleanup;
4618 sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4619 if (sinThetaPtr == (float *) NULL)
4620 {
4621 cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4622 goto cleanup;
4623 }
4624
4625 theta=DegreesToRadians(angle)/(double) (cossin_theta_size-1);
4626 offset=theta*(float) (cossin_theta_size-1)/2.0;
4627 for (i=0; i < (ssize_t) cossin_theta_size; i++)
4628 {
4629 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4630 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4631 }
4632
4633 sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4634 CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4635 sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4636 cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4637 CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4638 cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4639 if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4640 {
4641 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4642 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4643 goto cleanup;
4644 }
4645
4646 rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4647 if (rotationalBlurKernel == (cl_kernel) NULL)
4648 {
4649 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4650 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4651 goto cleanup;
4652 }
4653
4654 number_channels=(cl_uint) image->number_channels;
4655
4656 i=0;
4657 status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4658 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4659 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4660 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4661 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4662 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4663 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4664 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4665 if (status != CL_SUCCESS)
4666 {
4667 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4668 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4669 goto cleanup;
4670 }
4671
4672 gsize[0]=image->columns;
4673 gsize[1]=image->rows;
4674 outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4675 (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4676 MagickFalse,exception);
4677
4678 cleanup:
4679
4680 if (imageBuffer != (cl_mem) NULL)
4681 ReleaseOpenCLMemObject(imageBuffer);
4682 if (filteredImageBuffer != (cl_mem) NULL)
4683 ReleaseOpenCLMemObject(filteredImageBuffer);
4684 if (sinThetaBuffer != (cl_mem) NULL)
4685 ReleaseOpenCLMemObject(sinThetaBuffer);
4686 if (cosThetaBuffer != (cl_mem) NULL)
4687 ReleaseOpenCLMemObject(cosThetaBuffer);
4688 if (rotationalBlurKernel != (cl_kernel) NULL)
4689 ReleaseOpenCLKernel(rotationalBlurKernel);
4690 if (queue != (cl_command_queue) NULL)
4691 ReleaseOpenCLCommandQueue(device,queue);
4692 if (device != (MagickCLDevice) NULL)
4693 ReleaseOpenCLDevice(device);
4694 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4695 filteredImage=DestroyImage(filteredImage);
4696
4697 return(filteredImage);
4698 }
4699
AccelerateRotationalBlurImage(const Image * image,const double angle,ExceptionInfo * exception)4700 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4701 const double angle,ExceptionInfo *exception)
4702 {
4703 Image
4704 *filteredImage;
4705
4706 MagickCLEnv
4707 clEnv;
4708
4709 assert(image != NULL);
4710 assert(exception != (ExceptionInfo *) NULL);
4711 if (image->debug != MagickFalse)
4712 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4713
4714 if (checkAccelerateCondition(image) == MagickFalse)
4715 return((Image *) NULL);
4716
4717 clEnv=getOpenCLEnvironment(exception);
4718 if (clEnv == (MagickCLEnv) NULL)
4719 return((Image *) NULL);
4720
4721 filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4722 return filteredImage;
4723 }
4724
4725 /*
4726 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4727 % %
4728 % %
4729 % %
4730 % A c c e l e r a t e U n s h a r p M a s k I m a g e %
4731 % %
4732 % %
4733 % %
4734 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4735 */
4736
ComputeUnsharpMaskImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4737 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4738 const double radius,const double sigma,const double gain,
4739 const double threshold,ExceptionInfo *exception)
4740 {
4741 cl_command_queue
4742 queue;
4743
4744 cl_int
4745 status;
4746
4747 cl_kernel
4748 blurRowKernel,
4749 unsharpMaskBlurColumnKernel;
4750
4751 cl_mem
4752 filteredImageBuffer,
4753 imageBuffer,
4754 imageKernelBuffer,
4755 tempImageBuffer;
4756
4757 cl_uint
4758 imageColumns,
4759 imageRows,
4760 kernelWidth,
4761 number_channels;
4762
4763 float
4764 fGain,
4765 fThreshold;
4766
4767 Image
4768 *filteredImage;
4769
4770 int
4771 chunkSize;
4772
4773 MagickBooleanType
4774 outputReady;
4775
4776 MagickCLDevice
4777 device;
4778
4779 MagickSizeType
4780 length;
4781
4782 size_t
4783 gsize[2],
4784 i,
4785 lsize[2];
4786
4787 filteredImage=NULL;
4788 imageBuffer=NULL;
4789 filteredImageBuffer=NULL;
4790 tempImageBuffer=NULL;
4791 imageKernelBuffer=NULL;
4792 blurRowKernel=NULL;
4793 unsharpMaskBlurColumnKernel=NULL;
4794 outputReady=MagickFalse;
4795
4796 device=RequestOpenCLDevice(clEnv);
4797 queue=AcquireOpenCLCommandQueue(device);
4798 filteredImage=cloneImage(image,exception);
4799 if (filteredImage == (Image *) NULL)
4800 goto cleanup;
4801 if (filteredImage->number_channels != image->number_channels)
4802 goto cleanup;
4803 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4804 if (imageBuffer == (cl_mem) NULL)
4805 goto cleanup;
4806 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4807 if (filteredImageBuffer == (cl_mem) NULL)
4808 goto cleanup;
4809
4810 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4811 exception);
4812
4813 length=image->columns*image->rows;
4814 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4815 sizeof(cl_float4),NULL);
4816 if (tempImageBuffer == (cl_mem) NULL)
4817 {
4818 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4819 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4820 goto cleanup;
4821 }
4822
4823 blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4824 if (blurRowKernel == (cl_kernel) NULL)
4825 {
4826 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4827 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4828 goto cleanup;
4829 }
4830
4831 unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4832 "UnsharpMaskBlurColumn");
4833 if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4834 {
4835 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4836 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4837 goto cleanup;
4838 }
4839
4840 number_channels=(cl_uint) image->number_channels;
4841 imageColumns=(cl_uint) image->columns;
4842 imageRows=(cl_uint) image->rows;
4843
4844 chunkSize = 256;
4845
4846 i=0;
4847 status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4848 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4849 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4850 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4851 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4852 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4853 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4854 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4855 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4856 if (status != CL_SUCCESS)
4857 {
4858 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4859 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4860 goto cleanup;
4861 }
4862
4863 gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4864 gsize[1]=image->rows;
4865 lsize[0]=chunkSize;
4866 lsize[1]=1;
4867 outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4868 (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4869 exception);
4870
4871 chunkSize=256;
4872 fGain=(float) gain;
4873 fThreshold=(float) threshold;
4874
4875 i=0;
4876 status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4877 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4878 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4879 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4880 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4881 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4882 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4883 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4884 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4885 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4886 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4887 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4888 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4889 if (status != CL_SUCCESS)
4890 {
4891 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4892 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4893 goto cleanup;
4894 }
4895
4896 gsize[0]=image->columns;
4897 gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4898 lsize[0]=1;
4899 lsize[1]=chunkSize;
4900 outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4901 (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4902 exception);
4903
4904 cleanup:
4905
4906 if (imageBuffer != (cl_mem) NULL)
4907 ReleaseOpenCLMemObject(imageBuffer);
4908 if (filteredImageBuffer != (cl_mem) NULL)
4909 ReleaseOpenCLMemObject(filteredImageBuffer);
4910 if (tempImageBuffer != (cl_mem) NULL)
4911 ReleaseOpenCLMemObject(tempImageBuffer);
4912 if (imageKernelBuffer != (cl_mem) NULL)
4913 ReleaseOpenCLMemObject(imageKernelBuffer);
4914 if (blurRowKernel != (cl_kernel) NULL)
4915 ReleaseOpenCLKernel(blurRowKernel);
4916 if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4917 ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4918 if (queue != (cl_command_queue) NULL)
4919 ReleaseOpenCLCommandQueue(device,queue);
4920 if (device != (MagickCLDevice) NULL)
4921 ReleaseOpenCLDevice(device);
4922 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4923 filteredImage=DestroyImage(filteredImage);
4924
4925 return(filteredImage);
4926 }
4927
ComputeUnsharpMaskImageSingle(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4928 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4929 MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4930 const double threshold,ExceptionInfo *exception)
4931 {
4932 cl_command_queue
4933 queue;
4934
4935 cl_int
4936 status;
4937
4938 cl_kernel
4939 unsharpMaskKernel;
4940
4941 cl_mem
4942 filteredImageBuffer,
4943 imageBuffer,
4944 imageKernelBuffer;
4945
4946 cl_uint
4947 imageColumns,
4948 imageRows,
4949 kernelWidth,
4950 number_channels;
4951
4952 float
4953 fGain,
4954 fThreshold;
4955
4956 Image
4957 *filteredImage;
4958
4959 MagickBooleanType
4960 outputReady;
4961
4962 MagickCLDevice
4963 device;
4964
4965 size_t
4966 gsize[2],
4967 i,
4968 lsize[2];
4969
4970 filteredImage=NULL;
4971 imageBuffer=NULL;
4972 filteredImageBuffer=NULL;
4973 imageKernelBuffer=NULL;
4974 unsharpMaskKernel=NULL;
4975 outputReady=MagickFalse;
4976
4977 device=RequestOpenCLDevice(clEnv);
4978 queue=AcquireOpenCLCommandQueue(device);
4979 filteredImage=cloneImage(image,exception);
4980 if (filteredImage == (Image *) NULL)
4981 goto cleanup;
4982 if (filteredImage->number_channels != image->number_channels)
4983 goto cleanup;
4984 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4985 if (imageBuffer == (cl_mem) NULL)
4986 goto cleanup;
4987 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4988 if (filteredImageBuffer == (cl_mem) NULL)
4989 goto cleanup;
4990
4991 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4992 exception);
4993
4994 unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4995 if (unsharpMaskKernel == NULL)
4996 {
4997 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4998 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4999 goto cleanup;
5000 }
5001
5002 imageColumns=(cl_uint) image->columns;
5003 imageRows=(cl_uint) image->rows;
5004 number_channels=(cl_uint) image->number_channels;
5005 fGain=(float) gain;
5006 fThreshold=(float) threshold;
5007
5008 i=0;
5009 status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5010 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5011 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
5012 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
5013 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
5014 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
5015 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
5016 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
5017 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5018 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5019 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5020 if (status != CL_SUCCESS)
5021 {
5022 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5023 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5024 goto cleanup;
5025 }
5026
5027 gsize[0]=((image->columns + 7) / 8)*8;
5028 gsize[1]=((image->rows + 31) / 32)*32;
5029 lsize[0]=8;
5030 lsize[1]=32;
5031 outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
5032 gsize,lsize,image,filteredImage,MagickFalse,exception);
5033
5034 cleanup:
5035
5036 if (imageBuffer != (cl_mem) NULL)
5037 ReleaseOpenCLMemObject(imageBuffer);
5038 if (filteredImageBuffer != (cl_mem) NULL)
5039 ReleaseOpenCLMemObject(filteredImageBuffer);
5040 if (imageKernelBuffer != (cl_mem) NULL)
5041 ReleaseOpenCLMemObject(imageKernelBuffer);
5042 if (unsharpMaskKernel != (cl_kernel) NULL)
5043 ReleaseOpenCLKernel(unsharpMaskKernel);
5044 if (queue != (cl_command_queue) NULL)
5045 ReleaseOpenCLCommandQueue(device,queue);
5046 if (device != (MagickCLDevice) NULL)
5047 ReleaseOpenCLDevice(device);
5048 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5049 filteredImage=DestroyImage(filteredImage);
5050
5051 return(filteredImage);
5052 }
5053
AccelerateUnsharpMaskImage(const Image * image,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5054 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5055 const double radius,const double sigma,const double gain,
5056 const double threshold,ExceptionInfo *exception)
5057 {
5058 Image
5059 *filteredImage;
5060
5061 MagickCLEnv
5062 clEnv;
5063
5064 assert(image != NULL);
5065 assert(exception != (ExceptionInfo *) NULL);
5066
5067 if (checkAccelerateCondition(image) == MagickFalse)
5068 return((Image *) NULL);
5069
5070 clEnv=getOpenCLEnvironment(exception);
5071 if (clEnv == (MagickCLEnv) NULL)
5072 return((Image *) NULL);
5073
5074 if (radius < 12.1)
5075 filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5076 threshold,exception);
5077 else
5078 filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5079 threshold,exception);
5080 return(filteredImage);
5081 }
5082
ComputeWaveletDenoiseImage(const Image * image,MagickCLEnv clEnv,const double threshold,ExceptionInfo * exception)5083 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5084 const double threshold,ExceptionInfo *exception)
5085 {
5086 cl_command_queue
5087 queue;
5088
5089 const cl_int
5090 PASSES=5;
5091
5092 const int
5093 TILESIZE=64,
5094 PAD=1<<(PASSES-1),
5095 SIZE=TILESIZE-2*PAD;
5096
5097 cl_float
5098 thresh;
5099
5100 cl_int
5101 status;
5102
5103 cl_kernel
5104 denoiseKernel;
5105
5106 cl_mem
5107 filteredImageBuffer,
5108 imageBuffer;
5109
5110 cl_uint
5111 number_channels,
5112 width,
5113 height,
5114 max_channels;
5115
5116 Image
5117 *filteredImage;
5118
5119 MagickBooleanType
5120 outputReady;
5121
5122 MagickCLDevice
5123 device;
5124
5125 size_t
5126 goffset[2],
5127 gsize[2],
5128 i,
5129 lsize[2],
5130 passes,
5131 x;
5132
5133 filteredImage=NULL;
5134 imageBuffer=NULL;
5135 filteredImageBuffer=NULL;
5136 denoiseKernel=NULL;
5137 queue=NULL;
5138 outputReady=MagickFalse;
5139
5140 device=RequestOpenCLDevice(clEnv);
5141 /* Work around an issue on low end Intel devices */
5142 if (strcmp("Intel(R) HD Graphics",device->name) == 0)
5143 goto cleanup;
5144 queue=AcquireOpenCLCommandQueue(device);
5145 filteredImage=CloneImage(image,0,0,MagickTrue,
5146 exception);
5147 if (filteredImage == (Image *) NULL)
5148 goto cleanup;
5149 if (filteredImage->number_channels != image->number_channels)
5150 goto cleanup;
5151 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5152 if (imageBuffer == (cl_mem) NULL)
5153 goto cleanup;
5154 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5155 if (filteredImageBuffer == (cl_mem) NULL)
5156 goto cleanup;
5157
5158 denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5159 if (denoiseKernel == (cl_kernel) NULL)
5160 {
5161 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5162 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5163 goto cleanup;
5164 }
5165
5166 number_channels=(cl_uint)image->number_channels;
5167 width=(cl_uint)image->columns;
5168 height=(cl_uint)image->rows;
5169 max_channels=number_channels;
5170 if ((max_channels == 4) || (max_channels == 2))
5171 max_channels=max_channels-1;
5172 thresh=threshold;
5173 passes=(((1.0f*image->columns)*image->rows)+1999999.0f)/2000000.0f;
5174 passes=(passes < 1) ? 1 : passes;
5175
5176 i=0;
5177 status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5178 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5179 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5180 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5181 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5182 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5183 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5184 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5185 if (status != CL_SUCCESS)
5186 {
5187 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5188 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5189 goto cleanup;
5190 }
5191
5192 for (x = 0; x < passes; ++x)
5193 {
5194 gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5195 gsize[1]=((((height+(SIZE-1))/SIZE)+passes-1)/passes)*4;
5196 lsize[0]=TILESIZE;
5197 lsize[1]=4;
5198 goffset[0]=0;
5199 goffset[1]=x*gsize[1];
5200
5201 outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,goffset,gsize,lsize,
5202 image,filteredImage,MagickTrue,exception);
5203 if (outputReady == MagickFalse)
5204 break;
5205 }
5206
5207 cleanup:
5208
5209 if (imageBuffer != (cl_mem) NULL)
5210 ReleaseOpenCLMemObject(imageBuffer);
5211 if (filteredImageBuffer != (cl_mem) NULL)
5212 ReleaseOpenCLMemObject(filteredImageBuffer);
5213 if (denoiseKernel != (cl_kernel) NULL)
5214 ReleaseOpenCLKernel(denoiseKernel);
5215 if (queue != (cl_command_queue) NULL)
5216 ReleaseOpenCLCommandQueue(device,queue);
5217 if (device != (MagickCLDevice) NULL)
5218 ReleaseOpenCLDevice(device);
5219 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5220 filteredImage=DestroyImage(filteredImage);
5221
5222 return(filteredImage);
5223 }
5224
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5225 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5226 const double threshold,ExceptionInfo *exception)
5227 {
5228 Image
5229 *filteredImage;
5230
5231 MagickCLEnv
5232 clEnv;
5233
5234 assert(image != NULL);
5235 assert(exception != (ExceptionInfo *)NULL);
5236
5237 if (checkAccelerateCondition(image) == MagickFalse)
5238 return((Image *) NULL);
5239
5240 clEnv=getOpenCLEnvironment(exception);
5241 if (clEnv == (MagickCLEnv) NULL)
5242 return((Image *) NULL);
5243
5244 filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5245
5246 return(filteredImage);
5247 }
5248 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5249