1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % %
4 % %
5 % %
6 % AAA CCCC CCCC EEEEE L EEEEE RRRR AAA TTTTT EEEEE %
7 % A A C C E L E R R A A T E %
8 % AAAAA C C EEE L EEE RRRR AAAAA T EEE %
9 % A A C C E L E R R A A T E %
10 % A A CCCC CCCC EEEEE LLLLL EEEEE R R A A T EEEEE %
11 % %
12 % %
13 % MagickCore Acceleration Methods %
14 % %
15 % Software Design %
16 % Cristy %
17 % SiuChi Chan %
18 % Guansong Zhang %
19 % January 2010 %
20 % Dirk Lemstra %
21 % April 2016 %
22 % %
23 % %
24 % Copyright 1999-2019 ImageMagick Studio LLC, a non-profit organization %
25 % dedicated to making software imaging solutions freely available. %
26 % %
27 % You may not use this file except in compliance with the License. You may %
28 % obtain a copy of the License at %
29 % %
30 % https://imagemagick.org/script/license.php %
31 % %
32 % Unless required by applicable law or agreed to in writing, software %
33 % distributed under the License is distributed on an "AS IS" BASIS, %
34 % WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
35 % See the License for the specific language governing permissions and %
36 % limitations under the License. %
37 % %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87
88 /*
89 Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92
93 /*
94 Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98 BoxWeightingFunction,
99 TriangleWeightingFunction,
100 HannWeightingFunction,
101 HammingWeightingFunction,
102 BlackmanWeightingFunction,
103 CubicBCWeightingFunction,
104 SincWeightingFunction,
105 SincFastWeightingFunction,
106 LastWeightingFunction
107 };
108
109 /*
110 Helper functions.
111 */
checkAccelerateCondition(const Image * image)112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114 /* only direct class images are supported */
115 if (image->storage_class != DirectClass)
116 return(MagickFalse);
117
118 /* check if the image's colorspace is supported */
119 if (image->colorspace != RGBColorspace &&
120 image->colorspace != sRGBColorspace &&
121 image->colorspace != LinearGRAYColorspace &&
122 image->colorspace != GRAYColorspace)
123 return(MagickFalse);
124
125 /* check if the virtual pixel method is compatible with the OpenCL implementation */
126 if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
127 (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
128 return(MagickFalse);
129
130 /* check if the image has mask */
131 if (((image->channels & ReadMaskChannel) != 0) ||
132 ((image->channels & WriteMaskChannel) != 0) ||
133 ((image->channels & CompositeMaskChannel) != 0))
134 return(MagickFalse);
135
136 if (image->number_channels > 4)
137 return(MagickFalse);
138
139 /* check if pixel order is R */
140 if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
141 return(MagickFalse);
142
143 if (image->number_channels == 1)
144 return(MagickTrue);
145
146 /* check if pixel order is RA */
147 if ((image->number_channels == 2) &&
148 (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
149 return(MagickTrue);
150
151 if (image->number_channels == 2)
152 return(MagickFalse);
153
154 /* check if pixel order is RGB */
155 if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
156 (GetPixelChannelOffset(image,BluePixelChannel) != 2))
157 return(MagickFalse);
158
159 if (image->number_channels == 3)
160 return(MagickTrue);
161
162 /* check if pixel order is RGBA */
163 if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
164 return(MagickFalse);
165
166 return(MagickTrue);
167 }
168
checkAccelerateConditionRGBA(const Image * image)169 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
170 {
171 if (checkAccelerateCondition(image) == MagickFalse)
172 return(MagickFalse);
173
174 /* the order will be RGBA if the image has 4 channels */
175 if (image->number_channels != 4)
176 return(MagickFalse);
177
178 if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
179 (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
180 (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
181 (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
182 return(MagickFalse);
183
184 return(MagickTrue);
185 }
186
checkPixelIntensity(const Image * image,const PixelIntensityMethod method)187 static MagickBooleanType checkPixelIntensity(const Image *image,
188 const PixelIntensityMethod method)
189 {
190 /* EncodePixelGamma and DecodePixelGamma are not supported */
191 if ((method == Rec601LumaPixelIntensityMethod) ||
192 (method == Rec709LumaPixelIntensityMethod))
193 {
194 if (image->colorspace == RGBColorspace)
195 return(MagickFalse);
196 }
197
198 if ((method == Rec601LuminancePixelIntensityMethod) ||
199 (method == Rec709LuminancePixelIntensityMethod))
200 {
201 if (image->colorspace == sRGBColorspace)
202 return(MagickFalse);
203 }
204
205 return(MagickTrue);
206 }
207
checkHistogramCondition(const Image * image,const PixelIntensityMethod method)208 static MagickBooleanType checkHistogramCondition(const Image *image,
209 const PixelIntensityMethod method)
210 {
211 /* ensure this is the only pass get in for now. */
212 if ((image->channel_mask & SyncChannels) == 0)
213 return MagickFalse;
214
215 return(checkPixelIntensity(image,method));
216 }
217
getOpenCLEnvironment(ExceptionInfo * exception)218 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
219 {
220 MagickCLEnv
221 clEnv;
222
223 clEnv=GetCurrentOpenCLEnv();
224 if (clEnv == (MagickCLEnv) NULL)
225 return((MagickCLEnv) NULL);
226
227 if (clEnv->enabled == MagickFalse)
228 return((MagickCLEnv) NULL);
229
230 if (InitializeOpenCL(clEnv,exception) == MagickFalse)
231 return((MagickCLEnv) NULL);
232
233 return(clEnv);
234 }
235
cloneImage(const Image * image,ExceptionInfo * exception)236 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
237 {
238 Image
239 *clone;
240
241 if (((image->channel_mask & RedChannel) != 0) &&
242 ((image->channel_mask & GreenChannel) != 0) &&
243 ((image->channel_mask & BlueChannel) != 0) &&
244 ((image->channel_mask & AlphaChannel) != 0))
245 clone=CloneImage(image,0,0,MagickTrue,exception);
246 else
247 {
248 clone=CloneImage(image,0,0,MagickTrue,exception);
249 if (clone != (Image *) NULL)
250 SyncImagePixelCache(clone,exception);
251 }
252 return(clone);
253 }
254
255 /* pad the global workgroup size to the next multiple of
256 the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)257 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
258 const unsigned int orgGlobalSize,const unsigned int localGroupSize)
259 {
260 return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
261 }
262
createKernelInfo(MagickCLDevice device,const double radius,const double sigma,cl_uint * width,ExceptionInfo * exception)263 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
264 const double sigma,cl_uint *width,ExceptionInfo *exception)
265 {
266 char
267 geometry[MagickPathExtent];
268
269 cl_mem
270 imageKernelBuffer;
271
272 float
273 *kernelBufferPtr;
274
275 KernelInfo
276 *kernel;
277
278 ssize_t
279 i;
280
281 (void) FormatLocaleString(geometry,MagickPathExtent,
282 "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
283 kernel=AcquireKernelInfo(geometry,exception);
284 if (kernel == (KernelInfo *) NULL)
285 {
286 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
287 ResourceLimitWarning,"AcquireKernelInfo failed.",".");
288 return((cl_mem) NULL);
289 }
290 kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
291 sizeof(*kernelBufferPtr));
292 if (kernelBufferPtr == (float *) NULL)
293 {
294 kernel=DestroyKernelInfo(kernel);
295 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
296 ResourceLimitWarning,"MemoryAllocationFailed.",".");
297 return((cl_mem) NULL);
298 }
299 for (i = 0; i < (ssize_t) kernel->width; i++)
300 kernelBufferPtr[i] = (float)kernel->values[i];
301 imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
302 CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
303 *width=(cl_uint) kernel->width;
304 kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
305 kernel=DestroyKernelInfo(kernel);
306 if (imageKernelBuffer == (cl_mem) NULL)
307 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
308 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
309 return(imageKernelBuffer);
310 }
311
LaunchHistogramKernel(MagickCLEnv clEnv,MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)312 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
313 MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
314 cl_mem histogramBuffer,Image *image,const ChannelType channel,
315 ExceptionInfo *exception)
316 {
317 MagickBooleanType
318 outputReady;
319
320 cl_int
321 clStatus;
322
323 cl_kernel
324 histogramKernel;
325
326 cl_event
327 event;
328
329 cl_uint
330 colorspace,
331 method;
332
333 register ssize_t
334 i;
335
336 size_t
337 global_work_size[2];
338
339 histogramKernel = NULL;
340
341 outputReady = MagickFalse;
342 colorspace = image->colorspace;
343 method = image->intensity;
344
345 /* get the OpenCL kernel */
346 histogramKernel = AcquireOpenCLKernel(device,"Histogram");
347 if (histogramKernel == NULL)
348 {
349 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
350 goto cleanup;
351 }
352
353 /* set the kernel arguments */
354 i = 0;
355 clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
356 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
357 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
358 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
359 clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
360 if (clStatus != CL_SUCCESS)
361 {
362 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
363 goto cleanup;
364 }
365
366 /* launch the kernel */
367 global_work_size[0] = image->columns;
368 global_work_size[1] = image->rows;
369
370 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
371
372 if (clStatus != CL_SUCCESS)
373 {
374 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
375 goto cleanup;
376 }
377 RecordProfileData(device,histogramKernel,event);
378
379 outputReady = MagickTrue;
380
381 cleanup:
382
383 if (histogramKernel!=NULL)
384 ReleaseOpenCLKernel(histogramKernel);
385
386 return(outputReady);
387 }
388
389 /*
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391 % %
392 % %
393 % %
394 % A c c e l e r a t e A d d N o i s e I m a g e %
395 % %
396 % %
397 % %
398 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
399 */
400
ComputeAddNoiseImage(const Image * image,MagickCLEnv clEnv,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)401 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
402 const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
403 {
404 cl_command_queue
405 queue;
406
407 cl_float
408 cl_attenuate;
409
410 cl_int
411 status;
412
413 cl_kernel
414 addNoiseKernel;
415
416 cl_mem
417 filteredImageBuffer,
418 imageBuffer;
419
420 cl_uint
421 bufferLength,
422 inputPixelCount,
423 number_channels,
424 numRandomNumberPerPixel,
425 pixelsPerWorkitem,
426 seed0,
427 seed1,
428 workItemCount;
429
430 const unsigned long
431 *s;
432
433 MagickBooleanType
434 outputReady;
435
436 MagickCLDevice
437 device;
438
439 Image
440 *filteredImage;
441
442 RandomInfo
443 *randomInfo;
444
445 size_t
446 gsize[1],
447 i,
448 lsize[1],
449 numRandPerChannel;
450
451 filteredImage=NULL;
452 imageBuffer=NULL;
453 filteredImageBuffer=NULL;
454 addNoiseKernel=NULL;
455 outputReady=MagickFalse;
456
457 device=RequestOpenCLDevice(clEnv);
458 queue=AcquireOpenCLCommandQueue(device);
459 if (queue == (cl_command_queue) NULL)
460 goto cleanup;
461 filteredImage=cloneImage(image,exception);
462 if (filteredImage == (Image *) NULL)
463 goto cleanup;
464 if (filteredImage->number_channels != image->number_channels)
465 goto cleanup;
466 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
467 if (imageBuffer == (cl_mem) NULL)
468 goto cleanup;
469 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
470 if (filteredImageBuffer == (cl_mem) NULL)
471 goto cleanup;
472
473 /* find out how many random numbers needed by pixel */
474 numRandPerChannel=0;
475 numRandomNumberPerPixel=0;
476 switch (noise_type)
477 {
478 case UniformNoise:
479 case ImpulseNoise:
480 case LaplacianNoise:
481 case RandomNoise:
482 default:
483 numRandPerChannel=1;
484 break;
485 case GaussianNoise:
486 case MultiplicativeGaussianNoise:
487 case PoissonNoise:
488 numRandPerChannel=2;
489 break;
490 };
491 if (GetPixelRedTraits(image) != UndefinedPixelTrait)
492 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
493 if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
494 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
495 if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
496 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
497 if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
498 numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
499
500 addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
501 if (addNoiseKernel == (cl_kernel) NULL)
502 {
503 (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
504 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
505 goto cleanup;
506 }
507
508 /* 256 work items per group, 2 groups per CU */
509 workItemCount=device->max_compute_units*2*256;
510 inputPixelCount=(cl_int) (image->columns*image->rows);
511 pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
512 pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
513 lsize[0]=256;
514 gsize[0]=workItemCount;
515
516 randomInfo=AcquireRandomInfo();
517 s=GetRandomInfoSeed(randomInfo);
518 seed0=s[0];
519 (void) GetPseudoRandomValue(randomInfo);
520 seed1=s[0];
521 randomInfo=DestroyRandomInfo(randomInfo);
522
523 number_channels=(cl_uint) image->number_channels;
524 bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
525 cl_attenuate=(cl_float) attenuate;
526
527 i=0;
528 status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
529 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
530 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
531 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
532 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
533 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
534 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&cl_attenuate);
535 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
536 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
537 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
538 status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
539 if (status != CL_SUCCESS)
540 {
541 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
542 ResourceLimitWarning,"clSetKernelArg failed.",".");
543 goto cleanup;
544 }
545
546 outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
547 lsize,image,filteredImage,MagickFalse,exception);
548
549 cleanup:
550
551 if (imageBuffer != (cl_mem) NULL)
552 ReleaseOpenCLMemObject(imageBuffer);
553 if (filteredImageBuffer != (cl_mem) NULL)
554 ReleaseOpenCLMemObject(filteredImageBuffer);
555 if (addNoiseKernel != (cl_kernel) NULL)
556 ReleaseOpenCLKernel(addNoiseKernel);
557 if (queue != (cl_command_queue) NULL)
558 ReleaseOpenCLCommandQueue(device,queue);
559 if (device != (MagickCLDevice) NULL)
560 ReleaseOpenCLDevice(device);
561 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
562 filteredImage=DestroyImage(filteredImage);
563
564 return(filteredImage);
565 }
566
AccelerateAddNoiseImage(const Image * image,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)567 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
568 const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
569 {
570 Image
571 *filteredImage;
572
573 MagickCLEnv
574 clEnv;
575
576 assert(image != NULL);
577 assert(exception != (ExceptionInfo *) NULL);
578
579 if (checkAccelerateCondition(image) == MagickFalse)
580 return((Image *) NULL);
581
582 clEnv=getOpenCLEnvironment(exception);
583 if (clEnv == (MagickCLEnv) NULL)
584 return((Image *) NULL);
585
586 filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,attenuate,
587 exception);
588 return(filteredImage);
589 }
590
591 /*
592 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
593 % %
594 % %
595 % %
596 % A c c e l e r a t e B l u r I m a g e %
597 % %
598 % %
599 % %
600 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
601 */
602
ComputeBlurImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,ExceptionInfo * exception)603 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
604 const double radius,const double sigma,ExceptionInfo *exception)
605 {
606 cl_command_queue
607 queue;
608
609 cl_int
610 status;
611
612 cl_kernel
613 blurColumnKernel,
614 blurRowKernel;
615
616 cl_mem
617 filteredImageBuffer,
618 imageBuffer,
619 imageKernelBuffer,
620 tempImageBuffer;
621
622 cl_uint
623 imageColumns,
624 imageRows,
625 kernelWidth,
626 number_channels;
627
628 Image
629 *filteredImage;
630
631 MagickBooleanType
632 outputReady;
633
634 MagickCLDevice
635 device;
636
637 MagickSizeType
638 length;
639
640 size_t
641 chunkSize=256,
642 gsize[2],
643 i,
644 lsize[2];
645
646 filteredImage=NULL;
647 imageBuffer=NULL;
648 filteredImageBuffer=NULL;
649 tempImageBuffer=NULL;
650 imageKernelBuffer=NULL;
651 blurRowKernel=NULL;
652 blurColumnKernel=NULL;
653 outputReady=MagickFalse;
654
655 device=RequestOpenCLDevice(clEnv);
656 queue=AcquireOpenCLCommandQueue(device);
657 filteredImage=cloneImage(image,exception);
658 if (filteredImage == (Image *) NULL)
659 goto cleanup;
660 if (filteredImage->number_channels != image->number_channels)
661 goto cleanup;
662 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
663 if (imageBuffer == (cl_mem) NULL)
664 goto cleanup;
665 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
666 if (filteredImageBuffer == (cl_mem) NULL)
667 goto cleanup;
668
669 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
670 exception);
671 if (imageKernelBuffer == (cl_mem) NULL)
672 goto cleanup;
673
674 length=image->columns*image->rows;
675 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
676 sizeof(cl_float4),(void *) NULL);
677 if (tempImageBuffer == (cl_mem) NULL)
678 goto cleanup;
679
680 blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
681 if (blurRowKernel == (cl_kernel) NULL)
682 {
683 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
684 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
685 goto cleanup;
686 }
687
688 number_channels=(cl_uint) image->number_channels;
689 imageColumns=(cl_uint) image->columns;
690 imageRows=(cl_uint) image->rows;
691
692 i=0;
693 status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
694 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
695 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
696 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
697 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
698 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
699 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
700 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
701 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
702 if (status != CL_SUCCESS)
703 {
704 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
705 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
706 goto cleanup;
707 }
708
709 gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
710 gsize[1]=image->rows;
711 lsize[0]=chunkSize;
712 lsize[1]=1;
713
714 outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
715 lsize,image,filteredImage,MagickFalse,exception);
716 if (outputReady == MagickFalse)
717 goto cleanup;
718
719 blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
720 if (blurColumnKernel == (cl_kernel) NULL)
721 {
722 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
723 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
724 goto cleanup;
725 }
726
727 i=0;
728 status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
729 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
730 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
731 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
732 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
733 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
734 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
735 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
736 status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
737 if (status != CL_SUCCESS)
738 {
739 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
740 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
741 goto cleanup;
742 }
743
744 gsize[0]=image->columns;
745 gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
746 lsize[0]=1;
747 lsize[1]=chunkSize;
748
749 outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
750 lsize,image,filteredImage,MagickFalse,exception);
751
752 cleanup:
753
754 if (imageBuffer != (cl_mem) NULL)
755 ReleaseOpenCLMemObject(imageBuffer);
756 if (filteredImageBuffer != (cl_mem) NULL)
757 ReleaseOpenCLMemObject(filteredImageBuffer);
758 if (tempImageBuffer != (cl_mem) NULL)
759 ReleaseOpenCLMemObject(tempImageBuffer);
760 if (imageKernelBuffer != (cl_mem) NULL)
761 ReleaseOpenCLMemObject(imageKernelBuffer);
762 if (blurRowKernel != (cl_kernel) NULL)
763 ReleaseOpenCLKernel(blurRowKernel);
764 if (blurColumnKernel != (cl_kernel) NULL)
765 ReleaseOpenCLKernel(blurColumnKernel);
766 if (queue != (cl_command_queue) NULL)
767 ReleaseOpenCLCommandQueue(device,queue);
768 if (device != (MagickCLDevice) NULL)
769 ReleaseOpenCLDevice(device);
770 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
771 filteredImage=DestroyImage(filteredImage);
772
773 return(filteredImage);
774 }
775
AccelerateBlurImage(const Image * image,const double radius,const double sigma,ExceptionInfo * exception)776 MagickPrivate Image* AccelerateBlurImage(const Image *image,
777 const double radius,const double sigma,ExceptionInfo *exception)
778 {
779 Image
780 *filteredImage;
781
782 MagickCLEnv
783 clEnv;
784
785 assert(image != NULL);
786 assert(exception != (ExceptionInfo *) NULL);
787
788 if (checkAccelerateCondition(image) == MagickFalse)
789 return((Image *) NULL);
790
791 clEnv=getOpenCLEnvironment(exception);
792 if (clEnv == (MagickCLEnv) NULL)
793 return((Image *) NULL);
794
795 filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
796 return(filteredImage);
797 }
798
799 /*
800 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
801 % %
802 % %
803 % %
804 % A c c e l e r a t e C o n t r a s t I m a g e %
805 % %
806 % %
807 % %
808 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
809 */
810
ComputeContrastImage(Image * image,MagickCLEnv clEnv,const MagickBooleanType sharpen,ExceptionInfo * exception)811 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
812 const MagickBooleanType sharpen,ExceptionInfo *exception)
813 {
814 cl_command_queue
815 queue;
816
817 cl_int
818 status,
819 sign;
820
821 cl_kernel
822 contrastKernel;
823
824 cl_mem
825 imageBuffer;
826
827 cl_uint
828 number_channels;
829
830 MagickBooleanType
831 outputReady;
832
833 MagickCLDevice
834 device;
835
836 size_t
837 gsize[2],
838 i;
839
840 contrastKernel=NULL;
841 imageBuffer=NULL;
842 outputReady=MagickFalse;
843
844 device=RequestOpenCLDevice(clEnv);
845 queue=AcquireOpenCLCommandQueue(device);
846 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
847 if (imageBuffer == (cl_mem) NULL)
848 goto cleanup;
849
850 contrastKernel=AcquireOpenCLKernel(device,"Contrast");
851 if (contrastKernel == (cl_kernel) NULL)
852 {
853 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
854 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
855 goto cleanup;
856 }
857
858 number_channels=(cl_uint) image->number_channels;
859 sign=sharpen != MagickFalse ? 1 : -1;
860
861 i=0;
862 status =SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
863 status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_uint),&number_channels);
864 status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_int),&sign);
865 if (status != CL_SUCCESS)
866 {
867 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
868 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
869 goto cleanup;
870 }
871
872 gsize[0]=image->columns;
873 gsize[1]=image->rows;
874
875 outputReady=EnqueueOpenCLKernel(queue,contrastKernel,2,(const size_t *) NULL,
876 gsize,(const size_t *) NULL,image,(Image *) NULL,MagickFalse,exception);
877
878 cleanup:
879
880 if (imageBuffer != (cl_mem) NULL)
881 ReleaseOpenCLMemObject(imageBuffer);
882 if (contrastKernel != (cl_kernel) NULL)
883 ReleaseOpenCLKernel(contrastKernel);
884 if (queue != (cl_command_queue) NULL)
885 ReleaseOpenCLCommandQueue(device,queue);
886 if (device != (MagickCLDevice) NULL)
887 ReleaseOpenCLDevice(device);
888
889 return(outputReady);
890 }
891
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)892 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
893 const MagickBooleanType sharpen,ExceptionInfo *exception)
894 {
895 MagickBooleanType
896 status;
897
898 MagickCLEnv
899 clEnv;
900
901 assert(image != NULL);
902 assert(exception != (ExceptionInfo *) NULL);
903
904 if (checkAccelerateCondition(image) == MagickFalse)
905 return(MagickFalse);
906
907 clEnv=getOpenCLEnvironment(exception);
908 if (clEnv == (MagickCLEnv) NULL)
909 return(MagickFalse);
910
911 status=ComputeContrastImage(image,clEnv,sharpen,exception);
912 return(status);
913 }
914
915 /*
916 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
917 % %
918 % %
919 % %
920 % A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e %
921 % %
922 % %
923 % %
924 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
925 */
926
ComputeContrastStretchImage(Image * image,MagickCLEnv clEnv,const double black_point,const double white_point,ExceptionInfo * exception)927 static MagickBooleanType ComputeContrastStretchImage(Image *image,
928 MagickCLEnv clEnv,const double black_point,const double white_point,
929 ExceptionInfo *exception)
930 {
931 #define ContrastStretchImageTag "ContrastStretch/Image"
932 #define MaxRange(color) ((cl_float) ScaleQuantumToMap((Quantum) (color)))
933
934 CacheView
935 *image_view;
936
937 cl_command_queue
938 queue;
939
940 cl_int
941 clStatus;
942
943 cl_mem_flags
944 mem_flags;
945
946 cl_mem
947 histogramBuffer,
948 imageBuffer,
949 stretchMapBuffer;
950
951 cl_kernel
952 histogramKernel,
953 stretchKernel;
954
955 cl_event
956 event;
957
958 cl_uint4
959 *histogram;
960
961 double
962 intensity;
963
964 cl_float4
965 black,
966 white;
967
968 MagickBooleanType
969 outputReady,
970 status;
971
972 MagickCLDevice
973 device;
974
975 MagickSizeType
976 length;
977
978 PixelPacket
979 *stretch_map;
980
981 register ssize_t
982 i;
983
984 size_t
985 global_work_size[2];
986
987 void
988 *hostPtr,
989 *inputPixels;
990
991 histogram=NULL;
992 stretch_map=NULL;
993 inputPixels = NULL;
994 imageBuffer = NULL;
995 histogramBuffer = NULL;
996 stretchMapBuffer = NULL;
997 histogramKernel = NULL;
998 stretchKernel = NULL;
999 queue = NULL;
1000 outputReady = MagickFalse;
1001
1002
1003 assert(image != (Image *) NULL);
1004 assert(image->signature == MagickCoreSignature);
1005 if (image->debug != MagickFalse)
1006 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1007
1008 /* exception=(&image->exception); */
1009
1010 /*
1011 Initialize opencl environment.
1012 */
1013 device = RequestOpenCLDevice(clEnv);
1014 queue = AcquireOpenCLCommandQueue(device);
1015
1016 /*
1017 Allocate and initialize histogram arrays.
1018 */
1019 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1020
1021 if (histogram == (cl_uint4 *) NULL)
1022 ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1023
1024 /* reset histogram */
1025 (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
1026
1027 /*
1028 if (IsGrayImage(image,exception) != MagickFalse)
1029 (void) SetImageColorspace(image,GRAYColorspace);
1030 */
1031
1032 status=MagickTrue;
1033
1034
1035 /*
1036 Form histogram.
1037 */
1038 /* Create and initialize OpenCL buffers. */
1039 /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1040 /* assume this will get a writable image */
1041 image_view=AcquireAuthenticCacheView(image,exception);
1042 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1043
1044 if (inputPixels == (void *) NULL)
1045 {
1046 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1047 goto cleanup;
1048 }
1049 /* If the host pointer is aligned to the size of CLPixelPacket,
1050 then use the host buffer directly from the GPU; otherwise,
1051 create a buffer on the GPU and copy the data over */
1052 if (ALIGNED(inputPixels,CLPixelPacket))
1053 {
1054 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1055 }
1056 else
1057 {
1058 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1059 }
1060 /* create a CL buffer from image pixel buffer */
1061 length = image->columns * image->rows;
1062 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1063 if (clStatus != CL_SUCCESS)
1064 {
1065 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1066 goto cleanup;
1067 }
1068
1069 /* If the host pointer is aligned to the size of cl_uint,
1070 then use the host buffer directly from the GPU; otherwise,
1071 create a buffer on the GPU and copy the data over */
1072 if (ALIGNED(histogram,cl_uint4))
1073 {
1074 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1075 hostPtr = histogram;
1076 }
1077 else
1078 {
1079 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1080 hostPtr = histogram;
1081 }
1082 /* create a CL buffer for histogram */
1083 length = (MaxMap+1);
1084 histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1085 if (clStatus != CL_SUCCESS)
1086 {
1087 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1088 goto cleanup;
1089 }
1090
1091 status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1092 if (status == MagickFalse)
1093 goto cleanup;
1094
1095 /* read from the kenel output */
1096 if (ALIGNED(histogram,cl_uint4))
1097 {
1098 length = (MaxMap+1);
1099 clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1100 }
1101 else
1102 {
1103 length = (MaxMap+1);
1104 clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1105 }
1106 if (clStatus != CL_SUCCESS)
1107 {
1108 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1109 goto cleanup;
1110 }
1111
1112 /* unmap, don't block gpu to use this buffer again. */
1113 if (ALIGNED(histogram,cl_uint4))
1114 {
1115 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1116 if (clStatus != CL_SUCCESS)
1117 {
1118 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1119 goto cleanup;
1120 }
1121 }
1122
1123 /* recreate input buffer later, in case image updated */
1124 #ifdef RECREATEBUFFER
1125 if (imageBuffer!=NULL)
1126 clEnv->library->clReleaseMemObject(imageBuffer);
1127 #endif
1128
1129 /* CPU stuff */
1130 /*
1131 Find the histogram boundaries by locating the black/white levels.
1132 */
1133 black.x=0.0;
1134 white.x=MaxRange(QuantumRange);
1135 if ((image->channel_mask & RedChannel) != 0)
1136 {
1137 intensity=0.0;
1138 for (i=0; i <= (ssize_t) MaxMap; i++)
1139 {
1140 intensity+=histogram[i].s[2];
1141 if (intensity > black_point)
1142 break;
1143 }
1144 black.x=(cl_float) i;
1145 intensity=0.0;
1146 for (i=(ssize_t) MaxMap; i != 0; i--)
1147 {
1148 intensity+=histogram[i].s[2];
1149 if (intensity > ((double) image->columns*image->rows-white_point))
1150 break;
1151 }
1152 white.x=(cl_float) i;
1153 }
1154 black.y=0.0;
1155 white.y=MaxRange(QuantumRange);
1156 if ((image->channel_mask & GreenChannel) != 0)
1157 {
1158 intensity=0.0;
1159 for (i=0; i <= (ssize_t) MaxMap; i++)
1160 {
1161 intensity+=histogram[i].s[2];
1162 if (intensity > black_point)
1163 break;
1164 }
1165 black.y=(cl_float) i;
1166 intensity=0.0;
1167 for (i=(ssize_t) MaxMap; i != 0; i--)
1168 {
1169 intensity+=histogram[i].s[2];
1170 if (intensity > ((double) image->columns*image->rows-white_point))
1171 break;
1172 }
1173 white.y=(cl_float) i;
1174 }
1175 black.z=0.0;
1176 white.z=MaxRange(QuantumRange);
1177 if ((image->channel_mask & BlueChannel) != 0)
1178 {
1179 intensity=0.0;
1180 for (i=0; i <= (ssize_t) MaxMap; i++)
1181 {
1182 intensity+=histogram[i].s[2];
1183 if (intensity > black_point)
1184 break;
1185 }
1186 black.z=(cl_float) i;
1187 intensity=0.0;
1188 for (i=(ssize_t) MaxMap; i != 0; i--)
1189 {
1190 intensity+=histogram[i].s[2];
1191 if (intensity > ((double) image->columns*image->rows-white_point))
1192 break;
1193 }
1194 white.z=(cl_float) i;
1195 }
1196 black.w=0.0;
1197 white.w=MaxRange(QuantumRange);
1198 if ((image->channel_mask & AlphaChannel) != 0)
1199 {
1200 intensity=0.0;
1201 for (i=0; i <= (ssize_t) MaxMap; i++)
1202 {
1203 intensity+=histogram[i].s[2];
1204 if (intensity > black_point)
1205 break;
1206 }
1207 black.w=(cl_float) i;
1208 intensity=0.0;
1209 for (i=(ssize_t) MaxMap; i != 0; i--)
1210 {
1211 intensity+=histogram[i].s[2];
1212 if (intensity > ((double) image->columns*image->rows-white_point))
1213 break;
1214 }
1215 white.w=(cl_float) i;
1216 }
1217
1218 stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1219 sizeof(*stretch_map));
1220
1221 if (stretch_map == (PixelPacket *) NULL)
1222 ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1223 image->filename);
1224
1225 /*
1226 Stretch the histogram to create the stretched image mapping.
1227 */
1228 (void) memset(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1229 for (i=0; i <= (ssize_t) MaxMap; i++)
1230 {
1231 if ((image->channel_mask & RedChannel) != 0)
1232 {
1233 if (i < (ssize_t) black.x)
1234 stretch_map[i].red=(Quantum) 0;
1235 else
1236 if (i > (ssize_t) white.x)
1237 stretch_map[i].red=QuantumRange;
1238 else
1239 if (black.x != white.x)
1240 stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1241 (i-black.x)/(white.x-black.x)));
1242 }
1243 if ((image->channel_mask & GreenChannel) != 0)
1244 {
1245 if (i < (ssize_t) black.y)
1246 stretch_map[i].green=0;
1247 else
1248 if (i > (ssize_t) white.y)
1249 stretch_map[i].green=QuantumRange;
1250 else
1251 if (black.y != white.y)
1252 stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1253 (i-black.y)/(white.y-black.y)));
1254 }
1255 if ((image->channel_mask & BlueChannel) != 0)
1256 {
1257 if (i < (ssize_t) black.z)
1258 stretch_map[i].blue=0;
1259 else
1260 if (i > (ssize_t) white.z)
1261 stretch_map[i].blue= QuantumRange;
1262 else
1263 if (black.z != white.z)
1264 stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1265 (i-black.z)/(white.z-black.z)));
1266 }
1267 if ((image->channel_mask & AlphaChannel) != 0)
1268 {
1269 if (i < (ssize_t) black.w)
1270 stretch_map[i].alpha=0;
1271 else
1272 if (i > (ssize_t) white.w)
1273 stretch_map[i].alpha=QuantumRange;
1274 else
1275 if (black.w != white.w)
1276 stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1277 (i-black.w)/(white.w-black.w)));
1278 }
1279 }
1280
1281 /*
1282 Stretch the image.
1283 */
1284 if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1285 (image->colorspace == CMYKColorspace)))
1286 image->storage_class=DirectClass;
1287 if (image->storage_class == PseudoClass)
1288 {
1289 /*
1290 Stretch colormap.
1291 */
1292 for (i=0; i < (ssize_t) image->colors; i++)
1293 {
1294 if ((image->channel_mask & RedChannel) != 0)
1295 {
1296 if (black.x != white.x)
1297 image->colormap[i].red=stretch_map[
1298 ScaleQuantumToMap(image->colormap[i].red)].red;
1299 }
1300 if ((image->channel_mask & GreenChannel) != 0)
1301 {
1302 if (black.y != white.y)
1303 image->colormap[i].green=stretch_map[
1304 ScaleQuantumToMap(image->colormap[i].green)].green;
1305 }
1306 if ((image->channel_mask & BlueChannel) != 0)
1307 {
1308 if (black.z != white.z)
1309 image->colormap[i].blue=stretch_map[
1310 ScaleQuantumToMap(image->colormap[i].blue)].blue;
1311 }
1312 if ((image->channel_mask & AlphaChannel) != 0)
1313 {
1314 if (black.w != white.w)
1315 image->colormap[i].alpha=stretch_map[
1316 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1317 }
1318 }
1319 }
1320
1321 /*
1322 Stretch image.
1323 */
1324
1325
1326 /* GPU can work on this again, image and equalize map as input
1327 image: uchar4 (CLPixelPacket)
1328 stretch_map: uchar4 (PixelPacket)
1329 black, white: float4 (FloatPixelPacket) */
1330
1331 #ifdef RECREATEBUFFER
1332 /* If the host pointer is aligned to the size of CLPixelPacket,
1333 then use the host buffer directly from the GPU; otherwise,
1334 create a buffer on the GPU and copy the data over */
1335 if (ALIGNED(inputPixels,CLPixelPacket))
1336 {
1337 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1338 }
1339 else
1340 {
1341 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1342 }
1343 /* create a CL buffer from image pixel buffer */
1344 length = image->columns * image->rows;
1345 imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1346 if (clStatus != CL_SUCCESS)
1347 {
1348 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1349 goto cleanup;
1350 }
1351 #endif
1352
1353 /* Create and initialize OpenCL buffers. */
1354 if (ALIGNED(stretch_map, PixelPacket))
1355 {
1356 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1357 hostPtr = stretch_map;
1358 }
1359 else
1360 {
1361 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1362 hostPtr = stretch_map;
1363 }
1364 /* create a CL buffer for stretch_map */
1365 length = (MaxMap+1);
1366 stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1367 if (clStatus != CL_SUCCESS)
1368 {
1369 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1370 goto cleanup;
1371 }
1372
1373 /* get the OpenCL kernel */
1374 stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1375 if (stretchKernel == NULL)
1376 {
1377 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1378 goto cleanup;
1379 }
1380
1381 /* set the kernel arguments */
1382 i = 0;
1383 clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1384 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1385 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1386 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
1387 clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
1388 if (clStatus != CL_SUCCESS)
1389 {
1390 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1391 goto cleanup;
1392 }
1393
1394 /* launch the kernel */
1395 global_work_size[0] = image->columns;
1396 global_work_size[1] = image->rows;
1397
1398 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1399
1400 if (clStatus != CL_SUCCESS)
1401 {
1402 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1403 goto cleanup;
1404 }
1405 RecordProfileData(device,stretchKernel,event);
1406
1407 /* read the data back */
1408 if (ALIGNED(inputPixels,CLPixelPacket))
1409 {
1410 length = image->columns * image->rows;
1411 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1412 }
1413 else
1414 {
1415 length = image->columns * image->rows;
1416 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1417 }
1418 if (clStatus != CL_SUCCESS)
1419 {
1420 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1421 goto cleanup;
1422 }
1423
1424 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1425
1426 cleanup:
1427
1428 image_view=DestroyCacheView(image_view);
1429
1430 if (imageBuffer!=NULL)
1431 clEnv->library->clReleaseMemObject(imageBuffer);
1432
1433 if (stretchMapBuffer!=NULL)
1434 clEnv->library->clReleaseMemObject(stretchMapBuffer);
1435 if (stretch_map!=NULL)
1436 stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1437 if (histogramBuffer!=NULL)
1438 clEnv->library->clReleaseMemObject(histogramBuffer);
1439 if (histogram!=NULL)
1440 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1441 if (histogramKernel!=NULL)
1442 ReleaseOpenCLKernel(histogramKernel);
1443 if (stretchKernel!=NULL)
1444 ReleaseOpenCLKernel(stretchKernel);
1445 if (queue != NULL)
1446 ReleaseOpenCLCommandQueue(device,queue);
1447 if (device != NULL)
1448 ReleaseOpenCLDevice(device);
1449
1450 return(outputReady);
1451 }
1452
AccelerateContrastStretchImage(Image * image,const double black_point,const double white_point,ExceptionInfo * exception)1453 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1454 Image *image,const double black_point,const double white_point,
1455 ExceptionInfo *exception)
1456 {
1457 MagickBooleanType
1458 status;
1459
1460 MagickCLEnv
1461 clEnv;
1462
1463 assert(image != NULL);
1464 assert(exception != (ExceptionInfo *) NULL);
1465
1466 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1467 (checkHistogramCondition(image,image->intensity) == MagickFalse))
1468 return(MagickFalse);
1469
1470 clEnv=getOpenCLEnvironment(exception);
1471 if (clEnv == (MagickCLEnv) NULL)
1472 return(MagickFalse);
1473
1474 status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1475 exception);
1476 return(status);
1477 }
1478
1479 /*
1480 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1481 % %
1482 % %
1483 % %
1484 % A c c e l e r a t e C o n v o l v e I m a g e %
1485 % %
1486 % %
1487 % %
1488 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1489 */
1490
ComputeConvolveImage(const Image * image,MagickCLEnv clEnv,const KernelInfo * kernel,ExceptionInfo * exception)1491 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1492 const KernelInfo *kernel,ExceptionInfo *exception)
1493 {
1494 CacheView
1495 *filteredImage_view,
1496 *image_view;
1497
1498 cl_command_queue
1499 queue;
1500
1501 cl_event
1502 event;
1503
1504 cl_kernel
1505 clkernel;
1506
1507 cl_int
1508 clStatus;
1509
1510 cl_mem
1511 convolutionKernel,
1512 filteredImageBuffer,
1513 imageBuffer;
1514
1515 cl_mem_flags
1516 mem_flags;
1517
1518 const void
1519 *inputPixels;
1520
1521 float
1522 *kernelBufferPtr;
1523
1524 Image
1525 *filteredImage;
1526
1527 MagickBooleanType
1528 outputReady;
1529
1530 MagickCLDevice
1531 device;
1532
1533 MagickSizeType
1534 length;
1535
1536 size_t
1537 global_work_size[3],
1538 localGroupSize[3],
1539 localMemoryRequirement;
1540
1541 unsigned
1542 kernelSize;
1543
1544 unsigned int
1545 filterHeight,
1546 filterWidth,
1547 i,
1548 imageHeight,
1549 imageWidth,
1550 matte;
1551
1552 void
1553 *filteredPixels,
1554 *hostPtr;
1555
1556 /* intialize all CL objects to NULL */
1557 imageBuffer = NULL;
1558 filteredImageBuffer = NULL;
1559 convolutionKernel = NULL;
1560 clkernel = NULL;
1561 queue = NULL;
1562
1563 filteredImage = NULL;
1564 filteredImage_view = NULL;
1565 outputReady = MagickFalse;
1566
1567 device = RequestOpenCLDevice(clEnv);
1568
1569 image_view=AcquireAuthenticCacheView(image,exception);
1570 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1571 if (inputPixels == (const void *) NULL)
1572 {
1573 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1574 goto cleanup;
1575 }
1576
1577 /* Create and initialize OpenCL buffers. */
1578
1579 /* If the host pointer is aligned to the size of CLPixelPacket,
1580 then use the host buffer directly from the GPU; otherwise,
1581 create a buffer on the GPU and copy the data over */
1582 if (ALIGNED(inputPixels,CLPixelPacket))
1583 {
1584 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1585 }
1586 else
1587 {
1588 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1589 }
1590 /* create a CL buffer from image pixel buffer */
1591 length = image->columns * image->rows;
1592 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1593 if (clStatus != CL_SUCCESS)
1594 {
1595 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1596 goto cleanup;
1597 }
1598
1599 filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1600 assert(filteredImage != NULL);
1601 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1602 {
1603 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1604 goto cleanup;
1605 }
1606 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1607 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1608 if (filteredPixels == (void *) NULL)
1609 {
1610 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1611 goto cleanup;
1612 }
1613
1614 if (ALIGNED(filteredPixels,CLPixelPacket))
1615 {
1616 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1617 hostPtr = filteredPixels;
1618 }
1619 else
1620 {
1621 mem_flags = CL_MEM_WRITE_ONLY;
1622 hostPtr = NULL;
1623 }
1624 /* create a CL buffer from image pixel buffer */
1625 length = image->columns * image->rows;
1626 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1627 if (clStatus != CL_SUCCESS)
1628 {
1629 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1630 goto cleanup;
1631 }
1632
1633 kernelSize = (unsigned int) (kernel->width * kernel->height);
1634 convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1635 if (clStatus != CL_SUCCESS)
1636 {
1637 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1638 goto cleanup;
1639 }
1640
1641 queue = AcquireOpenCLCommandQueue(device);
1642
1643 kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1644 , 0, NULL, NULL, &clStatus);
1645 if (clStatus != CL_SUCCESS)
1646 {
1647 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1648 goto cleanup;
1649 }
1650 for (i = 0; i < kernelSize; i++)
1651 {
1652 kernelBufferPtr[i] = (float) kernel->values[i];
1653 }
1654 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1655 if (clStatus != CL_SUCCESS)
1656 {
1657 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1658 goto cleanup;
1659 }
1660
1661 /* Compute the local memory requirement for a 16x16 workgroup.
1662 If it's larger than 16k, reduce the workgroup size to 8x8 */
1663 localGroupSize[0] = 16;
1664 localGroupSize[1] = 16;
1665 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1666 + kernel->width*kernel->height*sizeof(float);
1667
1668 if (localMemoryRequirement > device->local_memory_size)
1669 {
1670 localGroupSize[0] = 8;
1671 localGroupSize[1] = 8;
1672 localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1673 + kernel->width*kernel->height*sizeof(float);
1674 }
1675 if (localMemoryRequirement <= device->local_memory_size)
1676 {
1677 /* get the OpenCL kernel */
1678 clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1679 if (clkernel == NULL)
1680 {
1681 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1682 goto cleanup;
1683 }
1684
1685 /* set the kernel arguments */
1686 i = 0;
1687 clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1688 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1689 imageWidth = (unsigned int) image->columns;
1690 imageHeight = (unsigned int) image->rows;
1691 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1692 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1693 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1694 filterWidth = (unsigned int) kernel->width;
1695 filterHeight = (unsigned int) kernel->height;
1696 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1697 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1698 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1699 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1700 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1701 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1702 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1703 if (clStatus != CL_SUCCESS)
1704 {
1705 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1706 goto cleanup;
1707 }
1708
1709 /* pad the global size to a multiple of the local work size dimension */
1710 global_work_size[0] = ((image->columns + localGroupSize[0] - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1711 global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1712
1713 /* launch the kernel */
1714 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1715 if (clStatus != CL_SUCCESS)
1716 {
1717 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1718 goto cleanup;
1719 }
1720 RecordProfileData(device,clkernel,event);
1721 }
1722 else
1723 {
1724 /* get the OpenCL kernel */
1725 clkernel = AcquireOpenCLKernel(device,"Convolve");
1726 if (clkernel == NULL)
1727 {
1728 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1729 goto cleanup;
1730 }
1731
1732 /* set the kernel arguments */
1733 i = 0;
1734 clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1735 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1736 imageWidth = (unsigned int) image->columns;
1737 imageHeight = (unsigned int) image->rows;
1738 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1739 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1740 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1741 filterWidth = (unsigned int) kernel->width;
1742 filterHeight = (unsigned int) kernel->height;
1743 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1744 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1745 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1746 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1747 clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1748 if (clStatus != CL_SUCCESS)
1749 {
1750 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1751 goto cleanup;
1752 }
1753
1754 localGroupSize[0] = 8;
1755 localGroupSize[1] = 8;
1756 global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1757 global_work_size[1] = (image->rows + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1758 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1759
1760 if (clStatus != CL_SUCCESS)
1761 {
1762 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1763 goto cleanup;
1764 }
1765 }
1766 RecordProfileData(device,clkernel,event);
1767
1768 if (ALIGNED(filteredPixels,CLPixelPacket))
1769 {
1770 length = image->columns * image->rows;
1771 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1772 }
1773 else
1774 {
1775 length = image->columns * image->rows;
1776 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1777 }
1778 if (clStatus != CL_SUCCESS)
1779 {
1780 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1781 goto cleanup;
1782 }
1783
1784 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1785
1786 cleanup:
1787
1788 image_view=DestroyCacheView(image_view);
1789 if (filteredImage_view != NULL)
1790 filteredImage_view=DestroyCacheView(filteredImage_view);
1791 if (imageBuffer != NULL)
1792 clEnv->library->clReleaseMemObject(imageBuffer);
1793 if (filteredImageBuffer != NULL)
1794 clEnv->library->clReleaseMemObject(filteredImageBuffer);
1795 if (convolutionKernel != NULL)
1796 clEnv->library->clReleaseMemObject(convolutionKernel);
1797 if (clkernel != NULL)
1798 ReleaseOpenCLKernel(clkernel);
1799 if (queue != NULL)
1800 ReleaseOpenCLCommandQueue(device,queue);
1801 if (device != NULL)
1802 ReleaseOpenCLDevice(device);
1803 if (outputReady == MagickFalse)
1804 {
1805 if (filteredImage != NULL)
1806 {
1807 DestroyImage(filteredImage);
1808 filteredImage = NULL;
1809 }
1810 }
1811
1812 return(filteredImage);
1813 }
1814
AccelerateConvolveImage(const Image * image,const KernelInfo * kernel,ExceptionInfo * exception)1815 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1816 const KernelInfo *kernel,ExceptionInfo *exception)
1817 {
1818 /* Temporary disabled due to access violation
1819
1820 Image
1821 *filteredImage;
1822
1823 assert(image != NULL);
1824 assert(kernel != (KernelInfo *) NULL);
1825 assert(exception != (ExceptionInfo *) NULL);
1826 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1827 (checkOpenCLEnvironment(exception) == MagickFalse))
1828 return((Image *) NULL);
1829
1830 filteredImage=ComputeConvolveImage(image,kernel,exception);
1831 return(filteredImage);
1832 */
1833 magick_unreferenced(image);
1834 magick_unreferenced(kernel);
1835 magick_unreferenced(exception);
1836 return((Image *)NULL);
1837 }
1838
1839 /*
1840 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1841 % %
1842 % %
1843 % %
1844 % A c c e l e r a t e D e s p e c k l e I m a g e %
1845 % %
1846 % %
1847 % %
1848 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1849 */
1850
ComputeDespeckleImage(const Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)1851 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1852 ExceptionInfo*exception)
1853 {
1854 static const int
1855 X[4] = {0, 1, 1,-1},
1856 Y[4] = {1, 0, 1, 1};
1857
1858 CacheView
1859 *filteredImage_view,
1860 *image_view;
1861
1862 cl_command_queue
1863 queue;
1864
1865 cl_int
1866 clStatus;
1867
1868 cl_kernel
1869 hullPass1,
1870 hullPass2;
1871
1872 cl_event
1873 event;
1874
1875 cl_mem_flags
1876 mem_flags;
1877
1878 cl_mem
1879 filteredImageBuffer,
1880 imageBuffer,
1881 tempImageBuffer[2];
1882
1883 const void
1884 *inputPixels;
1885
1886 Image
1887 *filteredImage;
1888
1889 int
1890 k,
1891 matte;
1892
1893 MagickBooleanType
1894 outputReady;
1895
1896 MagickCLDevice
1897 device;
1898
1899 MagickSizeType
1900 length;
1901
1902 size_t
1903 global_work_size[2];
1904
1905 unsigned int
1906 imageHeight,
1907 imageWidth;
1908
1909 void
1910 *filteredPixels,
1911 *hostPtr;
1912
1913 outputReady = MagickFalse;
1914 inputPixels = NULL;
1915 filteredImage = NULL;
1916 filteredImage_view = NULL;
1917 filteredPixels = NULL;
1918 imageBuffer = NULL;
1919 filteredImageBuffer = NULL;
1920 hullPass1 = NULL;
1921 hullPass2 = NULL;
1922 queue = NULL;
1923 tempImageBuffer[0] = tempImageBuffer[1] = NULL;
1924
1925 device = RequestOpenCLDevice(clEnv);
1926 queue = AcquireOpenCLCommandQueue(device);
1927
1928 image_view=AcquireAuthenticCacheView(image,exception);
1929 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1930 if (inputPixels == (void *) NULL)
1931 {
1932 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1933 goto cleanup;
1934 }
1935
1936 if (ALIGNED(inputPixels,CLPixelPacket))
1937 {
1938 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1939 }
1940 else
1941 {
1942 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1943 }
1944 /* create a CL buffer from image pixel buffer */
1945 length = image->columns * image->rows;
1946 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1947 if (clStatus != CL_SUCCESS)
1948 {
1949 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1950 goto cleanup;
1951 }
1952
1953 mem_flags = CL_MEM_READ_WRITE;
1954 length = image->columns * image->rows;
1955 for (k = 0; k < 2; k++)
1956 {
1957 tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
1958 if (clStatus != CL_SUCCESS)
1959 {
1960 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1961 goto cleanup;
1962 }
1963 }
1964
1965 filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1966 assert(filteredImage != NULL);
1967 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1968 {
1969 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1970 goto cleanup;
1971 }
1972 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1973 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1974 if (filteredPixels == (void *) NULL)
1975 {
1976 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1977 goto cleanup;
1978 }
1979
1980 if (ALIGNED(filteredPixels,CLPixelPacket))
1981 {
1982 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1983 hostPtr = filteredPixels;
1984 }
1985 else
1986 {
1987 mem_flags = CL_MEM_WRITE_ONLY;
1988 hostPtr = NULL;
1989 }
1990 /* create a CL buffer from image pixel buffer */
1991 length = image->columns * image->rows;
1992 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1993 if (clStatus != CL_SUCCESS)
1994 {
1995 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1996 goto cleanup;
1997 }
1998
1999 hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2000 hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2001
2002 clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2003 clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2004 imageWidth = (unsigned int) image->columns;
2005 clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2006 imageHeight = (unsigned int) image->rows;
2007 clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2008 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2009 clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2010 if (clStatus != CL_SUCCESS)
2011 {
2012 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2013 goto cleanup;
2014 }
2015
2016 clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2017 clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2018 imageWidth = (unsigned int) image->columns;
2019 clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2020 imageHeight = (unsigned int) image->rows;
2021 clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2022 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2023 clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2024 if (clStatus != CL_SUCCESS)
2025 {
2026 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2027 goto cleanup;
2028 }
2029
2030
2031 global_work_size[0] = image->columns;
2032 global_work_size[1] = image->rows;
2033
2034
2035 for (k = 0; k < 4; k++)
2036 {
2037 cl_int2 offset;
2038 int polarity;
2039
2040
2041 offset.s[0] = X[k];
2042 offset.s[1] = Y[k];
2043 polarity = 1;
2044 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2045 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2046 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2047 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2048 if (clStatus != CL_SUCCESS)
2049 {
2050 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2051 goto cleanup;
2052 }
2053 /* launch the kernel */
2054 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2055 if (clStatus != CL_SUCCESS)
2056 {
2057 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2058 goto cleanup;
2059 }
2060 RecordProfileData(device,hullPass1,event);
2061
2062 /* launch the kernel */
2063 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2064 if (clStatus != CL_SUCCESS)
2065 {
2066 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2067 goto cleanup;
2068 }
2069 RecordProfileData(device,hullPass2,event);
2070
2071 if (k == 0)
2072 clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2073 offset.s[0] = -X[k];
2074 offset.s[1] = -Y[k];
2075 polarity = 1;
2076 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2077 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2078 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2079 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2080 if (clStatus != CL_SUCCESS)
2081 {
2082 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2083 goto cleanup;
2084 }
2085 /* launch the kernel */
2086 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2087 if (clStatus != CL_SUCCESS)
2088 {
2089 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2090 goto cleanup;
2091 }
2092 RecordProfileData(device,hullPass1,event);
2093
2094 /* launch the kernel */
2095 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2096 if (clStatus != CL_SUCCESS)
2097 {
2098 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2099 goto cleanup;
2100 }
2101 RecordProfileData(device,hullPass2,event);
2102
2103 offset.s[0] = -X[k];
2104 offset.s[1] = -Y[k];
2105 polarity = -1;
2106 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2107 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2108 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2109 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2110 if (clStatus != CL_SUCCESS)
2111 {
2112 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2113 goto cleanup;
2114 }
2115 /* launch the kernel */
2116 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2117 if (clStatus != CL_SUCCESS)
2118 {
2119 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2120 goto cleanup;
2121 }
2122 RecordProfileData(device,hullPass1,event);
2123
2124 /* launch the kernel */
2125 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2126 if (clStatus != CL_SUCCESS)
2127 {
2128 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2129 goto cleanup;
2130 }
2131 RecordProfileData(device,hullPass2,event);
2132
2133 offset.s[0] = X[k];
2134 offset.s[1] = Y[k];
2135 polarity = -1;
2136 clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2137 clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2138 clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2139 clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2140
2141 if (k == 3)
2142 clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2143
2144 if (clStatus != CL_SUCCESS)
2145 {
2146 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2147 goto cleanup;
2148 }
2149 /* launch the kernel */
2150 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2151 if (clStatus != CL_SUCCESS)
2152 {
2153 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2154 goto cleanup;
2155 }
2156 RecordProfileData(device,hullPass1,event);
2157
2158 /* launch the kernel */
2159 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2160 if (clStatus != CL_SUCCESS)
2161 {
2162 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2163 goto cleanup;
2164 }
2165 RecordProfileData(device,hullPass2,event);
2166 }
2167
2168 if (ALIGNED(filteredPixels,CLPixelPacket))
2169 {
2170 length = image->columns * image->rows;
2171 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2172 }
2173 else
2174 {
2175 length = image->columns * image->rows;
2176 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2177 }
2178 if (clStatus != CL_SUCCESS)
2179 {
2180 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2181 goto cleanup;
2182 }
2183
2184 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2185
2186 cleanup:
2187
2188 image_view=DestroyCacheView(image_view);
2189 if (filteredImage_view != NULL)
2190 filteredImage_view=DestroyCacheView(filteredImage_view);
2191
2192 if (queue != NULL)
2193 ReleaseOpenCLCommandQueue(device,queue);
2194 if (device != NULL)
2195 ReleaseOpenCLDevice(device);
2196 if (imageBuffer!=NULL)
2197 clEnv->library->clReleaseMemObject(imageBuffer);
2198 for (k = 0; k < 2; k++)
2199 {
2200 if (tempImageBuffer[k]!=NULL)
2201 clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2202 }
2203 if (filteredImageBuffer!=NULL)
2204 clEnv->library->clReleaseMemObject(filteredImageBuffer);
2205 if (hullPass1!=NULL)
2206 ReleaseOpenCLKernel(hullPass1);
2207 if (hullPass2!=NULL)
2208 ReleaseOpenCLKernel(hullPass2);
2209 if (outputReady == MagickFalse && filteredImage != NULL)
2210 filteredImage=DestroyImage(filteredImage);
2211
2212 return(filteredImage);
2213 }
2214
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2215 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2216 ExceptionInfo* exception)
2217 {
2218 Image
2219 *filteredImage;
2220
2221 MagickCLEnv
2222 clEnv;
2223
2224 assert(image != NULL);
2225 assert(exception != (ExceptionInfo *) NULL);
2226
2227 if (checkAccelerateConditionRGBA(image) == MagickFalse)
2228 return((Image *) NULL);
2229
2230 clEnv=getOpenCLEnvironment(exception);
2231 if (clEnv == (MagickCLEnv) NULL)
2232 return((Image *) NULL);
2233
2234 filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2235 return(filteredImage);
2236 }
2237
2238 /*
2239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2240 % %
2241 % %
2242 % %
2243 % A c c e l e r a t e E q u a l i z e I m a g e %
2244 % %
2245 % %
2246 % %
2247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2248 */
2249
ComputeEqualizeImage(Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)2250 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2251 ExceptionInfo *exception)
2252 {
2253 #define EqualizeImageTag "Equalize/Image"
2254
2255 CacheView
2256 *image_view;
2257
2258 cl_command_queue
2259 queue;
2260
2261 cl_int
2262 clStatus;
2263
2264 cl_mem_flags
2265 mem_flags;
2266
2267 cl_mem
2268 equalizeMapBuffer,
2269 histogramBuffer,
2270 imageBuffer;
2271
2272 cl_kernel
2273 equalizeKernel,
2274 histogramKernel;
2275
2276 cl_event
2277 event;
2278
2279 cl_uint4
2280 *histogram;
2281
2282 cl_float4
2283 white,
2284 black,
2285 intensity,
2286 *map;
2287
2288 MagickBooleanType
2289 outputReady,
2290 status;
2291
2292 MagickCLDevice
2293 device;
2294
2295 MagickSizeType
2296 length;
2297
2298 PixelPacket
2299 *equalize_map;
2300
2301 register ssize_t
2302 i;
2303
2304 size_t
2305 global_work_size[2];
2306
2307 void
2308 *hostPtr,
2309 *inputPixels;
2310
2311 map=NULL;
2312 histogram=NULL;
2313 equalize_map=NULL;
2314 inputPixels = NULL;
2315 imageBuffer = NULL;
2316 histogramBuffer = NULL;
2317 equalizeMapBuffer = NULL;
2318 histogramKernel = NULL;
2319 equalizeKernel = NULL;
2320 queue = NULL;
2321 outputReady = MagickFalse;
2322
2323 assert(image != (Image *) NULL);
2324 assert(image->signature == MagickCoreSignature);
2325 if (image->debug != MagickFalse)
2326 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2327
2328 /*
2329 * initialize opencl env
2330 */
2331 device = RequestOpenCLDevice(clEnv);
2332 queue = AcquireOpenCLCommandQueue(device);
2333
2334 /*
2335 Allocate and initialize histogram arrays.
2336 */
2337 histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2338 if (histogram == (cl_uint4 *) NULL)
2339 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2340
2341 /* reset histogram */
2342 (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
2343
2344 /* Create and initialize OpenCL buffers. */
2345 /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2346 /* assume this will get a writable image */
2347 image_view=AcquireAuthenticCacheView(image,exception);
2348 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2349
2350 if (inputPixels == (void *) NULL)
2351 {
2352 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2353 goto cleanup;
2354 }
2355 /* If the host pointer is aligned to the size of CLPixelPacket,
2356 then use the host buffer directly from the GPU; otherwise,
2357 create a buffer on the GPU and copy the data over */
2358 if (ALIGNED(inputPixels,CLPixelPacket))
2359 {
2360 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2361 }
2362 else
2363 {
2364 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2365 }
2366 /* create a CL buffer from image pixel buffer */
2367 length = image->columns * image->rows;
2368 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2369 if (clStatus != CL_SUCCESS)
2370 {
2371 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2372 goto cleanup;
2373 }
2374
2375 /* If the host pointer is aligned to the size of cl_uint,
2376 then use the host buffer directly from the GPU; otherwise,
2377 create a buffer on the GPU and copy the data over */
2378 if (ALIGNED(histogram,cl_uint4))
2379 {
2380 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2381 hostPtr = histogram;
2382 }
2383 else
2384 {
2385 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2386 hostPtr = histogram;
2387 }
2388 /* create a CL buffer for histogram */
2389 length = (MaxMap+1);
2390 histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2391 if (clStatus != CL_SUCCESS)
2392 {
2393 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2394 goto cleanup;
2395 }
2396
2397 status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2398 if (status == MagickFalse)
2399 goto cleanup;
2400
2401 /* read from the kenel output */
2402 if (ALIGNED(histogram,cl_uint4))
2403 {
2404 length = (MaxMap+1);
2405 clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2406 }
2407 else
2408 {
2409 length = (MaxMap+1);
2410 clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2411 }
2412 if (clStatus != CL_SUCCESS)
2413 {
2414 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2415 goto cleanup;
2416 }
2417
2418 /* unmap, don't block gpu to use this buffer again. */
2419 if (ALIGNED(histogram,cl_uint4))
2420 {
2421 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2422 if (clStatus != CL_SUCCESS)
2423 {
2424 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2425 goto cleanup;
2426 }
2427 }
2428
2429 /* recreate input buffer later, in case image updated */
2430 #ifdef RECREATEBUFFER
2431 if (imageBuffer!=NULL)
2432 clEnv->library->clReleaseMemObject(imageBuffer);
2433 #endif
2434
2435 /* CPU stuff */
2436 equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2437 if (equalize_map == (PixelPacket *) NULL)
2438 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2439
2440 map=(cl_float4 *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2441 if (map == (cl_float4 *) NULL)
2442 ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2443
2444 /*
2445 Integrate the histogram to get the equalization map.
2446 */
2447 (void) memset(&intensity,0,sizeof(intensity));
2448 for (i=0; i <= (ssize_t) MaxMap; i++)
2449 {
2450 if ((image->channel_mask & SyncChannels) != 0)
2451 {
2452 intensity.x+=histogram[i].s[2];
2453 map[i]=intensity;
2454 continue;
2455 }
2456 if ((image->channel_mask & RedChannel) != 0)
2457 intensity.x+=histogram[i].s[2];
2458 if ((image->channel_mask & GreenChannel) != 0)
2459 intensity.y+=histogram[i].s[1];
2460 if ((image->channel_mask & BlueChannel) != 0)
2461 intensity.z+=histogram[i].s[0];
2462 if ((image->channel_mask & AlphaChannel) != 0)
2463 intensity.w+=histogram[i].s[3];
2464 map[i]=intensity;
2465 }
2466 black=map[0];
2467 white=map[(int) MaxMap];
2468 (void) memset(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2469 for (i=0; i <= (ssize_t) MaxMap; i++)
2470 {
2471 if ((image->channel_mask & SyncChannels) != 0)
2472 {
2473 if (white.x != black.x)
2474 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2475 (map[i].x-black.x))/(white.x-black.x)));
2476 continue;
2477 }
2478 if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2479 equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2480 (map[i].x-black.x))/(white.x-black.x)));
2481 if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2482 equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2483 (map[i].y-black.y))/(white.y-black.y)));
2484 if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2485 equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2486 (map[i].z-black.z))/(white.z-black.z)));
2487 if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2488 equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2489 (map[i].w-black.w))/(white.w-black.w)));
2490 }
2491
2492 if (image->storage_class == PseudoClass)
2493 {
2494 /*
2495 Equalize colormap.
2496 */
2497 for (i=0; i < (ssize_t) image->colors; i++)
2498 {
2499 if ((image->channel_mask & SyncChannels) != 0)
2500 {
2501 if (white.x != black.x)
2502 {
2503 image->colormap[i].red=equalize_map[
2504 ScaleQuantumToMap(image->colormap[i].red)].red;
2505 image->colormap[i].green=equalize_map[
2506 ScaleQuantumToMap(image->colormap[i].green)].red;
2507 image->colormap[i].blue=equalize_map[
2508 ScaleQuantumToMap(image->colormap[i].blue)].red;
2509 image->colormap[i].alpha=equalize_map[
2510 ScaleQuantumToMap(image->colormap[i].alpha)].red;
2511 }
2512 continue;
2513 }
2514 if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2515 image->colormap[i].red=equalize_map[
2516 ScaleQuantumToMap(image->colormap[i].red)].red;
2517 if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2518 image->colormap[i].green=equalize_map[
2519 ScaleQuantumToMap(image->colormap[i].green)].green;
2520 if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2521 image->colormap[i].blue=equalize_map[
2522 ScaleQuantumToMap(image->colormap[i].blue)].blue;
2523 if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2524 image->colormap[i].alpha=equalize_map[
2525 ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2526 }
2527 }
2528
2529 /*
2530 Equalize image.
2531 */
2532
2533 /* GPU can work on this again, image and equalize map as input
2534 image: uchar4 (CLPixelPacket)
2535 equalize_map: uchar4 (PixelPacket)
2536 black, white: float4 (FloatPixelPacket) */
2537
2538 #ifdef RECREATEBUFFER
2539 /* If the host pointer is aligned to the size of CLPixelPacket,
2540 then use the host buffer directly from the GPU; otherwise,
2541 create a buffer on the GPU and copy the data over */
2542 if (ALIGNED(inputPixels,CLPixelPacket))
2543 {
2544 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2545 }
2546 else
2547 {
2548 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2549 }
2550 /* create a CL buffer from image pixel buffer */
2551 length = image->columns * image->rows;
2552 imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2553 if (clStatus != CL_SUCCESS)
2554 {
2555 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2556 goto cleanup;
2557 }
2558 #endif
2559
2560 /* Create and initialize OpenCL buffers. */
2561 if (ALIGNED(equalize_map, PixelPacket))
2562 {
2563 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2564 hostPtr = equalize_map;
2565 }
2566 else
2567 {
2568 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2569 hostPtr = equalize_map;
2570 }
2571 /* create a CL buffer for eqaulize_map */
2572 length = (MaxMap+1);
2573 equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2574 if (clStatus != CL_SUCCESS)
2575 {
2576 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2577 goto cleanup;
2578 }
2579
2580 /* get the OpenCL kernel */
2581 equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2582 if (equalizeKernel == NULL)
2583 {
2584 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2585 goto cleanup;
2586 }
2587
2588 /* set the kernel arguments */
2589 i = 0;
2590 clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2591 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2592 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2593 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
2594 clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
2595 if (clStatus != CL_SUCCESS)
2596 {
2597 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2598 goto cleanup;
2599 }
2600
2601 /* launch the kernel */
2602 global_work_size[0] = image->columns;
2603 global_work_size[1] = image->rows;
2604
2605 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2606
2607 if (clStatus != CL_SUCCESS)
2608 {
2609 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2610 goto cleanup;
2611 }
2612 RecordProfileData(device,equalizeKernel,event);
2613
2614 /* read the data back */
2615 if (ALIGNED(inputPixels,CLPixelPacket))
2616 {
2617 length = image->columns * image->rows;
2618 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2619 }
2620 else
2621 {
2622 length = image->columns * image->rows;
2623 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2624 }
2625 if (clStatus != CL_SUCCESS)
2626 {
2627 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2628 goto cleanup;
2629 }
2630
2631 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2632
2633 cleanup:
2634
2635 image_view=DestroyCacheView(image_view);
2636
2637 if (imageBuffer!=NULL)
2638 clEnv->library->clReleaseMemObject(imageBuffer);
2639 if (map!=NULL)
2640 map=(cl_float4 *) RelinquishMagickMemory(map);
2641 if (equalizeMapBuffer!=NULL)
2642 clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2643 if (equalize_map!=NULL)
2644 equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2645 if (histogramBuffer!=NULL)
2646 clEnv->library->clReleaseMemObject(histogramBuffer);
2647 if (histogram!=NULL)
2648 histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2649 if (histogramKernel!=NULL)
2650 ReleaseOpenCLKernel(histogramKernel);
2651 if (equalizeKernel!=NULL)
2652 ReleaseOpenCLKernel(equalizeKernel);
2653 if (queue != NULL)
2654 ReleaseOpenCLCommandQueue(device, queue);
2655 if (device != NULL)
2656 ReleaseOpenCLDevice(device);
2657
2658 return(outputReady);
2659 }
2660
AccelerateEqualizeImage(Image * image,ExceptionInfo * exception)2661 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2662 ExceptionInfo *exception)
2663 {
2664 MagickBooleanType
2665 status;
2666
2667 MagickCLEnv
2668 clEnv;
2669
2670 assert(image != NULL);
2671 assert(exception != (ExceptionInfo *) NULL);
2672
2673 if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2674 (checkHistogramCondition(image,image->intensity) == MagickFalse))
2675 return(MagickFalse);
2676
2677 clEnv=getOpenCLEnvironment(exception);
2678 if (clEnv == (MagickCLEnv) NULL)
2679 return(MagickFalse);
2680
2681 status=ComputeEqualizeImage(image,clEnv,exception);
2682 return(status);
2683 }
2684
2685 /*
2686 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2687 % %
2688 % %
2689 % %
2690 % A c c e l e r a t e F u n c t i o n I m a g e %
2691 % %
2692 % %
2693 % %
2694 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2695 */
2696
ComputeFunctionImage(Image * image,MagickCLEnv clEnv,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2697 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2698 const MagickFunction function,const size_t number_parameters,
2699 const double *parameters,ExceptionInfo *exception)
2700 {
2701 cl_command_queue
2702 queue;
2703
2704 cl_int
2705 status;
2706
2707 cl_kernel
2708 functionKernel;
2709
2710 cl_mem
2711 imageBuffer,
2712 parametersBuffer;
2713
2714 cl_uint
2715 number_params,
2716 number_channels;
2717
2718 float
2719 *parametersBufferPtr;
2720
2721 MagickBooleanType
2722 outputReady;
2723
2724 MagickCLDevice
2725 device;
2726
2727 size_t
2728 gsize[2],
2729 i;
2730
2731 outputReady=MagickFalse;
2732
2733 imageBuffer=NULL;
2734 functionKernel=NULL;
2735 parametersBuffer=NULL;
2736
2737 device=RequestOpenCLDevice(clEnv);
2738 queue=AcquireOpenCLCommandQueue(device);
2739 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2740 if (imageBuffer == (cl_mem) NULL)
2741 goto cleanup;
2742
2743 parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2744 sizeof(float));
2745 if (parametersBufferPtr == (float *) NULL)
2746 goto cleanup;
2747 for (i=0; i<number_parameters; i++)
2748 parametersBufferPtr[i]=(float) parameters[i];
2749 parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2750 CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2751 parametersBufferPtr);
2752 parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2753 if (parametersBuffer == (cl_mem) NULL)
2754 {
2755 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2756 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2757 goto cleanup;
2758 }
2759
2760 functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2761 if (functionKernel == (cl_kernel) NULL)
2762 {
2763 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2764 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2765 goto cleanup;
2766 }
2767
2768 number_channels=(cl_uint) image->number_channels;
2769 number_params=(cl_uint) number_parameters;
2770
2771 i=0;
2772 status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2773 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2774 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2775 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2776 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2777 status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)¶metersBuffer);
2778 if (status != CL_SUCCESS)
2779 {
2780 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2781 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2782 goto cleanup;
2783 }
2784
2785 gsize[0]=image->columns;
2786 gsize[1]=image->rows;
2787 outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2788 gsize,(const size_t *) NULL,image,(const Image *) NULL,MagickFalse,
2789 exception);
2790
2791 cleanup:
2792
2793 if (imageBuffer != (cl_mem) NULL)
2794 ReleaseOpenCLMemObject(imageBuffer);
2795 if (parametersBuffer != (cl_mem) NULL)
2796 ReleaseOpenCLMemObject(parametersBuffer);
2797 if (functionKernel != (cl_kernel) NULL)
2798 ReleaseOpenCLKernel(functionKernel);
2799 if (queue != (cl_command_queue) NULL)
2800 ReleaseOpenCLCommandQueue(device,queue);
2801 if (device != (MagickCLDevice) NULL)
2802 ReleaseOpenCLDevice(device);
2803 return(outputReady);
2804 }
2805
AccelerateFunctionImage(Image * image,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2806 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2807 const MagickFunction function,const size_t number_parameters,
2808 const double *parameters,ExceptionInfo *exception)
2809 {
2810 MagickBooleanType
2811 status;
2812
2813 MagickCLEnv
2814 clEnv;
2815
2816 assert(image != NULL);
2817 assert(exception != (ExceptionInfo *) NULL);
2818
2819 if (checkAccelerateCondition(image) == MagickFalse)
2820 return(MagickFalse);
2821
2822 clEnv=getOpenCLEnvironment(exception);
2823 if (clEnv == (MagickCLEnv) NULL)
2824 return(MagickFalse);
2825
2826 status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2827 parameters,exception);
2828 return(status);
2829 }
2830
2831 /*
2832 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2833 % %
2834 % %
2835 % %
2836 % A c c e l e r a t e G r a y s c a l e I m a g e %
2837 % %
2838 % %
2839 % %
2840 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2841 */
2842
ComputeGrayscaleImage(Image * image,MagickCLEnv clEnv,const PixelIntensityMethod method,ExceptionInfo * exception)2843 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2844 const PixelIntensityMethod method,ExceptionInfo *exception)
2845 {
2846 cl_command_queue
2847 queue;
2848
2849 cl_int
2850 status;
2851
2852 cl_kernel
2853 grayscaleKernel;
2854
2855 cl_mem
2856 imageBuffer;
2857
2858 cl_uint
2859 number_channels,
2860 colorspace,
2861 intensityMethod;
2862
2863 MagickBooleanType
2864 outputReady;
2865
2866 MagickCLDevice
2867 device;
2868
2869 size_t
2870 gsize[2],
2871 i;
2872
2873 outputReady=MagickFalse;
2874 imageBuffer=NULL;
2875 grayscaleKernel=NULL;
2876
2877 assert(image != (Image *) NULL);
2878 assert(image->signature == MagickCoreSignature);
2879 device=RequestOpenCLDevice(clEnv);
2880 queue=AcquireOpenCLCommandQueue(device);
2881 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2882 if (imageBuffer == (cl_mem) NULL)
2883 goto cleanup;
2884
2885 grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2886 if (grayscaleKernel == (cl_kernel) NULL)
2887 {
2888 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2889 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2890 goto cleanup;
2891 }
2892
2893 number_channels=(cl_uint) image->number_channels;
2894 intensityMethod=(cl_uint) method;
2895 colorspace=(cl_uint) image->colorspace;
2896
2897 i=0;
2898 status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2899 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2900 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2901 status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2902 if (status != CL_SUCCESS)
2903 {
2904 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2905 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2906 goto cleanup;
2907 }
2908
2909 gsize[0]=image->columns;
2910 gsize[1]=image->rows;
2911 outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
2912 (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
2913 MagickFalse,exception);
2914
2915 cleanup:
2916
2917 if (imageBuffer != (cl_mem) NULL)
2918 ReleaseOpenCLMemObject(imageBuffer);
2919 if (grayscaleKernel != (cl_kernel) NULL)
2920 ReleaseOpenCLKernel(grayscaleKernel);
2921 if (queue != (cl_command_queue) NULL)
2922 ReleaseOpenCLCommandQueue(device,queue);
2923 if (device != (MagickCLDevice) NULL)
2924 ReleaseOpenCLDevice(device);
2925
2926 return(outputReady);
2927 }
2928
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)2929 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
2930 const PixelIntensityMethod method,ExceptionInfo *exception)
2931 {
2932 MagickBooleanType
2933 status;
2934
2935 MagickCLEnv
2936 clEnv;
2937
2938 assert(image != NULL);
2939 assert(exception != (ExceptionInfo *) NULL);
2940
2941 if ((checkAccelerateCondition(image) == MagickFalse) ||
2942 (checkPixelIntensity(image,method) == MagickFalse))
2943 return(MagickFalse);
2944
2945 if (image->number_channels < 3)
2946 return(MagickFalse);
2947
2948 if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
2949 (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
2950 (GetPixelBlueTraits(image) == UndefinedPixelTrait))
2951 return(MagickFalse);
2952
2953 clEnv=getOpenCLEnvironment(exception);
2954 if (clEnv == (MagickCLEnv) NULL)
2955 return(MagickFalse);
2956
2957 status=ComputeGrayscaleImage(image,clEnv,method,exception);
2958 return(status);
2959 }
2960
2961 /*
2962 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2963 % %
2964 % %
2965 % %
2966 % A c c e l e r a t e L o c a l C o n t r a s t I m a g e %
2967 % %
2968 % %
2969 % %
2970 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2971 */
2972
ComputeLocalContrastImage(const Image * image,MagickCLEnv clEnv,const double radius,const double strength,ExceptionInfo * exception)2973 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
2974 const double radius,const double strength,ExceptionInfo *exception)
2975 {
2976 CacheView
2977 *filteredImage_view,
2978 *image_view;
2979
2980 cl_command_queue
2981 queue;
2982
2983 cl_int
2984 clStatus,
2985 iRadius;
2986
2987 cl_kernel
2988 blurRowKernel,
2989 blurColumnKernel;
2990
2991 cl_event
2992 event;
2993
2994 cl_mem
2995 filteredImageBuffer,
2996 imageBuffer,
2997 imageKernelBuffer,
2998 tempImageBuffer;
2999
3000 cl_mem_flags
3001 mem_flags;
3002
3003 const void
3004 *inputPixels;
3005
3006 Image
3007 *filteredImage;
3008
3009 MagickBooleanType
3010 outputReady;
3011
3012 MagickCLDevice
3013 device;
3014
3015 MagickSizeType
3016 length;
3017
3018 void
3019 *filteredPixels,
3020 *hostPtr;
3021
3022 unsigned int
3023 i,
3024 imageColumns,
3025 imageRows,
3026 passes;
3027
3028 filteredImage = NULL;
3029 filteredImage_view = NULL;
3030 imageBuffer = NULL;
3031 filteredImageBuffer = NULL;
3032 tempImageBuffer = NULL;
3033 imageKernelBuffer = NULL;
3034 blurRowKernel = NULL;
3035 blurColumnKernel = NULL;
3036 queue = NULL;
3037 outputReady = MagickFalse;
3038
3039 device = RequestOpenCLDevice(clEnv);
3040 queue = AcquireOpenCLCommandQueue(device);
3041
3042 /* Create and initialize OpenCL buffers. */
3043 {
3044 image_view=AcquireAuthenticCacheView(image,exception);
3045 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3046 if (inputPixels == (const void *) NULL)
3047 {
3048 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3049 goto cleanup;
3050 }
3051
3052 /* If the host pointer is aligned to the size of CLPixelPacket,
3053 then use the host buffer directly from the GPU; otherwise,
3054 create a buffer on the GPU and copy the data over */
3055 if (ALIGNED(inputPixels,CLPixelPacket))
3056 {
3057 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3058 }
3059 else
3060 {
3061 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3062 }
3063 /* create a CL buffer from image pixel buffer */
3064 length = image->columns * image->rows;
3065 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3066 if (clStatus != CL_SUCCESS)
3067 {
3068 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3069 goto cleanup;
3070 }
3071 }
3072
3073 /* create output */
3074 {
3075 filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3076 assert(filteredImage != NULL);
3077 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3078 {
3079 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3080 goto cleanup;
3081 }
3082 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3083 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3084 if (filteredPixels == (void *) NULL)
3085 {
3086 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3087 goto cleanup;
3088 }
3089
3090 if (ALIGNED(filteredPixels,CLPixelPacket))
3091 {
3092 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3093 hostPtr = filteredPixels;
3094 }
3095 else
3096 {
3097 mem_flags = CL_MEM_WRITE_ONLY;
3098 hostPtr = NULL;
3099 }
3100
3101 /* create a CL buffer from image pixel buffer */
3102 length = image->columns * image->rows;
3103 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3104 if (clStatus != CL_SUCCESS)
3105 {
3106 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3107 goto cleanup;
3108 }
3109 }
3110
3111 {
3112 /* create temp buffer */
3113 {
3114 length = image->columns * image->rows;
3115 tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3116 if (clStatus != CL_SUCCESS)
3117 {
3118 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3119 goto cleanup;
3120 }
3121 }
3122
3123 /* get the opencl kernel */
3124 {
3125 blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3126 if (blurRowKernel == NULL)
3127 {
3128 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3129 goto cleanup;
3130 };
3131
3132 blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3133 if (blurColumnKernel == NULL)
3134 {
3135 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3136 goto cleanup;
3137 };
3138 }
3139
3140 {
3141 imageColumns = (unsigned int) image->columns;
3142 imageRows = (unsigned int) image->rows;
3143 iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); /* Normalized radius, 100% gives blur radius of 20% of the largest dimension */
3144
3145 passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3146 passes = (passes < 1) ? 1: passes;
3147
3148 /* set the kernel arguments */
3149 i = 0;
3150 clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3151 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3152 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3153 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3154 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3155 clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3156
3157 if (clStatus != CL_SUCCESS)
3158 {
3159 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3160 goto cleanup;
3161 }
3162 }
3163
3164 /* launch the kernel */
3165 {
3166 int x;
3167 for (x = 0; x < passes; ++x) {
3168 size_t gsize[2];
3169 size_t wsize[2];
3170 size_t goffset[2];
3171
3172 gsize[0] = 256;
3173 gsize[1] = (image->rows + passes - 1) / passes;
3174 wsize[0] = 256;
3175 wsize[1] = 1;
3176 goffset[0] = 0;
3177 goffset[1] = x * gsize[1];
3178
3179 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3180 if (clStatus != CL_SUCCESS)
3181 {
3182 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3183 goto cleanup;
3184 }
3185 clEnv->library->clFlush(queue);
3186 RecordProfileData(device,blurRowKernel,event);
3187 }
3188 }
3189
3190 {
3191 cl_float FStrength = strength;
3192 i = 0;
3193 clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3194 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3195 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3196 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3197 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3198 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3199 clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3200
3201 if (clStatus != CL_SUCCESS)
3202 {
3203 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3204 goto cleanup;
3205 }
3206 }
3207
3208 /* launch the kernel */
3209 {
3210 int x;
3211 for (x = 0; x < passes; ++x) {
3212 size_t gsize[2];
3213 size_t wsize[2];
3214 size_t goffset[2];
3215
3216 gsize[0] = ((image->columns + 3) / 4) * 4;
3217 gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3218 wsize[0] = 4;
3219 wsize[1] = 64;
3220 goffset[0] = 0;
3221 goffset[1] = x * gsize[1];
3222
3223 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3224 if (clStatus != CL_SUCCESS)
3225 {
3226 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3227 goto cleanup;
3228 }
3229 clEnv->library->clFlush(queue);
3230 RecordProfileData(device,blurColumnKernel,event);
3231 }
3232 }
3233 }
3234
3235 /* get result */
3236 if (ALIGNED(filteredPixels,CLPixelPacket))
3237 {
3238 length = image->columns * image->rows;
3239 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3240 }
3241 else
3242 {
3243 length = image->columns * image->rows;
3244 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3245 }
3246 if (clStatus != CL_SUCCESS)
3247 {
3248 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3249 goto cleanup;
3250 }
3251
3252 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3253
3254 cleanup:
3255
3256 image_view=DestroyCacheView(image_view);
3257 if (filteredImage_view != NULL)
3258 filteredImage_view=DestroyCacheView(filteredImage_view);
3259
3260 if (imageBuffer!=NULL)
3261 clEnv->library->clReleaseMemObject(imageBuffer);
3262 if (filteredImageBuffer!=NULL)
3263 clEnv->library->clReleaseMemObject(filteredImageBuffer);
3264 if (tempImageBuffer!=NULL)
3265 clEnv->library->clReleaseMemObject(tempImageBuffer);
3266 if (imageKernelBuffer!=NULL)
3267 clEnv->library->clReleaseMemObject(imageKernelBuffer);
3268 if (blurRowKernel!=NULL)
3269 ReleaseOpenCLKernel(blurRowKernel);
3270 if (blurColumnKernel!=NULL)
3271 ReleaseOpenCLKernel(blurColumnKernel);
3272 if (queue != NULL)
3273 ReleaseOpenCLCommandQueue(device, queue);
3274 if (device != NULL)
3275 ReleaseOpenCLDevice(device);
3276 if (outputReady == MagickFalse)
3277 {
3278 if (filteredImage != NULL)
3279 {
3280 DestroyImage(filteredImage);
3281 filteredImage = NULL;
3282 }
3283 }
3284
3285 return(filteredImage);
3286 }
3287
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3288 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3289 const double radius,const double strength,ExceptionInfo *exception)
3290 {
3291 Image
3292 *filteredImage;
3293
3294 MagickCLEnv
3295 clEnv;
3296
3297 assert(image != NULL);
3298 assert(exception != (ExceptionInfo *) NULL);
3299
3300 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3301 return((Image *) NULL);
3302
3303 clEnv=getOpenCLEnvironment(exception);
3304 if (clEnv == (MagickCLEnv) NULL)
3305 return((Image *) NULL);
3306
3307 filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3308 exception);
3309 return(filteredImage);
3310 }
3311
3312 /*
3313 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3314 % %
3315 % %
3316 % %
3317 % A c c e l e r a t e M o d u l a t e I m a g e %
3318 % %
3319 % %
3320 % %
3321 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3322 */
3323
ComputeModulateImage(Image * image,MagickCLEnv clEnv,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3324 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3325 const double percent_brightness,const double percent_hue,
3326 const double percent_saturation,const ColorspaceType colorspace,
3327 ExceptionInfo *exception)
3328 {
3329 CacheView
3330 *image_view;
3331
3332 cl_float
3333 bright,
3334 hue,
3335 saturation;
3336
3337 cl_command_queue
3338 queue;
3339
3340 cl_int
3341 color,
3342 clStatus;
3343
3344 cl_kernel
3345 modulateKernel;
3346
3347 cl_event
3348 event;
3349
3350 cl_mem
3351 imageBuffer;
3352
3353 cl_mem_flags
3354 mem_flags;
3355
3356 MagickBooleanType
3357 outputReady;
3358
3359 MagickCLDevice
3360 device;
3361
3362 MagickSizeType
3363 length;
3364
3365 register ssize_t
3366 i;
3367
3368 void
3369 *inputPixels;
3370
3371 inputPixels = NULL;
3372 imageBuffer = NULL;
3373 modulateKernel = NULL;
3374
3375 assert(image != (Image *) NULL);
3376 assert(image->signature == MagickCoreSignature);
3377 if (image->debug != MagickFalse)
3378 (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3379
3380 /*
3381 * initialize opencl env
3382 */
3383 device = RequestOpenCLDevice(clEnv);
3384 queue = AcquireOpenCLCommandQueue(device);
3385
3386 outputReady = MagickFalse;
3387
3388 /* Create and initialize OpenCL buffers.
3389 inputPixels = AcquirePixelCachePixels(image, &length, exception);
3390 assume this will get a writable image
3391 */
3392 image_view=AcquireAuthenticCacheView(image,exception);
3393 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3394 if (inputPixels == (void *) NULL)
3395 {
3396 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3397 goto cleanup;
3398 }
3399
3400 /* If the host pointer is aligned to the size of CLPixelPacket,
3401 then use the host buffer directly from the GPU; otherwise,
3402 create a buffer on the GPU and copy the data over
3403 */
3404 if (ALIGNED(inputPixels,CLPixelPacket))
3405 {
3406 mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3407 }
3408 else
3409 {
3410 mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3411 }
3412 /* create a CL buffer from image pixel buffer */
3413 length = image->columns * image->rows;
3414 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3415 if (clStatus != CL_SUCCESS)
3416 {
3417 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3418 goto cleanup;
3419 }
3420
3421 modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3422 if (modulateKernel == NULL)
3423 {
3424 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3425 goto cleanup;
3426 }
3427
3428 bright=percent_brightness;
3429 hue=percent_hue;
3430 saturation=percent_saturation;
3431 color=colorspace;
3432
3433 i = 0;
3434 clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3435 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3436 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3437 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3438 clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3439 if (clStatus != CL_SUCCESS)
3440 {
3441 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3442 goto cleanup;
3443 }
3444
3445 {
3446 size_t global_work_size[2];
3447 global_work_size[0] = image->columns;
3448 global_work_size[1] = image->rows;
3449 /* launch the kernel */
3450 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3451 if (clStatus != CL_SUCCESS)
3452 {
3453 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3454 goto cleanup;
3455 }
3456 RecordProfileData(device,modulateKernel,event);
3457 }
3458
3459 if (ALIGNED(inputPixels,CLPixelPacket))
3460 {
3461 length = image->columns * image->rows;
3462 clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3463 }
3464 else
3465 {
3466 length = image->columns * image->rows;
3467 clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3468 }
3469 if (clStatus != CL_SUCCESS)
3470 {
3471 (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3472 goto cleanup;
3473 }
3474
3475 outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3476
3477 cleanup:
3478
3479 image_view=DestroyCacheView(image_view);
3480
3481 if (imageBuffer!=NULL)
3482 clEnv->library->clReleaseMemObject(imageBuffer);
3483 if (modulateKernel!=NULL)
3484 ReleaseOpenCLKernel(modulateKernel);
3485 if (queue != NULL)
3486 ReleaseOpenCLCommandQueue(device,queue);
3487 if (device != NULL)
3488 ReleaseOpenCLDevice(device);
3489
3490 return outputReady;
3491
3492 }
3493
AccelerateModulateImage(Image * image,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3494 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3495 const double percent_brightness,const double percent_hue,
3496 const double percent_saturation,const ColorspaceType colorspace,
3497 ExceptionInfo *exception)
3498 {
3499 MagickBooleanType
3500 status;
3501
3502 MagickCLEnv
3503 clEnv;
3504
3505 assert(image != NULL);
3506 assert(exception != (ExceptionInfo *) NULL);
3507
3508 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3509 return(MagickFalse);
3510
3511 if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3512 return(MagickFalse);
3513
3514 clEnv=getOpenCLEnvironment(exception);
3515 if (clEnv == (MagickCLEnv) NULL)
3516 return(MagickFalse);
3517
3518 status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3519 percent_saturation,colorspace,exception);
3520 return(status);
3521 }
3522
3523 /*
3524 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3525 % %
3526 % %
3527 % %
3528 % A c c e l e r a t e M o t i o n B l u r I m a g e %
3529 % %
3530 % %
3531 % %
3532 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3533 */
3534
ComputeMotionBlurImage(const Image * image,MagickCLEnv clEnv,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3535 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3536 const double *kernel,const size_t width,const OffsetInfo *offset,
3537 ExceptionInfo *exception)
3538 {
3539 CacheView
3540 *filteredImage_view,
3541 *image_view;
3542
3543 cl_command_queue
3544 queue;
3545
3546 cl_float4
3547 biasPixel;
3548
3549 cl_int
3550 clStatus;
3551
3552 cl_kernel
3553 motionBlurKernel;
3554
3555 cl_event
3556 event;
3557
3558 cl_mem
3559 filteredImageBuffer,
3560 imageBuffer,
3561 imageKernelBuffer,
3562 offsetBuffer;
3563
3564 cl_mem_flags
3565 mem_flags;
3566
3567 const void
3568 *inputPixels;
3569
3570 float
3571 *kernelBufferPtr;
3572
3573 Image
3574 *filteredImage;
3575
3576 int
3577 *offsetBufferPtr;
3578
3579 MagickBooleanType
3580 outputReady;
3581
3582 MagickCLDevice
3583 device;
3584
3585 PixelInfo
3586 bias;
3587
3588 MagickSizeType
3589 length;
3590
3591 size_t
3592 global_work_size[2],
3593 local_work_size[2];
3594
3595 unsigned int
3596 i,
3597 imageHeight,
3598 imageWidth,
3599 matte;
3600
3601 void
3602 *filteredPixels,
3603 *hostPtr;
3604
3605 outputReady = MagickFalse;
3606 filteredImage = NULL;
3607 filteredImage_view = NULL;
3608 imageBuffer = NULL;
3609 filteredImageBuffer = NULL;
3610 imageKernelBuffer = NULL;
3611 motionBlurKernel = NULL;
3612 queue = NULL;
3613
3614 device = RequestOpenCLDevice(clEnv);
3615
3616 /* Create and initialize OpenCL buffers. */
3617
3618 image_view=AcquireAuthenticCacheView(image,exception);
3619 inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3620 if (inputPixels == (const void *) NULL)
3621 {
3622 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3623 "UnableToReadPixelCache.","`%s'",image->filename);
3624 goto cleanup;
3625 }
3626
3627 /*
3628 If the host pointer is aligned to the size of CLPixelPacket, then use
3629 the host buffer directly from the GPU; otherwise, create a buffer on
3630 the GPU and copy the data over
3631 */
3632 if (ALIGNED(inputPixels,CLPixelPacket))
3633 {
3634 mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3635 }
3636 else
3637 {
3638 mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3639 }
3640 /*
3641 create a CL buffer from image pixel buffer
3642 */
3643 length = image->columns * image->rows;
3644 imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3645 length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3646 if (clStatus != CL_SUCCESS)
3647 {
3648 (void) ThrowMagickException(exception, GetMagickModule(),
3649 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3650 goto cleanup;
3651 }
3652
3653
3654 filteredImage = CloneImage(image,image->columns,image->rows,
3655 MagickTrue,exception);
3656 assert(filteredImage != NULL);
3657 if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3658 {
3659 (void) ThrowMagickException(exception, GetMagickModule(),
3660 ResourceLimitError, "CloneImage failed.", ".");
3661 goto cleanup;
3662 }
3663 filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3664 filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3665 if (filteredPixels == (void *) NULL)
3666 {
3667 (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3668 "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3669 goto cleanup;
3670 }
3671
3672 if (ALIGNED(filteredPixels,CLPixelPacket))
3673 {
3674 mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3675 hostPtr = filteredPixels;
3676 }
3677 else
3678 {
3679 mem_flags = CL_MEM_WRITE_ONLY;
3680 hostPtr = NULL;
3681 }
3682 /*
3683 Create a CL buffer from image pixel buffer.
3684 */
3685 length = image->columns * image->rows;
3686 filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3687 length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3688 if (clStatus != CL_SUCCESS)
3689 {
3690 (void) ThrowMagickException(exception, GetMagickModule(),
3691 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3692 goto cleanup;
3693 }
3694
3695
3696 imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3697 CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3698 &clStatus);
3699 if (clStatus != CL_SUCCESS)
3700 {
3701 (void) ThrowMagickException(exception, GetMagickModule(),
3702 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3703 goto cleanup;
3704 }
3705
3706 queue = AcquireOpenCLCommandQueue(device);
3707 kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3708 CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3709 if (clStatus != CL_SUCCESS)
3710 {
3711 (void) ThrowMagickException(exception, GetMagickModule(),
3712 ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3713 goto cleanup;
3714 }
3715 for (i = 0; i < width; i++)
3716 {
3717 kernelBufferPtr[i] = (float) kernel[i];
3718 }
3719 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3720 0, NULL, NULL);
3721 if (clStatus != CL_SUCCESS)
3722 {
3723 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3724 "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3725 goto cleanup;
3726 }
3727
3728 offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3729 CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3730 &clStatus);
3731 if (clStatus != CL_SUCCESS)
3732 {
3733 (void) ThrowMagickException(exception, GetMagickModule(),
3734 ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3735 goto cleanup;
3736 }
3737
3738 offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3739 CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3740 if (clStatus != CL_SUCCESS)
3741 {
3742 (void) ThrowMagickException(exception, GetMagickModule(),
3743 ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3744 goto cleanup;
3745 }
3746 for (i = 0; i < width; i++)
3747 {
3748 offsetBufferPtr[2*i] = (int)offset[i].x;
3749 offsetBufferPtr[2*i+1] = (int)offset[i].y;
3750 }
3751 clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3752 NULL, NULL);
3753 if (clStatus != CL_SUCCESS)
3754 {
3755 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3756 "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3757 goto cleanup;
3758 }
3759
3760
3761 /*
3762 Get the OpenCL kernel
3763 */
3764 motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3765 if (motionBlurKernel == NULL)
3766 {
3767 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3768 "AcquireOpenCLKernel failed.", ".");
3769 goto cleanup;
3770 }
3771
3772 /*
3773 Set the kernel arguments.
3774 */
3775 i = 0;
3776 clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3777 (void *)&imageBuffer);
3778 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3779 (void *)&filteredImageBuffer);
3780 imageWidth = (unsigned int) image->columns;
3781 imageHeight = (unsigned int) image->rows;
3782 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3783 &imageWidth);
3784 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3785 &imageHeight);
3786 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3787 (void *)&imageKernelBuffer);
3788 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3789 &width);
3790 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3791 (void *)&offsetBuffer);
3792
3793 GetPixelInfo(image,&bias);
3794 biasPixel.s[0] = bias.red;
3795 biasPixel.s[1] = bias.green;
3796 biasPixel.s[2] = bias.blue;
3797 biasPixel.s[3] = bias.alpha;
3798 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3799
3800 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3801 matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3802 clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3803 if (clStatus != CL_SUCCESS)
3804 {
3805 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3806 "clEnv->library->clSetKernelArg failed.", ".");
3807 goto cleanup;
3808 }
3809
3810 /*
3811 Launch the kernel.
3812 */
3813 local_work_size[0] = 16;
3814 local_work_size[1] = 16;
3815 global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3816 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3817 global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3818 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3819 clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3820 global_work_size, local_work_size, 0, NULL, &event);
3821
3822 if (clStatus != CL_SUCCESS)
3823 {
3824 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3825 "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3826 goto cleanup;
3827 }
3828 RecordProfileData(device,motionBlurKernel,event);
3829
3830 if (ALIGNED(filteredPixels,CLPixelPacket))
3831 {
3832 length = image->columns * image->rows;
3833 clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3834 CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3835 NULL, &clStatus);
3836 }
3837 else
3838 {
3839 length = image->columns * image->rows;
3840 clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3841 length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3842 }
3843 if (clStatus != CL_SUCCESS)
3844 {
3845 (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3846 "Reading output image from CL buffer failed.", ".");
3847 goto cleanup;
3848 }
3849 outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3850
3851 cleanup:
3852
3853 image_view=DestroyCacheView(image_view);
3854 if (filteredImage_view != NULL)
3855 filteredImage_view=DestroyCacheView(filteredImage_view);
3856
3857 if (filteredImageBuffer!=NULL)
3858 clEnv->library->clReleaseMemObject(filteredImageBuffer);
3859 if (imageBuffer!=NULL)
3860 clEnv->library->clReleaseMemObject(imageBuffer);
3861 if (imageKernelBuffer!=NULL)
3862 clEnv->library->clReleaseMemObject(imageKernelBuffer);
3863 if (motionBlurKernel!=NULL)
3864 ReleaseOpenCLKernel(motionBlurKernel);
3865 if (queue != NULL)
3866 ReleaseOpenCLCommandQueue(device,queue);
3867 if (device != NULL)
3868 ReleaseOpenCLDevice(device);
3869 if (outputReady == MagickFalse && filteredImage != NULL)
3870 filteredImage=DestroyImage(filteredImage);
3871
3872 return(filteredImage);
3873 }
3874
AccelerateMotionBlurImage(const Image * image,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3875 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3876 const double* kernel,const size_t width,const OffsetInfo *offset,
3877 ExceptionInfo *exception)
3878 {
3879 Image
3880 *filteredImage;
3881
3882 MagickCLEnv
3883 clEnv;
3884
3885 assert(image != NULL);
3886 assert(kernel != (double *) NULL);
3887 assert(offset != (OffsetInfo *) NULL);
3888 assert(exception != (ExceptionInfo *) NULL);
3889
3890 if (checkAccelerateConditionRGBA(image) == MagickFalse)
3891 return((Image *) NULL);
3892
3893 clEnv=getOpenCLEnvironment(exception);
3894 if (clEnv == (MagickCLEnv) NULL)
3895 return((Image *) NULL);
3896
3897 filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3898 exception);
3899 return(filteredImage);
3900 }
3901
3902 /*
3903 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3904 % %
3905 % %
3906 % %
3907 % A c c e l e r a t e R e s i z e I m a g e %
3908 % %
3909 % %
3910 % %
3911 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3912 */
3913
resizeHorizontalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,ExceptionInfo * exception)3914 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3915 cl_command_queue queue,const Image *image,Image *filteredImage,
3916 cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3917 cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3918 const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3919 const float xFactor,ExceptionInfo *exception)
3920 {
3921 cl_kernel
3922 horizontalKernel;
3923
3924 cl_int
3925 status;
3926
3927 const unsigned int
3928 workgroupSize = 256;
3929
3930 float
3931 resizeFilterScale,
3932 resizeFilterSupport,
3933 resizeFilterWindowSupport,
3934 resizeFilterBlur,
3935 scale,
3936 support;
3937
3938 int
3939 cacheRangeStart,
3940 cacheRangeEnd,
3941 numCachedPixels,
3942 resizeFilterType,
3943 resizeWindowType;
3944
3945 MagickBooleanType
3946 outputReady;
3947
3948 size_t
3949 gammaAccumulatorLocalMemorySize,
3950 gsize[2],
3951 i,
3952 imageCacheLocalMemorySize,
3953 pixelAccumulatorLocalMemorySize,
3954 lsize[2],
3955 totalLocalMemorySize,
3956 weightAccumulatorLocalMemorySize;
3957
3958 unsigned int
3959 chunkSize,
3960 pixelPerWorkgroup;
3961
3962 horizontalKernel=NULL;
3963 outputReady=MagickFalse;
3964
3965 /*
3966 Apply filter to resize vertically from image to resize image.
3967 */
3968 scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
3969 support=scale*GetResizeFilterSupport(resizeFilter);
3970 if (support < 0.5)
3971 {
3972 /*
3973 Support too small even for nearest neighbour: Reduce to point
3974 sampling.
3975 */
3976 support=(float) 0.5;
3977 scale=1.0;
3978 }
3979 scale=PerceptibleReciprocal(scale);
3980
3981 if (resizedColumns < workgroupSize)
3982 {
3983 chunkSize=32;
3984 pixelPerWorkgroup=32;
3985 }
3986 else
3987 {
3988 chunkSize=workgroupSize;
3989 pixelPerWorkgroup=workgroupSize;
3990 }
3991
3992 DisableMSCWarning(4127)
3993 while(1)
3994 RestoreMSCWarning
3995 {
3996 /* calculate the local memory size needed per workgroup */
3997 cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
3998 cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
3999 MagickEpsilon)+support+0.5);
4000 numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4001 imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4002 number_channels;
4003 totalLocalMemorySize=imageCacheLocalMemorySize;
4004
4005 /* local size for the pixel accumulator */
4006 pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4007 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4008
4009 /* local memory size for the weight accumulator */
4010 weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4011 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4012
4013 /* local memory size for the gamma accumulator */
4014 if ((number_channels == 4) || (number_channels == 2))
4015 gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4016 else
4017 gammaAccumulatorLocalMemorySize=sizeof(float);
4018 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4019
4020 if (totalLocalMemorySize <= device->local_memory_size)
4021 break;
4022 else
4023 {
4024 pixelPerWorkgroup=pixelPerWorkgroup/2;
4025 chunkSize=chunkSize/2;
4026 if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4027 {
4028 /* quit, fallback to CPU */
4029 goto cleanup;
4030 }
4031 }
4032 }
4033
4034 resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4035 resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4036
4037 horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4038 if (horizontalKernel == (cl_kernel) NULL)
4039 {
4040 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4041 ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4042 goto cleanup;
4043 }
4044
4045 resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4046 resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4047 resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4048 resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4049
4050 i=0;
4051 status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4052 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4053 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4054 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4055 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4056 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4057 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4058 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4059 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4060 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4061 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4062 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4063 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4064 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4065 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4066 status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4067 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4068 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4069 status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4070 status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4071 status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4072 status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4073
4074 if (status != CL_SUCCESS)
4075 {
4076 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4077 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4078 goto cleanup;
4079 }
4080
4081 gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4082 workgroupSize;
4083 gsize[1]=resizedRows;
4084 lsize[0]=workgroupSize;
4085 lsize[1]=1;
4086 outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4087 (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4088 exception);
4089
4090 cleanup:
4091
4092 if (horizontalKernel != (cl_kernel) NULL)
4093 ReleaseOpenCLKernel(horizontalKernel);
4094
4095 return(outputReady);
4096 }
4097
resizeVerticalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,ExceptionInfo * exception)4098 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4099 cl_command_queue queue,const Image *image,Image * filteredImage,
4100 cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4101 cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4102 const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4103 const float yFactor,ExceptionInfo *exception)
4104 {
4105 cl_kernel
4106 verticalKernel;
4107
4108 cl_int
4109 status;
4110
4111 const unsigned int
4112 workgroupSize = 256;
4113
4114 float
4115 resizeFilterScale,
4116 resizeFilterSupport,
4117 resizeFilterWindowSupport,
4118 resizeFilterBlur,
4119 scale,
4120 support;
4121
4122 int
4123 cacheRangeStart,
4124 cacheRangeEnd,
4125 numCachedPixels,
4126 resizeFilterType,
4127 resizeWindowType;
4128
4129 MagickBooleanType
4130 outputReady;
4131
4132 size_t
4133 gammaAccumulatorLocalMemorySize,
4134 gsize[2],
4135 i,
4136 imageCacheLocalMemorySize,
4137 pixelAccumulatorLocalMemorySize,
4138 lsize[2],
4139 totalLocalMemorySize,
4140 weightAccumulatorLocalMemorySize;
4141
4142 unsigned int
4143 chunkSize,
4144 pixelPerWorkgroup;
4145
4146 verticalKernel=NULL;
4147 outputReady=MagickFalse;
4148
4149 /*
4150 Apply filter to resize vertically from image to resize image.
4151 */
4152 scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4153 support=scale*GetResizeFilterSupport(resizeFilter);
4154 if (support < 0.5)
4155 {
4156 /*
4157 Support too small even for nearest neighbour: Reduce to point
4158 sampling.
4159 */
4160 support=(float) 0.5;
4161 scale=1.0;
4162 }
4163 scale=PerceptibleReciprocal(scale);
4164
4165 if (resizedRows < workgroupSize)
4166 {
4167 chunkSize=32;
4168 pixelPerWorkgroup=32;
4169 }
4170 else
4171 {
4172 chunkSize=workgroupSize;
4173 pixelPerWorkgroup=workgroupSize;
4174 }
4175
4176 DisableMSCWarning(4127)
4177 while(1)
4178 RestoreMSCWarning
4179 {
4180 /* calculate the local memory size needed per workgroup */
4181 cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4182 cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4183 MagickEpsilon)+support+0.5);
4184 numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4185 imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4186 number_channels;
4187 totalLocalMemorySize=imageCacheLocalMemorySize;
4188
4189 /* local size for the pixel accumulator */
4190 pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4191 totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4192
4193 /* local memory size for the weight accumulator */
4194 weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4195 totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4196
4197 /* local memory size for the gamma accumulator */
4198 if ((number_channels == 4) || (number_channels == 2))
4199 gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4200 else
4201 gammaAccumulatorLocalMemorySize=sizeof(float);
4202 totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4203
4204 if (totalLocalMemorySize <= device->local_memory_size)
4205 break;
4206 else
4207 {
4208 pixelPerWorkgroup=pixelPerWorkgroup/2;
4209 chunkSize=chunkSize/2;
4210 if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4211 {
4212 /* quit, fallback to CPU */
4213 goto cleanup;
4214 }
4215 }
4216 }
4217
4218 resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4219 resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4220
4221 verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4222 if (verticalKernel == (cl_kernel) NULL)
4223 {
4224 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4225 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4226 goto cleanup;
4227 }
4228
4229 resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4230 resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4231 resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4232 resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4233
4234 i=0;
4235 status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4236 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4237 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4238 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4239 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4240 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4241 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4242 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4243 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4244 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4245 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4246 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4247 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4248 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4249 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4250 status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4251 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4252 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4253 status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4254 status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4255 status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4256 status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4257
4258 if (status != CL_SUCCESS)
4259 {
4260 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4261 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4262 goto cleanup;
4263 }
4264
4265 gsize[0]=resizedColumns;
4266 gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4267 workgroupSize;
4268 lsize[0]=1;
4269 lsize[1]=workgroupSize;
4270 outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4271 gsize,lsize,image,filteredImage,MagickFalse,exception);
4272
4273 cleanup:
4274
4275 if (verticalKernel != (cl_kernel) NULL)
4276 ReleaseOpenCLKernel(verticalKernel);
4277
4278 return(outputReady);
4279 }
4280
ComputeResizeImage(const Image * image,MagickCLEnv clEnv,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4281 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4282 const size_t resizedColumns,const size_t resizedRows,
4283 const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4284 {
4285 cl_command_queue
4286 queue;
4287
4288 cl_mem
4289 cubicCoefficientsBuffer,
4290 filteredImageBuffer,
4291 imageBuffer,
4292 tempImageBuffer;
4293
4294 cl_uint
4295 number_channels;
4296
4297 const double
4298 *resizeFilterCoefficient;
4299
4300 float
4301 coefficientBuffer[7],
4302 xFactor,
4303 yFactor;
4304
4305 MagickBooleanType
4306 outputReady;
4307
4308 MagickCLDevice
4309 device;
4310
4311 MagickSizeType
4312 length;
4313
4314 Image
4315 *filteredImage;
4316
4317 size_t
4318 i;
4319
4320 filteredImage=NULL;
4321 imageBuffer=NULL;
4322 filteredImageBuffer=NULL;
4323 tempImageBuffer=NULL;
4324 cubicCoefficientsBuffer=NULL;
4325 outputReady=MagickFalse;
4326
4327 device=RequestOpenCLDevice(clEnv);
4328 queue=AcquireOpenCLCommandQueue(device);
4329 filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4330 exception);
4331 if (filteredImage == (Image *) NULL)
4332 goto cleanup;
4333 if (filteredImage->number_channels != image->number_channels)
4334 goto cleanup;
4335 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4336 if (imageBuffer == (cl_mem) NULL)
4337 goto cleanup;
4338 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4339 if (filteredImageBuffer == (cl_mem) NULL)
4340 goto cleanup;
4341
4342 resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4343 for (i = 0; i < 7; i++)
4344 coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4345 cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4346 CL_MEM_READ_ONLY,sizeof(coefficientBuffer),&coefficientBuffer);
4347 if (cubicCoefficientsBuffer == (cl_mem) NULL)
4348 {
4349 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4350 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4351 goto cleanup;
4352 }
4353
4354 number_channels=(cl_uint) image->number_channels;
4355 xFactor=(float) resizedColumns/(float) image->columns;
4356 yFactor=(float) resizedRows/(float) image->rows;
4357 if (xFactor > yFactor)
4358 {
4359 length=resizedColumns*image->rows*number_channels;
4360 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4361 sizeof(CLQuantum),(void *) NULL);
4362 if (tempImageBuffer == (cl_mem) NULL)
4363 {
4364 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4365 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4366 goto cleanup;
4367 }
4368
4369 outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4370 imageBuffer,number_channels,(cl_uint) image->columns,
4371 (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4372 (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4373 exception);
4374 if (outputReady == MagickFalse)
4375 goto cleanup;
4376
4377 outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4378 tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4379 (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4380 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4381 exception);
4382 if (outputReady == MagickFalse)
4383 goto cleanup;
4384 }
4385 else
4386 {
4387 length=image->columns*resizedRows*number_channels;
4388 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4389 sizeof(CLQuantum),(void *) NULL);
4390 if (tempImageBuffer == (cl_mem) NULL)
4391 {
4392 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4393 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4394 goto cleanup;
4395 }
4396
4397 outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4398 imageBuffer,number_channels,(cl_uint) image->columns,
4399 (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4400 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4401 exception);
4402 if (outputReady == MagickFalse)
4403 goto cleanup;
4404
4405 outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4406 tempImageBuffer,number_channels,(cl_uint) image->columns,
4407 (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4408 (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4409 exception);
4410 if (outputReady == MagickFalse)
4411 goto cleanup;
4412 }
4413
4414 cleanup:
4415
4416 if (imageBuffer != (cl_mem) NULL)
4417 ReleaseOpenCLMemObject(imageBuffer);
4418 if (filteredImageBuffer != (cl_mem) NULL)
4419 ReleaseOpenCLMemObject(filteredImageBuffer);
4420 if (tempImageBuffer != (cl_mem) NULL)
4421 ReleaseOpenCLMemObject(tempImageBuffer);
4422 if (cubicCoefficientsBuffer != (cl_mem) NULL)
4423 ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4424 if (queue != (cl_command_queue) NULL)
4425 ReleaseOpenCLCommandQueue(device,queue);
4426 if (device != (MagickCLDevice) NULL)
4427 ReleaseOpenCLDevice(device);
4428 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4429 filteredImage=DestroyImage(filteredImage);
4430
4431 return(filteredImage);
4432 }
4433
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4434 static MagickBooleanType gpuSupportedResizeWeighting(
4435 ResizeWeightingFunctionType f)
4436 {
4437 unsigned int
4438 i;
4439
4440 for (i = 0; ;i++)
4441 {
4442 if (supportedResizeWeighting[i] == LastWeightingFunction)
4443 break;
4444 if (supportedResizeWeighting[i] == f)
4445 return(MagickTrue);
4446 }
4447 return(MagickFalse);
4448 }
4449
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4450 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4451 const size_t resizedColumns,const size_t resizedRows,
4452 const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4453 {
4454 Image
4455 *filteredImage;
4456
4457 MagickCLEnv
4458 clEnv;
4459
4460 assert(image != NULL);
4461 assert(exception != (ExceptionInfo *) NULL);
4462
4463 if (checkAccelerateCondition(image) == MagickFalse)
4464 return((Image *) NULL);
4465
4466 if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4467 resizeFilter)) == MagickFalse) ||
4468 (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4469 resizeFilter)) == MagickFalse))
4470 return((Image *) NULL);
4471
4472 clEnv=getOpenCLEnvironment(exception);
4473 if (clEnv == (MagickCLEnv) NULL)
4474 return((Image *) NULL);
4475
4476 filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4477 resizeFilter,exception);
4478 return(filteredImage);
4479 }
4480
4481 /*
4482 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4483 % %
4484 % %
4485 % %
4486 % A c c e l e r a t e R o t a t i o n a l B l u r I m a g e %
4487 % %
4488 % %
4489 % %
4490 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4491 */
4492
ComputeRotationalBlurImage(const Image * image,MagickCLEnv clEnv,const double angle,ExceptionInfo * exception)4493 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4494 const double angle,ExceptionInfo *exception)
4495 {
4496 cl_command_queue
4497 queue;
4498
4499 cl_float2
4500 blurCenter;
4501
4502 cl_int
4503 status;
4504
4505 cl_mem
4506 cosThetaBuffer,
4507 filteredImageBuffer,
4508 imageBuffer,
4509 sinThetaBuffer;
4510
4511 cl_kernel
4512 rotationalBlurKernel;
4513
4514 cl_uint
4515 cossin_theta_size,
4516 number_channels;
4517
4518 float
4519 blurRadius,
4520 *cosThetaPtr,
4521 offset,
4522 *sinThetaPtr,
4523 theta;
4524
4525 Image
4526 *filteredImage;
4527
4528 MagickBooleanType
4529 outputReady;
4530
4531 MagickCLDevice
4532 device;
4533
4534 size_t
4535 gsize[2],
4536 i;
4537
4538 filteredImage=NULL;
4539 imageBuffer=NULL;
4540 filteredImageBuffer=NULL;
4541 sinThetaBuffer=NULL;
4542 cosThetaBuffer=NULL;
4543 rotationalBlurKernel=NULL;
4544 outputReady=MagickFalse;
4545
4546 device=RequestOpenCLDevice(clEnv);
4547 queue=AcquireOpenCLCommandQueue(device);
4548 filteredImage=cloneImage(image,exception);
4549 if (filteredImage == (Image *) NULL)
4550 goto cleanup;
4551 if (filteredImage->number_channels != image->number_channels)
4552 goto cleanup;
4553 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4554 if (imageBuffer == (cl_mem) NULL)
4555 goto cleanup;
4556 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4557 if (filteredImageBuffer == (cl_mem) NULL)
4558 goto cleanup;
4559
4560 blurCenter.x=(float) (image->columns-1)/2.0;
4561 blurCenter.y=(float) (image->rows-1)/2.0;
4562 blurRadius=hypot(blurCenter.x,blurCenter.y);
4563 cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4564 (double) blurRadius)+2UL);
4565
4566 cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4567 if (cosThetaPtr == (float *) NULL)
4568 goto cleanup;
4569 sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4570 if (sinThetaPtr == (float *) NULL)
4571 {
4572 cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4573 goto cleanup;
4574 }
4575
4576 theta=DegreesToRadians(angle)/(double) (cossin_theta_size-1);
4577 offset=theta*(float) (cossin_theta_size-1)/2.0;
4578 for (i=0; i < (ssize_t) cossin_theta_size; i++)
4579 {
4580 cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4581 sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4582 }
4583
4584 sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4585 CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4586 sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4587 cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4588 CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4589 cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4590 if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4591 {
4592 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4593 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4594 goto cleanup;
4595 }
4596
4597 rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4598 if (rotationalBlurKernel == (cl_kernel) NULL)
4599 {
4600 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4601 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4602 goto cleanup;
4603 }
4604
4605 number_channels=(cl_uint) image->number_channels;
4606
4607 i=0;
4608 status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4609 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4610 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4611 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4612 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4613 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4614 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4615 status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4616 if (status != CL_SUCCESS)
4617 {
4618 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4619 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4620 goto cleanup;
4621 }
4622
4623 gsize[0]=image->columns;
4624 gsize[1]=image->rows;
4625 outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4626 (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4627 MagickFalse,exception);
4628
4629 cleanup:
4630
4631 if (imageBuffer != (cl_mem) NULL)
4632 ReleaseOpenCLMemObject(imageBuffer);
4633 if (filteredImageBuffer != (cl_mem) NULL)
4634 ReleaseOpenCLMemObject(filteredImageBuffer);
4635 if (sinThetaBuffer != (cl_mem) NULL)
4636 ReleaseOpenCLMemObject(sinThetaBuffer);
4637 if (cosThetaBuffer != (cl_mem) NULL)
4638 ReleaseOpenCLMemObject(cosThetaBuffer);
4639 if (rotationalBlurKernel != (cl_kernel) NULL)
4640 ReleaseOpenCLKernel(rotationalBlurKernel);
4641 if (queue != (cl_command_queue) NULL)
4642 ReleaseOpenCLCommandQueue(device,queue);
4643 if (device != (MagickCLDevice) NULL)
4644 ReleaseOpenCLDevice(device);
4645 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4646 filteredImage=DestroyImage(filteredImage);
4647
4648 return(filteredImage);
4649 }
4650
AccelerateRotationalBlurImage(const Image * image,const double angle,ExceptionInfo * exception)4651 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4652 const double angle,ExceptionInfo *exception)
4653 {
4654 Image
4655 *filteredImage;
4656
4657 MagickCLEnv
4658 clEnv;
4659
4660 assert(image != NULL);
4661 assert(exception != (ExceptionInfo *) NULL);
4662
4663 if (checkAccelerateCondition(image) == MagickFalse)
4664 return((Image *) NULL);
4665
4666 clEnv=getOpenCLEnvironment(exception);
4667 if (clEnv == (MagickCLEnv) NULL)
4668 return((Image *) NULL);
4669
4670 filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4671 return filteredImage;
4672 }
4673
4674 /*
4675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4676 % %
4677 % %
4678 % %
4679 % A c c e l e r a t e U n s h a r p M a s k I m a g e %
4680 % %
4681 % %
4682 % %
4683 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4684 */
4685
ComputeUnsharpMaskImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4686 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4687 const double radius,const double sigma,const double gain,
4688 const double threshold,ExceptionInfo *exception)
4689 {
4690 cl_command_queue
4691 queue;
4692
4693 cl_int
4694 status;
4695
4696 cl_kernel
4697 blurRowKernel,
4698 unsharpMaskBlurColumnKernel;
4699
4700 cl_mem
4701 filteredImageBuffer,
4702 imageBuffer,
4703 imageKernelBuffer,
4704 tempImageBuffer;
4705
4706 cl_uint
4707 imageColumns,
4708 imageRows,
4709 kernelWidth,
4710 number_channels;
4711
4712 float
4713 fGain,
4714 fThreshold;
4715
4716 Image
4717 *filteredImage;
4718
4719 int
4720 chunkSize;
4721
4722 MagickBooleanType
4723 outputReady;
4724
4725 MagickCLDevice
4726 device;
4727
4728 MagickSizeType
4729 length;
4730
4731 size_t
4732 gsize[2],
4733 i,
4734 lsize[2];
4735
4736 filteredImage=NULL;
4737 imageBuffer=NULL;
4738 filteredImageBuffer=NULL;
4739 tempImageBuffer=NULL;
4740 imageKernelBuffer=NULL;
4741 blurRowKernel=NULL;
4742 unsharpMaskBlurColumnKernel=NULL;
4743 outputReady=MagickFalse;
4744
4745 device=RequestOpenCLDevice(clEnv);
4746 queue=AcquireOpenCLCommandQueue(device);
4747 filteredImage=cloneImage(image,exception);
4748 if (filteredImage == (Image *) NULL)
4749 goto cleanup;
4750 if (filteredImage->number_channels != image->number_channels)
4751 goto cleanup;
4752 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4753 if (imageBuffer == (cl_mem) NULL)
4754 goto cleanup;
4755 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4756 if (filteredImageBuffer == (cl_mem) NULL)
4757 goto cleanup;
4758
4759 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4760 exception);
4761
4762 length=image->columns*image->rows;
4763 tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4764 sizeof(cl_float4),NULL);
4765 if (tempImageBuffer == (cl_mem) NULL)
4766 {
4767 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4768 ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4769 goto cleanup;
4770 }
4771
4772 blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4773 if (blurRowKernel == (cl_kernel) NULL)
4774 {
4775 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4776 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4777 goto cleanup;
4778 }
4779
4780 unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4781 "UnsharpMaskBlurColumn");
4782 if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4783 {
4784 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4785 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4786 goto cleanup;
4787 }
4788
4789 number_channels=(cl_uint) image->number_channels;
4790 imageColumns=(cl_uint) image->columns;
4791 imageRows=(cl_uint) image->rows;
4792
4793 chunkSize = 256;
4794
4795 i=0;
4796 status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4797 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4798 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4799 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4800 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4801 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4802 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4803 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4804 status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4805 if (status != CL_SUCCESS)
4806 {
4807 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4808 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4809 goto cleanup;
4810 }
4811
4812 gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4813 gsize[1]=image->rows;
4814 lsize[0]=chunkSize;
4815 lsize[1]=1;
4816 outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4817 (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4818 exception);
4819
4820 chunkSize=256;
4821 fGain=(float) gain;
4822 fThreshold=(float) threshold;
4823
4824 i=0;
4825 status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4826 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4827 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4828 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4829 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4830 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4831 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4832 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4833 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4834 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4835 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4836 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4837 status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4838 if (status != CL_SUCCESS)
4839 {
4840 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4841 ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4842 goto cleanup;
4843 }
4844
4845 gsize[0]=image->columns;
4846 gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4847 lsize[0]=1;
4848 lsize[1]=chunkSize;
4849 outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4850 (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4851 exception);
4852
4853 cleanup:
4854
4855 if (imageBuffer != (cl_mem) NULL)
4856 ReleaseOpenCLMemObject(imageBuffer);
4857 if (filteredImageBuffer != (cl_mem) NULL)
4858 ReleaseOpenCLMemObject(filteredImageBuffer);
4859 if (tempImageBuffer != (cl_mem) NULL)
4860 ReleaseOpenCLMemObject(tempImageBuffer);
4861 if (imageKernelBuffer != (cl_mem) NULL)
4862 ReleaseOpenCLMemObject(imageKernelBuffer);
4863 if (blurRowKernel != (cl_kernel) NULL)
4864 ReleaseOpenCLKernel(blurRowKernel);
4865 if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4866 ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4867 if (queue != (cl_command_queue) NULL)
4868 ReleaseOpenCLCommandQueue(device,queue);
4869 if (device != (MagickCLDevice) NULL)
4870 ReleaseOpenCLDevice(device);
4871 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4872 filteredImage=DestroyImage(filteredImage);
4873
4874 return(filteredImage);
4875 }
4876
ComputeUnsharpMaskImageSingle(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4877 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4878 MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4879 const double threshold,ExceptionInfo *exception)
4880 {
4881 cl_command_queue
4882 queue;
4883
4884 cl_int
4885 status;
4886
4887 cl_kernel
4888 unsharpMaskKernel;
4889
4890 cl_mem
4891 filteredImageBuffer,
4892 imageBuffer,
4893 imageKernelBuffer;
4894
4895 cl_uint
4896 imageColumns,
4897 imageRows,
4898 kernelWidth,
4899 number_channels;
4900
4901 float
4902 fGain,
4903 fThreshold;
4904
4905 Image
4906 *filteredImage;
4907
4908 MagickBooleanType
4909 outputReady;
4910
4911 MagickCLDevice
4912 device;
4913
4914 size_t
4915 gsize[2],
4916 i,
4917 lsize[2];
4918
4919 filteredImage=NULL;
4920 imageBuffer=NULL;
4921 filteredImageBuffer=NULL;
4922 imageKernelBuffer=NULL;
4923 unsharpMaskKernel=NULL;
4924 outputReady=MagickFalse;
4925
4926 device=RequestOpenCLDevice(clEnv);
4927 queue=AcquireOpenCLCommandQueue(device);
4928 filteredImage=cloneImage(image,exception);
4929 if (filteredImage == (Image *) NULL)
4930 goto cleanup;
4931 if (filteredImage->number_channels != image->number_channels)
4932 goto cleanup;
4933 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4934 if (imageBuffer == (cl_mem) NULL)
4935 goto cleanup;
4936 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4937 if (filteredImageBuffer == (cl_mem) NULL)
4938 goto cleanup;
4939
4940 imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4941 exception);
4942
4943 unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4944 if (unsharpMaskKernel == NULL)
4945 {
4946 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4947 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4948 goto cleanup;
4949 }
4950
4951 imageColumns=(cl_uint) image->columns;
4952 imageRows=(cl_uint) image->rows;
4953 number_channels=(cl_uint) image->number_channels;
4954 fGain=(float) gain;
4955 fThreshold=(float) threshold;
4956
4957 i=0;
4958 status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4959 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
4960 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
4961 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4962 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4963 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4964 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4965 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
4966 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
4967 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
4968 status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4969 if (status != CL_SUCCESS)
4970 {
4971 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4972 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4973 goto cleanup;
4974 }
4975
4976 gsize[0]=((image->columns + 7) / 8)*8;
4977 gsize[1]=((image->rows + 31) / 32)*32;
4978 lsize[0]=8;
4979 lsize[1]=32;
4980 outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
4981 gsize,lsize,image,filteredImage,MagickFalse,exception);
4982
4983 cleanup:
4984
4985 if (imageBuffer != (cl_mem) NULL)
4986 ReleaseOpenCLMemObject(imageBuffer);
4987 if (filteredImageBuffer != (cl_mem) NULL)
4988 ReleaseOpenCLMemObject(filteredImageBuffer);
4989 if (imageKernelBuffer != (cl_mem) NULL)
4990 ReleaseOpenCLMemObject(imageKernelBuffer);
4991 if (unsharpMaskKernel != (cl_kernel) NULL)
4992 ReleaseOpenCLKernel(unsharpMaskKernel);
4993 if (queue != (cl_command_queue) NULL)
4994 ReleaseOpenCLCommandQueue(device,queue);
4995 if (device != (MagickCLDevice) NULL)
4996 ReleaseOpenCLDevice(device);
4997 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4998 filteredImage=DestroyImage(filteredImage);
4999
5000 return(filteredImage);
5001 }
5002
AccelerateUnsharpMaskImage(const Image * image,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5003 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5004 const double radius,const double sigma,const double gain,
5005 const double threshold,ExceptionInfo *exception)
5006 {
5007 Image
5008 *filteredImage;
5009
5010 MagickCLEnv
5011 clEnv;
5012
5013 assert(image != NULL);
5014 assert(exception != (ExceptionInfo *) NULL);
5015
5016 if (checkAccelerateCondition(image) == MagickFalse)
5017 return((Image *) NULL);
5018
5019 clEnv=getOpenCLEnvironment(exception);
5020 if (clEnv == (MagickCLEnv) NULL)
5021 return((Image *) NULL);
5022
5023 if (radius < 12.1)
5024 filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5025 threshold,exception);
5026 else
5027 filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5028 threshold,exception);
5029 return(filteredImage);
5030 }
5031
ComputeWaveletDenoiseImage(const Image * image,MagickCLEnv clEnv,const double threshold,ExceptionInfo * exception)5032 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5033 const double threshold,ExceptionInfo *exception)
5034 {
5035 cl_command_queue
5036 queue;
5037
5038 const cl_int
5039 PASSES=5;
5040
5041 const int
5042 TILESIZE=64,
5043 PAD=1<<(PASSES-1),
5044 SIZE=TILESIZE-2*PAD;
5045
5046 cl_float
5047 thresh;
5048
5049 cl_int
5050 status;
5051
5052 cl_kernel
5053 denoiseKernel;
5054
5055 cl_mem
5056 filteredImageBuffer,
5057 imageBuffer;
5058
5059 cl_uint
5060 number_channels,
5061 width,
5062 height,
5063 max_channels;
5064
5065 Image
5066 *filteredImage;
5067
5068 MagickBooleanType
5069 outputReady;
5070
5071 MagickCLDevice
5072 device;
5073
5074 size_t
5075 goffset[2],
5076 gsize[2],
5077 i,
5078 lsize[2],
5079 passes,
5080 x;
5081
5082 filteredImage=NULL;
5083 imageBuffer=NULL;
5084 filteredImageBuffer=NULL;
5085 denoiseKernel=NULL;
5086 queue=NULL;
5087 outputReady=MagickFalse;
5088
5089 device=RequestOpenCLDevice(clEnv);
5090 /* Work around an issue on low end Intel devices */
5091 if (strcmp("Intel(R) HD Graphics",device->name) == 0)
5092 goto cleanup;
5093 queue=AcquireOpenCLCommandQueue(device);
5094 filteredImage=CloneImage(image,0,0,MagickTrue,
5095 exception);
5096 if (filteredImage == (Image *) NULL)
5097 goto cleanup;
5098 if (filteredImage->number_channels != image->number_channels)
5099 goto cleanup;
5100 imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5101 if (imageBuffer == (cl_mem) NULL)
5102 goto cleanup;
5103 filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5104 if (filteredImageBuffer == (cl_mem) NULL)
5105 goto cleanup;
5106
5107 denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5108 if (denoiseKernel == (cl_kernel) NULL)
5109 {
5110 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5111 ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5112 goto cleanup;
5113 }
5114
5115 number_channels=(cl_uint)image->number_channels;
5116 width=(cl_uint)image->columns;
5117 height=(cl_uint)image->rows;
5118 max_channels=number_channels;
5119 if ((max_channels == 4) || (max_channels == 2))
5120 max_channels=max_channels-1;
5121 thresh=threshold;
5122 passes=(((1.0f*image->columns)*image->rows)+1999999.0f)/2000000.0f;
5123 passes=(passes < 1) ? 1 : passes;
5124
5125 i=0;
5126 status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5127 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5128 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5129 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5130 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5131 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5132 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5133 status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5134 if (status != CL_SUCCESS)
5135 {
5136 (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5137 ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5138 goto cleanup;
5139 }
5140
5141 for (x = 0; x < passes; ++x)
5142 {
5143 gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5144 gsize[1]=((((height+(SIZE-1))/SIZE)+passes-1)/passes)*4;
5145 lsize[0]=TILESIZE;
5146 lsize[1]=4;
5147 goffset[0]=0;
5148 goffset[1]=x*gsize[1];
5149
5150 outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,goffset,gsize,lsize,
5151 image,filteredImage,MagickTrue,exception);
5152 if (outputReady == MagickFalse)
5153 break;
5154 }
5155
5156 cleanup:
5157
5158 if (imageBuffer != (cl_mem) NULL)
5159 ReleaseOpenCLMemObject(imageBuffer);
5160 if (filteredImageBuffer != (cl_mem) NULL)
5161 ReleaseOpenCLMemObject(filteredImageBuffer);
5162 if (denoiseKernel != (cl_kernel) NULL)
5163 ReleaseOpenCLKernel(denoiseKernel);
5164 if (queue != (cl_command_queue) NULL)
5165 ReleaseOpenCLCommandQueue(device,queue);
5166 if (device != (MagickCLDevice) NULL)
5167 ReleaseOpenCLDevice(device);
5168 if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5169 filteredImage=DestroyImage(filteredImage);
5170
5171 return(filteredImage);
5172 }
5173
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5174 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5175 const double threshold,ExceptionInfo *exception)
5176 {
5177 Image
5178 *filteredImage;
5179
5180 MagickCLEnv
5181 clEnv;
5182
5183 assert(image != NULL);
5184 assert(exception != (ExceptionInfo *)NULL);
5185
5186 if (checkAccelerateCondition(image) == MagickFalse)
5187 return((Image *) NULL);
5188
5189 clEnv=getOpenCLEnvironment(exception);
5190 if (clEnv == (MagickCLEnv) NULL)
5191 return((Image *) NULL);
5192
5193 filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5194
5195 return(filteredImage);
5196 }
5197 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5198