MagickCore 6.9.11-60
Convert, Edit, Or Compose Bitmap Images
accelerate-kernels-private.h
Go to the documentation of this file.
1/*
2 Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization
3 dedicated to making software imaging solutions freely available.
4
5 You may not use this file except in compliance with the License. You may
6 obtain a copy of the License at
7
8 https://imagemagick.org/script/license.php
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 MagickCore private methods for accelerated functions.
17*/
18
19#ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20#define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
21
22#if defined(__cplusplus) || defined(c_plusplus)
23extern "C" {
24#endif
25
26#if defined(MAGICKCORE_OPENCL_SUPPORT)
27
28/*
29 Define declarations.
30*/
31#define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32#define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33#define OPENCL_ELSE() "\n #""else " " \n"
34#define OPENCL_ENDIF() "\n #""endif " " \n"
35#define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36#define STRINGIFY(...) #__VA_ARGS__ "\n"
37
38const char* accelerateKernels =
39
40/*
41 Define declarations.
42*/
43 OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
44 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
45 OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
46 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
47 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
48 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
49 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
50 OPENCL_DEFINE(SigmaRandom, (attenuate))
51 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
52 OPENCL_DEFINE(MagickMax(x, y), (((x) > (y)) ? (x) : (y)))
53 OPENCL_DEFINE(MagickMin(x, y), (((x) < (y)) ? (x) : (y)))
54
55/*
56 Typedef declarations.
57*/
58 STRINGIFY(
59 typedef enum
60 {
62 RGBColorspace, /* Linear RGB colorspace */
63 GRAYColorspace, /* greyscale (non-linear) image (faked 1 channel) */
73 CMYKColorspace, /* negared linear RGB with black separated */
74 sRGBColorspace, /* Default: non-lienar sRGB colorspace */
83 CMYColorspace, /* negated linear RGB colorspace */
86 LCHColorspace, /* alias for LCHuv */
88 LCHabColorspace, /* Cylindrical (Polar) Lab */
89 LCHuvColorspace, /* Cylindrical (Polar) Luv */
92 HSVColorspace, /* alias for HSB */
95 LinearGRAYColorspace /* greyscale (linear) image (faked 1 channel) */
97 )
98
99 STRINGIFY(
100 typedef enum
101 {
157 /* These are new operators, added after the above was last sorted.
158 * The list should be re-sorted only when a new library version is
159 * created.
160 */
175 )
176
177 STRINGIFY(
178 typedef enum
179 {
186 )
187
188 STRINGIFY(
189 typedef enum
190 {
199 } NoiseType;
200 )
201
202 STRINGIFY(
203 typedef enum
204 {
216 )
217
218 STRINGIFY(
219 typedef enum {
237 )
238
239 STRINGIFY(
240 typedef enum
241 {
243 RedChannel = 0x0001,
244 GrayChannel = 0x0001,
245 CyanChannel = 0x0001,
246 GreenChannel = 0x0002,
247 MagentaChannel = 0x0002,
248 BlueChannel = 0x0004,
249 YellowChannel = 0x0004,
250 AlphaChannel = 0x0008,
251 OpacityChannel = 0x0008,
252 MatteChannel = 0x0008, /* deprecated */
253 BlackChannel = 0x0020,
254 IndexChannel = 0x0020,
255 CompositeChannels = 0x002F,
256 AllChannels = 0x7ffffff,
257 /*
258 Special purpose channel types.
259 */
260 TrueAlphaChannel = 0x0040, /* extract actual alpha channel from opacity */
261 RGBChannels = 0x0080, /* set alpha from grayscale mask in RGB */
262 GrayChannels = 0x0080,
263 SyncChannels = 0x0100, /* channels should be modified equally */
265 } ChannelType;
266 )
267
268/*
269 Helper functions.
270*/
271
272OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
273
274 STRINGIFY(
275 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
276 {
277 return((CLQuantum) value);
278 }
279 )
280
281OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
282
283 STRINGIFY(
284 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
285 {
286 return((CLQuantum) (257.0f*value));
287 }
288 )
289
290OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
291
292 STRINGIFY(
293 static inline CLQuantum ScaleCharToQuantum(const unsigned char value)
294 {
295 return((CLQuantum) (16843009.0*value));
296 }
297 )
298
299OPENCL_ENDIF()
300
301OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
302
303 STRINGIFY(
304 static inline CLQuantum ClampToQuantum(const float value)
305 {
306 return (CLQuantum) value;
307 }
308 )
309
310OPENCL_ELSE()
311
312 STRINGIFY(
313 static inline CLQuantum ClampToQuantum(const float value)
314 {
315 return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
316 }
317 )
318
319OPENCL_ENDIF()
320
321 STRINGIFY(
322 static inline int ClampToCanvas(const int offset, const int range)
323 {
324 return clamp(offset, (int)0, range - 1);
325 }
326 )
327
328 STRINGIFY(
329 static inline int ClampToCanvasWithHalo(const int offset, const int range, const int edge, const int section)
330 {
331 return clamp(offset, section ? (int)(0 - edge) : (int)0, section ? (range - 1) : (range - 1 + edge));
332 }
333 )
334
335 STRINGIFY(
336 static inline uint ScaleQuantumToMap(CLQuantum value)
337 {
338 if (value >= (CLQuantum)MaxMap)
339 return ((uint)MaxMap);
340 else
341 return ((uint)value);
342 }
343 )
344
345 STRINGIFY(
346 static inline float PerceptibleReciprocal(const float x)
347 {
348 float sign = x < (float) 0.0 ? (float)-1.0 : (float) 1.0;
349 return((sign*x) >= MagickEpsilon ? (float) 1.0 / x : sign*((float) 1.0 / MagickEpsilon));
350 }
351 )
352
353 STRINGIFY(
354 static inline float RoundToUnity(const float value)
355 {
356 return clamp(value, 0.0f, 1.0f);
357 }
358 )
359
360 STRINGIFY(
361
362 static inline CLQuantum getBlue(CLPixelType p) { return p.x; }
363 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
364 static inline float getBlueF4(float4 p) { return p.x; }
365 static inline void setBlueF4(float4* p, float value) { (*p).x = value; }
366
367 static inline CLQuantum getGreen(CLPixelType p) { return p.y; }
368 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
369 static inline float getGreenF4(float4 p) { return p.y; }
370 static inline void setGreenF4(float4* p, float value) { (*p).y = value; }
371
372 static inline CLQuantum getRed(CLPixelType p) { return p.z; }
373 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
374 static inline float getRedF4(float4 p) { return p.z; }
375 static inline void setRedF4(float4* p, float value) { (*p).z = value; }
376
377 static inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
378 static inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
379 static inline float getOpacityF4(float4 p) { return p.w; }
380 static inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
381
382 static inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
383
384 static inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
385 {
386 float red = getRed(p);
387 float green = getGreen(p);
388 float blue = getBlue(p);
389
390 float intensity;
391
392 if (colorspace == GRAYColorspace)
393 return red;
394
395 switch (method)
396 {
398 {
399 intensity = (red + green + blue) / 3.0;
400 break;
401 }
403 {
404 intensity = MagickMax(MagickMax(red, green), blue);
405 break;
406 }
408 {
409 intensity = (MagickMin(MagickMin(red, green), blue) +
410 MagickMax(MagickMax(red, green), blue)) / 2.0;
411 break;
412 }
414 {
415 intensity = (float)(((float)red*red + green*green + blue*blue) /
416 (3.0*QuantumRange));
417 break;
418 }
420 {
421 /*
422 if (image->colorspace == RGBColorspace)
423 {
424 red=EncodePixelGamma(red);
425 green=EncodePixelGamma(green);
426 blue=EncodePixelGamma(blue);
427 }
428 */
429 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
430 break;
431 }
433 {
434 /*
435 if (image->colorspace == sRGBColorspace)
436 {
437 red=DecodePixelGamma(red);
438 green=DecodePixelGamma(green);
439 blue=DecodePixelGamma(blue);
440 }
441 */
442 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
443 break;
444 }
446 default:
447 {
448 /*
449 if (image->colorspace == RGBColorspace)
450 {
451 red=EncodePixelGamma(red);
452 green=EncodePixelGamma(green);
453 blue=EncodePixelGamma(blue);
454 }
455 */
456 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
457 break;
458 }
460 {
461 /*
462 if (image->colorspace == sRGBColorspace)
463 {
464 red=DecodePixelGamma(red);
465 green=DecodePixelGamma(green);
466 blue=DecodePixelGamma(blue);
467 }
468 */
469 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
470 break;
471 }
473 {
474 intensity = (float)(sqrt((float)red*red + green*green + blue*blue) /
475 sqrt(3.0));
476 break;
477 }
478 }
479
480 return intensity;
481
482 }
483 )
484
485/*
486%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
487% %
488% %
489% %
490% A d d N o i s e %
491% %
492% %
493% %
494%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
495*/
496
497STRINGIFY(
498
499/*
500Part of MWC64X by David Thomas, dt10@imperial.ac.uk
501This is provided under BSD, full license is with the main package.
502See http://www.doc.ic.ac.uk/~dt10/research
503*/
504
505// Pre: a<M, b<M
506// Post: r=(a+b) mod M
507ulong MWC_AddMod64(ulong a, ulong b, ulong M)
508{
509 ulong v=a+b;
510 //if( (v>=M) || (v<a) )
511 if( (v>=M) || (convert_float(v) < convert_float(a)) ) // workaround for what appears to be an optimizer bug.
512 v=v-M;
513 return v;
514}
515
516// Pre: a<M,b<M
517// Post: r=(a*b) mod M
518// This could be done more efficently, but it is portable, and should
519// be easy to understand. It can be replaced with any of the better
520// modular multiplication algorithms (for example if you know you have
521// double precision available or something).
522ulong MWC_MulMod64(ulong a, ulong b, ulong M)
523{
524 ulong r=0;
525 while(a!=0){
526 if(a&1)
527 r=MWC_AddMod64(r,b,M);
528 b=MWC_AddMod64(b,b,M);
529 a=a>>1;
530 }
531 return r;
532}
533
534
535// Pre: a<M, e>=0
536// Post: r=(a^b) mod M
537// This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on
538// most architectures
539ulong MWC_PowMod64(ulong a, ulong e, ulong M)
540{
541 ulong sqr=a, acc=1;
542 while(e!=0){
543 if(e&1)
544 acc=MWC_MulMod64(acc,sqr,M);
545 sqr=MWC_MulMod64(sqr,sqr,M);
546 e=e>>1;
547 }
548 return acc;
549}
550
551uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
552{
553 ulong m=MWC_PowMod64(A, distance, M);
554 ulong x=curr.x*(ulong)A+curr.y;
555 x=MWC_MulMod64(x, m, M);
556 return (uint2)((uint)(x/A), (uint)(x%A));
557}
558
559uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
560{
561 // This is an arbitrary constant for starting LCG jumping from. I didn't
562 // want to start from 1, as then you end up with the two or three first values
563 // being a bit poor in ones - once you've decided that, one constant is as
564 // good as any another. There is no deep mathematical reason for it, I just
565 // generated a random number.
566 enum{ MWC_BASEID = 4077358422479273989UL };
567
568 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
569 ulong m=MWC_PowMod64(A, dist, M);
570
571 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
572 return (uint2)((uint)(x/A), (uint)(x%A));
573}
574
576typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
577
578void MWC64X_Step(mwc64x_state_t *s)
579{
580 uint X=s->x, C=s->c;
581
582 uint Xn=s->seed0*X+C;
583 uint carry=(uint)(Xn<C); // The (Xn<C) will be zero or one for scalar
584 uint Cn=mad_hi(s->seed0,X,carry);
585
586 s->x=Xn;
587 s->c=Cn;
588}
589
590void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
591{
592 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
593 s->x=tmp.x;
594 s->c=tmp.y;
595}
596
597void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
598{
599 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
600 s->x=tmp.x;
601 s->c=tmp.y;
602}
603
605uint MWC64X_NextUint(mwc64x_state_t *s)
606{
607 uint res=s->x ^ s->c;
608 MWC64X_Step(s);
609 return res;
610}
611
612//
613// End of MWC64X excerpt
614//
615
616 float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
617 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0
618 }
619
620
621 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
622
623 float
624 alpha,
625 beta,
626 noise,
627 sigma;
628
629 noise = 0.0f;
630 alpha=mwcReadPseudoRandomValue(r);
631 switch(noise_type) {
632 case UniformNoise:
633 default:
634 {
635 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
636 break;
637 }
638 case GaussianNoise:
639 {
640 float
641 gamma,
642 tau;
643
644 if (alpha == 0.0f)
645 alpha=1.0f;
646 beta=mwcReadPseudoRandomValue(r);
647 gamma=sqrt(-2.0f*log(alpha));
648 sigma=gamma*cospi((2.0f*beta));
649 tau=gamma*sinpi((2.0f*beta));
650 noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
652 break;
653 }
654
655
656 case ImpulseNoise:
657 {
658 if (alpha < (SigmaImpulse/2.0f))
659 noise=0.0f;
660 else
661 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
662 noise=(float)QuantumRange;
663 else
664 noise=(float)pixel;
665 break;
666 }
667 case LaplacianNoise:
668 {
669 if (alpha <= 0.5f)
670 {
671 if (alpha <= MagickEpsilon)
672 noise=(float) (pixel-QuantumRange);
673 else
674 noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
675 0.5f);
676 break;
677 }
678 beta=1.0f-alpha;
679 if (beta <= (0.5f*MagickEpsilon))
680 noise=(float) (pixel+QuantumRange);
681 else
682 noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
683 break;
684 }
686 {
687 sigma=1.0f;
688 if (alpha > MagickEpsilon)
689 sigma=sqrt(-2.0f*log(alpha));
690 beta=mwcReadPseudoRandomValue(r);
691 noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
692 cospi((float) (2.0f*beta))/2.0f);
693 break;
694 }
695 case PoissonNoise:
696 {
697 float
698 poisson;
699 unsigned int i;
700 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
701 for (i=0; alpha > poisson; i++)
702 {
703 beta=mwcReadPseudoRandomValue(r);
704 alpha*=beta;
705 }
706 noise=(float) (QuantumRange*i/SigmaPoisson);
707 break;
708 }
709 case RandomNoise:
710 {
711 noise=(float) (QuantumRange*SigmaRandom*alpha);
712 break;
713 }
714
715 };
716 return noise;
717 }
718
719 __kernel
720 void AddNoise(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
721 ,const unsigned int inputPixelCount, const unsigned int pixelsPerWorkItem
722 ,const ChannelType channel
723 ,const NoiseType noise_type, const float attenuate
724 ,const unsigned int seed0, const unsigned int seed1
725 ,const unsigned int numRandomNumbersPerPixel) {
726
727 mwc64x_state_t rng;
728 rng.seed0 = seed0;
729 rng.seed1 = seed1;
730
731 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; // length of RNG substream each workitem will use
732 uint offset = span * get_local_size(0) * get_group_id(0); // offset of this workgroup's RNG substream (in master stream);
733
734 MWC64X_SeedStreams(&rng, offset, span); // Seed the RNG streams
735
736 uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0); // pixel to process
737
738 uint count = pixelsPerWorkItem;
739
740 while (count > 0) {
741 if (pos < inputPixelCount) {
742 CLPixelType p = inputImage[pos];
743
744 if ((channel&RedChannel)!=0) {
745 setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
746 }
747
748 if ((channel&GreenChannel)!=0) {
749 setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
750 }
751
752 if ((channel&BlueChannel)!=0) {
753 setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
754 }
755
756 if ((channel & OpacityChannel) != 0) {
757 setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
758 }
759
760 filteredImage[pos] = p;
761 //filteredImage[pos] = (CLPixelType)(MWC64X_NextUint(&rng) % 256, MWC64X_NextUint(&rng) % 256, MWC64X_NextUint(&rng) % 256, 255);
762 }
763 pos += get_local_size(0);
764 --count;
765 }
766 }
767 )
768
769/*
770%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
771% %
772% %
773% %
774% B l u r %
775% %
776% %
777% %
778%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
779*/
780
781 STRINGIFY(
782 /*
783 Reduce image noise and reduce detail levels by row
784 im: input pixels filtered_in filtered_im: output pixels
785 filter : convolve kernel width: convolve kernel size
786 channel : define which channel is blured
787 is_RGBA_BGRA : define the input is RGBA or BGRA
788 */
789 __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
790 const ChannelType channel, __constant float *filter,
791 const unsigned int width,
792 const unsigned int imageColumns, const unsigned int imageRows,
793 __local CLPixelType *temp)
794 {
795 const int x = get_global_id(0);
796 const int y = get_global_id(1);
797
798 const int columns = imageColumns;
799
800 const unsigned int radius = (width-1)/2;
801 const int wsize = get_local_size(0);
802 const unsigned int loadSize = wsize+width;
803
804 //load chunk only for now
805 //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
806 //wait_group_events(1,&e);
807
808 //parallel load and clamp
809 /*
810 int count = 0;
811 for (int i=0; i < loadSize; i=i+wsize)
812 {
813 int currentX = x + wsize*(count++);
814
815 int localId = get_local_id(0);
816
817 if ((localId+i) > loadSize)
818 break;
819
820 temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
821
822 if (y==0 && get_group_id(0) == 0)
823 {
824 printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
825 }
826 }
827 */
828
829 //group coordinate
830 const int groupX=get_local_size(0)*get_group_id(0);
831 const int groupY=get_local_size(1)*get_group_id(1);
832
833 //parallel load and clamp
834 for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
835 {
836 //int cx = ClampToCanvas(groupX+i, columns);
837 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
838
839 /*if (0 && y==0 && get_group_id(1) == 0)
840 {
841 printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
842 }*/
843 }
844
845 // barrier
846 barrier(CLK_LOCAL_MEM_FENCE);
847
848 // only do the work if this is not a patched item
849 if (get_global_id(0) < columns)
850 {
851 // compute
852 float4 result = (float4) 0;
853
854 int i = 0;
855
856 \n #ifndef UFACTOR \n
857 \n #define UFACTOR 8 \n
858 \n #endif \n
859
860 for ( ; i+UFACTOR < width; )
861 {
862 \n #pragma unroll UFACTOR\n
863 for (int j=0; j < UFACTOR; j++, i++)
864 {
865 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
866 }
867 }
868
869 for ( ; i < width; i++)
870 {
871 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
872 }
873
874 result.x = ClampToQuantum(result.x);
875 result.y = ClampToQuantum(result.y);
876 result.z = ClampToQuantum(result.z);
877 result.w = ClampToQuantum(result.w);
878
879 // write back to global
880 filtered_im[y*columns+x] = result;
881 }
882 }
883 )
884
885 STRINGIFY(
886 /*
887 Reduce image noise and reduce detail levels by line
888 im: input pixels filtered_in filtered_im: output pixels
889 filter : convolve kernel width: convolve kernel size
890 channel : define which channel is blured\
891 is_RGBA_BGRA : define the input is RGBA or BGRA
892 */
893 __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
894 const ChannelType channel, __constant float *filter,
895 const unsigned int width,
896 const unsigned int imageColumns, const unsigned int imageRows,
897 __local float4 *temp)
898 {
899 const int x = get_global_id(0);
900 const int y = get_global_id(1);
901
902 //const int columns = get_global_size(0);
903 //const int rows = get_global_size(1);
904 const int columns = imageColumns;
905 const int rows = imageRows;
906
907 unsigned int radius = (width-1)/2;
908 const int wsize = get_local_size(1);
909 const unsigned int loadSize = wsize+width;
910
911 //group coordinate
912 const int groupX=get_local_size(0)*get_group_id(0);
913 const int groupY=get_local_size(1)*get_group_id(1);
914 //notice that get_local_size(0) is 1, so
915 //groupX=get_group_id(0);
916
917 //parallel load and clamp
918 for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
919 {
920 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
921 }
922
923 // barrier
924 barrier(CLK_LOCAL_MEM_FENCE);
925
926 // only do the work if this is not a patched item
927 if (get_global_id(1) < rows)
928 {
929 // compute
930 float4 result = (float4) 0;
931
932 int i = 0;
933
934 \n #ifndef UFACTOR \n
935 \n #define UFACTOR 8 \n
936 \n #endif \n
937
938 for ( ; i+UFACTOR < width; )
939 {
940 \n #pragma unroll UFACTOR \n
941 for (int j=0; j < UFACTOR; j++, i++)
942 {
943 result+=filter[i]*temp[i+get_local_id(1)];
944 }
945 }
946
947 for ( ; i < width; i++)
948 {
949 result+=filter[i]*temp[i+get_local_id(1)];
950 }
951
952 result.x = ClampToQuantum(result.x);
953 result.y = ClampToQuantum(result.y);
954 result.z = ClampToQuantum(result.z);
955 result.w = ClampToQuantum(result.w);
956
957 // write back to global
958 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
959 }
960
961 }
962 )
963
964/*
965%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
966% %
967% %
968% %
969% C o m p o s i t e %
970% %
971% %
972% %
973%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
974*/
975
976 STRINGIFY(
977 static inline float ColorDodge(const float Sca,
978 const float Sa,const float Dca,const float Da)
979 {
980 /*
981 Oct 2004 SVG specification.
982 */
983 if ((Sca*Da+Dca*Sa) >= Sa*Da)
984 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
985 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
986
987
988 /*
989 New specification, March 2009 SVG specification. This specification was
990 also wrong of non-overlap cases.
991 */
992 /*
993 if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
994 return(Sca*(1.0-Da));
995 if (fabs(Sca-Sa) < MagickEpsilon)
996 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
997 return(Sa*MagickMin(Da,Dca*Sa/(Sa-Sca)));
998 */
999
1000 /*
1001 Working from first principles using the original formula:
1002
1003 f(Sc,Dc) = Dc/(1-Sc)
1004
1005 This works correctly! Looks like the 2004 model was right but just
1006 required a extra condition for correct handling.
1007 */
1008
1009 /*
1010 if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
1011 return(Sca*(1.0-Da)+Dca*(1.0-Sa));
1012 if (fabs(Sca-Sa) < MagickEpsilon)
1013 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
1014 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
1015 */
1016 }
1017
1018 static inline void CompositeColorDodge(const float4 *p,
1019 const float4 *q,float4 *composite) {
1020
1021 float
1022 Da,
1023 gamma,
1024 Sa;
1025
1026 Sa=1.0f-QuantumScale*getOpacityF4(*p); /* simplify and speed up equations */
1027 Da=1.0f-QuantumScale*getOpacityF4(*q);
1028 gamma=RoundToUnity(Sa+Da-Sa*Da); /* over blend, as per SVG doc */
1029 setOpacityF4(composite, QuantumRange*(1.0-gamma));
1030 gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
1031 setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
1032 getRedF4(*q)*Da,Da));
1033 setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
1034 getGreenF4(*q)*Da,Da));
1035 setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
1036 getBlueF4(*q)*Da,Da));
1037 }
1038 )
1039
1040 STRINGIFY(
1041 static inline void MagickPixelCompositePlus(const float4 *p,
1042 const float alpha,const float4 *q,
1043 const float beta,float4 *composite)
1044 {
1045 float
1046 gamma;
1047
1048 float
1049 Da,
1050 Sa;
1051 /*
1052 Add two pixels with the given opacities.
1053 */
1054 Sa=1.0-QuantumScale*alpha;
1055 Da=1.0-QuantumScale*beta;
1056 gamma=RoundToUnity(Sa+Da); /* 'Plus' blending -- not 'Over' blending */
1057 setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
1058 gamma=PerceptibleReciprocal(gamma);
1059 setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
1060 setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
1061 setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
1062 }
1063 )
1064
1065 STRINGIFY(
1066 static inline void MagickPixelCompositeBlend(const float4 *p,
1067 const float alpha,const float4 *q,
1068 const float beta,float4 *composite)
1069 {
1070 MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
1071 (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
1072 (QuantumRange-getOpacityF4(*q))),composite);
1073 }
1074 )
1075
1076 STRINGIFY(
1077 __kernel
1078 void Composite(__global CLPixelType *image,
1079 const unsigned int imageWidth,
1080 const unsigned int imageHeight,
1081 const unsigned int imageMatte,
1082 const __global CLPixelType *compositeImage,
1083 const unsigned int compositeWidth,
1084 const unsigned int compositeHeight,
1085 const unsigned int compositeMatte,
1086 const unsigned int compose,
1087 const ChannelType channel,
1088 const float destination_dissolve,
1089 const float source_dissolve) {
1090
1091 uint2 index;
1092 index.x = get_global_id(0);
1093 index.y = get_global_id(1);
1094
1095
1096 if (index.x >= imageWidth
1097 || index.y >= imageHeight) {
1098 return;
1099 }
1100 const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
1101 float4 destination;
1102 setRedF4(&destination,getRed(inputPixel));
1103 setGreenF4(&destination,getGreen(inputPixel));
1104 setBlueF4(&destination,getBlue(inputPixel));
1105
1106
1107 const CLPixelType compositePixel
1108 = compositeImage[index.y*imageWidth+index.x];
1109 float4 source;
1110 setRedF4(&source,getRed(compositePixel));
1111 setGreenF4(&source,getGreen(compositePixel));
1112 setBlueF4(&source,getBlue(compositePixel));
1113
1114 if (imageMatte != 0) {
1115 setOpacityF4(&destination,getOpacity(inputPixel));
1116 }
1117 else {
1118 setOpacityF4(&destination,0.0f);
1119 }
1120
1121 if (compositeMatte != 0) {
1122 setOpacityF4(&source,getOpacity(compositePixel));
1123 }
1124 else {
1125 setOpacityF4(&source,0.0f);
1126 }
1127
1128 float4 composite=destination;
1129
1131 switch (op) {
1133 CompositeColorDodge(&source,&destination,&composite);
1134 break;
1135 case BlendCompositeOp:
1136 MagickPixelCompositeBlend(&source,source_dissolve,&destination,
1137 destination_dissolve,&composite);
1138 break;
1139 default:
1140 // unsupported operators
1141 break;
1142 };
1143
1144 CLPixelType outputPixel;
1145 setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
1146 setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
1147 setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
1148 setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
1149 image[index.y*imageWidth+index.x] = outputPixel;
1150 }
1151 )
1152
1153/*
1154%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1155% %
1156% %
1157% %
1158% C o n t r a s t %
1159% %
1160% %
1161% %
1162%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1163*/
1164
1165 STRINGIFY(
1166
1167 static inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1168 float3 HueSaturationBrightness;
1169 HueSaturationBrightness.x = 0.0f; // Hue
1170 HueSaturationBrightness.y = 0.0f; // Saturation
1171 HueSaturationBrightness.z = 0.0f; // Brightness
1172
1173 float r=(float) getRed(pixel);
1174 float g=(float) getGreen(pixel);
1175 float b=(float) getBlue(pixel);
1176
1177 float tmin=MagickMin(MagickMin(r,g),b);
1178 float tmax= MagickMax(MagickMax(r,g),b);
1179
1180 if (tmax!=0.0f) {
1181 float delta=tmax-tmin;
1182 HueSaturationBrightness.y=delta/tmax;
1183 HueSaturationBrightness.z=QuantumScale*tmax;
1184
1185 if (delta != 0.0f) {
1186 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1187 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1188 HueSaturationBrightness.x/=6.0f;
1189 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1190 }
1191 }
1192 return HueSaturationBrightness;
1193 }
1194
1195 static inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1196
1197 float hue = HueSaturationBrightness.x;
1198 float brightness = HueSaturationBrightness.z;
1199 float saturation = HueSaturationBrightness.y;
1200
1201 CLPixelType rgb;
1202
1203 if (saturation == 0.0f) {
1204 setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1205 setGreen(&rgb,getRed(rgb));
1206 setBlue(&rgb,getRed(rgb));
1207 }
1208 else {
1209
1210 float h=6.0f*(hue-floor(hue));
1211 float f=h-floor(h);
1212 float p=brightness*(1.0f-saturation);
1213 float q=brightness*(1.0f-saturation*f);
1214 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1215
1216 float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1217 float clamped_t = ClampToQuantum(QuantumRange*t);
1218 float clamped_p = ClampToQuantum(QuantumRange*p);
1219 float clamped_q = ClampToQuantum(QuantumRange*q);
1220 int ih = (int)h;
1221 setRed(&rgb, (ih == 1)?clamped_q:
1222 (ih == 2 || ih == 3)?clamped_p:
1223 (ih == 4)?clamped_t:
1224 clampedBrightness);
1225
1226 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1227 (ih == 3)?clamped_q:
1228 (ih == 4 || ih == 5)?clamped_p:
1229 clamped_t);
1230
1231 setBlue(&rgb, (ih == 2)?clamped_t:
1232 (ih == 3 || ih == 4)?clampedBrightness:
1233 (ih == 5)?clamped_q:
1234 clamped_p);
1235 }
1236 return rgb;
1237 }
1238
1239 __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
1240 {
1241
1242 const int sign = sharpen!=0?1:-1;
1243 const int x = get_global_id(0);
1244 const int y = get_global_id(1);
1245 const int columns = get_global_size(0);
1246 const int c = x + y * columns;
1247
1248 CLPixelType pixel = im[c];
1249 float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1250 float brightness = HueSaturationBrightness.z;
1251 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1252 brightness = clamp(brightness,0.0f,1.0f);
1253 HueSaturationBrightness.z = brightness;
1254
1255 CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1256 filteredPixel.w = pixel.w;
1257 im[c] = filteredPixel;
1258 }
1259 )
1260
1261/*
1262%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1263% %
1264% %
1265% %
1266% C o n t r a s t S t r e t c h %
1267% %
1268% %
1269% %
1270%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1271*/
1272
1273 STRINGIFY(
1274 /*
1275 */
1276 __kernel void Histogram(__global CLPixelType * restrict im,
1277 const ChannelType channel,
1278 const int method,
1279 const int colorspace,
1280 __global uint4 * restrict histogram)
1281 {
1282 const int x = get_global_id(0);
1283 const int y = get_global_id(1);
1284 const int columns = get_global_size(0);
1285 const int c = x + y * columns;
1286 if ((channel & SyncChannels) != 0)
1287 {
1288 float intensity = GetPixelIntensity(method, colorspace,im[c]);
1289 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1290 atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
1291 }
1292 else
1293 {
1294 // for equalizing, we always need all channels?
1295 // otherwise something more
1296 }
1297 }
1298 )
1299
1300 STRINGIFY(
1301 /*
1302 */
1303 __kernel void ContrastStretch(__global CLPixelType * restrict im,
1304 const ChannelType channel,
1305 __global CLPixelType * restrict stretch_map,
1306 const float4 white, const float4 black)
1307 {
1308 const int x = get_global_id(0);
1309 const int y = get_global_id(1);
1310 const int columns = get_global_size(0);
1311 const int c = x + y * columns;
1312
1313 uint ePos;
1314 CLPixelType oValue, eValue;
1315 CLQuantum red, green, blue, opacity;
1316
1317 //read from global
1318 oValue=im[c];
1319
1320 if ((channel & RedChannel) != 0)
1321 {
1322 if (getRedF4(white) != getRedF4(black))
1323 {
1324 ePos = ScaleQuantumToMap(getRed(oValue));
1325 eValue = stretch_map[ePos];
1326 red = getRed(eValue);
1327 }
1328 }
1329
1330 if ((channel & GreenChannel) != 0)
1331 {
1332 if (getGreenF4(white) != getGreenF4(black))
1333 {
1334 ePos = ScaleQuantumToMap(getGreen(oValue));
1335 eValue = stretch_map[ePos];
1336 green = getGreen(eValue);
1337 }
1338 }
1339
1340 if ((channel & BlueChannel) != 0)
1341 {
1342 if (getBlueF4(white) != getBlueF4(black))
1343 {
1344 ePos = ScaleQuantumToMap(getBlue(oValue));
1345 eValue = stretch_map[ePos];
1346 blue = getBlue(eValue);
1347 }
1348 }
1349
1350 if ((channel & OpacityChannel) != 0)
1351 {
1352 if (getOpacityF4(white) != getOpacityF4(black))
1353 {
1354 ePos = ScaleQuantumToMap(getOpacity(oValue));
1355 eValue = stretch_map[ePos];
1356 opacity = getOpacity(eValue);
1357 }
1358 }
1359
1360 //write back
1361 im[c]=(CLPixelType)(blue, green, red, opacity);
1362
1363 }
1364 )
1365
1366/*
1367%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1368% %
1369% %
1370% %
1371% C o n v o l v e %
1372% %
1373% %
1374% %
1375%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1376*/
1377
1378 STRINGIFY(
1379 __kernel
1380 void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
1381 const unsigned int imageWidth, const unsigned int imageHeight,
1382 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1383 const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
1384
1385 int2 blockID;
1386 blockID.x = get_group_id(0);
1387 blockID.y = get_group_id(1);
1388
1389 // image area processed by this workgroup
1390 int2 imageAreaOrg;
1391 imageAreaOrg.x = blockID.x * get_local_size(0);
1392 imageAreaOrg.y = blockID.y * get_local_size(1);
1393
1394 int2 midFilterDimen;
1395 midFilterDimen.x = (filterWidth-1)/2;
1396 midFilterDimen.y = (filterHeight-1)/2;
1397
1398 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1399
1400 // dimension of the local cache
1401 int2 cachedAreaDimen;
1402 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1403 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1404
1405 // cache the pixels accessed by this workgroup in local memory
1406 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1407 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1408 int groupSize = get_local_size(0) * get_local_size(1);
1409 for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1410
1411 int2 cachedAreaIndex;
1412 cachedAreaIndex.x = i % cachedAreaDimen.x;
1413 cachedAreaIndex.y = i / cachedAreaDimen.x;
1414
1415 int2 imagePixelIndex;
1416 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1417
1418 // only support EdgeVirtualPixelMethod through ClampToCanvas
1419 // TODO: implement other virtual pixel method
1420 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1421 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1422
1423 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1424 }
1425
1426 // cache the filter
1427 for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1428 filterCache[i] = filter[i];
1429 }
1430 barrier(CLK_LOCAL_MEM_FENCE);
1431
1432
1433 int2 imageIndex;
1434 imageIndex.x = imageAreaOrg.x + get_local_id(0);
1435 imageIndex.y = imageAreaOrg.y + get_local_id(1);
1436
1437 // if out-of-range, stops here and quit
1438 if (imageIndex.x >= imageWidth
1439 || imageIndex.y >= imageHeight) {
1440 return;
1441 }
1442
1443 int filterIndex = 0;
1444 float4 sum = (float4)0.0f;
1445 float gamma = 0.0f;
1446 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1447 int cacheIndexY = get_local_id(1);
1448 for (int j = 0; j < filterHeight; j++) {
1449 int cacheIndexX = get_local_id(0);
1450 for (int i = 0; i < filterWidth; i++) {
1451 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1452 float f = filterCache[filterIndex];
1453
1454 sum.x += f * p.x;
1455 sum.y += f * p.y;
1456 sum.z += f * p.z;
1457 sum.w += f * p.w;
1458
1459 gamma += f;
1460 filterIndex++;
1461 cacheIndexX++;
1462 }
1463 cacheIndexY++;
1464 }
1465 }
1466 else {
1467 int cacheIndexY = get_local_id(1);
1468 for (int j = 0; j < filterHeight; j++) {
1469 int cacheIndexX = get_local_id(0);
1470 for (int i = 0; i < filterWidth; i++) {
1471
1472 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1473 float alpha = QuantumScale*(QuantumRange-p.w);
1474 float f = filterCache[filterIndex];
1475 float g = alpha * f;
1476
1477 sum.x += g*p.x;
1478 sum.y += g*p.y;
1479 sum.z += g*p.z;
1480 sum.w += f*p.w;
1481
1482 gamma += g;
1483 filterIndex++;
1484 cacheIndexX++;
1485 }
1486 cacheIndexY++;
1487 }
1488 gamma = PerceptibleReciprocal(gamma);
1489 sum.xyz = gamma*sum.xyz;
1490 }
1491 CLPixelType outputPixel;
1492 outputPixel.x = ClampToQuantum(sum.x);
1493 outputPixel.y = ClampToQuantum(sum.y);
1494 outputPixel.z = ClampToQuantum(sum.z);
1495 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1496
1497 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1498 }
1499 )
1500
1501 STRINGIFY(
1502 __kernel
1503 void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
1504 const uint imageWidth, const uint imageHeight,
1505 __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1506 const uint matte, const ChannelType channel) {
1507
1508 int2 imageIndex;
1509 imageIndex.x = get_global_id(0);
1510 imageIndex.y = get_global_id(1);
1511
1512 /*
1513 unsigned int imageWidth = get_global_size(0);
1514 unsigned int imageHeight = get_global_size(1);
1515 */
1516 if (imageIndex.x >= imageWidth
1517 || imageIndex.y >= imageHeight)
1518 return;
1519
1520 int2 midFilterDimen;
1521 midFilterDimen.x = (filterWidth-1)/2;
1522 midFilterDimen.y = (filterHeight-1)/2;
1523
1524 int filterIndex = 0;
1525 float4 sum = (float4)0.0f;
1526 float gamma = 0.0f;
1527 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1528 for (int j = 0; j < filterHeight; j++) {
1529 int2 inputPixelIndex;
1530 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1531 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1532 for (int i = 0; i < filterWidth; i++) {
1533 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1534 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1535
1536 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1537 float f = filter[filterIndex];
1538
1539 sum.x += f * p.x;
1540 sum.y += f * p.y;
1541 sum.z += f * p.z;
1542 sum.w += f * p.w;
1543
1544 gamma += f;
1545
1546 filterIndex++;
1547 }
1548 }
1549 }
1550 else {
1551
1552 for (int j = 0; j < filterHeight; j++) {
1553 int2 inputPixelIndex;
1554 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1555 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1556 for (int i = 0; i < filterWidth; i++) {
1557 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1558 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1559
1560 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1561 float alpha = QuantumScale*(QuantumRange-p.w);
1562 float f = filter[filterIndex];
1563 float g = alpha * f;
1564
1565 sum.x += g*p.x;
1566 sum.y += g*p.y;
1567 sum.z += g*p.z;
1568 sum.w += f*p.w;
1569
1570 gamma += g;
1571
1572
1573 filterIndex++;
1574 }
1575 }
1576 gamma = PerceptibleReciprocal(gamma);
1577 sum.xyz = gamma*sum.xyz;
1578 }
1579
1580 CLPixelType outputPixel;
1581 outputPixel.x = ClampToQuantum(sum.x);
1582 outputPixel.y = ClampToQuantum(sum.y);
1583 outputPixel.z = ClampToQuantum(sum.z);
1584 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1585
1586 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1587 }
1588 )
1589
1590/*
1591%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1592% %
1593% %
1594% %
1595% D e s p e c k l e %
1596% %
1597% %
1598% %
1599%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1600*/
1601
1602 STRINGIFY(
1603
1604 __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1605 , const unsigned int imageWidth, const unsigned int imageHeight
1606 , const int2 offset, const int polarity, const int matte) {
1607
1608 int x = get_global_id(0);
1609 int y = get_global_id(1);
1610
1611 CLPixelType v = inputImage[y*imageWidth+x];
1612
1613 int2 neighbor;
1614 neighbor.y = y + offset.y;
1615 neighbor.x = x + offset.x;
1616
1617 int2 clampedNeighbor;
1618 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1619 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1620
1621 CLPixelType r = (clampedNeighbor.x == neighbor.x
1622 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1623 :(CLPixelType)0;
1624
1625 int sv[4];
1626 sv[0] = (int)v.x;
1627 sv[1] = (int)v.y;
1628 sv[2] = (int)v.z;
1629 sv[3] = (int)v.w;
1630
1631 int sr[4];
1632 sr[0] = (int)r.x;
1633 sr[1] = (int)r.y;
1634 sr[2] = (int)r.z;
1635 sr[3] = (int)r.w;
1636
1637 if (polarity > 0) {
1638 \n #pragma unroll 4\n
1639 for (unsigned int i = 0; i < 4; i++) {
1640 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1641 }
1642 }
1643 else {
1644 \n #pragma unroll 4\n
1645 for (unsigned int i = 0; i < 4; i++) {
1646 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1647 }
1648
1649 }
1650
1651 v.x = (CLQuantum)sv[0];
1652 v.y = (CLQuantum)sv[1];
1653 v.z = (CLQuantum)sv[2];
1654
1655 if (matte!=0)
1656 v.w = (CLQuantum)sv[3];
1657
1658 outputImage[y*imageWidth+x] = v;
1659
1660 }
1661
1662
1663 )
1664
1665
1666
1667 STRINGIFY(
1668
1669 __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1670 , const unsigned int imageWidth, const unsigned int imageHeight
1671 , const int2 offset, const int polarity, const int matte) {
1672
1673 int x = get_global_id(0);
1674 int y = get_global_id(1);
1675
1676 CLPixelType v = inputImage[y*imageWidth+x];
1677
1678 int2 neighbor, clampedNeighbor;
1679
1680 neighbor.y = y + offset.y;
1681 neighbor.x = x + offset.x;
1682 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1683 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1684
1685 CLPixelType r = (clampedNeighbor.x == neighbor.x
1686 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1687 :(CLPixelType)0;
1688
1689
1690 neighbor.y = y - offset.y;
1691 neighbor.x = x - offset.x;
1692 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1693 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1694
1695 CLPixelType s = (clampedNeighbor.x == neighbor.x
1696 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1697 :(CLPixelType)0;
1698
1699
1700 int sv[4];
1701 sv[0] = (int)v.x;
1702 sv[1] = (int)v.y;
1703 sv[2] = (int)v.z;
1704 sv[3] = (int)v.w;
1705
1706 int sr[4];
1707 sr[0] = (int)r.x;
1708 sr[1] = (int)r.y;
1709 sr[2] = (int)r.z;
1710 sr[3] = (int)r.w;
1711
1712 int ss[4];
1713 ss[0] = (int)s.x;
1714 ss[1] = (int)s.y;
1715 ss[2] = (int)s.z;
1716 ss[3] = (int)s.w;
1717
1718 if (polarity > 0) {
1719 \n #pragma unroll 4\n
1720 for (unsigned int i = 0; i < 4; i++) {
1721 //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1722 //
1723 //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1724 //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1725 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1726 }
1727 }
1728 else {
1729 \n #pragma unroll 4\n
1730 for (unsigned int i = 0; i < 4; i++) {
1731 //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1732 //
1733 //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1734 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1735 }
1736 }
1737
1738 v.x = (CLQuantum)sv[0];
1739 v.y = (CLQuantum)sv[1];
1740 v.z = (CLQuantum)sv[2];
1741
1742 if (matte!=0)
1743 v.w = (CLQuantum)sv[3];
1744
1745 outputImage[y*imageWidth+x] = v;
1746
1747 }
1748
1749
1750 )
1751
1752/*
1753%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1754% %
1755% %
1756% %
1757% E q u a l i z e %
1758% %
1759% %
1760% %
1761%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1762*/
1763
1764 STRINGIFY(
1765 /*
1766 */
1767 __kernel void Equalize(__global CLPixelType * restrict im,
1768 const ChannelType channel,
1769 __global CLPixelType * restrict equalize_map,
1770 const float4 white, const float4 black)
1771 {
1772 const int x = get_global_id(0);
1773 const int y = get_global_id(1);
1774 const int columns = get_global_size(0);
1775 const int c = x + y * columns;
1776
1777 uint ePos;
1778 CLPixelType oValue, eValue;
1779 CLQuantum red, green, blue, opacity;
1780
1781 //read from global
1782 oValue=im[c];
1783
1784 if ((channel & SyncChannels) != 0)
1785 {
1786 if (getRedF4(white) != getRedF4(black))
1787 {
1788 ePos = ScaleQuantumToMap(getRed(oValue));
1789 eValue = equalize_map[ePos];
1790 red = getRed(eValue);
1791 ePos = ScaleQuantumToMap(getGreen(oValue));
1792 eValue = equalize_map[ePos];
1793 green = getRed(eValue);
1794 ePos = ScaleQuantumToMap(getBlue(oValue));
1795 eValue = equalize_map[ePos];
1796 blue = getRed(eValue);
1797 ePos = ScaleQuantumToMap(getOpacity(oValue));
1798 eValue = equalize_map[ePos];
1799 opacity = getRed(eValue);
1800
1801 //write back
1802 im[c]=(CLPixelType)(blue, green, red, opacity);
1803 }
1804
1805 }
1806
1807 // for equalizing, we always need all channels?
1808 // otherwise something more
1809
1810 }
1811 )
1812
1813/*
1814%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1815% %
1816% %
1817% %
1818% F u n c t i o n %
1819% %
1820% %
1821% %
1822%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1823*/
1824
1825 STRINGIFY(
1826
1827 /*
1828 apply FunctionImageChannel(braightness-contrast)
1829 */
1830 CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
1831 const unsigned int number_parameters,
1832 __constant float *parameters)
1833 {
1834 float4 result = (float4) 0.0f;
1835 switch (function)
1836 {
1837 case PolynomialFunction:
1838 {
1839 for (unsigned int i=0; i < number_parameters; i++)
1840 result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
1841 result *= (float4)QuantumRange;
1842 break;
1843 }
1844 case SinusoidFunction:
1845 {
1846 float freq,phase,ampl,bias;
1847 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1848 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1849 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1850 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1851 result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
1852 (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
1853 result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
1854 (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
1855 result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
1856 (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
1857 result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
1858 (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
1859 break;
1860 }
1861 case ArcsinFunction:
1862 {
1863 float width,range,center,bias;
1864 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1865 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1866 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1867 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1868
1869 result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
1870 result.x = range/MagickPI*asin(result.x)+bias;
1871 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
1872 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
1873
1874 result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
1875 result.y = range/MagickPI*asin(result.y)+bias;
1876 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
1877 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
1878
1879 result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
1880 result.z = range/MagickPI*asin(result.z)+bias;
1881 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
1882 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
1883
1884
1885 result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
1886 result.w = range/MagickPI*asin(result.w)+bias;
1887 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
1888 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
1889
1890 result *= (float4)QuantumRange;
1891 break;
1892 }
1893 case ArctanFunction:
1894 {
1895 float slope,range,center,bias;
1896 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1897 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1898 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1899 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1900 result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
1901 result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
1902 break;
1903 }
1904 case UndefinedFunction:
1905 break;
1906 }
1907 return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
1908 ClampToQuantum(result.z), ClampToQuantum(result.w));
1909 }
1910 )
1911
1912 STRINGIFY(
1913 /*
1914 Improve brightness / contrast of the image
1915 channel : define which channel is improved
1916 function : the function called to enchance the brightness contrast
1917 number_parameters : numbers of parameters
1918 parameters : the parameter
1919 */
1920 __kernel void ComputeFunction(__global CLPixelType *im,
1921 const ChannelType channel, const MagickFunction function,
1922 const unsigned int number_parameters, __constant float *parameters)
1923 {
1924 const int x = get_global_id(0);
1925 const int y = get_global_id(1);
1926 const int columns = get_global_size(0);
1927 const int c = x + y * columns;
1928 im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
1929 }
1930 )
1931
1932/*
1933%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1934% %
1935% %
1936% %
1937% G r a y s c a l e %
1938% %
1939% %
1940% %
1941%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1942*/
1943
1944 STRINGIFY(
1945 __kernel void Grayscale(__global CLPixelType *im,
1946 const int method, const int colorspace)
1947 {
1948
1949 const int x = get_global_id(0);
1950 const int y = get_global_id(1);
1951 const int columns = get_global_size(0);
1952 const int c = x + y * columns;
1953
1954 CLPixelType pixel = im[c];
1955
1956 float
1957 blue,
1958 green,
1959 intensity,
1960 red;
1961
1962 red=(float)getRed(pixel);
1963 green=(float)getGreen(pixel);
1964 blue=(float)getBlue(pixel);
1965
1966 intensity=0.0;
1967
1968 CLPixelType filteredPixel;
1969
1970 switch (method)
1971 {
1973 {
1974 intensity=(red+green+blue)/3.0;
1975 break;
1976 }
1978 {
1979 intensity=MagickMax(MagickMax(red,green),blue);
1980 break;
1981 }
1983 {
1984 intensity=(MagickMin(MagickMin(red,green),blue)+
1985 MagickMax(MagickMax(red,green),blue))/2.0;
1986 break;
1987 }
1989 {
1990 intensity=(float) (((float) red*red+green*green+
1991 blue*blue)/(3.0*QuantumRange));
1992 break;
1993 }
1995 {
1996 /*
1997 if (colorspace == RGBColorspace)
1998 {
1999 red=EncodePixelGamma(red);
2000 green=EncodePixelGamma(green);
2001 blue=EncodePixelGamma(blue);
2002 }
2003 */
2004 intensity=0.298839*red+0.586811*green+0.114350*blue;
2005 break;
2006 }
2008 {
2009 /*
2010 if (image->colorspace == sRGBColorspace)
2011 {
2012 red=DecodePixelGamma(red);
2013 green=DecodePixelGamma(green);
2014 blue=DecodePixelGamma(blue);
2015 }
2016 */
2017 intensity=0.298839*red+0.586811*green+0.114350*blue;
2018 break;
2019 }
2021 default:
2022 {
2023 /*
2024 if (image->colorspace == RGBColorspace)
2025 {
2026 red=EncodePixelGamma(red);
2027 green=EncodePixelGamma(green);
2028 blue=EncodePixelGamma(blue);
2029 }
2030 */
2031 intensity=0.212656*red+0.715158*green+0.072186*blue;
2032 break;
2033 }
2035 {
2036 /*
2037 if (image->colorspace == sRGBColorspace)
2038 {
2039 red=DecodePixelGamma(red);
2040 green=DecodePixelGamma(green);
2041 blue=DecodePixelGamma(blue);
2042 }
2043 */
2044 intensity=0.212656*red+0.715158*green+0.072186*blue;
2045 break;
2046 }
2048 {
2049 intensity=(float) (sqrt((float) red*red+green*green+
2050 blue*blue)/sqrt(3.0));
2051 break;
2052 }
2053
2054 }
2055
2056 setGray(&filteredPixel, ClampToQuantum(intensity));
2057
2058 filteredPixel.w = pixel.w;
2059
2060 im[c] = filteredPixel;
2061 }
2062 )
2063
2064/*
2065%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2066% %
2067% %
2068% %
2069% L o c a l C o n t r a s t %
2070% %
2071% %
2072% %
2073%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2074*/
2075
2076 STRINGIFY(
2077 static inline int mirrorBottom(int value)
2078 {
2079 return (value < 0) ? - (value) : value;
2080 }
2081 static inline int mirrorTop(int value, int width)
2082 {
2083 return (value >= width) ? (2 * width - value - 1) : value;
2084 }
2085
2086 __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
2087 const int radius,
2088 const int imageWidth,
2089 const int imageHeight)
2090 {
2091 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
2092
2093 int x = get_local_id(0);
2094 int y = get_global_id(1);
2095
2096 if ((x >= imageWidth) || (y >= imageHeight))
2097 return;
2098
2099 global CLPixelType *src = srcImage + y * imageWidth;
2100
2101 for (int i = x; i < imageWidth; i += get_local_size(0)) {
2102 float sum = 0.0f;
2103 float weight = 1.0f;
2104
2105 int j = i - radius;
2106 while ((j + 7) < i) {
2107 for (int k = 0; k < 8; ++k) // Unroll 8x
2108 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
2109 weight += 8.0f;
2110 j+=8;
2111 }
2112 while (j < i) {
2113 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
2114 weight += 1.0f;
2115 ++j;
2116 }
2117
2118 while ((j + 7) < radius + i) {
2119 for (int k = 0; k < 8; ++k) // Unroll 8x
2120 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
2121 weight -= 8.0f;
2122 j+=8;
2123 }
2124 while (j < radius + i) {
2125 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
2126 weight -= 1.0f;
2127 ++j;
2128 }
2129
2130 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
2131 }
2132 }
2133 )
2134
2135 STRINGIFY(
2136 __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
2137 const int radius,
2138 const float strength,
2139 const int imageWidth,
2140 const int imageHeight)
2141 {
2142 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
2143
2144 int x = get_global_id(0);
2145 int y = get_global_id(1);
2146
2147 if ((x >= imageWidth) || (y >= imageHeight))
2148 return;
2149
2150 global float *src = blurImage + x;
2151
2152 float sum = 0.0f;
2153 float weight = 1.0f;
2154
2155 int j = y - radius;
2156 while ((j + 7) < y) {
2157 for (int k = 0; k < 8; ++k) // Unroll 8x
2158 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
2159 weight += 8.0f;
2160 j+=8;
2161 }
2162 while (j < y) {
2163 sum += weight * src[mirrorBottom(j) * imageWidth];
2164 weight += 1.0f;
2165 ++j;
2166 }
2167
2168 while ((j + 7) < radius + y) {
2169 for (int k = 0; k < 8; ++k) // Unroll 8x
2170 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
2171 weight -= 8.0f;
2172 j+=8;
2173 }
2174 while (j < radius + y) {
2175 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
2176 weight -= 1.0f;
2177 ++j;
2178 }
2179
2180 CLPixelType pixel = srcImage[x + y * imageWidth];
2181 float srcVal = dot(RGB, convert_float4(pixel));
2182 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
2183 mult = (srcVal + mult) / srcVal;
2184
2185 pixel.x = ClampToQuantum(pixel.x * mult);
2186 pixel.y = ClampToQuantum(pixel.y * mult);
2187 pixel.z = ClampToQuantum(pixel.z * mult);
2188
2189 dstImage[x + y * imageWidth] = pixel;
2190 }
2191 )
2192
2193/*
2194%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2195% %
2196% %
2197% %
2198% M o d u l a t e %
2199% %
2200% %
2201% %
2202%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2203*/
2204
2205 STRINGIFY(
2206
2207 static inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
2208 float *hue, float *saturation, float *lightness)
2209 {
2210 float
2211 c,
2212 tmax,
2213 tmin;
2214
2215 /*
2216 Convert RGB to HSL colorspace.
2217 */
2220
2221 c=tmax-tmin;
2222
2223 *lightness=(tmax+tmin)/2.0;
2224 if (c <= 0.0)
2225 {
2226 *hue=0.0;
2227 *saturation=0.0;
2228 return;
2229 }
2230
2231 if (tmax == (QuantumScale*red))
2232 {
2233 *hue=(QuantumScale*green-QuantumScale*blue)/c;
2234 if ((QuantumScale*green) < (QuantumScale*blue))
2235 *hue+=6.0;
2236 }
2237 else
2238 if (tmax == (QuantumScale*green))
2239 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
2240 else
2241 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
2242
2243 *hue*=60.0/360.0;
2244 if (*lightness <= 0.5)
2245 *saturation=c/(2.0*(*lightness));
2246 else
2247 *saturation=c/(2.0-2.0*(*lightness));
2248 }
2249
2250 static inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
2251 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2252 {
2253 float
2254 b,
2255 c,
2256 g,
2257 h,
2258 tmin,
2259 r,
2260 x;
2261
2262 /*
2263 Convert HSL to RGB colorspace.
2264 */
2265 h=hue*360.0;
2266 if (lightness <= 0.5)
2267 c=2.0*lightness*saturation;
2268 else
2269 c=(2.0-2.0*lightness)*saturation;
2270 tmin=lightness-0.5*c;
2271 h-=360.0*floor(h/360.0);
2272 h/=60.0;
2273 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2274 switch ((int) floor(h) % 6)
2275 {
2276 case 0:
2277 {
2278 r=tmin+c;
2279 g=tmin+x;
2280 b=tmin;
2281 break;
2282 }
2283 case 1:
2284 {
2285 r=tmin+x;
2286 g=tmin+c;
2287 b=tmin;
2288 break;
2289 }
2290 case 2:
2291 {
2292 r=tmin;
2293 g=tmin+c;
2294 b=tmin+x;
2295 break;
2296 }
2297 case 3:
2298 {
2299 r=tmin;
2300 g=tmin+x;
2301 b=tmin+c;
2302 break;
2303 }
2304 case 4:
2305 {
2306 r=tmin+x;
2307 g=tmin;
2308 b=tmin+c;
2309 break;
2310 }
2311 case 5:
2312 {
2313 r=tmin+c;
2314 g=tmin;
2315 b=tmin+x;
2316 break;
2317 }
2318 default:
2319 {
2320 r=0.0;
2321 g=0.0;
2322 b=0.0;
2323 }
2324 }
2326 *green=ClampToQuantum(QuantumRange*g);
2328 }
2329
2330 static inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
2331 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2332 {
2333 float
2334 hue,
2335 lightness,
2336 saturation;
2337
2338 /*
2339 Increase or decrease color lightness, saturation, or hue.
2340 */
2341 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2342 hue+=0.5*(0.01*percent_hue-1.0);
2343 while (hue < 0.0)
2344 hue+=1.0;
2345 while (hue >= 1.0)
2346 hue-=1.0;
2347 saturation*=0.01*percent_saturation;
2348 lightness*=0.01*percent_lightness;
2349 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2350 }
2351
2352 __kernel void Modulate(__global CLPixelType *im,
2353 const float percent_brightness,
2354 const float percent_hue,
2355 const float percent_saturation,
2356 const int colorspace)
2357 {
2358
2359 const int x = get_global_id(0);
2360 const int y = get_global_id(1);
2361 const int columns = get_global_size(0);
2362 const int c = x + y * columns;
2363
2364 CLPixelType pixel = im[c];
2365
2366 CLQuantum
2367 blue,
2368 green,
2369 red;
2370
2371 red=getRed(pixel);
2372 green=getGreen(pixel);
2373 blue=getBlue(pixel);
2374
2375 switch (colorspace)
2376 {
2377 case HSLColorspace:
2378 default:
2379 {
2380 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2381 &red, &green, &blue);
2382 }
2383
2384 }
2385
2386 CLPixelType filteredPixel;
2387
2388 setRed(&filteredPixel, red);
2389 setGreen(&filteredPixel, green);
2390 setBlue(&filteredPixel, blue);
2391 filteredPixel.w = pixel.w;
2392
2393 im[c] = filteredPixel;
2394 }
2395 )
2396
2397/*
2398%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2399% %
2400% %
2401% %
2402% M o t i o n B l u r %
2403% %
2404% %
2405% %
2406%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2407*/
2408
2409 STRINGIFY(
2410 __kernel
2411 void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2412 const unsigned int imageWidth, const unsigned int imageHeight,
2413 const __global float *filter, const unsigned int width, const __global int2* offset,
2414 const float4 bias,
2415 const ChannelType channel, const unsigned int matte) {
2416
2417 int2 currentPixel;
2418 currentPixel.x = get_global_id(0);
2419 currentPixel.y = get_global_id(1);
2420
2421 if (currentPixel.x >= imageWidth
2422 || currentPixel.y >= imageHeight)
2423 return;
2424
2425 float4 pixel;
2426 pixel.x = (float)bias.x;
2427 pixel.y = (float)bias.y;
2428 pixel.z = (float)bias.z;
2429 pixel.w = (float)bias.w;
2430
2431 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2432
2433 for (int i = 0; i < width; i++) {
2434 // only support EdgeVirtualPixelMethod through ClampToCanvas
2435 // TODO: implement other virtual pixel method
2436 int2 samplePixel = currentPixel + offset[i];
2437 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2438 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2439 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2440
2441 pixel.x += (filter[i] * (float)samplePixelValue.x);
2442 pixel.y += (filter[i] * (float)samplePixelValue.y);
2443 pixel.z += (filter[i] * (float)samplePixelValue.z);
2444 pixel.w += (filter[i] * (float)samplePixelValue.w);
2445 }
2446
2447 CLPixelType outputPixel;
2448 outputPixel.x = ClampToQuantum(pixel.x);
2449 outputPixel.y = ClampToQuantum(pixel.y);
2450 outputPixel.z = ClampToQuantum(pixel.z);
2451 outputPixel.w = ClampToQuantum(pixel.w);
2452 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2453 }
2454 else {
2455
2456 float gamma = 0.0f;
2457 for (int i = 0; i < width; i++) {
2458 // only support EdgeVirtualPixelMethod through ClampToCanvas
2459 // TODO: implement other virtual pixel method
2460 int2 samplePixel = currentPixel + offset[i];
2461 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2462 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2463
2464 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2465
2466 float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
2467 float k = filter[i];
2468 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2469 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2470 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2471
2472 pixel.w += k * alpha * samplePixelValue.w;
2473
2474 gamma+=k*alpha;
2475 }
2476 gamma = PerceptibleReciprocal(gamma);
2477 pixel.xyz = gamma*pixel.xyz;
2478
2479 CLPixelType outputPixel;
2480 outputPixel.x = ClampToQuantum(pixel.x);
2481 outputPixel.y = ClampToQuantum(pixel.y);
2482 outputPixel.z = ClampToQuantum(pixel.z);
2483 outputPixel.w = ClampToQuantum(pixel.w);
2484 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2485 }
2486 }
2487 )
2488
2489/*
2490%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2491% %
2492% %
2493% %
2494% R a d i a l B l u r %
2495% %
2496% %
2497% %
2498%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2499*/
2500
2501 STRINGIFY(
2502 __kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
2503 const float4 bias,
2504 const unsigned int channel, const unsigned int matte,
2505 const float2 blurCenter,
2506 __constant float *cos_theta, __constant float *sin_theta,
2507 const unsigned int cossin_theta_size)
2508 {
2509 const int x = get_global_id(0);
2510 const int y = get_global_id(1);
2511 const int columns = get_global_size(0);
2512 const int rows = get_global_size(1);
2513 unsigned int step = 1;
2514 float center_x = (float) x - blurCenter.x;
2515 float center_y = (float) y - blurCenter.y;
2516 float radius = hypot(center_x, center_y);
2517
2518 //float blur_radius = hypot((float) columns/2.0f, (float) rows/2.0f);
2519 float blur_radius = hypot(blurCenter.x, blurCenter.y);
2520
2521 if (radius > MagickEpsilon)
2522 {
2523 step = (unsigned int) (blur_radius / radius);
2524 if (step == 0)
2525 step = 1;
2526 if (step >= cossin_theta_size)
2527 step = cossin_theta_size-1;
2528 }
2529
2530 float4 result;
2531 result.x = (float)bias.x;
2532 result.y = (float)bias.y;
2533 result.z = (float)bias.z;
2534 result.w = (float)bias.w;
2535 float normalize = 0.0f;
2536
2537 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2538 for (unsigned int i=0; i<cossin_theta_size; i+=step)
2539 {
2540 result += convert_float4(im[
2541 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2542 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2543 normalize += 1.0f;
2544 }
2545 normalize = PerceptibleReciprocal(normalize);
2546 result = result * normalize;
2547 }
2548 else {
2549 float gamma = 0.0f;
2550 for (unsigned int i=0; i<cossin_theta_size; i+=step)
2551 {
2552 float4 p = convert_float4(im[
2553 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2554 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2555
2556 float alpha = (float)(QuantumScale*(QuantumRange-p.w));
2557 result.x += alpha * p.x;
2558 result.y += alpha * p.y;
2559 result.z += alpha * p.z;
2560 result.w += p.w;
2561 gamma+=alpha;
2562 normalize += 1.0f;
2563 }
2564 gamma = PerceptibleReciprocal(gamma);
2565 normalize = PerceptibleReciprocal(normalize);
2566 result.x = gamma*result.x;
2567 result.y = gamma*result.y;
2568 result.z = gamma*result.z;
2569 result.w = normalize*result.w;
2570 }
2571 filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
2572 ClampToQuantum(result.z), ClampToQuantum(result.w));
2573 }
2574 )
2575
2576/*
2577%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2578% %
2579% %
2580% %
2581% R e s i z e %
2582% %
2583% %
2584% %
2585%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2586*/
2587
2588 STRINGIFY(
2589 // Based on Box from resize.c
2590 float BoxResizeFilter(const float x)
2591 {
2592 return 1.0f;
2593 }
2594 )
2595
2596 STRINGIFY(
2597 // Based on CubicBC from resize.c
2598 float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2599 {
2600 /*
2601 Cubic Filters using B,C determined values:
2602 Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2603 Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2604 Spline B = 1 C = 0 B-Spline Gaussian approximation
2605 Hermite B = 0 C = 0 B-Spline interpolator
2606
2607 See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2608 Graphics Computer Graphics, Volume 22, Number 4, August 1988
2609 http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2610 Mitchell.pdf.
2611
2612 Coefficents are determined from B,C values:
2613 P0 = ( 6 - 2*B )/6 = coeff[0]
2614 P1 = 0
2615 P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2616 P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2617 Q0 = ( 8*B +24*C )/6 = coeff[3]
2618 Q1 = ( -12*B -48*C )/6 = coeff[4]
2619 Q2 = ( 6*B +30*C )/6 = coeff[5]
2620 Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2621
2622 which are used to define the filter:
2623
2624 P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2625 Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2626
2627 which ensures function is continuous in value and derivative (slope).
2628 */
2629 if (x < 1.0)
2630 return(resizeFilterCoefficients[0]+x*(x*
2631 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2632 if (x < 2.0)
2633 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2634 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2635 return(0.0);
2636 }
2637 )
2638
2639 STRINGIFY(
2640 float Sinc(const float x)
2641 {
2642 if (x != 0.0f)
2643 {
2644 const float alpha=(float) (MagickPI*x);
2645 return sinpi(x)/alpha;
2646 }
2647 return(1.0f);
2648 }
2649 )
2650
2651 STRINGIFY(
2652 float Triangle(const float x)
2653 {
2654 /*
2655 1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2656 a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2657 for Sinc().
2658 */
2659 return ((x<1.0f)?(1.0f-x):0.0f);
2660 }
2661 )
2662
2663
2664 STRINGIFY(
2665 float Hanning(const float x)
2666 {
2667 /*
2668 Cosine window function:
2669 0.5+0.5*cos(pi*x).
2670 */
2671 const float cosine=cos((MagickPI*x));
2672 return(0.5f+0.5f*cosine);
2673 }
2674 )
2675
2676 STRINGIFY(
2677 float Hamming(const float x)
2678 {
2679 /*
2680 Offset cosine window function:
2681 .54 + .46 cos(pi x).
2682 */
2683 const float cosine=cos((MagickPI*x));
2684 return(0.54f+0.46f*cosine);
2685 }
2686 )
2687
2688 STRINGIFY(
2689 float Blackman(const float x)
2690 {
2691 /*
2692 Blackman: 2nd order cosine windowing function:
2693 0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2694
2695 Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2696 five flops.
2697 */
2698 const float cosine=cos((MagickPI*x));
2699 return(0.34f+cosine*(0.5f+cosine*0.16f));
2700 }
2701 )
2702
2703
2704
2705
2706 STRINGIFY(
2707 static inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2708 {
2709 switch (filterType)
2710 {
2711 /* Call Sinc even for SincFast to get better precision on GPU
2712 and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2715 return Sinc(x);
2717 return CubicBC(x,filterCoefficients);
2719 return BoxResizeFilter(x);
2721 return Triangle(x);
2723 return Hanning(x);
2725 return Hamming(x);
2727 return Blackman(x);
2728
2729 default:
2730 return 0.0f;
2731 }
2732 }
2733 )
2734
2735
2736 STRINGIFY(
2737 static inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2738 , const ResizeWeightingFunctionType resizeWindowType
2739 , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2740 {
2741 float scale;
2742 float xBlur = fabs(x/resizeFilterBlur);
2743 if (resizeWindowSupport < MagickEpsilon
2744 || resizeWindowType == BoxWeightingFunction)
2745 {
2746 scale = 1.0f;
2747 }
2748 else
2749 {
2750 scale = resizeFilterScale;
2751 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2752 }
2753 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2754 return weight;
2755 }
2756
2757 )
2758
2759 ;
2760 const char* accelerateKernels2 =
2761
2762 STRINGIFY(
2763
2764 static inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2765 return (numWorkItems/pixelPerWorkgroup);
2766 }
2767
2768 // returns the index of the pixel for the current workitem to compute.
2769 // returns -1 if this workitem doesn't need to participate in any computation
2770 static inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2771 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2772 int pixelIndex = itemID/numWorkItemsPerPixel;
2773 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2774 return pixelIndex;
2775 }
2776
2777 )
2778
2779 STRINGIFY(
2780 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2781 void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2782 , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2783 , const int resizeFilterType, const int resizeWindowType
2784 , const __global float* resizeFilterCubicCoefficients
2785 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2786 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2787 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2788
2789
2790 // calculate the range of resized image pixels computed by this workgroup
2791 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2792 const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2793 const unsigned int actualNumPixelToCompute = stopX - startX;
2794
2795 // calculate the range of input image pixels to cache
2796 float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2797 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2798 scale = PerceptibleReciprocal(scale);
2799
2800 const int cacheRangeStartX = MagickMax((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
2801 const int cacheRangeEndX = MagickMin((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
2802
2803 // cache the input pixels into local memory
2804 const unsigned int y = get_global_id(1);
2805 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
2806 wait_group_events(1,&e);
2807
2808 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2809 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2810 {
2811
2812 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2813 const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
2814 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2815
2816 // determine which resized pixel computed by this workitem
2817 const unsigned int itemID = get_local_id(0);
2818 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2819
2820 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2821
2822 float4 filteredPixel = (float4)0.0f;
2823 float density = 0.0f;
2824 float gamma = 0.0f;
2825 // -1 means this workitem doesn't participate in the computation
2826 if (pixelIndex != -1) {
2827
2828 // x coordinated of the resized pixel computed by this workitem
2829 const int x = chunkStartX + pixelIndex;
2830
2831 // calculate how many steps required for this pixel
2832 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2833 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2834 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputColumns);
2835 const unsigned int n = stop - start;
2836
2837 // calculate how many steps this workitem will contribute
2838 unsigned int numStepsPerWorkItem = n / numItems;
2839 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2840
2841 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2842 if (startStep < n) {
2843 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2844
2845 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2846 if (matte == 0) {
2847
2848 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2849 float4 cp = convert_float4(inputImageCache[cacheIndex]);
2850
2851 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2852 , (ResizeWeightingFunctionType)resizeWindowType
2853 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2854
2855 filteredPixel += ((float4)weight)*cp;
2856 density+=weight;
2857 }
2858
2859
2860 }
2861 else {
2862 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2863 CLPixelType p = inputImageCache[cacheIndex];
2864
2865 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2866 , (ResizeWeightingFunctionType)resizeWindowType
2867 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2868
2869 float alpha = weight * QuantumScale * GetPixelAlpha(p);
2870 float4 cp = convert_float4(p);
2871
2872 filteredPixel.x += alpha * cp.x;
2873 filteredPixel.y += alpha * cp.y;
2874 filteredPixel.z += alpha * cp.z;
2875 filteredPixel.w += weight * cp.w;
2876
2877 density+=weight;
2878 gamma+=alpha;
2879 }
2880 }
2881 }
2882 }
2883
2884 // initialize the accumulators to zero
2885 if (itemID < actualNumPixelInThisChunk) {
2886 outputPixelCache[itemID] = (float4)0.0f;
2887 densityCache[itemID] = 0.0f;
2888 if (matte != 0)
2889 gammaCache[itemID] = 0.0f;
2890 }
2891 barrier(CLK_LOCAL_MEM_FENCE);
2892
2893 // accumulatte the filtered pixel value and the density
2894 for (unsigned int i = 0; i < numItems; i++) {
2895 if (pixelIndex != -1) {
2896 if (itemID%numItems == i) {
2897 outputPixelCache[pixelIndex]+=filteredPixel;
2898 densityCache[pixelIndex]+=density;
2899 if (matte!=0) {
2900 gammaCache[pixelIndex]+=gamma;
2901 }
2902 }
2903 }
2904 barrier(CLK_LOCAL_MEM_FENCE);
2905 }
2906
2907 if (itemID < actualNumPixelInThisChunk) {
2908 if (matte==0) {
2909 float density = densityCache[itemID];
2910 float4 filteredPixel = outputPixelCache[itemID];
2911 if (density!= 0.0f && density != 1.0)
2912 {
2913 density = PerceptibleReciprocal(density);
2914 filteredPixel *= (float4)density;
2915 }
2916 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2917 , ClampToQuantum(filteredPixel.y)
2918 , ClampToQuantum(filteredPixel.z)
2919 , ClampToQuantum(filteredPixel.w));
2920 }
2921 else {
2922 float density = densityCache[itemID];
2923 float gamma = gammaCache[itemID];
2924 float4 filteredPixel = outputPixelCache[itemID];
2925
2926 if (density!= 0.0f && density != 1.0) {
2927 density = PerceptibleReciprocal(density);
2928 filteredPixel *= (float4)density;
2929 gamma *= density;
2930 }
2931 gamma = PerceptibleReciprocal(gamma);
2932
2933 CLPixelType fp;
2934 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2935 , ClampToQuantum(gamma*filteredPixel.y)
2936 , ClampToQuantum(gamma*filteredPixel.z)
2937 , ClampToQuantum(filteredPixel.w));
2938
2939 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
2940
2941 }
2942 }
2943
2944 } // end of chunking loop
2945 }
2946 )
2947
2948
2949 STRINGIFY(
2950 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2951 void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2952 , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2953 , const int resizeFilterType, const int resizeWindowType
2954 , const __global float* resizeFilterCubicCoefficients
2955 , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2956 , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2957 , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2958
2959
2960 // calculate the range of resized image pixels computed by this workgroup
2961 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2962 const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
2963 const unsigned int actualNumPixelToCompute = stopY - startY;
2964
2965 // calculate the range of input image pixels to cache
2966 float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2967 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2968 scale = PerceptibleReciprocal(scale);
2969
2970 const int cacheRangeStartY = MagickMax((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2971 const int cacheRangeEndY = MagickMin((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2972
2973 // cache the input pixels into local memory
2974 const unsigned int x = get_global_id(0);
2975 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
2976 wait_group_events(1,&e);
2977
2978 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2979 for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2980 {
2981
2982 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2983 const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
2984 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2985
2986 // determine which resized pixel computed by this workitem
2987 const unsigned int itemID = get_local_id(1);
2988 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2989
2990 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2991
2992 float4 filteredPixel = (float4)0.0f;
2993 float density = 0.0f;
2994 float gamma = 0.0f;
2995 // -1 means this workitem doesn't participate in the computation
2996 if (pixelIndex != -1) {
2997
2998 // x coordinated of the resized pixel computed by this workitem
2999 const int y = chunkStartY + pixelIndex;
3000
3001 // calculate how many steps required for this pixel
3002 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
3003 const unsigned int start = (unsigned int)MagickMax(bisect-support+0.5f,0.0f);
3004 const unsigned int stop = (unsigned int)MagickMin(bisect+support+0.5f,(float)inputRows);
3005 const unsigned int n = stop - start;
3006
3007 // calculate how many steps this workitem will contribute
3008 unsigned int numStepsPerWorkItem = n / numItems;
3009 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3010
3011 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3012 if (startStep < n) {
3013 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
3014
3015 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
3016 if (matte == 0) {
3017
3018 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3019 float4 cp = convert_float4(inputImageCache[cacheIndex]);
3020
3021 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3022 , (ResizeWeightingFunctionType)resizeWindowType
3023 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3024
3025 filteredPixel += ((float4)weight)*cp;
3026 density+=weight;
3027 }
3028
3029
3030 }
3031 else {
3032 for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3033 CLPixelType p = inputImageCache[cacheIndex];
3034
3035 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3036 , (ResizeWeightingFunctionType)resizeWindowType
3037 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3038
3039 float alpha = weight * QuantumScale * GetPixelAlpha(p);
3040 float4 cp = convert_float4(p);
3041
3042 filteredPixel.x += alpha * cp.x;
3043 filteredPixel.y += alpha * cp.y;
3044 filteredPixel.z += alpha * cp.z;
3045 filteredPixel.w += weight * cp.w;
3046
3047 density+=weight;
3048 gamma+=alpha;
3049 }
3050 }
3051 }
3052 }
3053
3054 // initialize the accumulators to zero
3055 if (itemID < actualNumPixelInThisChunk) {
3056 outputPixelCache[itemID] = (float4)0.0f;
3057 densityCache[itemID] = 0.0f;
3058 if (matte != 0)
3059 gammaCache[itemID] = 0.0f;
3060 }
3061 barrier(CLK_LOCAL_MEM_FENCE);
3062
3063 // accumulatte the filtered pixel value and the density
3064 for (unsigned int i = 0; i < numItems; i++) {
3065 if (pixelIndex != -1) {
3066 if (itemID%numItems == i) {
3067 outputPixelCache[pixelIndex]+=filteredPixel;
3068 densityCache[pixelIndex]+=density;
3069 if (matte!=0) {
3070 gammaCache[pixelIndex]+=gamma;
3071 }
3072 }
3073 }
3074 barrier(CLK_LOCAL_MEM_FENCE);
3075 }
3076
3077 if (itemID < actualNumPixelInThisChunk) {
3078 if (matte==0) {
3079 float density = densityCache[itemID];
3080 float4 filteredPixel = outputPixelCache[itemID];
3081 if (density!= 0.0f && density != 1.0)
3082 {
3083 density = PerceptibleReciprocal(density);
3084 filteredPixel *= (float4)density;
3085 }
3086 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
3087 , ClampToQuantum(filteredPixel.y)
3088 , ClampToQuantum(filteredPixel.z)
3089 , ClampToQuantum(filteredPixel.w));
3090 }
3091 else {
3092 float density = densityCache[itemID];
3093 float gamma = gammaCache[itemID];
3094 float4 filteredPixel = outputPixelCache[itemID];
3095
3096 if (density!= 0.0f && density != 1.0) {
3097 density = PerceptibleReciprocal(density);
3098 filteredPixel *= (float4)density;
3099 gamma *= density;
3100 }
3101 gamma = PerceptibleReciprocal(gamma);
3102
3103 CLPixelType fp;
3104 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
3105 , ClampToQuantum(gamma*filteredPixel.y)
3106 , ClampToQuantum(gamma*filteredPixel.z)
3107 , ClampToQuantum(filteredPixel.w));
3108
3109 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
3110
3111 }
3112 }
3113
3114 } // end of chunking loop
3115 }
3116 )
3117
3118/*
3119%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3120% %
3121% %
3122% %
3123% U n s h a r p M a s k %
3124% %
3125% %
3126% %
3127%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3128*/
3129
3130 STRINGIFY(
3131 __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
3132 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3133 const unsigned int imageColumns, const unsigned int imageRows,
3134 __local float4* cachedData, __local float* cachedFilter,
3135 const ChannelType channel, const __global float *filter, const unsigned int width,
3136 const float gain, const float threshold)
3137 {
3138 const unsigned int radius = (width-1)/2;
3139
3140 // cache the pixel shared by the workgroup
3141 const int groupX = get_group_id(0);
3142 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3143 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3144
3145 if (groupStartY >= 0
3146 && groupStopY < imageRows) {
3147 event_t e = async_work_group_strided_copy(cachedData
3148 ,blurRowData+groupStartY*imageColumns+groupX
3149 ,groupStopY-groupStartY,imageColumns,0);
3150 wait_group_events(1,&e);
3151 }
3152 else {
3153 for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3154 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
3155 }
3156 barrier(CLK_LOCAL_MEM_FENCE);
3157 }
3158 // cache the filter as well
3159 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3160 wait_group_events(1,&e);
3161
3162 // only do the work if this is not a patched item
3163 //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
3164 const int cy = get_global_id(1);
3165
3166 if (cy < imageRows) {
3167 float4 blurredPixel = (float4) 0.0f;
3168
3169 int i = 0;
3170
3171 \n #ifndef UFACTOR \n
3172 \n #define UFACTOR 8 \n
3173 \n #endif \n
3174
3175 for ( ; i+UFACTOR < width; )
3176 {
3177 \n #pragma unroll UFACTOR \n
3178 for (int j=0; j < UFACTOR; j++, i++)
3179 {
3180 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3181 }
3182 }
3183
3184 for ( ; i < width; i++)
3185 {
3186 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3187 }
3188
3189 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
3190 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
3191
3192 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3193 float4 outputPixel = inputImagePixel - blurredPixel;
3194
3195 float quantumThreshold = QuantumRange*threshold;
3196
3197 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3198 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3199
3200 //write back
3201 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
3202 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
3203
3204 }
3205 }
3206 )
3207
3208
3209
3210 STRINGIFY(
3211 __kernel void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
3212 __constant float *filter,
3213 const unsigned int width,
3214 const unsigned int imageColumns, const unsigned int imageRows,
3215 __local float4 *pixels,
3216 const float gain, const float threshold, const unsigned int justBlur)
3217 {
3218 const int x = get_global_id(0);
3219 const int y = get_global_id(1);
3220
3221 const unsigned int radius = (width - 1) / 2;
3222
3223 int row = y - radius;
3224 int baseRow = get_group_id(1) * get_local_size(1) - radius;
3225 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
3226
3227 while (row < endRow) {
3228 int srcy = (row < 0) ? -row : row; // mirror pad
3229 srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
3230
3231 float4 value = 0.0f;
3232
3233 int ix = x - radius;
3234 int i = 0;
3235
3236 while (i + 7 < width) {
3237 for (int j = 0; j < 8; ++j) { // unrolled
3238 int srcx = ix + j;
3239 srcx = (srcx < 0) ? -srcx : srcx;
3240 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3241 value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
3242 }
3243 ix += 8;
3244 i += 8;
3245 }
3246
3247 while (i < width) {
3248 int srcx = (ix < 0) ? -ix : ix; // mirror pad
3249 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3250 value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
3251 ++i;
3252 ++ix;
3253 }
3254 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
3255 row += get_local_size(1);
3256 }
3257
3258
3259 barrier(CLK_LOCAL_MEM_FENCE);
3260
3261
3262 const int px = get_local_id(0);
3263 const int py = get_local_id(1);
3264 const int prp = get_local_size(0);
3265 float4 value = (float4)(0.0f);
3266
3267 int i = 0;
3268 while (i + 7 < width) { // unrolled
3269 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3270 value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
3271 value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
3272 value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
3273 value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
3274 value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
3275 value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
3276 value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
3277 i += 8;
3278 }
3279 while (i < width) {
3280 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3281 ++i;
3282 }
3283 if ((x < imageColumns) && (y < imageRows)) {
3284 if (justBlur == 0) { // apply sharpening
3285 float4 srcPixel = convert_float4(im[x + y * imageColumns]);
3286 float4 diff = srcPixel - value;
3287
3288 float quantumThreshold = QuantumRange*threshold;
3289
3290 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3291 value = select(srcPixel + diff * gain, srcPixel, mask);
3292 }
3293 filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
3294 }
3295 }
3296 )
3297
3298 STRINGIFY(
3299 __kernel __attribute__((reqd_work_group_size(64, 4, 1))) void WaveletDenoise(__global CLPixelType *srcImage, __global CLPixelType *dstImage,
3300 const float threshold,
3301 const int passes,
3302 const int imageWidth,
3303 const int imageHeight)
3304 {
3305 const int pad = (1 << (passes - 1));
3306 const int tileSize = 64;
3307 const int tileRowPixels = 64;
3308 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3309
3310 CLPixelType stage[16];
3311
3312 local float buffer[64 * 64];
3313
3314 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3315 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3316
3317 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3318 stage[i / 4] = srcImage[mirrorTop(mirrorBottom(srcx), imageWidth) + (mirrorTop(mirrorBottom(srcy + i) , imageHeight)) * imageWidth];
3319 }
3320
3321
3322 for (int channel = 0; channel < 3; ++channel) {
3323 // Load LDS
3324 switch (channel) {
3325 case 0:
3326 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3327 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s0);
3328 break;
3329 case 1:
3330 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3331 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s1);
3332 break;
3333 case 2:
3334 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3335 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s2);
3336 break;
3337 }
3338
3339
3340 // Process
3341
3342 float tmp[16];
3343 float accum[16];
3344 float pixel;
3345
3346 for (int pass = 0; pass < passes; ++pass) {
3347 const int radius = 1 << pass;
3348 const int x = get_local_id(0);
3349 const float thresh = threshold * noise[pass];
3350
3351 if (pass == 0)
3352 accum[0] = accum[1] = accum[2] = accum[3] = accum[4] = accum[5] = accum[6] = accum[6] = accum[7] = accum[8] = accum[9] = accum[10] = accum[11] = accum[12] = accum[13] = accum[14] = accum[15] = 0.0f;
3353
3354 // Snapshot input
3355
3356 // Apply horizontal hat
3357 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3358 const int offset = i * tileRowPixels;
3359 if (pass == 0)
3360 tmp[i / 4] = buffer[x + offset]; // snapshot input on first pass
3361 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3362 barrier(CLK_LOCAL_MEM_FENCE);
3363 buffer[x + offset] = pixel;
3364 }
3365 barrier(CLK_LOCAL_MEM_FENCE);
3366 // Apply vertical hat
3367 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3368 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3369 float delta = tmp[i / 4] - pixel;
3370 tmp[i / 4] = pixel; // hold output in tmp until all workitems are done
3371 if (delta < -thresh)
3372 delta += thresh;
3373 else if (delta > thresh)
3374 delta -= thresh;
3375 else
3376 delta = 0;
3377 accum[i / 4] += delta;
3378
3379 }
3380 barrier(CLK_LOCAL_MEM_FENCE);
3381 if (pass < passes - 1)
3382 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3383 buffer[x + i * tileRowPixels] = tmp[i / 4]; // store lowpass for next pass
3384 else // last pass
3385 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3386 accum[i / 4] += tmp[i / 4]; // add the lowpass signal back to output
3387 barrier(CLK_LOCAL_MEM_FENCE);
3388 }
3389
3390 switch (channel) {
3391 case 0:
3392 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3393 stage[i / 4].s0 = ClampToQuantum(accum[i / 4]);
3394 break;
3395 case 1:
3396 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3397 stage[i / 4].s1 = ClampToQuantum(accum[i / 4]);
3398 break;
3399 case 2:
3400 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3401 stage[i / 4].s2 = ClampToQuantum(accum[i / 4]);
3402 break;
3403 }
3404
3405 barrier(CLK_LOCAL_MEM_FENCE);
3406 }
3407
3408 // Write from stage to output
3409
3410 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3411 //for (int i = pad + get_local_id(1); i < tileSize - pad; i += get_local_size(1)) {
3412 for (int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3413 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3414 dstImage[srcx + (srcy + i) * imageWidth] = stage[i / 4];
3415 }
3416 }
3417 }
3418 }
3419 )
3420 ;
3421
3422#endif // MAGICKCORE_OPENCL_SUPPORT
3423
3424#if defined(__cplusplus) || defined(c_plusplus)
3425}
3426#endif
3427
3428#endif // MAGICKCORE_ACCELERATE_PRIVATE_H
ColorspaceType
Definition: colorspace.h:26
@ Rec601LumaColorspace
Definition: colorspace.h:44
@ LogColorspace
Definition: colorspace.h:48
@ Rec601YCbCrColorspace
Definition: colorspace.h:45
@ HSLColorspace
Definition: colorspace.h:42
@ YCbCrColorspace
Definition: colorspace.h:34
@ scRGBColorspace
Definition: colorspace.h:56
@ LabColorspace
Definition: colorspace.h:32
@ OHTAColorspace
Definition: colorspace.h:31
@ HSIColorspace
Definition: colorspace.h:57
@ LuvColorspace
Definition: colorspace.h:50
@ YDbDrColorspace
Definition: colorspace.h:60
@ HSBColorspace
Definition: colorspace.h:41
@ HCLColorspace
Definition: colorspace.h:51
@ YIQColorspace
Definition: colorspace.h:36
@ LMSColorspace
Definition: colorspace.h:53
@ YPbPrColorspace
Definition: colorspace.h:37
@ HCLpColorspace
Definition: colorspace.h:59
@ RGBColorspace
Definition: colorspace.h:28
@ LCHabColorspace
Definition: colorspace.h:54
@ YUVColorspace
Definition: colorspace.h:38
@ HWBColorspace
Definition: colorspace.h:43
@ CMYKColorspace
Definition: colorspace.h:39
@ CMYColorspace
Definition: colorspace.h:49
@ LCHuvColorspace
Definition: colorspace.h:55
@ UndefinedColorspace
Definition: colorspace.h:27
@ XYZColorspace
Definition: colorspace.h:33
@ Rec709LumaColorspace
Definition: colorspace.h:46
@ YCCColorspace
Definition: colorspace.h:35
@ Rec709YCbCrColorspace
Definition: colorspace.h:47
@ GRAYColorspace
Definition: colorspace.h:29
@ HSVColorspace
Definition: colorspace.h:58
@ TransparentColorspace
Definition: colorspace.h:30
@ sRGBColorspace
Definition: colorspace.h:40
@ LinearGRAYColorspace
Definition: colorspace.h:62
@ LCHColorspace
Definition: colorspace.h:52
static void MagickPixelCompositeBlend(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:138
static MagickRealType RoundToUnity(const MagickRealType value)
Definition: composite-private.h:33
static void MagickPixelCompositePlus(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:111
static void CompositeColorDodge(const MagickPixelPacket *p, const MagickPixelPacket *q, MagickPixelPacket *composite)
Definition: composite.c:324
static MagickRealType ColorDodge(const MagickRealType Sca, const MagickRealType Sa, const MagickRealType Dca, const MagickRealType Da)
Definition: composite.c:287
CompositeOperator
Definition: composite.h:26
@ DivideSrcCompositeOp
Definition: composite.h:95
@ MultiplyCompositeOp
Definition: composite.h:65
@ LightenIntensityCompositeOp
Definition: composite.h:98
@ SrcCompositeOp
Definition: composite.h:75
@ ThresholdCompositeOp
Definition: composite.h:80
@ ModulateCompositeOp
Definition: composite.h:64
@ ModulusSubtractCompositeOp
Definition: composite.h:79
@ CopyMagentaCompositeOp
Definition: composite.h:43
@ SrcInCompositeOp
Definition: composite.h:76
@ DifferenceCompositeOp
Definition: composite.h:53
@ ScreenCompositeOp
Definition: composite.h:72
@ DarkenCompositeOp
Definition: composite.h:47
@ HardLightCompositeOp
Definition: composite.h:57
@ LinearLightCompositeOp
Definition: composite.h:61
@ ModulusAddCompositeOp
Definition: composite.h:29
@ DstCompositeOp
Definition: composite.h:49
@ InCompositeOp
Definition: composite.h:59
@ LinearBurnCompositeOp
Definition: composite.h:93
@ CopyBlueCompositeOp
Definition: composite.h:39
@ PinLightCompositeOp
Definition: composite.h:91
@ DistortCompositeOp
Definition: composite.h:87
@ CopyRedCompositeOp
Definition: composite.h:45
@ DivideDstCompositeOp
Definition: composite.h:86
@ BlendCompositeOp
Definition: composite.h:31
@ SrcAtopCompositeOp
Definition: composite.h:74
@ ClearCompositeOp
Definition: composite.h:34
@ DarkenIntensityCompositeOp
Definition: composite.h:97
@ SrcOverCompositeOp
Definition: composite.h:78
@ DstOverCompositeOp
Definition: composite.h:52
@ BlurCompositeOp
Definition: composite.h:88
@ MathematicsCompositeOp
Definition: composite.h:94
@ VividLightCompositeOp
Definition: composite.h:90
@ ColorBurnCompositeOp
Definition: composite.h:35
@ ExclusionCompositeOp
Definition: composite.h:56
@ NoCompositeOp
Definition: composite.h:28
@ OverlayCompositeOp
Definition: composite.h:68
@ DissolveCompositeOp
Definition: composite.h:55
@ HueCompositeOp
Definition: composite.h:58
@ SoftLightCompositeOp
Definition: composite.h:73
@ UndefinedCompositeOp
Definition: composite.h:27
@ LightenCompositeOp
Definition: composite.h:60
@ ColorizeCompositeOp
Definition: composite.h:37
@ BumpmapCompositeOp
Definition: composite.h:32
@ XorCompositeOp
Definition: composite.h:81
@ ChangeMaskCompositeOp
Definition: composite.h:33
@ PegtopLightCompositeOp
Definition: composite.h:89
@ OverCompositeOp
Definition: composite.h:67
@ CopyCompositeOp
Definition: composite.h:40
@ SrcOutCompositeOp
Definition: composite.h:77
@ DstAtopCompositeOp
Definition: composite.h:48
@ DstInCompositeOp
Definition: composite.h:50
@ LinearDodgeCompositeOp
Definition: composite.h:92
@ MinusDstCompositeOp
Definition: composite.h:63
@ CopyOpacityCompositeOp
Definition: composite.h:44
@ MinusSrcCompositeOp
Definition: composite.h:96
@ AtopCompositeOp
Definition: composite.h:30
@ DstOutCompositeOp
Definition: composite.h:51
@ CopyGreenCompositeOp
Definition: composite.h:42
@ DisplaceCompositeOp
Definition: composite.h:54
@ CopyBlackCompositeOp
Definition: composite.h:38
@ CopyYellowCompositeOp
Definition: composite.h:46
@ ColorDodgeCompositeOp
Definition: composite.h:36
@ CopyCyanCompositeOp
Definition: composite.h:41
@ PlusCompositeOp
Definition: composite.h:69
@ SaturateCompositeOp
Definition: composite.h:71
@ OutCompositeOp
Definition: composite.h:66
@ LuminizeCompositeOp
Definition: composite.h:62
@ ReplaceCompositeOp
Definition: composite.h:70
static void Contrast(const int sign, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:917
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:3583
#define SigmaPoisson
MagickExport void ConvertHSBToRGB(const double hue, const double saturation, const double brightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:284
#define SigmaMultiplicativeGaussian
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:460
#define TauGaussian
#define SigmaImpulse
MagickExport void ConvertRGBToHSB(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *brightness)
Definition: gem.c:994
#define SigmaRandom
#define SigmaUniform
#define SigmaGaussian
MagickExport void ConvertRGBToHSL(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1127
#define SigmaLaplacian
#define MagickMin(x, y)
Definition: image-private.h:37
#define MagickPI
Definition: image-private.h:40
#define MagickMax(x, y)
Definition: image-private.h:36
ChannelType
Definition: magick-type.h:169
@ MatteChannel
Definition: magick-type.h:180
@ UndefinedChannel
Definition: magick-type.h:170
@ GrayChannels
Definition: magick-type.h:190
@ AllChannels
Definition: magick-type.h:184
@ GreenChannel
Definition: magick-type.h:174
@ OpacityChannel
Definition: magick-type.h:179
@ CompositeChannels
Definition: magick-type.h:183
@ TrueAlphaChannel
Definition: magick-type.h:188
@ BlackChannel
Definition: magick-type.h:181
@ DefaultChannels
Definition: magick-type.h:192
@ BlueChannel
Definition: magick-type.h:176
@ RedChannel
Definition: magick-type.h:171
@ SyncChannels
Definition: magick-type.h:191
@ YellowChannel
Definition: magick-type.h:177
@ MagentaChannel
Definition: magick-type.h:175
@ RGBChannels
Definition: magick-type.h:189
@ IndexChannel
Definition: magick-type.h:182
@ CyanChannel
Definition: magick-type.h:173
@ AlphaChannel
Definition: magick-type.h:178
@ GrayChannel
Definition: magick-type.h:172
#define MaxMap
Definition: magick-type.h:82
#define QuantumRange
Definition: magick-type.h:90
#define MagickEpsilon
Definition: magick-type.h:119
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
#define QuantumScale
Definition: magick-type.h:124
static double PerceptibleReciprocal(const double x)
Definition: pixel-accessor.h:124
#define GetPixelAlpha(pixel)
Definition: pixel-accessor.h:36
MagickExport MagickRealType GetPixelIntensity(const Image *image, const PixelPacket *magick_restrict pixel)
Definition: pixel.c:2292
PixelIntensityMethod
Definition: pixel.h:68
@ BrightnessPixelIntensityMethod
Definition: pixel.h:71
@ AveragePixelIntensityMethod
Definition: pixel.h:70
@ LightnessPixelIntensityMethod
Definition: pixel.h:72
@ Rec709LumaPixelIntensityMethod
Definition: pixel.h:75
@ UndefinedPixelIntensityMethod
Definition: pixel.h:69
@ RMSPixelIntensityMethod
Definition: pixel.h:77
@ Rec601LumaPixelIntensityMethod
Definition: pixel.h:73
@ Rec709LuminancePixelIntensityMethod
Definition: pixel.h:76
@ Rec601LuminancePixelIntensityMethod
Definition: pixel.h:74
@ MSPixelIntensityMethod
Definition: pixel.h:78
static Quantum ClampToQuantum(const MagickRealType quantum)
Definition: quantum.h:88
ResizeWeightingFunctionType
Definition: resize-private.h:26
@ HanningWeightingFunction
Definition: resize-private.h:30
@ QuadraticWeightingFunction
Definition: resize-private.h:34
@ TriangleWeightingFunction
Definition: resize-private.h:28
@ SincWeightingFunction
Definition: resize-private.h:36
@ CubicBCWeightingFunction
Definition: resize-private.h:29
@ BohmanWeightingFunction
Definition: resize-private.h:40
@ HammingWeightingFunction
Definition: resize-private.h:31
@ JincWeightingFunction
Definition: resize-private.h:35
@ WelshWeightingFunction
Definition: resize-private.h:39
@ BoxWeightingFunction
Definition: resize-private.h:27
@ KaiserWeightingFunction
Definition: resize-private.h:38
@ LagrangeWeightingFunction
Definition: resize-private.h:41
@ BlackmanWeightingFunction
Definition: resize-private.h:32
@ GaussianWeightingFunction
Definition: resize-private.h:33
@ CosineWeightingFunction
Definition: resize-private.h:42
@ SincFastWeightingFunction
Definition: resize-private.h:37
static MagickRealType Blackman(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:148
static MagickRealType Triangle(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:505
static MagickRealType CubicBC(const MagickRealType x, const ResizeFilter *resize_filter)
Definition: resize.c:205
static MagickRealType Sinc(const MagickRealType, const ResizeFilter *)
static MagickRealType Hamming(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:294
static MagickRealType Hanning(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:282
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:1000
MagickFunction
Definition: statistic.h:113
@ UndefinedFunction
Definition: statistic.h:114
@ SinusoidFunction
Definition: statistic.h:116
@ ArcsinFunction
Definition: statistic.h:117
@ PolynomialFunction
Definition: statistic.h:115
@ ArctanFunction
Definition: statistic.h:118
NoiseType
Definition: visual-effects.h:28
@ ImpulseNoise
Definition: visual-effects.h:33
@ UndefinedNoise
Definition: visual-effects.h:29
@ GaussianNoise
Definition: visual-effects.h:31
@ RandomNoise
Definition: visual-effects.h:36
@ UniformNoise
Definition: visual-effects.h:30
@ LaplacianNoise
Definition: visual-effects.h:34
@ PoissonNoise
Definition: visual-effects.h:35
@ MultiplicativeGaussianNoise
Definition: visual-effects.h:32