1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 #include "rs_core.rsh"
19
20 extern short __attribute__((overloadable, always_inline)) rsClamp(short amount, short low, short high);
21 extern uchar4 __attribute__((overloadable)) convert_uchar4(short4);
22 extern uchar4 __attribute__((overloadable)) convert_uchar4(float4);
23 extern float4 __attribute__((overloadable)) convert_float4(uchar4);
24 extern float __attribute__((overloadable)) sqrt(float);
25
26 /*
27 * CLAMP
28 */
29 #define _CLAMP(T) \
30 extern T __attribute__((overloadable)) clamp(T amount, T low, T high) { \
31 return amount < low ? low : (amount > high ? high : amount); \
32 } \
33 \
34 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T##2 low, T##2 high) { \
35 T##2 r; \
36 r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \
37 r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \
38 return r; \
39 } \
40 \
41 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T##3 low, T##3 high) { \
42 T##3 r; \
43 r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \
44 r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \
45 r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); \
46 return r; \
47 } \
48 \
49 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T##4 low, T##4 high) { \
50 T##4 r; \
51 r.x = amount.x < low.x ? low.x : (amount.x > high.x ? high.x : amount.x); \
52 r.y = amount.y < low.y ? low.y : (amount.y > high.y ? high.y : amount.y); \
53 r.z = amount.z < low.z ? low.z : (amount.z > high.z ? high.z : amount.z); \
54 r.w = amount.w < low.w ? low.w : (amount.w > high.w ? high.w : amount.w); \
55 return r; \
56 } \
57 \
58 extern T##2 __attribute__((overloadable)) clamp(T##2 amount, T low, T high) { \
59 T##2 r; \
60 r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \
61 r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \
62 return r; \
63 } \
64 \
65 extern T##3 __attribute__((overloadable)) clamp(T##3 amount, T low, T high) { \
66 T##3 r; \
67 r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \
68 r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \
69 r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); \
70 return r; \
71 } \
72 \
73 extern T##4 __attribute__((overloadable)) clamp(T##4 amount, T low, T high) { \
74 T##4 r; \
75 r.x = amount.x < low ? low : (amount.x > high ? high : amount.x); \
76 r.y = amount.y < low ? low : (amount.y > high ? high : amount.y); \
77 r.z = amount.z < low ? low : (amount.z > high ? high : amount.z); \
78 r.w = amount.w < low ? low : (amount.w > high ? high : amount.w); \
79 return r; \
80 }
81
82 #if !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
83 // These functions must be defined here if we are not using the SSE
84 // implementation, which includes when we are built as part of the
85 // debug runtime (libclcore_debug.bc) or compiling with debug info.
86
87 _CLAMP(float);
88
89 #else
90
91 extern float __attribute__((overloadable)) clamp(float amount, float low, float high);
92 extern float2 __attribute__((overloadable)) clamp(float2 amount, float2 low, float2 high);
93 extern float3 __attribute__((overloadable)) clamp(float3 amount, float3 low, float3 high);
94 extern float4 __attribute__((overloadable)) clamp(float4 amount, float4 low, float4 high);
95 extern float2 __attribute__((overloadable)) clamp(float2 amount, float low, float high);
96 extern float3 __attribute__((overloadable)) clamp(float3 amount, float low, float high);
97 extern float4 __attribute__((overloadable)) clamp(float4 amount, float low, float high);
98
99 #endif // !defined(ARCH_X86_HAVE_SSSE3) || defined(RS_DEBUG_RUNTIME) || defined(RS_G_RUNTIME)
100
101 _CLAMP(half);
102 _CLAMP(double);
103 _CLAMP(char);
104 _CLAMP(uchar);
105 _CLAMP(short);
106 _CLAMP(ushort);
107 _CLAMP(int);
108 _CLAMP(uint);
109 _CLAMP(long);
110 _CLAMP(ulong);
111
112 #undef _CLAMP
113
114 /*
115 * FMAX
116 */
117
fmax(float v1,float v2)118 extern float __attribute__((overloadable)) fmax(float v1, float v2) {
119 return v1 > v2 ? v1 : v2;
120 }
121
fmax(float2 v1,float2 v2)122 extern float2 __attribute__((overloadable)) fmax(float2 v1, float2 v2) {
123 float2 r;
124 r.x = v1.x > v2.x ? v1.x : v2.x;
125 r.y = v1.y > v2.y ? v1.y : v2.y;
126 return r;
127 }
128
fmax(float3 v1,float3 v2)129 extern float3 __attribute__((overloadable)) fmax(float3 v1, float3 v2) {
130 float3 r;
131 r.x = v1.x > v2.x ? v1.x : v2.x;
132 r.y = v1.y > v2.y ? v1.y : v2.y;
133 r.z = v1.z > v2.z ? v1.z : v2.z;
134 return r;
135 }
136
fmax(float4 v1,float4 v2)137 extern float4 __attribute__((overloadable)) fmax(float4 v1, float4 v2) {
138 float4 r;
139 r.x = v1.x > v2.x ? v1.x : v2.x;
140 r.y = v1.y > v2.y ? v1.y : v2.y;
141 r.z = v1.z > v2.z ? v1.z : v2.z;
142 r.w = v1.w > v2.w ? v1.w : v2.w;
143 return r;
144 }
145
fmax(float2 v1,float v2)146 extern float2 __attribute__((overloadable)) fmax(float2 v1, float v2) {
147 float2 r;
148 r.x = v1.x > v2 ? v1.x : v2;
149 r.y = v1.y > v2 ? v1.y : v2;
150 return r;
151 }
152
fmax(float3 v1,float v2)153 extern float3 __attribute__((overloadable)) fmax(float3 v1, float v2) {
154 float3 r;
155 r.x = v1.x > v2 ? v1.x : v2;
156 r.y = v1.y > v2 ? v1.y : v2;
157 r.z = v1.z > v2 ? v1.z : v2;
158 return r;
159 }
160
fmax(float4 v1,float v2)161 extern float4 __attribute__((overloadable)) fmax(float4 v1, float v2) {
162 float4 r;
163 r.x = v1.x > v2 ? v1.x : v2;
164 r.y = v1.y > v2 ? v1.y : v2;
165 r.z = v1.z > v2 ? v1.z : v2;
166 r.w = v1.w > v2 ? v1.w : v2;
167 return r;
168 }
169
fmin(float v1,float v2)170 extern float __attribute__((overloadable)) fmin(float v1, float v2) {
171 return v1 < v2 ? v1 : v2;
172 }
173
174
175 /*
176 * FMIN
177 */
fmin(float2 v1,float2 v2)178 extern float2 __attribute__((overloadable)) fmin(float2 v1, float2 v2) {
179 float2 r;
180 r.x = v1.x < v2.x ? v1.x : v2.x;
181 r.y = v1.y < v2.y ? v1.y : v2.y;
182 return r;
183 }
184
fmin(float3 v1,float3 v2)185 extern float3 __attribute__((overloadable)) fmin(float3 v1, float3 v2) {
186 float3 r;
187 r.x = v1.x < v2.x ? v1.x : v2.x;
188 r.y = v1.y < v2.y ? v1.y : v2.y;
189 r.z = v1.z < v2.z ? v1.z : v2.z;
190 return r;
191 }
192
fmin(float4 v1,float4 v2)193 extern float4 __attribute__((overloadable)) fmin(float4 v1, float4 v2) {
194 float4 r;
195 r.x = v1.x < v2.x ? v1.x : v2.x;
196 r.y = v1.y < v2.y ? v1.y : v2.y;
197 r.z = v1.z < v2.z ? v1.z : v2.z;
198 r.w = v1.w < v2.w ? v1.w : v2.w;
199 return r;
200 }
201
fmin(float2 v1,float v2)202 extern float2 __attribute__((overloadable)) fmin(float2 v1, float v2) {
203 float2 r;
204 r.x = v1.x < v2 ? v1.x : v2;
205 r.y = v1.y < v2 ? v1.y : v2;
206 return r;
207 }
208
fmin(float3 v1,float v2)209 extern float3 __attribute__((overloadable)) fmin(float3 v1, float v2) {
210 float3 r;
211 r.x = v1.x < v2 ? v1.x : v2;
212 r.y = v1.y < v2 ? v1.y : v2;
213 r.z = v1.z < v2 ? v1.z : v2;
214 return r;
215 }
216
fmin(float4 v1,float v2)217 extern float4 __attribute__((overloadable)) fmin(float4 v1, float v2) {
218 float4 r;
219 r.x = v1.x < v2 ? v1.x : v2;
220 r.y = v1.y < v2 ? v1.y : v2;
221 r.z = v1.z < v2 ? v1.z : v2;
222 r.w = v1.w < v2 ? v1.w : v2;
223 return r;
224 }
225
226
227 /*
228 * MAX
229 */
230
max(char v1,char v2)231 extern char __attribute__((overloadable)) max(char v1, char v2) {
232 return v1 > v2 ? v1 : v2;
233 }
234
max(char2 v1,char2 v2)235 extern char2 __attribute__((overloadable)) max(char2 v1, char2 v2) {
236 char2 r;
237 r.x = v1.x > v2.x ? v1.x : v2.x;
238 r.y = v1.y > v2.y ? v1.y : v2.y;
239 return r;
240 }
241
max(char3 v1,char3 v2)242 extern char3 __attribute__((overloadable)) max(char3 v1, char3 v2) {
243 char3 r;
244 r.x = v1.x > v2.x ? v1.x : v2.x;
245 r.y = v1.y > v2.y ? v1.y : v2.y;
246 r.z = v1.z > v2.z ? v1.z : v2.z;
247 return r;
248 }
249
max(char4 v1,char4 v2)250 extern char4 __attribute__((overloadable)) max(char4 v1, char4 v2) {
251 char4 r;
252 r.x = v1.x > v2.x ? v1.x : v2.x;
253 r.y = v1.y > v2.y ? v1.y : v2.y;
254 r.z = v1.z > v2.z ? v1.z : v2.z;
255 r.w = v1.w > v2.w ? v1.w : v2.w;
256 return r;
257 }
258
max(short v1,short v2)259 extern short __attribute__((overloadable)) max(short v1, short v2) {
260 return v1 > v2 ? v1 : v2;
261 }
262
max(short2 v1,short2 v2)263 extern short2 __attribute__((overloadable)) max(short2 v1, short2 v2) {
264 short2 r;
265 r.x = v1.x > v2.x ? v1.x : v2.x;
266 r.y = v1.y > v2.y ? v1.y : v2.y;
267 return r;
268 }
269
max(short3 v1,short3 v2)270 extern short3 __attribute__((overloadable)) max(short3 v1, short3 v2) {
271 short3 r;
272 r.x = v1.x > v2.x ? v1.x : v2.x;
273 r.y = v1.y > v2.y ? v1.y : v2.y;
274 r.z = v1.z > v2.z ? v1.z : v2.z;
275 return r;
276 }
277
max(short4 v1,short4 v2)278 extern short4 __attribute__((overloadable)) max(short4 v1, short4 v2) {
279 short4 r;
280 r.x = v1.x > v2.x ? v1.x : v2.x;
281 r.y = v1.y > v2.y ? v1.y : v2.y;
282 r.z = v1.z > v2.z ? v1.z : v2.z;
283 r.w = v1.w > v2.w ? v1.w : v2.w;
284 return r;
285 }
286
max(int v1,int v2)287 extern int __attribute__((overloadable)) max(int v1, int v2) {
288 return v1 > v2 ? v1 : v2;
289 }
290
max(int2 v1,int2 v2)291 extern int2 __attribute__((overloadable)) max(int2 v1, int2 v2) {
292 int2 r;
293 r.x = v1.x > v2.x ? v1.x : v2.x;
294 r.y = v1.y > v2.y ? v1.y : v2.y;
295 return r;
296 }
297
max(int3 v1,int3 v2)298 extern int3 __attribute__((overloadable)) max(int3 v1, int3 v2) {
299 int3 r;
300 r.x = v1.x > v2.x ? v1.x : v2.x;
301 r.y = v1.y > v2.y ? v1.y : v2.y;
302 r.z = v1.z > v2.z ? v1.z : v2.z;
303 return r;
304 }
305
max(int4 v1,int4 v2)306 extern int4 __attribute__((overloadable)) max(int4 v1, int4 v2) {
307 int4 r;
308 r.x = v1.x > v2.x ? v1.x : v2.x;
309 r.y = v1.y > v2.y ? v1.y : v2.y;
310 r.z = v1.z > v2.z ? v1.z : v2.z;
311 r.w = v1.w > v2.w ? v1.w : v2.w;
312 return r;
313 }
314
max(uchar v1,uchar v2)315 extern uchar __attribute__((overloadable)) max(uchar v1, uchar v2) {
316 return v1 > v2 ? v1 : v2;
317 }
318
max(uchar2 v1,uchar2 v2)319 extern uchar2 __attribute__((overloadable)) max(uchar2 v1, uchar2 v2) {
320 uchar2 r;
321 r.x = v1.x > v2.x ? v1.x : v2.x;
322 r.y = v1.y > v2.y ? v1.y : v2.y;
323 return r;
324 }
325
max(uchar3 v1,uchar3 v2)326 extern uchar3 __attribute__((overloadable)) max(uchar3 v1, uchar3 v2) {
327 uchar3 r;
328 r.x = v1.x > v2.x ? v1.x : v2.x;
329 r.y = v1.y > v2.y ? v1.y : v2.y;
330 r.z = v1.z > v2.z ? v1.z : v2.z;
331 return r;
332 }
333
max(uchar4 v1,uchar4 v2)334 extern uchar4 __attribute__((overloadable)) max(uchar4 v1, uchar4 v2) {
335 uchar4 r;
336 r.x = v1.x > v2.x ? v1.x : v2.x;
337 r.y = v1.y > v2.y ? v1.y : v2.y;
338 r.z = v1.z > v2.z ? v1.z : v2.z;
339 r.w = v1.w > v2.w ? v1.w : v2.w;
340 return r;
341 }
342
max(ushort v1,ushort v2)343 extern ushort __attribute__((overloadable)) max(ushort v1, ushort v2) {
344 return v1 > v2 ? v1 : v2;
345 }
346
max(ushort2 v1,ushort2 v2)347 extern ushort2 __attribute__((overloadable)) max(ushort2 v1, ushort2 v2) {
348 ushort2 r;
349 r.x = v1.x > v2.x ? v1.x : v2.x;
350 r.y = v1.y > v2.y ? v1.y : v2.y;
351 return r;
352 }
353
max(ushort3 v1,ushort3 v2)354 extern ushort3 __attribute__((overloadable)) max(ushort3 v1, ushort3 v2) {
355 ushort3 r;
356 r.x = v1.x > v2.x ? v1.x : v2.x;
357 r.y = v1.y > v2.y ? v1.y : v2.y;
358 r.z = v1.z > v2.z ? v1.z : v2.z;
359 return r;
360 }
361
max(ushort4 v1,ushort4 v2)362 extern ushort4 __attribute__((overloadable)) max(ushort4 v1, ushort4 v2) {
363 ushort4 r;
364 r.x = v1.x > v2.x ? v1.x : v2.x;
365 r.y = v1.y > v2.y ? v1.y : v2.y;
366 r.z = v1.z > v2.z ? v1.z : v2.z;
367 r.w = v1.w > v2.w ? v1.w : v2.w;
368 return r;
369 }
370
max(uint v1,uint v2)371 extern uint __attribute__((overloadable)) max(uint v1, uint v2) {
372 return v1 > v2 ? v1 : v2;
373 }
374
max(uint2 v1,uint2 v2)375 extern uint2 __attribute__((overloadable)) max(uint2 v1, uint2 v2) {
376 uint2 r;
377 r.x = v1.x > v2.x ? v1.x : v2.x;
378 r.y = v1.y > v2.y ? v1.y : v2.y;
379 return r;
380 }
381
max(uint3 v1,uint3 v2)382 extern uint3 __attribute__((overloadable)) max(uint3 v1, uint3 v2) {
383 uint3 r;
384 r.x = v1.x > v2.x ? v1.x : v2.x;
385 r.y = v1.y > v2.y ? v1.y : v2.y;
386 r.z = v1.z > v2.z ? v1.z : v2.z;
387 return r;
388 }
389
max(uint4 v1,uint4 v2)390 extern uint4 __attribute__((overloadable)) max(uint4 v1, uint4 v2) {
391 uint4 r;
392 r.x = v1.x > v2.x ? v1.x : v2.x;
393 r.y = v1.y > v2.y ? v1.y : v2.y;
394 r.z = v1.z > v2.z ? v1.z : v2.z;
395 r.w = v1.w > v2.w ? v1.w : v2.w;
396 return r;
397 }
398
max(float v1,float v2)399 extern float __attribute__((overloadable)) max(float v1, float v2) {
400 return fmax(v1, v2);
401 }
402
max(float2 v1,float2 v2)403 extern float2 __attribute__((overloadable)) max(float2 v1, float2 v2) {
404 return fmax(v1, v2);
405 }
406
max(float2 v1,float v2)407 extern float2 __attribute__((overloadable)) max(float2 v1, float v2) {
408 return fmax(v1, v2);
409 }
410
max(float3 v1,float3 v2)411 extern float3 __attribute__((overloadable)) max(float3 v1, float3 v2) {
412 return fmax(v1, v2);
413 }
414
max(float3 v1,float v2)415 extern float3 __attribute__((overloadable)) max(float3 v1, float v2) {
416 return fmax(v1, v2);
417 }
418
max(float4 v1,float4 v2)419 extern float4 __attribute__((overloadable)) max(float4 v1, float4 v2) {
420 return fmax(v1, v2);
421 }
422
max(float4 v1,float v2)423 extern float4 __attribute__((overloadable)) max(float4 v1, float v2) {
424 return fmax(v1, v2);
425 }
426
427
428 /*
429 * MIN
430 */
431
min(int8_t v1,int8_t v2)432 extern int8_t __attribute__((overloadable)) min(int8_t v1, int8_t v2) {
433 return v1 < v2 ? v1 : v2;
434 }
435
min(char2 v1,char2 v2)436 extern char2 __attribute__((overloadable)) min(char2 v1, char2 v2) {
437 char2 r;
438 r.x = v1.x < v2.x ? v1.x : v2.x;
439 r.y = v1.y < v2.y ? v1.y : v2.y;
440 return r;
441 }
442
min(char3 v1,char3 v2)443 extern char3 __attribute__((overloadable)) min(char3 v1, char3 v2) {
444 char3 r;
445 r.x = v1.x < v2.x ? v1.x : v2.x;
446 r.y = v1.y < v2.y ? v1.y : v2.y;
447 r.z = v1.z < v2.z ? v1.z : v2.z;
448 return r;
449 }
450
min(char4 v1,char4 v2)451 extern char4 __attribute__((overloadable)) min(char4 v1, char4 v2) {
452 char4 r;
453 r.x = v1.x < v2.x ? v1.x : v2.x;
454 r.y = v1.y < v2.y ? v1.y : v2.y;
455 r.z = v1.z < v2.z ? v1.z : v2.z;
456 r.w = v1.w < v2.w ? v1.w : v2.w;
457 return r;
458 }
459
min(int16_t v1,int16_t v2)460 extern int16_t __attribute__((overloadable)) min(int16_t v1, int16_t v2) {
461 return v1 < v2 ? v1 : v2;
462 }
463
min(short2 v1,short2 v2)464 extern short2 __attribute__((overloadable)) min(short2 v1, short2 v2) {
465 short2 r;
466 r.x = v1.x < v2.x ? v1.x : v2.x;
467 r.y = v1.y < v2.y ? v1.y : v2.y;
468 return r;
469 }
470
min(short3 v1,short3 v2)471 extern short3 __attribute__((overloadable)) min(short3 v1, short3 v2) {
472 short3 r;
473 r.x = v1.x < v2.x ? v1.x : v2.x;
474 r.y = v1.y < v2.y ? v1.y : v2.y;
475 r.z = v1.z < v2.z ? v1.z : v2.z;
476 return r;
477 }
478
min(short4 v1,short4 v2)479 extern short4 __attribute__((overloadable)) min(short4 v1, short4 v2) {
480 short4 r;
481 r.x = v1.x < v2.x ? v1.x : v2.x;
482 r.y = v1.y < v2.y ? v1.y : v2.y;
483 r.z = v1.z < v2.z ? v1.z : v2.z;
484 r.w = v1.w < v2.w ? v1.w : v2.w;
485 return r;
486 }
487
min(int32_t v1,int32_t v2)488 extern int32_t __attribute__((overloadable)) min(int32_t v1, int32_t v2) {
489 return v1 < v2 ? v1 : v2;
490 }
491
min(int2 v1,int2 v2)492 extern int2 __attribute__((overloadable)) min(int2 v1, int2 v2) {
493 int2 r;
494 r.x = v1.x < v2.x ? v1.x : v2.x;
495 r.y = v1.y < v2.y ? v1.y : v2.y;
496 return r;
497 }
498
min(int3 v1,int3 v2)499 extern int3 __attribute__((overloadable)) min(int3 v1, int3 v2) {
500 int3 r;
501 r.x = v1.x < v2.x ? v1.x : v2.x;
502 r.y = v1.y < v2.y ? v1.y : v2.y;
503 r.z = v1.z < v2.z ? v1.z : v2.z;
504 return r;
505 }
506
min(int4 v1,int4 v2)507 extern int4 __attribute__((overloadable)) min(int4 v1, int4 v2) {
508 int4 r;
509 r.x = v1.x < v2.x ? v1.x : v2.x;
510 r.y = v1.y < v2.y ? v1.y : v2.y;
511 r.z = v1.z < v2.z ? v1.z : v2.z;
512 r.w = v1.w < v2.w ? v1.w : v2.w;
513 return r;
514 }
515
min(uchar v1,uchar v2)516 extern uchar __attribute__((overloadable)) min(uchar v1, uchar v2) {
517 return v1 < v2 ? v1 : v2;
518 }
519
min(uchar2 v1,uchar2 v2)520 extern uchar2 __attribute__((overloadable)) min(uchar2 v1, uchar2 v2) {
521 uchar2 r;
522 r.x = v1.x < v2.x ? v1.x : v2.x;
523 r.y = v1.y < v2.y ? v1.y : v2.y;
524 return r;
525 }
526
min(uchar3 v1,uchar3 v2)527 extern uchar3 __attribute__((overloadable)) min(uchar3 v1, uchar3 v2) {
528 uchar3 r;
529 r.x = v1.x < v2.x ? v1.x : v2.x;
530 r.y = v1.y < v2.y ? v1.y : v2.y;
531 r.z = v1.z < v2.z ? v1.z : v2.z;
532 return r;
533 }
534
min(uchar4 v1,uchar4 v2)535 extern uchar4 __attribute__((overloadable)) min(uchar4 v1, uchar4 v2) {
536 uchar4 r;
537 r.x = v1.x < v2.x ? v1.x : v2.x;
538 r.y = v1.y < v2.y ? v1.y : v2.y;
539 r.z = v1.z < v2.z ? v1.z : v2.z;
540 r.w = v1.w < v2.w ? v1.w : v2.w;
541 return r;
542 }
543
min(ushort v1,ushort v2)544 extern ushort __attribute__((overloadable)) min(ushort v1, ushort v2) {
545 return v1 < v2 ? v1 : v2;
546 }
547
min(ushort2 v1,ushort2 v2)548 extern ushort2 __attribute__((overloadable)) min(ushort2 v1, ushort2 v2) {
549 ushort2 r;
550 r.x = v1.x < v2.x ? v1.x : v2.x;
551 r.y = v1.y < v2.y ? v1.y : v2.y;
552 return r;
553 }
554
min(ushort3 v1,ushort3 v2)555 extern ushort3 __attribute__((overloadable)) min(ushort3 v1, ushort3 v2) {
556 ushort3 r;
557 r.x = v1.x < v2.x ? v1.x : v2.x;
558 r.y = v1.y < v2.y ? v1.y : v2.y;
559 r.z = v1.z < v2.z ? v1.z : v2.z;
560 return r;
561 }
562
min(ushort4 v1,ushort4 v2)563 extern ushort4 __attribute__((overloadable)) min(ushort4 v1, ushort4 v2) {
564 ushort4 r;
565 r.x = v1.x < v2.x ? v1.x : v2.x;
566 r.y = v1.y < v2.y ? v1.y : v2.y;
567 r.z = v1.z < v2.z ? v1.z : v2.z;
568 r.w = v1.w < v2.w ? v1.w : v2.w;
569 return r;
570 }
571
min(uint v1,uint v2)572 extern uint __attribute__((overloadable)) min(uint v1, uint v2) {
573 return v1 < v2 ? v1 : v2;
574 }
575
min(uint2 v1,uint2 v2)576 extern uint2 __attribute__((overloadable)) min(uint2 v1, uint2 v2) {
577 uint2 r;
578 r.x = v1.x < v2.x ? v1.x : v2.x;
579 r.y = v1.y < v2.y ? v1.y : v2.y;
580 return r;
581 }
582
min(uint3 v1,uint3 v2)583 extern uint3 __attribute__((overloadable)) min(uint3 v1, uint3 v2) {
584 uint3 r;
585 r.x = v1.x < v2.x ? v1.x : v2.x;
586 r.y = v1.y < v2.y ? v1.y : v2.y;
587 r.z = v1.z < v2.z ? v1.z : v2.z;
588 return r;
589 }
590
min(uint4 v1,uint4 v2)591 extern uint4 __attribute__((overloadable)) min(uint4 v1, uint4 v2) {
592 uint4 r;
593 r.x = v1.x < v2.x ? v1.x : v2.x;
594 r.y = v1.y < v2.y ? v1.y : v2.y;
595 r.z = v1.z < v2.z ? v1.z : v2.z;
596 r.w = v1.w < v2.w ? v1.w : v2.w;
597 return r;
598 }
599
min(float v1,float v2)600 extern float __attribute__((overloadable)) min(float v1, float v2) {
601 return fmin(v1, v2);
602 }
603
min(float2 v1,float2 v2)604 extern float2 __attribute__((overloadable)) min(float2 v1, float2 v2) {
605 return fmin(v1, v2);
606 }
607
min(float2 v1,float v2)608 extern float2 __attribute__((overloadable)) min(float2 v1, float v2) {
609 return fmin(v1, v2);
610 }
611
min(float3 v1,float3 v2)612 extern float3 __attribute__((overloadable)) min(float3 v1, float3 v2) {
613 return fmin(v1, v2);
614 }
615
min(float3 v1,float v2)616 extern float3 __attribute__((overloadable)) min(float3 v1, float v2) {
617 return fmin(v1, v2);
618 }
619
min(float4 v1,float4 v2)620 extern float4 __attribute__((overloadable)) min(float4 v1, float4 v2) {
621 return fmin(v1, v2);
622 }
623
min(float4 v1,float v2)624 extern float4 __attribute__((overloadable)) min(float4 v1, float v2) {
625 return fmin(v1, v2);
626 }
627
628 /*
629 * YUV
630 */
631
rsYuvToRGBA_uchar4(uchar y,uchar u,uchar v)632 extern uchar4 __attribute__((overloadable)) rsYuvToRGBA_uchar4(uchar y, uchar u, uchar v) {
633 short Y = ((short)y) - 16;
634 short U = ((short)u) - 128;
635 short V = ((short)v) - 128;
636
637 short4 p;
638 p.r = (Y * 298 + V * 409 + 128) >> 8;
639 p.g = (Y * 298 - U * 100 - V * 208 + 128) >> 8;
640 p.b = (Y * 298 + U * 516 + 128) >> 8;
641 p.a = 255;
642 p.r = rsClamp(p.r, (short)0, (short)255);
643 p.g = rsClamp(p.g, (short)0, (short)255);
644 p.b = rsClamp(p.b, (short)0, (short)255);
645
646 return convert_uchar4(p);
647 }
648
649 /*
650 * half_RECIP
651 */
652
half_recip(float2 v)653 extern float2 __attribute__((overloadable)) half_recip(float2 v) {
654 return ((float2) 1.f) / v;
655 }
656
half_recip(float3 v)657 extern float3 __attribute__((overloadable)) half_recip(float3 v) {
658 return ((float3) 1.f) / v;
659 }
660
half_recip(float4 v)661 extern float4 __attribute__((overloadable)) half_recip(float4 v) {
662 return ((float4) 1.f) / v;
663 }
664
665
666
667 /*
668 * half_rsqrt
669 */
670
half_rsqrt(float v)671 extern float __attribute__((overloadable)) half_rsqrt(float v) {
672 return 1.f / sqrt(v);
673 }
674
half_rsqrt(float2 v)675 extern float2 __attribute__((overloadable)) half_rsqrt(float2 v) {
676 float2 r;
677 r.x = half_rsqrt(v.x);
678 r.y = half_rsqrt(v.y);
679 return r;
680 }
681
half_rsqrt(float3 v)682 extern float3 __attribute__((overloadable)) half_rsqrt(float3 v) {
683 float3 r;
684 r.x = half_rsqrt(v.x);
685 r.y = half_rsqrt(v.y);
686 r.z = half_rsqrt(v.z);
687 return r;
688 }
689
half_rsqrt(float4 v)690 extern float4 __attribute__((overloadable)) half_rsqrt(float4 v) {
691 float4 r;
692 r.x = half_rsqrt(v.x);
693 r.y = half_rsqrt(v.y);
694 r.z = half_rsqrt(v.z);
695 r.w = half_rsqrt(v.w);
696 return r;
697 }
698
699 /**
700 * matrix ops
701 */
702
703 extern float4 __attribute__((overloadable))
rsMatrixMultiply(const rs_matrix4x4 * m,float4 in)704 rsMatrixMultiply(const rs_matrix4x4 *m, float4 in) {
705 float4 ret;
706 ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + (m->m[12] * in.w);
707 ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + (m->m[13] * in.w);
708 ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + (m->m[14] * in.w);
709 ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + (m->m[15] * in.w);
710 return ret;
711 }
712
713 extern float4 __attribute__((overloadable))
rsMatrixMultiply(const rs_matrix4x4 * m,float3 in)714 rsMatrixMultiply(const rs_matrix4x4 *m, float3 in) {
715 float4 ret;
716 ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + (m->m[8] * in.z) + m->m[12];
717 ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + (m->m[9] * in.z) + m->m[13];
718 ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + (m->m[10] * in.z) + m->m[14];
719 ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + (m->m[11] * in.z) + m->m[15];
720 return ret;
721 }
722
723 extern float4 __attribute__((overloadable))
rsMatrixMultiply(const rs_matrix4x4 * m,float2 in)724 rsMatrixMultiply(const rs_matrix4x4 *m, float2 in) {
725 float4 ret;
726 ret.x = (m->m[0] * in.x) + (m->m[4] * in.y) + m->m[12];
727 ret.y = (m->m[1] * in.x) + (m->m[5] * in.y) + m->m[13];
728 ret.z = (m->m[2] * in.x) + (m->m[6] * in.y) + m->m[14];
729 ret.w = (m->m[3] * in.x) + (m->m[7] * in.y) + m->m[15];
730 return ret;
731 }
732
733 extern float3 __attribute__((overloadable))
rsMatrixMultiply(const rs_matrix3x3 * m,float3 in)734 rsMatrixMultiply(const rs_matrix3x3 *m, float3 in) {
735 float3 ret;
736 ret.x = (m->m[0] * in.x) + (m->m[3] * in.y) + (m->m[6] * in.z);
737 ret.y = (m->m[1] * in.x) + (m->m[4] * in.y) + (m->m[7] * in.z);
738 ret.z = (m->m[2] * in.x) + (m->m[5] * in.y) + (m->m[8] * in.z);
739 return ret;
740 }
741
742 extern float3 __attribute__((overloadable))
rsMatrixMultiply(const rs_matrix3x3 * m,float2 in)743 rsMatrixMultiply(const rs_matrix3x3 *m, float2 in) {
744 float3 ret;
745 ret.x = (m->m[0] * in.x) + (m->m[3] * in.y);
746 ret.y = (m->m[1] * in.x) + (m->m[4] * in.y);
747 ret.z = (m->m[2] * in.x) + (m->m[5] * in.y);
748 return ret;
749 }
750
751 /**
752 * Pixel Ops
753 */
rsPackColorTo8888(float r,float g,float b)754 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b)
755 {
756 uchar4 c;
757 c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
758 c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
759 c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
760 c.w = 255;
761 return c;
762 }
763
rsPackColorTo8888(float r,float g,float b,float a)764 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float r, float g, float b, float a)
765 {
766 uchar4 c;
767 c.x = (uchar)clamp((r * 255.f + 0.5f), 0.f, 255.f);
768 c.y = (uchar)clamp((g * 255.f + 0.5f), 0.f, 255.f);
769 c.z = (uchar)clamp((b * 255.f + 0.5f), 0.f, 255.f);
770 c.w = (uchar)clamp((a * 255.f + 0.5f), 0.f, 255.f);
771 return c;
772 }
773
rsPackColorTo8888(float3 color)774 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float3 color)
775 {
776 color *= 255.f;
777 color += 0.5f;
778 color = clamp(color, 0.f, 255.f);
779 uchar4 c = {color.x, color.y, color.z, 255};
780 return c;
781 }
782
rsPackColorTo8888(float4 color)783 extern uchar4 __attribute__((overloadable)) rsPackColorTo8888(float4 color)
784 {
785 color *= 255.f;
786 color += 0.5f;
787 color = clamp(color, 0.f, 255.f);
788 uchar4 c = {color.x, color.y, color.z, color.w};
789 return c;
790 }
791