1 #include "rs_core.rsh"
2 #include "rs_graphics.rsh"
3 #include "rs_structs.h"
4
5
6 // 565 Conversion bits taken from SkBitmap
7 #define SK_R16_BITS 5
8 #define SK_G16_BITS 6
9 #define SK_B16_BITS 5
10
11 #define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
12 #define SK_G16_SHIFT (SK_B16_BITS)
13 #define SK_B16_SHIFT 0
14
15 #define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
16 #define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
17 #define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
18
19 #define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
20 #define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
21 #define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
22
SkR16ToR32(unsigned r)23 static inline unsigned SkR16ToR32(unsigned r) {
24 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
25 }
26
SkG16ToG32(unsigned g)27 static inline unsigned SkG16ToG32(unsigned g) {
28 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
29 }
30
SkB16ToB32(unsigned b)31 static inline unsigned SkB16ToB32(unsigned b) {
32 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
33 }
34
35 #define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
36 #define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
37 #define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
38
getFrom565(uint16_t color)39 static float3 getFrom565(uint16_t color) {
40 float3 result;
41 result.x = (float)SkPacked16ToR32(color);
42 result.y = (float)SkPacked16ToG32(color);
43 result.z = (float)SkPacked16ToB32(color);
44 return result;
45 }
46
47 /**
48 * Allocation sampling
49 */
50 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,int32_t x)51 getElementAt1(const uint8_t *p, int32_t x) {
52 float r = p[x];
53 return r;
54 }
55
56 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,int32_t x)57 getElementAt2(const uint8_t *p, int32_t x) {
58 x *= 2;
59 float2 r = {p[x], p[x+1]};
60 return r;
61 }
62
63 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,int32_t x)64 getElementAt3(const uint8_t *p, int32_t x) {
65 x *= 4;
66 float3 r = {p[x], p[x+1], p[x+2]};
67 return r;
68 }
69
70 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,int32_t x)71 getElementAt4(const uint8_t *p, int32_t x) {
72 x *= 4;
73 const uchar4 *p2 = (const uchar4 *)&p[x];
74 return convert_float4(p2[0]);
75 }
76
77 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,int32_t x)78 getElementAt565(const uint8_t *p, int32_t x) {
79 x *= 2;
80 float3 r = getFrom565(((const uint16_t *)p)[0]);
81 return r;
82 }
83
84 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,size_t stride,int32_t x,int32_t y)85 getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
86 p += y * stride;
87 float r = p[x];
88 return r;
89 }
90
91 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,size_t stride,int32_t x,int32_t y)92 getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
93 p += y * stride;
94 x *= 2;
95 float2 r = {p[x], p[x+1]};
96 return r;
97 }
98
99 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,size_t stride,int32_t x,int32_t y)100 getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
101 p += y * stride;
102 x *= 4;
103 float3 r = {p[x], p[x+1], p[x+2]};
104 return r;
105 }
106
107 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,size_t stride,int32_t x,int32_t y)108 getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
109 p += y * stride;
110 x *= 4;
111 float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
112 return r;
113 }
114
115 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,size_t stride,int32_t x,int32_t y)116 getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
117 p += y * stride;
118 x *= 2;
119 float3 r = getFrom565(((const uint16_t *)p)[0]);
120 return r;
121 }
122
123
124
125
126
127 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)128 getSample_A(const uint8_t *p, int32_t iPixel,
129 int32_t next, float w0, float w1) {
130 float p0 = getElementAt1(p, iPixel);
131 float p1 = getElementAt1(p, next);
132 float r = p0 * w0 + p1 * w1;
133 r *= (1.f / 255.f);
134 float4 ret = {0.f, 0.f, 0.f, r};
135 return ret;
136 }
137 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)138 getSample_L(const uint8_t *p, int32_t iPixel,
139 int32_t next, float w0, float w1) {
140 float p0 = getElementAt1(p, iPixel);
141 float p1 = getElementAt1(p, next);
142 float r = p0 * w0 + p1 * w1;
143 r *= (1.f / 255.f);
144 float4 ret = {r, r, r, 1.f};
145 return ret;
146 }
147 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)148 getSample_LA(const uint8_t *p, int32_t iPixel,
149 int32_t next, float w0, float w1) {
150 float2 p0 = getElementAt2(p, iPixel);
151 float2 p1 = getElementAt2(p, next);
152 float2 r = p0 * w0 + p1 * w1;
153 r *= (1.f / 255.f);
154 float4 ret = {r.x, r.x, r.x, r.y};
155 return ret;
156 }
157 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)158 getSample_RGB(const uint8_t *p, int32_t iPixel,
159 int32_t next, float w0, float w1) {
160 float3 p0 = getElementAt3(p, iPixel);
161 float3 p1 = getElementAt3(p, next);
162 float3 r = p0 * w0 + p1 * w1;
163 r *= (1.f / 255.f);
164 float4 ret = {r.x, r.x, r.z, 1.f};
165 return ret;
166 }
167 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)168 getSample_565(const uint8_t *p, int32_t iPixel,
169 int32_t next, float w0, float w1) {
170 float3 p0 = getElementAt565(p, iPixel);
171 float3 p1 = getElementAt565(p, next);
172 float3 r = p0 * w0 + p1 * w1;
173 r *= (1.f / 255.f);
174 float4 ret = {r.x, r.x, r.z, 1.f};
175 return ret;
176 }
177 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)178 getSample_RGBA(const uint8_t *p, int32_t iPixel,
179 int32_t next, float w0, float w1) {
180 float4 p0 = getElementAt4(p, iPixel);
181 float4 p1 = getElementAt4(p, next);
182 float4 r = p0 * w0 + p1 * w1;
183 r *= (1.f / 255.f);
184 return r;
185 }
186
187
188 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)189 getSample_A(const uint8_t *p, size_t stride,
190 int locX, int locY, int nextX, int nextY,
191 float w0, float w1, float w2, float w3) {
192 float p0 = getElementAt1(p, stride, locX, locY);
193 float p1 = getElementAt1(p, stride, nextX, locY);
194 float p2 = getElementAt1(p, stride, locX, nextY);
195 float p3 = getElementAt1(p, stride, nextX, nextY);
196 float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
197 r *= (1.f / 255.f);
198 float4 ret = {0.f, 0.f, 0.f, r};
199 return ret;
200 }
201 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)202 getSample_L(const uint8_t *p, size_t stride,
203 int locX, int locY, int nextX, int nextY,
204 float w0, float w1, float w2, float w3) {
205 float p0 = getElementAt1(p, stride, locX, locY);
206 float p1 = getElementAt1(p, stride, nextX, locY);
207 float p2 = getElementAt1(p, stride, locX, nextY);
208 float p3 = getElementAt1(p, stride, nextX, nextY);
209 float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
210 r *= (1.f / 255.f);
211 float4 ret = {r, r, r, 1.f};
212 return ret;
213 }
214 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)215 getSample_LA(const uint8_t *p, size_t stride,
216 int locX, int locY, int nextX, int nextY,
217 float w0, float w1, float w2, float w3) {
218 float2 p0 = getElementAt2(p, stride, locX, locY);
219 float2 p1 = getElementAt2(p, stride, nextX, locY);
220 float2 p2 = getElementAt2(p, stride, locX, nextY);
221 float2 p3 = getElementAt2(p, stride, nextX, nextY);
222 float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
223 r *= (1.f / 255.f);
224 float4 ret = {r.x, r.x, r.x, r.y};
225 return ret;
226 }
227 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)228 getSample_RGB(const uint8_t *p, size_t stride,
229 int locX, int locY, int nextX, int nextY,
230 float w0, float w1, float w2, float w3) {
231 float4 p0 = getElementAt4(p, stride, locX, locY);
232 float4 p1 = getElementAt4(p, stride, nextX, locY);
233 float4 p2 = getElementAt4(p, stride, locX, nextY);
234 float4 p3 = getElementAt4(p, stride, nextX, nextY);
235 float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
236 r *= (1.f / 255.f);
237 float4 ret = {r.x, r.y, r.z, 1.f};
238 return ret;
239 }
240 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)241 getSample_RGBA(const uint8_t *p, size_t stride,
242 int locX, int locY, int nextX, int nextY,
243 float w0, float w1, float w2, float w3) {
244 float4 p0 = getElementAt4(p, stride, locX, locY);
245 float4 p1 = getElementAt4(p, stride, nextX, locY);
246 float4 p2 = getElementAt4(p, stride, locX, nextY);
247 float4 p3 = getElementAt4(p, stride, nextX, nextY);
248 float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
249 r *= (1.f / 255.f);
250 return r;
251 }
252 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)253 getSample_565(const uint8_t *p, size_t stride,
254 int locX, int locY, int nextX, int nextY,
255 float w0, float w1, float w2, float w3) {
256 float3 p0 = getElementAt565(p, stride, locX, locY);
257 float3 p1 = getElementAt565(p, stride, nextX, locY);
258 float3 p2 = getElementAt565(p, stride, locX, nextY);
259 float3 p3 = getElementAt565(p, stride, nextX, nextY);
260 float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
261 r *= (1.f / 255.f);
262 float4 ret;
263 ret.rgb = r;
264 ret.w = 1.f;
265 return ret;
266 }
267
268 static float4 __attribute__((overloadable))
getBilinearSample1D(const Allocation_t * alloc,float2 weights,uint32_t iPixel,uint32_t next,rs_data_kind dk,rs_data_type dt,uint32_t lod)269 getBilinearSample1D(const Allocation_t *alloc, float2 weights,
270 uint32_t iPixel, uint32_t next,
271 rs_data_kind dk, rs_data_type dt, uint32_t lod) {
272
273 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
274
275 switch(dk) {
276 case RS_KIND_PIXEL_RGBA:
277 return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
278 case RS_KIND_PIXEL_A:
279 return getSample_A(p, iPixel, next, weights.x, weights.y);
280 case RS_KIND_PIXEL_RGB:
281 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
282 return getSample_565(p, iPixel, next, weights.x, weights.y);
283 }
284 return getSample_RGB(p, iPixel, next, weights.x, weights.y);
285 case RS_KIND_PIXEL_L:
286 return getSample_L(p, iPixel, next, weights.x, weights.y);
287 case RS_KIND_PIXEL_LA:
288 return getSample_LA(p, iPixel, next, weights.x, weights.y);
289
290 default:
291 //__builtin_unreachable();
292 break;
293 }
294
295 //__builtin_unreachable();
296 return 0.f;
297 }
298
wrapI(rs_sampler_value wrap,int32_t coord,int32_t size)299 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
300 if (wrap == RS_SAMPLER_WRAP) {
301 coord = coord % size;
302 if (coord < 0) {
303 coord += size;
304 }
305 }
306 if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
307 coord = coord % (size * 2);
308 if (coord < 0) {
309 coord = (size * 2) + coord;
310 }
311 if (coord >= size) {
312 coord = (size * 2) - coord;
313 }
314 }
315 return (uint32_t)max(0, min(coord, size - 1));
316 }
317
318 static float4 __attribute__((overloadable))
getBilinearSample2D(const Allocation_t * alloc,float w0,float w1,float w2,float w3,int lx,int ly,int nx,int ny,rs_data_kind dk,rs_data_type dt,uint32_t lod)319 getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
320 int lx, int ly, int nx, int ny,
321 rs_data_kind dk, rs_data_type dt, uint32_t lod) {
322
323 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
324 size_t stride = alloc->mHal.drvState.lod[lod].stride;
325
326 switch(dk) {
327 case RS_KIND_PIXEL_RGBA:
328 return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
329 case RS_KIND_PIXEL_A:
330 return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
331 case RS_KIND_PIXEL_LA:
332 return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
333 case RS_KIND_PIXEL_RGB:
334 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
335 return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
336 }
337 return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
338 case RS_KIND_PIXEL_L:
339 return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
340
341 default:
342 break;
343 }
344
345 return 0.f;
346 }
347
348 static float4 __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint32_t iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)349 getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
350 rs_data_type dt, uint32_t lod) {
351
352 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
353
354 float4 result = {0.f, 0.f, 0.f, 255.f};
355
356 switch(dk) {
357 case RS_KIND_PIXEL_RGBA:
358 result = getElementAt4(p, iPixel);
359 break;
360 case RS_KIND_PIXEL_A:
361 result.w = getElementAt1(p, iPixel);
362 break;
363 case RS_KIND_PIXEL_LA:
364 result.zw = getElementAt2(p, iPixel);
365 result.xy = result.z;
366 break;
367 case RS_KIND_PIXEL_RGB:
368 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
369 result.xyz = getElementAt565(p, iPixel);
370 } else {
371 result.xyz = getElementAt3(p, iPixel);
372 }
373 break;
374 case RS_KIND_PIXEL_L:
375 result.xyz = getElementAt1(p, iPixel);
376
377 default:
378 //__builtin_unreachable();
379 break;
380 }
381
382 return result * 0.003921569f;
383 }
384
385 static float4 __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint2 iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)386 getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
387 rs_data_type dt, uint32_t lod) {
388
389 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
390 size_t stride = alloc->mHal.drvState.lod[lod].stride;
391
392 float4 result = {0.f, 0.f, 0.f, 255.f};
393
394 switch(dk) {
395 case RS_KIND_PIXEL_RGBA:
396 result = getElementAt4(p, stride, iPixel.x, iPixel.y);
397 break;
398 case RS_KIND_PIXEL_A:
399 result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
400 break;
401 case RS_KIND_PIXEL_LA:
402 result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
403 result.xy = result.z;
404 break;
405 case RS_KIND_PIXEL_RGB:
406 if (dt == RS_TYPE_UNSIGNED_5_6_5) {
407 result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
408 } else {
409 result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
410 }
411 break;
412
413 default:
414 //__builtin_unreachable();
415 break;
416 }
417
418 return result * 0.003921569f;
419 }
420
421 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)422 sample_LOD_LinearPixel(const Allocation_t *alloc,
423 rs_data_kind dk, rs_data_type dt,
424 rs_sampler_value wrapS,
425 float uv, uint32_t lod) {
426
427 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
428
429 int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
430 float pixelUV = uv * (float)(sourceW);
431 int32_t iPixel = (int32_t)(pixelUV);
432 float frac = pixelUV - (float)iPixel;
433
434 if (frac < 0.5f) {
435 iPixel -= 1;
436 frac += 0.5f;
437 } else {
438 frac -= 0.5f;
439 }
440
441 float oneMinusFrac = 1.0f - frac;
442
443 float2 weights;
444 weights.x = oneMinusFrac;
445 weights.y = frac;
446
447 uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
448 uint32_t location = wrapI(wrapS, iPixel, sourceW);
449
450 return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
451 }
452
453 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)454 sample_LOD_NearestPixel(const Allocation_t *alloc,
455 rs_data_kind dk, rs_data_type dt,
456 rs_sampler_value wrapS,
457 float uv, uint32_t lod) {
458
459 int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
460 int32_t iPixel = (int32_t)(uv * (float)(sourceW));
461 uint32_t location = wrapI(wrapS, iPixel, sourceW);
462
463 return getNearestSample(alloc, location, dk, dt, lod);
464 }
465
466 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)467 sample_LOD_LinearPixel(const Allocation_t *alloc,
468 rs_data_kind dk, rs_data_type dt,
469 rs_sampler_value wrapS,
470 rs_sampler_value wrapT,
471 float2 uv, uint32_t lod) {
472
473 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
474
475 int sourceW = alloc->mHal.drvState.lod[lod].dimX;
476 int sourceH = alloc->mHal.drvState.lod[lod].dimY;
477
478 float pixelU = uv.x * sourceW;
479 float pixelV = uv.y * sourceH;
480 int iPixelU = pixelU;
481 int iPixelV = pixelV;
482 float fracU = pixelU - iPixelU;
483 float fracV = pixelV - iPixelV;
484
485 if (fracU < 0.5f) {
486 iPixelU -= 1;
487 fracU += 0.5f;
488 } else {
489 fracU -= 0.5f;
490 }
491 if (fracV < 0.5f) {
492 iPixelV -= 1;
493 fracV += 0.5f;
494 } else {
495 fracV -= 0.5f;
496 }
497 float oneMinusFracU = 1.0f - fracU;
498 float oneMinusFracV = 1.0f - fracV;
499
500 float w0 = oneMinusFracU * oneMinusFracV;
501 float w1 = fracU * oneMinusFracV;
502 float w2 = oneMinusFracU * fracV;
503 float w3 = fracU * fracV;
504
505 int nx = wrapI(wrapS, iPixelU + 1, sourceW);
506 int ny = wrapI(wrapT, iPixelV + 1, sourceH);
507 int lx = wrapI(wrapS, iPixelU, sourceW);
508 int ly = wrapI(wrapT, iPixelV, sourceH);
509
510 return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
511
512 }
513
514 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)515 sample_LOD_NearestPixel(const Allocation_t *alloc,
516 rs_data_kind dk, rs_data_type dt,
517 rs_sampler_value wrapS,
518 rs_sampler_value wrapT,
519 float2 uv, uint32_t lod) {
520 int sourceW = alloc->mHal.drvState.lod[lod].dimX;
521 int sourceH = alloc->mHal.drvState.lod[lod].dimY;
522
523 float2 dimF;
524 dimF.x = (float)(sourceW);
525 dimF.y = (float)(sourceH);
526 int2 iPixel = convert_int2(uv * dimF);
527
528 uint2 location;
529 location.x = wrapI(wrapS, iPixel.x, sourceW);
530 location.y = wrapI(wrapT, iPixel.y, sourceH);
531 return getNearestSample(alloc, location, dk, dt, lod);
532 }
533
534 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float uv,float lod)535 rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
536
537 const Allocation_t *alloc = (const Allocation_t *)a.p;
538 const Sampler_t *prog = (Sampler_t *)s.p;
539 const Type_t *type = (Type_t *)alloc->mHal.state.type;
540 const Element_t *elem = type->mHal.state.element;
541 rs_data_kind dk = elem->mHal.state.dataKind;
542 rs_data_type dt = elem->mHal.state.dataType;
543 rs_sampler_value sampleMin = prog->mHal.state.minFilter;
544 rs_sampler_value sampleMag = prog->mHal.state.magFilter;
545 rs_sampler_value wrapS = prog->mHal.state.wrapS;
546
547 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
548 return 0.f;
549 }
550
551 if (lod <= 0.0f) {
552 if (sampleMag == RS_SAMPLER_NEAREST) {
553 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
554 }
555 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
556 }
557
558 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
559 uint32_t maxLOD = type->mHal.state.lodCount - 1;
560 lod = min(lod, (float)maxLOD);
561 uint32_t nearestLOD = (uint32_t)round(lod);
562 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
563 }
564
565 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
566 uint32_t lod0 = (uint32_t)floor(lod);
567 uint32_t lod1 = (uint32_t)ceil(lod);
568 uint32_t maxLOD = type->mHal.state.lodCount - 1;
569 lod0 = min(lod0, maxLOD);
570 lod1 = min(lod1, maxLOD);
571 float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
572 float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
573 float frac = lod - (float)lod0;
574 return sample0 * (1.0f - frac) + sample1 * frac;
575 }
576
577 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
578 }
579
580 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float location)581 rsSample(rs_allocation a, rs_sampler s, float location) {
582 return rsSample(a, s, location, 0);
583 }
584
585
586 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv,float lod)587 rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
588
589 const Allocation_t *alloc = (const Allocation_t *)a.p;
590 const Sampler_t *prog = (Sampler_t *)s.p;
591 const Type_t *type = (Type_t *)alloc->mHal.state.type;
592 const Element_t *elem = type->mHal.state.element;
593 rs_data_kind dk = elem->mHal.state.dataKind;
594 rs_data_type dt = elem->mHal.state.dataType;
595 rs_sampler_value sampleMin = prog->mHal.state.minFilter;
596 rs_sampler_value sampleMag = prog->mHal.state.magFilter;
597 rs_sampler_value wrapS = prog->mHal.state.wrapS;
598 rs_sampler_value wrapT = prog->mHal.state.wrapT;
599
600 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
601 return 0.f;
602 }
603
604 if (lod <= 0.0f) {
605 if (sampleMag == RS_SAMPLER_NEAREST) {
606 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
607 }
608 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
609 }
610
611 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
612 uint32_t maxLOD = type->mHal.state.lodCount - 1;
613 lod = min(lod, (float)maxLOD);
614 uint32_t nearestLOD = (uint32_t)round(lod);
615 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
616 }
617
618 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
619 uint32_t lod0 = (uint32_t)floor(lod);
620 uint32_t lod1 = (uint32_t)ceil(lod);
621 uint32_t maxLOD = type->mHal.state.lodCount - 1;
622 lod0 = min(lod0, maxLOD);
623 lod1 = min(lod1, maxLOD);
624 float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
625 float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
626 float frac = lod - (float)lod0;
627 return sample0 * (1.0f - frac) + sample1 * frac;
628 }
629
630 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
631 }
632
633 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv)634 rsSample(rs_allocation a, rs_sampler s, float2 uv) {
635
636 const Allocation_t *alloc = (const Allocation_t *)a.p;
637 const Sampler_t *prog = (Sampler_t *)s.p;
638 const Type_t *type = (Type_t *)alloc->mHal.state.type;
639 const Element_t *elem = type->mHal.state.element;
640 rs_data_kind dk = elem->mHal.state.dataKind;
641 rs_data_type dt = elem->mHal.state.dataType;
642 rs_sampler_value wrapS = prog->mHal.state.wrapS;
643 rs_sampler_value wrapT = prog->mHal.state.wrapT;
644
645 if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
646 return 0.f;
647 }
648
649 if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
650 return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
651 }
652 return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
653 }
654
655