• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "rs_core.rsh"
2 #include "rs_graphics.rsh"
3 #include "rs_structs.h"
4 
5 
6 // 565 Conversion bits taken from SkBitmap
7 #define SK_R16_BITS     5
8 #define SK_G16_BITS     6
9 #define SK_B16_BITS     5
10 
11 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
12 #define SK_G16_SHIFT    (SK_B16_BITS)
13 #define SK_B16_SHIFT    0
14 
15 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
16 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
17 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
18 
19 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
20 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
21 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
22 
SkR16ToR32(unsigned r)23 static inline unsigned SkR16ToR32(unsigned r) {
24     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
25 }
26 
SkG16ToG32(unsigned g)27 static inline unsigned SkG16ToG32(unsigned g) {
28     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
29 }
30 
SkB16ToB32(unsigned b)31 static inline unsigned SkB16ToB32(unsigned b) {
32     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
33 }
34 
35 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
36 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
37 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
38 
getFrom565(uint16_t color)39 static float3 getFrom565(uint16_t color) {
40     float3 result;
41     result.x = (float)SkPacked16ToR32(color);
42     result.y = (float)SkPacked16ToG32(color);
43     result.z = (float)SkPacked16ToB32(color);
44     return result;
45 }
46 
47 /**
48 * Allocation sampling
49 */
50 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,int32_t x)51         getElementAt1(const uint8_t *p, int32_t x) {
52     float r = p[x];
53     return r;
54 }
55 
56 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,int32_t x)57         getElementAt2(const uint8_t *p, int32_t x) {
58     x *= 2;
59     float2 r = {p[x], p[x+1]};
60     return r;
61 }
62 
63 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,int32_t x)64         getElementAt3(const uint8_t *p, int32_t x) {
65     x *= 4;
66     float3 r = {p[x], p[x+1], p[x+2]};
67     return r;
68 }
69 
70 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,int32_t x)71         getElementAt4(const uint8_t *p, int32_t x) {
72     x *= 4;
73     const uchar4 *p2 = (const uchar4 *)&p[x];
74     return convert_float4(p2[0]);
75 }
76 
77 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,int32_t x)78         getElementAt565(const uint8_t *p, int32_t x) {
79     x *= 2;
80     float3 r = getFrom565(((const uint16_t *)p)[0]);
81     return r;
82 }
83 
84 static inline float __attribute__((overloadable))
getElementAt1(const uint8_t * p,size_t stride,int32_t x,int32_t y)85         getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
86     p += y * stride;
87     float r = p[x];
88     return r;
89 }
90 
91 static inline float2 __attribute__((overloadable))
getElementAt2(const uint8_t * p,size_t stride,int32_t x,int32_t y)92         getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
93     p += y * stride;
94     x *= 2;
95     float2 r = {p[x], p[x+1]};
96     return r;
97 }
98 
99 static inline float3 __attribute__((overloadable))
getElementAt3(const uint8_t * p,size_t stride,int32_t x,int32_t y)100         getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
101     p += y * stride;
102     x *= 4;
103     float3 r = {p[x], p[x+1], p[x+2]};
104     return r;
105 }
106 
107 static inline float4 __attribute__((overloadable))
getElementAt4(const uint8_t * p,size_t stride,int32_t x,int32_t y)108         getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
109     p += y * stride;
110     x *= 4;
111     float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
112     return r;
113 }
114 
115 static inline float3 __attribute__((overloadable))
getElementAt565(const uint8_t * p,size_t stride,int32_t x,int32_t y)116         getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
117     p += y * stride;
118     x *= 2;
119     float3 r = getFrom565(((const uint16_t *)p)[0]);
120     return r;
121 }
122 
123 
124 
125 
126 
127 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)128             getSample_A(const uint8_t *p, int32_t iPixel,
129                           int32_t next, float w0, float w1) {
130     float p0 = getElementAt1(p, iPixel);
131     float p1 = getElementAt1(p, next);
132     float r = p0 * w0 + p1 * w1;
133     r *= (1.f / 255.f);
134     float4 ret = {0.f, 0.f, 0.f, r};
135     return ret;
136 }
137 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)138             getSample_L(const uint8_t *p, int32_t iPixel,
139                           int32_t next, float w0, float w1) {
140     float p0 = getElementAt1(p, iPixel);
141     float p1 = getElementAt1(p, next);
142     float r = p0 * w0 + p1 * w1;
143     r *= (1.f / 255.f);
144     float4 ret = {r, r, r, 1.f};
145     return ret;
146 }
147 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)148             getSample_LA(const uint8_t *p, int32_t iPixel,
149                            int32_t next, float w0, float w1) {
150     float2 p0 = getElementAt2(p, iPixel);
151     float2 p1 = getElementAt2(p, next);
152     float2 r = p0 * w0 + p1 * w1;
153     r *= (1.f / 255.f);
154     float4 ret = {r.x, r.x, r.x, r.y};
155     return ret;
156 }
157 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)158             getSample_RGB(const uint8_t *p, int32_t iPixel,
159                             int32_t next, float w0, float w1) {
160     float3 p0 = getElementAt3(p, iPixel);
161     float3 p1 = getElementAt3(p, next);
162     float3 r = p0 * w0 + p1 * w1;
163     r *= (1.f / 255.f);
164     float4 ret = {r.x, r.x, r.z, 1.f};
165     return ret;
166 }
167 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)168             getSample_565(const uint8_t *p, int32_t iPixel,
169                            int32_t next, float w0, float w1) {
170     float3 p0 = getElementAt565(p, iPixel);
171     float3 p1 = getElementAt565(p, next);
172     float3 r = p0 * w0 + p1 * w1;
173     r *= (1.f / 255.f);
174     float4 ret = {r.x, r.x, r.z, 1.f};
175     return ret;
176 }
177 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,int32_t iPixel,int32_t next,float w0,float w1)178             getSample_RGBA(const uint8_t *p, int32_t iPixel,
179                              int32_t next, float w0, float w1) {
180     float4 p0 = getElementAt4(p, iPixel);
181     float4 p1 = getElementAt4(p, next);
182     float4 r = p0 * w0 + p1 * w1;
183     r *= (1.f / 255.f);
184     return r;
185 }
186 
187 
188 static float4 __attribute__((overloadable))
getSample_A(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)189             getSample_A(const uint8_t *p, size_t stride,
190                           int locX, int locY, int nextX, int nextY,
191                           float w0, float w1, float w2, float w3) {
192     float p0 = getElementAt1(p, stride, locX, locY);
193     float p1 = getElementAt1(p, stride, nextX, locY);
194     float p2 = getElementAt1(p, stride, locX, nextY);
195     float p3 = getElementAt1(p, stride, nextX, nextY);
196     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
197     r *= (1.f / 255.f);
198     float4 ret = {0.f, 0.f, 0.f, r};
199     return ret;
200 }
201 static float4 __attribute__((overloadable))
getSample_L(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)202             getSample_L(const uint8_t *p, size_t stride,
203                          int locX, int locY, int nextX, int nextY,
204                          float w0, float w1, float w2, float w3) {
205     float p0 = getElementAt1(p, stride, locX, locY);
206     float p1 = getElementAt1(p, stride, nextX, locY);
207     float p2 = getElementAt1(p, stride, locX, nextY);
208     float p3 = getElementAt1(p, stride, nextX, nextY);
209     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
210     r *= (1.f / 255.f);
211     float4 ret = {r, r, r, 1.f};
212     return ret;
213 }
214 static float4 __attribute__((overloadable))
getSample_LA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)215             getSample_LA(const uint8_t *p, size_t stride,
216                          int locX, int locY, int nextX, int nextY,
217                          float w0, float w1, float w2, float w3) {
218     float2 p0 = getElementAt2(p, stride, locX, locY);
219     float2 p1 = getElementAt2(p, stride, nextX, locY);
220     float2 p2 = getElementAt2(p, stride, locX, nextY);
221     float2 p3 = getElementAt2(p, stride, nextX, nextY);
222     float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
223     r *= (1.f / 255.f);
224     float4 ret = {r.x, r.x, r.x, r.y};
225     return ret;
226 }
227 static float4 __attribute__((overloadable))
getSample_RGB(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)228             getSample_RGB(const uint8_t *p, size_t stride,
229                          int locX, int locY, int nextX, int nextY,
230                          float w0, float w1, float w2, float w3) {
231     float4 p0 = getElementAt4(p, stride, locX, locY);
232     float4 p1 = getElementAt4(p, stride, nextX, locY);
233     float4 p2 = getElementAt4(p, stride, locX, nextY);
234     float4 p3 = getElementAt4(p, stride, nextX, nextY);
235     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
236     r *= (1.f / 255.f);
237     float4 ret = {r.x, r.y, r.z, 1.f};
238     return ret;
239 }
240 static float4 __attribute__((overloadable))
getSample_RGBA(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)241             getSample_RGBA(const uint8_t *p, size_t stride,
242                          int locX, int locY, int nextX, int nextY,
243                          float w0, float w1, float w2, float w3) {
244     float4 p0 = getElementAt4(p, stride, locX, locY);
245     float4 p1 = getElementAt4(p, stride, nextX, locY);
246     float4 p2 = getElementAt4(p, stride, locX, nextY);
247     float4 p3 = getElementAt4(p, stride, nextX, nextY);
248     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
249     r *= (1.f / 255.f);
250     return r;
251 }
252 static float4 __attribute__((overloadable))
getSample_565(const uint8_t * p,size_t stride,int locX,int locY,int nextX,int nextY,float w0,float w1,float w2,float w3)253             getSample_565(const uint8_t *p, size_t stride,
254                          int locX, int locY, int nextX, int nextY,
255                          float w0, float w1, float w2, float w3) {
256     float3 p0 = getElementAt565(p, stride, locX, locY);
257     float3 p1 = getElementAt565(p, stride, nextX, locY);
258     float3 p2 = getElementAt565(p, stride, locX, nextY);
259     float3 p3 = getElementAt565(p, stride, nextX, nextY);
260     float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
261     r *= (1.f / 255.f);
262     float4 ret;
263     ret.rgb = r;
264     ret.w = 1.f;
265     return ret;
266 }
267 
268 static float4 __attribute__((overloadable))
getBilinearSample1D(const Allocation_t * alloc,float2 weights,uint32_t iPixel,uint32_t next,rs_data_kind dk,rs_data_type dt,uint32_t lod)269         getBilinearSample1D(const Allocation_t *alloc, float2 weights,
270                           uint32_t iPixel, uint32_t next,
271                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
272 
273      const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
274 
275      switch(dk) {
276      case RS_KIND_PIXEL_RGBA:
277          return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
278      case RS_KIND_PIXEL_A:
279          return getSample_A(p, iPixel, next, weights.x, weights.y);
280      case RS_KIND_PIXEL_RGB:
281          if (dt == RS_TYPE_UNSIGNED_5_6_5) {
282              return getSample_565(p, iPixel, next, weights.x, weights.y);
283          }
284          return getSample_RGB(p, iPixel, next, weights.x, weights.y);
285      case RS_KIND_PIXEL_L:
286          return getSample_L(p, iPixel, next, weights.x, weights.y);
287      case RS_KIND_PIXEL_LA:
288          return getSample_LA(p, iPixel, next, weights.x, weights.y);
289 
290      default:
291          //__builtin_unreachable();
292          break;
293      }
294 
295      //__builtin_unreachable();
296      return 0.f;
297 }
298 
wrapI(rs_sampler_value wrap,int32_t coord,int32_t size)299 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
300     if (wrap == RS_SAMPLER_WRAP) {
301         coord = coord % size;
302         if (coord < 0) {
303             coord += size;
304         }
305     }
306     if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
307         coord = coord % (size * 2);
308         if (coord < 0) {
309             coord = (size * 2) + coord;
310         }
311         if (coord >= size) {
312             coord = (size * 2) - coord;
313         }
314     }
315     return (uint32_t)max(0, min(coord, size - 1));
316 }
317 
318 static float4 __attribute__((overloadable))
getBilinearSample2D(const Allocation_t * alloc,float w0,float w1,float w2,float w3,int lx,int ly,int nx,int ny,rs_data_kind dk,rs_data_type dt,uint32_t lod)319         getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
320                           int lx, int ly, int nx, int ny,
321                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
322 
323     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
324     size_t stride = alloc->mHal.drvState.lod[lod].stride;
325 
326     switch(dk) {
327     case RS_KIND_PIXEL_RGBA:
328         return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
329     case RS_KIND_PIXEL_A:
330         return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
331     case RS_KIND_PIXEL_LA:
332         return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
333     case RS_KIND_PIXEL_RGB:
334         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
335             return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
336         }
337         return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
338     case RS_KIND_PIXEL_L:
339         return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
340 
341     default:
342         break;
343     }
344 
345     return 0.f;
346 }
347 
348 static float4  __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint32_t iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)349         getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
350                          rs_data_type dt, uint32_t lod) {
351 
352     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
353 
354     float4 result = {0.f, 0.f, 0.f, 255.f};
355 
356     switch(dk) {
357     case RS_KIND_PIXEL_RGBA:
358         result = getElementAt4(p, iPixel);
359         break;
360     case RS_KIND_PIXEL_A:
361         result.w = getElementAt1(p, iPixel);
362         break;
363     case RS_KIND_PIXEL_LA:
364         result.zw = getElementAt2(p, iPixel);
365         result.xy = result.z;
366         break;
367     case RS_KIND_PIXEL_RGB:
368         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
369             result.xyz = getElementAt565(p, iPixel);
370         } else {
371             result.xyz = getElementAt3(p, iPixel);
372         }
373         break;
374     case RS_KIND_PIXEL_L:
375         result.xyz = getElementAt1(p, iPixel);
376 
377     default:
378         //__builtin_unreachable();
379         break;
380     }
381 
382     return result * 0.003921569f;
383 }
384 
385 static float4  __attribute__((overloadable))
getNearestSample(const Allocation_t * alloc,uint2 iPixel,rs_data_kind dk,rs_data_type dt,uint32_t lod)386         getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
387                          rs_data_type dt, uint32_t lod) {
388 
389     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
390     size_t stride = alloc->mHal.drvState.lod[lod].stride;
391 
392     float4 result = {0.f, 0.f, 0.f, 255.f};
393 
394     switch(dk) {
395     case RS_KIND_PIXEL_RGBA:
396         result = getElementAt4(p, stride, iPixel.x, iPixel.y);
397         break;
398     case RS_KIND_PIXEL_A:
399         result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
400         break;
401     case RS_KIND_PIXEL_LA:
402         result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
403         result.xy = result.z;
404         break;
405     case RS_KIND_PIXEL_RGB:
406         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
407             result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
408         } else {
409             result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
410         }
411         break;
412 
413     default:
414         //__builtin_unreachable();
415         break;
416     }
417 
418     return result * 0.003921569f;
419 }
420 
421 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)422         sample_LOD_LinearPixel(const Allocation_t *alloc,
423                                rs_data_kind dk, rs_data_type dt,
424                                rs_sampler_value wrapS,
425                                float uv, uint32_t lod) {
426 
427     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
428 
429     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
430     float pixelUV = uv * (float)(sourceW);
431     int32_t iPixel = (int32_t)(pixelUV);
432     float frac = pixelUV - (float)iPixel;
433 
434     if (frac < 0.5f) {
435         iPixel -= 1;
436         frac += 0.5f;
437     } else {
438         frac -= 0.5f;
439     }
440 
441     float oneMinusFrac = 1.0f - frac;
442 
443     float2 weights;
444     weights.x = oneMinusFrac;
445     weights.y = frac;
446 
447     uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
448     uint32_t location = wrapI(wrapS, iPixel, sourceW);
449 
450     return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
451 }
452 
453 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,float uv,uint32_t lod)454         sample_LOD_NearestPixel(const Allocation_t *alloc,
455                                 rs_data_kind dk, rs_data_type dt,
456                                 rs_sampler_value wrapS,
457                                 float uv, uint32_t lod) {
458 
459     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
460     int32_t iPixel = (int32_t)(uv * (float)(sourceW));
461     uint32_t location = wrapI(wrapS, iPixel, sourceW);
462 
463     return getNearestSample(alloc, location, dk, dt, lod);
464 }
465 
466 static float4 __attribute__((overloadable))
sample_LOD_LinearPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)467         sample_LOD_LinearPixel(const Allocation_t *alloc,
468                                rs_data_kind dk, rs_data_type dt,
469                                rs_sampler_value wrapS,
470                                rs_sampler_value wrapT,
471                                float2 uv, uint32_t lod) {
472 
473     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
474 
475     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
476     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
477 
478     float pixelU = uv.x * sourceW;
479     float pixelV = uv.y * sourceH;
480     int iPixelU = pixelU;
481     int iPixelV = pixelV;
482     float fracU = pixelU - iPixelU;
483     float fracV = pixelV - iPixelV;
484 
485     if (fracU < 0.5f) {
486         iPixelU -= 1;
487         fracU += 0.5f;
488     } else {
489         fracU -= 0.5f;
490     }
491     if (fracV < 0.5f) {
492         iPixelV -= 1;
493         fracV += 0.5f;
494     } else {
495         fracV -= 0.5f;
496     }
497     float oneMinusFracU = 1.0f - fracU;
498     float oneMinusFracV = 1.0f - fracV;
499 
500     float w0 = oneMinusFracU * oneMinusFracV;
501     float w1 = fracU * oneMinusFracV;
502     float w2 = oneMinusFracU * fracV;
503     float w3 = fracU * fracV;
504 
505     int nx = wrapI(wrapS, iPixelU + 1, sourceW);
506     int ny = wrapI(wrapT, iPixelV + 1, sourceH);
507     int lx = wrapI(wrapS, iPixelU, sourceW);
508     int ly = wrapI(wrapT, iPixelV, sourceH);
509 
510     return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
511 
512 }
513 
514 static float4 __attribute__((overloadable))
sample_LOD_NearestPixel(const Allocation_t * alloc,rs_data_kind dk,rs_data_type dt,rs_sampler_value wrapS,rs_sampler_value wrapT,float2 uv,uint32_t lod)515         sample_LOD_NearestPixel(const Allocation_t *alloc,
516                                 rs_data_kind dk, rs_data_type dt,
517                                 rs_sampler_value wrapS,
518                                 rs_sampler_value wrapT,
519                                 float2 uv, uint32_t lod) {
520     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
521     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
522 
523     float2 dimF;
524     dimF.x = (float)(sourceW);
525     dimF.y = (float)(sourceH);
526     int2 iPixel = convert_int2(uv * dimF);
527 
528     uint2 location;
529     location.x = wrapI(wrapS, iPixel.x, sourceW);
530     location.y = wrapI(wrapT, iPixel.y, sourceH);
531     return getNearestSample(alloc, location, dk, dt, lod);
532 }
533 
534 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float uv,float lod)535         rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
536 
537     const Allocation_t *alloc = (const Allocation_t *)a.p;
538     const Sampler_t *prog = (Sampler_t *)s.p;
539     const Type_t *type = (Type_t *)alloc->mHal.state.type;
540     const Element_t *elem = type->mHal.state.element;
541     rs_data_kind dk = elem->mHal.state.dataKind;
542     rs_data_type dt = elem->mHal.state.dataType;
543     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
544     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
545     rs_sampler_value wrapS = prog->mHal.state.wrapS;
546 
547     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
548         return 0.f;
549     }
550 
551     if (lod <= 0.0f) {
552         if (sampleMag == RS_SAMPLER_NEAREST) {
553             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
554         }
555         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
556     }
557 
558     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
559         uint32_t maxLOD = type->mHal.state.lodCount - 1;
560         lod = min(lod, (float)maxLOD);
561         uint32_t nearestLOD = (uint32_t)round(lod);
562         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
563     }
564 
565     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
566         uint32_t lod0 = (uint32_t)floor(lod);
567         uint32_t lod1 = (uint32_t)ceil(lod);
568         uint32_t maxLOD = type->mHal.state.lodCount - 1;
569         lod0 = min(lod0, maxLOD);
570         lod1 = min(lod1, maxLOD);
571         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
572         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
573         float frac = lod - (float)lod0;
574         return sample0 * (1.0f - frac) + sample1 * frac;
575     }
576 
577     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
578 }
579 
580 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float location)581         rsSample(rs_allocation a, rs_sampler s, float location) {
582     return rsSample(a, s, location, 0);
583 }
584 
585 
586 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv,float lod)587         rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
588 
589     const Allocation_t *alloc = (const Allocation_t *)a.p;
590     const Sampler_t *prog = (Sampler_t *)s.p;
591     const Type_t *type = (Type_t *)alloc->mHal.state.type;
592     const Element_t *elem = type->mHal.state.element;
593     rs_data_kind dk = elem->mHal.state.dataKind;
594     rs_data_type dt = elem->mHal.state.dataType;
595     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
596     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
597     rs_sampler_value wrapS = prog->mHal.state.wrapS;
598     rs_sampler_value wrapT = prog->mHal.state.wrapT;
599 
600     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
601         return 0.f;
602     }
603 
604     if (lod <= 0.0f) {
605         if (sampleMag == RS_SAMPLER_NEAREST) {
606             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
607         }
608         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
609     }
610 
611     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
612         uint32_t maxLOD = type->mHal.state.lodCount - 1;
613         lod = min(lod, (float)maxLOD);
614         uint32_t nearestLOD = (uint32_t)round(lod);
615         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
616     }
617 
618     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
619         uint32_t lod0 = (uint32_t)floor(lod);
620         uint32_t lod1 = (uint32_t)ceil(lod);
621         uint32_t maxLOD = type->mHal.state.lodCount - 1;
622         lod0 = min(lod0, maxLOD);
623         lod1 = min(lod1, maxLOD);
624         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
625         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
626         float frac = lod - (float)lod0;
627         return sample0 * (1.0f - frac) + sample1 * frac;
628     }
629 
630     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
631 }
632 
633 extern const float4 __attribute__((overloadable))
rsSample(rs_allocation a,rs_sampler s,float2 uv)634         rsSample(rs_allocation a, rs_sampler s, float2 uv) {
635 
636     const Allocation_t *alloc = (const Allocation_t *)a.p;
637     const Sampler_t *prog = (Sampler_t *)s.p;
638     const Type_t *type = (Type_t *)alloc->mHal.state.type;
639     const Element_t *elem = type->mHal.state.element;
640     rs_data_kind dk = elem->mHal.state.dataKind;
641     rs_data_type dt = elem->mHal.state.dataType;
642     rs_sampler_value wrapS = prog->mHal.state.wrapS;
643     rs_sampler_value wrapT = prog->mHal.state.wrapT;
644 
645     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
646         return 0.f;
647     }
648 
649     if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
650         return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
651     }
652     return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
653 }
654 
655