1 /* libs/pixelflinger/scanline.cpp
2 **
3 ** Copyright 2006-2011, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18
19 #define LOG_TAG "pixelflinger"
20
21 #include <assert.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25
26 #include <cutils/memory.h>
27 #include <cutils/log.h>
28
29 #include "buffer.h"
30 #include "scanline.h"
31
32 #include "codeflinger/CodeCache.h"
33 #include "codeflinger/GGLAssembler.h"
34 #include "codeflinger/ARMAssembler.h"
35 //#include "codeflinger/ARMAssemblerOptimizer.h"
36
37 // ----------------------------------------------------------------------------
38
39 #define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
40 #define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
41 #define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
42 #define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
43
44 #ifdef NDEBUG
45 # define ANDROID_RELEASE
46 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
47 #else
48 # define ANDROID_DEBUG
49 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
50 #endif
51
52 #if defined(__arm__)
53 # define ANDROID_ARM_CODEGEN 1
54 #else
55 # define ANDROID_ARM_CODEGEN 0
56 #endif
57
58 #define DEBUG__CODEGEN_ONLY 0
59
60 /* Set to 1 to dump to the log the states that need a new
61 * code-generated scanline callback, i.e. those that don't
62 * have a corresponding shortcut function.
63 */
64 #define DEBUG_NEEDS 0
65
66 #define ASSEMBLY_SCRATCH_SIZE 2048
67
68 // ----------------------------------------------------------------------------
69 namespace android {
70 // ----------------------------------------------------------------------------
71
72 static void init_y(context_t*, int32_t);
73 static void init_y_noop(context_t*, int32_t);
74 static void init_y_packed(context_t*, int32_t);
75 static void init_y_error(context_t*, int32_t);
76
77 static void step_y__generic(context_t* c);
78 static void step_y__nop(context_t*);
79 static void step_y__smooth(context_t* c);
80 static void step_y__tmu(context_t* c);
81 static void step_y__w(context_t* c);
82
83 static void scanline(context_t* c);
84 static void scanline_perspective(context_t* c);
85 static void scanline_perspective_single(context_t* c);
86 static void scanline_t32cb16blend(context_t* c);
87 static void scanline_t32cb16blend_dither(context_t* c);
88 static void scanline_t32cb16blend_srca(context_t* c);
89 static void scanline_t32cb16blend_clamp(context_t* c);
90 static void scanline_t32cb16blend_clamp_dither(context_t* c);
91 static void scanline_t32cb16blend_clamp_mod(context_t* c);
92 static void scanline_x32cb16blend_clamp_mod(context_t* c);
93 static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
94 static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
95 static void scanline_t32cb16(context_t* c);
96 static void scanline_t32cb16_dither(context_t* c);
97 static void scanline_t32cb16_clamp(context_t* c);
98 static void scanline_t32cb16_clamp_dither(context_t* c);
99 static void scanline_col32cb16blend(context_t* c);
100 static void scanline_t16cb16_clamp(context_t* c);
101 static void scanline_t16cb16blend_clamp_mod(context_t* c);
102 static void scanline_memcpy(context_t* c);
103 static void scanline_memset8(context_t* c);
104 static void scanline_memset16(context_t* c);
105 static void scanline_memset32(context_t* c);
106 static void scanline_noop(context_t* c);
107 static void scanline_set(context_t* c);
108 static void scanline_clear(context_t* c);
109
110 static void rect_generic(context_t* c, size_t yc);
111 static void rect_memcpy(context_t* c, size_t yc);
112
113 extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t);
114 extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct);
115 extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct);
116 extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
117
118 // ----------------------------------------------------------------------------
119
convertAbgr8888ToRgb565(uint32_t pix)120 static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
121 {
122 return uint16_t( ((pix << 8) & 0xf800) |
123 ((pix >> 5) & 0x07e0) |
124 ((pix >> 19) & 0x001f) );
125 }
126
127 struct shortcut_t {
128 needs_filter_t filter;
129 const char* desc;
130 void (*scanline)(context_t*);
131 void (*init_y)(context_t*, int32_t);
132 };
133
134 // Keep in sync with needs
135
136 /* To understand the values here, have a look at:
137 * system/core/include/private/pixelflinger/ggl_context.h
138 *
139 * Especially the lines defining and using GGL_RESERVE_NEEDS
140 *
141 * Quick reminders:
142 * - the last nibble of the first value is the destination buffer format.
143 * - the last nibble of the third value is the source texture format
144 * - formats: 4=rgb565 1=abgr8888 2=xbgr8888
145 *
146 * In the descriptions below:
147 *
148 * SRC means we copy the source pixels to the destination
149 *
150 * SRC_OVER means we blend the source pixels to the destination
151 * with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
152 * This mode is otherwise called 'blend'.
153 *
154 * SRCA_OVER means we blend the source pixels to the destination
155 * with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
156 * This mode is otherwise called 'blend_srca'
157 *
158 * clamp means we fetch source pixels from a texture with u/v clamping
159 *
160 * mod means the source pixels are modulated (multiplied) by the
161 * a/r/g/b of the current context's color. Typically used for
162 * fade-in / fade-out.
163 *
164 * dither means we dither 32 bit values to 16 bits
165 */
166 static shortcut_t shortcuts[] = {
167 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
168 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
169 "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
170 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
171 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
172 "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
173 /* same as first entry, but with dithering */
174 { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
175 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
176 "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
177 /* same as second entry, but with dithering */
178 { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
179 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
180 "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
181 /* this is used during the boot animation - CHEAT: ignore dithering */
182 { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
183 { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
184 "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
185 /* special case for arbitrary texture coordinates (think scaling) */
186 { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
187 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
188 "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
189 { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
190 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
191 "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
192 /* another case used during emulation */
193 { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
194 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
195 "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
196 /* and this */
197 { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
198 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
199 "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
200 { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
201 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
202 "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
203 { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
204 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
205 "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
206 { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
207 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
208 "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
209 { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
210 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
211 "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
212 { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
213 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
214 "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
215 { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
216 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
217 "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
218 { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
219 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
220 "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
221 { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
222 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
223 "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
224 { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
225 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
226 "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
227 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
228 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
229 "(nop) alpha test", scanline_noop, init_y_noop },
230 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
231 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
232 "(nop) depth test", scanline_noop, init_y_noop },
233 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
234 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
235 "(nop) logic_op", scanline_noop, init_y_noop },
236 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
237 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
238 "(nop) color mask", scanline_noop, init_y_noop },
239 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
240 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
241 "(set) logic_op", scanline_set, init_y_noop },
242 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
243 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
244 "(clear) logic_op", scanline_clear, init_y_noop },
245 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
246 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
247 "(clear) blending 0/0", scanline_clear, init_y_noop },
248 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
249 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
250 "(error) invalid color-buffer format", scanline_noop, init_y_error },
251 };
252 static const needs_filter_t noblend1to1 = {
253 // (disregard dithering, see below)
254 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
255 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
256 };
257 static const needs_filter_t fill16noblend = {
258 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
259 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
260 };
261
262 // ----------------------------------------------------------------------------
263
264 #if ANDROID_ARM_CODEGEN
265 static CodeCache gCodeCache(12 * 1024);
266
267 class ScanlineAssembly : public Assembly {
268 AssemblyKey<needs_t> mKey;
269 public:
ScanlineAssembly(needs_t needs,size_t size)270 ScanlineAssembly(needs_t needs, size_t size)
271 : Assembly(size), mKey(needs) { }
key() const272 const AssemblyKey<needs_t>& key() const { return mKey; }
273 };
274 #endif
275
276 // ----------------------------------------------------------------------------
277
ggl_init_scanline(context_t * c)278 void ggl_init_scanline(context_t* c)
279 {
280 c->init_y = init_y;
281 c->step_y = step_y__generic;
282 c->scanline = scanline;
283 }
284
ggl_uninit_scanline(context_t * c)285 void ggl_uninit_scanline(context_t* c)
286 {
287 if (c->state.buffers.coverage)
288 free(c->state.buffers.coverage);
289 #if ANDROID_ARM_CODEGEN
290 if (c->scanline_as)
291 c->scanline_as->decStrong(c);
292 #endif
293 }
294
295 // ----------------------------------------------------------------------------
296
pick_scanline(context_t * c)297 static void pick_scanline(context_t* c)
298 {
299 #if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0))
300
301 #if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
302 c->init_y = init_y;
303 c->step_y = step_y__generic;
304 c->scanline = scanline;
305 return;
306 #endif
307
308 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
309 // c->state.needs.n, c->state.needs.p,
310 // c->state.needs.t[0], c->state.needs.t[1]);
311
312 // first handle the special case that we cannot test with a filter
313 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
314 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
315 if (c->state.needs.match(noblend1to1)) {
316 // this will match regardless of dithering state, since both
317 // src and dest have the same format anyway, there is no dithering
318 // to be done.
319 const GGLFormat* f =
320 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
321 if ((f->components == GGL_RGB) ||
322 (f->components == GGL_RGBA) ||
323 (f->components == GGL_LUMINANCE) ||
324 (f->components == GGL_LUMINANCE_ALPHA))
325 {
326 // format must have all of RGB components
327 // (so the current color doesn't show through)
328 c->scanline = scanline_memcpy;
329 c->init_y = init_y_noop;
330 return;
331 }
332 }
333 }
334
335 if (c->state.needs.match(fill16noblend)) {
336 c->init_y = init_y_packed;
337 switch (c->formats[cb_format].size) {
338 case 1: c->scanline = scanline_memset8; return;
339 case 2: c->scanline = scanline_memset16; return;
340 case 4: c->scanline = scanline_memset32; return;
341 }
342 }
343
344 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
345 for (int i=0 ; i<numFilters ; i++) {
346 if (c->state.needs.match(shortcuts[i].filter)) {
347 c->scanline = shortcuts[i].scanline;
348 c->init_y = shortcuts[i].init_y;
349 return;
350 }
351 }
352
353 #if DEBUG_NEEDS
354 ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
355 c->state.needs.n, c->state.needs.p,
356 c->state.needs.t[0], c->state.needs.t[1]);
357 #endif
358
359 #endif // DEBUG__CODEGEN_ONLY
360
361 c->init_y = init_y;
362 c->step_y = step_y__generic;
363
364 #if ANDROID_ARM_CODEGEN
365 // we're going to have to generate some code...
366 // here, generate code for our pixel pipeline
367 const AssemblyKey<needs_t> key(c->state.needs);
368 sp<Assembly> assembly = gCodeCache.lookup(key);
369 if (assembly == 0) {
370 // create a new assembly region
371 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
372 ASSEMBLY_SCRATCH_SIZE);
373 // initialize our assembler
374 GGLAssembler assembler( new ARMAssembler(a) );
375 //GGLAssembler assembler(
376 // new ARMAssemblerOptimizer(new ARMAssembler(a)) );
377 // generate the scanline code for the given needs
378 int err = assembler.scanline(c->state.needs, c);
379 if (ggl_likely(!err)) {
380 // finally, cache this assembly
381 err = gCodeCache.cache(a->key(), a);
382 }
383 if (ggl_unlikely(err)) {
384 ALOGE("error generating or caching assembly. Reverting to NOP.");
385 c->scanline = scanline_noop;
386 c->init_y = init_y_noop;
387 c->step_y = step_y__nop;
388 return;
389 }
390 assembly = a;
391 }
392
393 // release the previous assembly
394 if (c->scanline_as) {
395 c->scanline_as->decStrong(c);
396 }
397
398 //ALOGI("using generated pixel-pipeline");
399 c->scanline_as = assembly.get();
400 c->scanline_as->incStrong(c); // hold on to assembly
401 c->scanline = (void(*)(context_t* c))assembly->base();
402 #else
403 // ALOGW("using generic (slow) pixel-pipeline");
404 c->scanline = scanline;
405 #endif
406 }
407
ggl_pick_scanline(context_t * c)408 void ggl_pick_scanline(context_t* c)
409 {
410 pick_scanline(c);
411 if ((c->state.enables & GGL_ENABLE_W) &&
412 (c->state.enables & GGL_ENABLE_TMUS))
413 {
414 c->span = c->scanline;
415 c->scanline = scanline_perspective;
416 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
417 // only one TMU enabled
418 c->scanline = scanline_perspective_single;
419 }
420 }
421 }
422
423 // ----------------------------------------------------------------------------
424
425 static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
426 static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
427 const pixel_t* src, const pixel_t* dst);
428 static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
429
430 #if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
431
432 // no need to compile the generic-pipeline, it can't be reached
scanline(context_t *)433 void scanline(context_t*)
434 {
435 }
436
437 #else
438
rescale(uint32_t & u,uint8_t & su,uint32_t & v,uint8_t & sv)439 void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
440 {
441 if (su && sv) {
442 if (su > sv) {
443 v = ggl_expand(v, sv, su);
444 sv = su;
445 } else if (su < sv) {
446 u = ggl_expand(u, su, sv);
447 su = sv;
448 }
449 }
450 }
451
blending(context_t * c,pixel_t * fragment,pixel_t * fb)452 void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
453 {
454 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
455 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
456 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
457 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
458
459 pixel_t sf, df;
460 blend_factor(c, &sf, c->state.blend.src, fragment, fb);
461 blend_factor(c, &df, c->state.blend.dst, fragment, fb);
462
463 fragment->c[1] =
464 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
465 fragment->c[2] =
466 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
467 fragment->c[3] =
468 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
469
470 if (c->state.blend.alpha_separate) {
471 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
472 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
473 }
474
475 fragment->c[0] =
476 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
477
478 // clamp to 1.0
479 if (fragment->c[0] >= (1LU<<fragment->s[0]))
480 fragment->c[0] = (1<<fragment->s[0])-1;
481 if (fragment->c[1] >= (1LU<<fragment->s[1]))
482 fragment->c[1] = (1<<fragment->s[1])-1;
483 if (fragment->c[2] >= (1LU<<fragment->s[2]))
484 fragment->c[2] = (1<<fragment->s[2])-1;
485 if (fragment->c[3] >= (1LU<<fragment->s[3]))
486 fragment->c[3] = (1<<fragment->s[3])-1;
487 }
488
blendfactor(uint32_t x,uint32_t size,uint32_t def=0)489 static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
490 {
491 if (!size)
492 return def;
493
494 // scale to 16 bits
495 if (size > 16) {
496 x >>= (size - 16);
497 } else if (size < 16) {
498 x = ggl_expand(x, size, 16);
499 }
500 x += x >> 15;
501 return x;
502 }
503
blend_factor(context_t * c,pixel_t * r,uint32_t factor,const pixel_t * src,const pixel_t * dst)504 void blend_factor(context_t* c, pixel_t* r,
505 uint32_t factor, const pixel_t* src, const pixel_t* dst)
506 {
507 switch (factor) {
508 case GGL_ZERO:
509 r->c[1] =
510 r->c[2] =
511 r->c[3] =
512 r->c[0] = 0;
513 break;
514 case GGL_ONE:
515 r->c[1] =
516 r->c[2] =
517 r->c[3] =
518 r->c[0] = FIXED_ONE;
519 break;
520 case GGL_DST_COLOR:
521 r->c[1] = blendfactor(dst->c[1], dst->s[1]);
522 r->c[2] = blendfactor(dst->c[2], dst->s[2]);
523 r->c[3] = blendfactor(dst->c[3], dst->s[3]);
524 r->c[0] = blendfactor(dst->c[0], dst->s[0]);
525 break;
526 case GGL_SRC_COLOR:
527 r->c[1] = blendfactor(src->c[1], src->s[1]);
528 r->c[2] = blendfactor(src->c[2], src->s[2]);
529 r->c[3] = blendfactor(src->c[3], src->s[3]);
530 r->c[0] = blendfactor(src->c[0], src->s[0]);
531 break;
532 case GGL_ONE_MINUS_DST_COLOR:
533 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
534 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
535 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
536 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
537 break;
538 case GGL_ONE_MINUS_SRC_COLOR:
539 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
540 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
541 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
542 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
543 break;
544 case GGL_SRC_ALPHA:
545 r->c[1] =
546 r->c[2] =
547 r->c[3] =
548 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
549 break;
550 case GGL_ONE_MINUS_SRC_ALPHA:
551 r->c[1] =
552 r->c[2] =
553 r->c[3] =
554 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
555 break;
556 case GGL_DST_ALPHA:
557 r->c[1] =
558 r->c[2] =
559 r->c[3] =
560 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
561 break;
562 case GGL_ONE_MINUS_DST_ALPHA:
563 r->c[1] =
564 r->c[2] =
565 r->c[3] =
566 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
567 break;
568 case GGL_SRC_ALPHA_SATURATE:
569 // XXX: GGL_SRC_ALPHA_SATURATE
570 break;
571 }
572 }
573
wrapping(int32_t coord,uint32_t size,int tx_wrap)574 static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
575 {
576 GGLfixed d;
577 if (tx_wrap == GGL_REPEAT) {
578 d = (uint32_t(coord)>>16) * size;
579 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
580 const GGLfixed clamp_min = FIXED_HALF;
581 const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
582 if (coord < clamp_min) coord = clamp_min;
583 if (coord > clamp_max) coord = clamp_max;
584 d = coord;
585 } else { // 1:1
586 const GGLfixed clamp_min = 0;
587 const GGLfixed clamp_max = (size << 16);
588 if (coord < clamp_min) coord = clamp_min;
589 if (coord > clamp_max) coord = clamp_max;
590 d = coord;
591 }
592 return d;
593 }
594
595 static inline
ADJUST_COLOR_ITERATOR(GGLcolor v,GGLcolor dvdx,int len)596 GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
597 {
598 const int32_t end = dvdx * (len-1) + v;
599 if (end < 0)
600 v -= end;
601 v &= ~(v>>31);
602 return v;
603 }
604
scanline(context_t * c)605 void scanline(context_t* c)
606 {
607 const uint32_t enables = c->state.enables;
608 const int xs = c->iterators.xl;
609 const int x1 = c->iterators.xr;
610 int xc = x1 - xs;
611 const int16_t* covPtr = c->state.buffers.coverage + xs;
612
613 // All iterated values are sampled at the pixel center
614
615 // reset iterators for that scanline...
616 GGLcolor r, g, b, a;
617 iterators_t& ci = c->iterators;
618 if (enables & GGL_ENABLE_SMOOTH) {
619 r = (xs * c->shade.drdx) + ci.ydrdy;
620 g = (xs * c->shade.dgdx) + ci.ydgdy;
621 b = (xs * c->shade.dbdx) + ci.ydbdy;
622 a = (xs * c->shade.dadx) + ci.ydady;
623 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
624 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
625 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
626 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
627 } else {
628 r = ci.ydrdy;
629 g = ci.ydgdy;
630 b = ci.ydbdy;
631 a = ci.ydady;
632 }
633
634 // z iterators are 1.31
635 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
636 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
637
638 struct {
639 GGLfixed s, t;
640 } tc[GGL_TEXTURE_UNIT_COUNT];
641 if (enables & GGL_ENABLE_TMUS) {
642 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
643 if (c->state.texture[i].enable) {
644 texture_iterators_t& ti = c->state.texture[i].iterators;
645 if (enables & GGL_ENABLE_W) {
646 tc[i].s = ti.ydsdy;
647 tc[i].t = ti.ydtdy;
648 } else {
649 tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
650 tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
651 }
652 }
653 }
654 }
655
656 pixel_t fragment;
657 pixel_t texel;
658 pixel_t fb;
659
660 uint32_t x = xs;
661 uint32_t y = c->iterators.y;
662
663 while (xc--) {
664
665 { // just a scope
666
667 // read color (convert to 8 bits by keeping only the integer part)
668 fragment.s[1] = fragment.s[2] =
669 fragment.s[3] = fragment.s[0] = 8;
670 fragment.c[1] = r >> (GGL_COLOR_BITS-8);
671 fragment.c[2] = g >> (GGL_COLOR_BITS-8);
672 fragment.c[3] = b >> (GGL_COLOR_BITS-8);
673 fragment.c[0] = a >> (GGL_COLOR_BITS-8);
674
675 // texturing
676 if (enables & GGL_ENABLE_TMUS) {
677 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
678 texture_t& tx = c->state.texture[i];
679 if (!tx.enable)
680 continue;
681 texture_iterators_t& ti = tx.iterators;
682 int32_t u, v;
683
684 // s-coordinate
685 if (tx.s_coord != GGL_ONE_TO_ONE) {
686 const int w = tx.surface.width;
687 u = wrapping(tc[i].s, w, tx.s_wrap);
688 tc[i].s += ti.dsdx;
689 } else {
690 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
691 }
692
693 // t-coordinate
694 if (tx.t_coord != GGL_ONE_TO_ONE) {
695 const int h = tx.surface.height;
696 v = wrapping(tc[i].t, h, tx.t_wrap);
697 tc[i].t += ti.dtdx;
698 } else {
699 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
700 }
701
702 // read texture
703 if (tx.mag_filter == GGL_NEAREST &&
704 tx.min_filter == GGL_NEAREST)
705 {
706 u >>= 16;
707 v >>= 16;
708 tx.surface.read(&tx.surface, c, u, v, &texel);
709 } else {
710 const int w = tx.surface.width;
711 const int h = tx.surface.height;
712 u -= FIXED_HALF;
713 v -= FIXED_HALF;
714 int u0 = u >> 16;
715 int v0 = v >> 16;
716 int u1 = u0 + 1;
717 int v1 = v0 + 1;
718 if (tx.s_wrap == GGL_REPEAT) {
719 if (u0<0) u0 += w;
720 if (u1<0) u1 += w;
721 if (u0>=w) u0 -= w;
722 if (u1>=w) u1 -= w;
723 } else {
724 if (u0<0) u0 = 0;
725 if (u1<0) u1 = 0;
726 if (u0>=w) u0 = w-1;
727 if (u1>=w) u1 = w-1;
728 }
729 if (tx.t_wrap == GGL_REPEAT) {
730 if (v0<0) v0 += h;
731 if (v1<0) v1 += h;
732 if (v0>=h) v0 -= h;
733 if (v1>=h) v1 -= h;
734 } else {
735 if (v0<0) v0 = 0;
736 if (v1<0) v1 = 0;
737 if (v0>=h) v0 = h-1;
738 if (v1>=h) v1 = h-1;
739 }
740 pixel_t texels[4];
741 uint32_t mm[4];
742 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
743 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
744 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
745 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
746 u = (u >> 12) & 0xF;
747 v = (v >> 12) & 0xF;
748 u += u>>3;
749 v += v>>3;
750 mm[0] = (0x10 - u) * (0x10 - v);
751 mm[1] = (0x10 - u) * v;
752 mm[2] = u * (0x10 - v);
753 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
754 for (int j=0 ; j<4 ; j++) {
755 texel.s[j] = texels[0].s[j];
756 if (!texel.s[j]) continue;
757 texel.s[j] += 8;
758 texel.c[j] = texels[0].c[j]*mm[0] +
759 texels[1].c[j]*mm[1] +
760 texels[2].c[j]*mm[2] +
761 texels[3].c[j]*mm[3] ;
762 }
763 }
764
765 // Texture environnement...
766 for (int j=0 ; j<4 ; j++) {
767 uint32_t& Cf = fragment.c[j];
768 uint32_t& Ct = texel.c[j];
769 uint8_t& sf = fragment.s[j];
770 uint8_t& st = texel.s[j];
771 uint32_t At = texel.c[0];
772 uint8_t sat = texel.s[0];
773 switch (tx.env) {
774 case GGL_REPLACE:
775 if (st) {
776 Cf = Ct;
777 sf = st;
778 }
779 break;
780 case GGL_MODULATE:
781 if (st) {
782 uint32_t factor = Ct + (Ct>>(st-1));
783 Cf = (Cf * factor) >> st;
784 }
785 break;
786 case GGL_DECAL:
787 if (sat) {
788 rescale(Cf, sf, Ct, st);
789 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
790 }
791 break;
792 case GGL_BLEND:
793 if (st) {
794 uint32_t Cc = tx.env_color[i];
795 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
796 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
797 uint32_t factor = Ct + (Ct>>(st-1));
798 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
799 }
800 break;
801 case GGL_ADD:
802 if (st) {
803 rescale(Cf, sf, Ct, st);
804 Cf += Ct;
805 }
806 break;
807 }
808 }
809 }
810 }
811
812 // coverage application
813 if (enables & GGL_ENABLE_AA) {
814 int16_t cf = *covPtr++;
815 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
816 }
817
818 // alpha-test
819 if (enables & GGL_ENABLE_ALPHA_TEST) {
820 GGLcolor ref = c->state.alpha_test.ref;
821 GGLcolor alpha = (uint64_t(fragment.c[0]) *
822 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
823 switch (c->state.alpha_test.func) {
824 case GGL_NEVER: goto discard;
825 case GGL_LESS: if (alpha<ref) break; goto discard;
826 case GGL_EQUAL: if (alpha==ref) break; goto discard;
827 case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
828 case GGL_GREATER: if (alpha>ref) break; goto discard;
829 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
830 case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
831 }
832 }
833
834 // depth test
835 if (c->state.buffers.depth.format) {
836 if (enables & GGL_ENABLE_DEPTH_TEST) {
837 surface_t* cb = &(c->state.buffers.depth);
838 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y));
839 uint16_t zz = uint32_t(z)>>(16);
840 uint16_t depth = *p;
841 switch (c->state.depth_test.func) {
842 case GGL_NEVER: goto discard;
843 case GGL_LESS: if (zz<depth) break; goto discard;
844 case GGL_EQUAL: if (zz==depth) break; goto discard;
845 case GGL_LEQUAL: if (zz<=depth) break; goto discard;
846 case GGL_GREATER: if (zz>depth) break; goto discard;
847 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
848 case GGL_GEQUAL: if (zz>=depth) break; goto discard;
849 }
850 // depth buffer is not enabled, if depth-test is not enabled
851 /*
852 fragment.s[1] = fragment.s[2] =
853 fragment.s[3] = fragment.s[0] = 8;
854 fragment.c[1] =
855 fragment.c[2] =
856 fragment.c[3] =
857 fragment.c[0] = 255 - (zz>>8);
858 */
859 if (c->state.mask.depth) {
860 *p = zz;
861 }
862 }
863 }
864
865 // fog
866 if (enables & GGL_ENABLE_FOG) {
867 for (int i=1 ; i<=3 ; i++) {
868 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
869 uint32_t& c = fragment.c[i];
870 uint8_t& s = fragment.s[i];
871 c = (c * 0x10000) / ((1<<s)-1);
872 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
873 s = 16;
874 }
875 }
876
877 // blending
878 if (enables & GGL_ENABLE_BLENDING) {
879 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
880 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
881 c->state.buffers.color.read(
882 &(c->state.buffers.color), c, x, y, &fb);
883 blending( c, &fragment, &fb );
884 }
885
886 // write
887 c->state.buffers.color.write(
888 &(c->state.buffers.color), c, x, y, &fragment);
889 }
890
891 discard:
892 // iterate...
893 x += 1;
894 if (enables & GGL_ENABLE_SMOOTH) {
895 r += c->shade.drdx;
896 g += c->shade.dgdx;
897 b += c->shade.dbdx;
898 a += c->shade.dadx;
899 }
900 z += c->shade.dzdx;
901 f += c->shade.dfdx;
902 }
903 }
904
905 #endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
906
907 // ----------------------------------------------------------------------------
908 #if 0
909 #pragma mark -
910 #pragma mark Scanline
911 #endif
912
913 /* Used to parse a 32-bit source texture linearly. Usage is:
914 *
915 * horz_iterator32 hi(context);
916 * while (...) {
917 * uint32_t src_pixel = hi.get_pixel32();
918 * ...
919 * }
920 *
921 * Use only for one-to-one texture mapping.
922 */
923 struct horz_iterator32 {
horz_iterator32android::horz_iterator32924 horz_iterator32(context_t* c) {
925 const int x = c->iterators.xl;
926 const int y = c->iterators.y;
927 texture_t& tx = c->state.texture[0];
928 const int32_t u = (tx.shade.is0>>16) + x;
929 const int32_t v = (tx.shade.it0>>16) + y;
930 m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
931 }
get_pixel32android::horz_iterator32932 uint32_t get_pixel32() {
933 return *m_src++;
934 }
935 protected:
936 uint32_t* m_src;
937 };
938
939 /* A variant for 16-bit source textures. */
940 struct horz_iterator16 {
horz_iterator16android::horz_iterator16941 horz_iterator16(context_t* c) {
942 const int x = c->iterators.xl;
943 const int y = c->iterators.y;
944 texture_t& tx = c->state.texture[0];
945 const int32_t u = (tx.shade.is0>>16) + x;
946 const int32_t v = (tx.shade.it0>>16) + y;
947 m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
948 }
get_pixel16android::horz_iterator16949 uint16_t get_pixel16() {
950 return *m_src++;
951 }
952 protected:
953 uint16_t* m_src;
954 };
955
956 /* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
957 * After initialization, call get_src16() or get_src32() to get the current
958 * texture pixel value.
959 */
960 struct clamp_iterator {
clamp_iteratorandroid::clamp_iterator961 clamp_iterator(context_t* c) {
962 const int xs = c->iterators.xl;
963 texture_t& tx = c->state.texture[0];
964 texture_iterators_t& ti = tx.iterators;
965 m_s = (xs * ti.dsdx) + ti.ydsdy;
966 m_t = (xs * ti.dtdx) + ti.ydtdy;
967 m_ds = ti.dsdx;
968 m_dt = ti.dtdx;
969 m_width_m1 = tx.surface.width - 1;
970 m_height_m1 = tx.surface.height - 1;
971 m_data = tx.surface.data;
972 m_stride = tx.surface.stride;
973 }
get_pixel16android::clamp_iterator974 uint16_t get_pixel16() {
975 int u, v;
976 get_uv(u, v);
977 uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v));
978 return src[0];
979 }
get_pixel32android::clamp_iterator980 uint32_t get_pixel32() {
981 int u, v;
982 get_uv(u, v);
983 uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v));
984 return src[0];
985 }
986 private:
get_uvandroid::clamp_iterator987 void get_uv(int& u, int& v) {
988 int uu = m_s >> 16;
989 int vv = m_t >> 16;
990 if (uu < 0)
991 uu = 0;
992 if (uu > m_width_m1)
993 uu = m_width_m1;
994 if (vv < 0)
995 vv = 0;
996 if (vv > m_height_m1)
997 vv = m_height_m1;
998 u = uu;
999 v = vv;
1000 m_s += m_ds;
1001 m_t += m_dt;
1002 }
1003
1004 GGLfixed m_s, m_t;
1005 GGLfixed m_ds, m_dt;
1006 int m_width_m1, m_height_m1;
1007 uint8_t* m_data;
1008 int m_stride;
1009 };
1010
1011 /*
1012 * The 'horizontal clamp iterator' variant corresponds to the case where
1013 * the 'v' coordinate doesn't change. This is useful to avoid one mult and
1014 * extra adds / checks per pixels, if the blending/processing operation after
1015 * this is very fast.
1016 */
is_context_horizontal(const context_t * c)1017 static int is_context_horizontal(const context_t* c) {
1018 return (c->state.texture[0].iterators.dtdx == 0);
1019 }
1020
1021 struct horz_clamp_iterator {
get_pixel16android::horz_clamp_iterator1022 uint16_t get_pixel16() {
1023 int u = m_s >> 16;
1024 m_s += m_ds;
1025 if (u < 0)
1026 u = 0;
1027 if (u > m_width_m1)
1028 u = m_width_m1;
1029 const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
1030 return src[u];
1031 }
get_pixel32android::horz_clamp_iterator1032 uint32_t get_pixel32() {
1033 int u = m_s >> 16;
1034 m_s += m_ds;
1035 if (u < 0)
1036 u = 0;
1037 if (u > m_width_m1)
1038 u = m_width_m1;
1039 const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
1040 return src[u];
1041 }
1042 protected:
1043 void init(const context_t* c, int shift);
1044 GGLfixed m_s;
1045 GGLfixed m_ds;
1046 int m_width_m1;
1047 const uint8_t* m_data;
1048 };
1049
init(const context_t * c,int shift)1050 void horz_clamp_iterator::init(const context_t* c, int shift)
1051 {
1052 const int xs = c->iterators.xl;
1053 const texture_t& tx = c->state.texture[0];
1054 const texture_iterators_t& ti = tx.iterators;
1055 m_s = (xs * ti.dsdx) + ti.ydsdy;
1056 m_ds = ti.dsdx;
1057 m_width_m1 = tx.surface.width-1;
1058 m_data = tx.surface.data;
1059
1060 GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
1061 int v = t >> 16;
1062 if (v < 0)
1063 v = 0;
1064 else if (v >= (int)tx.surface.height)
1065 v = (int)tx.surface.height-1;
1066
1067 m_data += (tx.surface.stride*v) << shift;
1068 }
1069
1070 struct horz_clamp_iterator16 : horz_clamp_iterator {
horz_clamp_iterator16android::horz_clamp_iterator161071 horz_clamp_iterator16(const context_t* c) {
1072 init(c,1);
1073 };
1074 };
1075
1076 struct horz_clamp_iterator32 : horz_clamp_iterator {
horz_clamp_iterator32android::horz_clamp_iterator321077 horz_clamp_iterator32(context_t* c) {
1078 init(c,2);
1079 };
1080 };
1081
1082 /* This is used to perform dithering operations.
1083 */
1084 struct ditherer {
dithererandroid::ditherer1085 ditherer(const context_t* c) {
1086 const int x = c->iterators.xl;
1087 const int y = c->iterators.y;
1088 m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
1089 m_index = x & GGL_DITHER_MASK;
1090 }
stepandroid::ditherer1091 void step(void) {
1092 m_index++;
1093 }
get_valueandroid::ditherer1094 int get_value(void) {
1095 int ret = m_line[m_index & GGL_DITHER_MASK];
1096 m_index++;
1097 return ret;
1098 }
abgr8888ToRgb565android::ditherer1099 uint16_t abgr8888ToRgb565(uint32_t s) {
1100 uint32_t r = s & 0xff;
1101 uint32_t g = (s >> 8) & 0xff;
1102 uint32_t b = (s >> 16) & 0xff;
1103 return rgb888ToRgb565(r,g,b);
1104 }
1105 /* The following assumes that r/g/b are in the 0..255 range each */
rgb888ToRgb565android::ditherer1106 uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
1107 int threshold = get_value();
1108 /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
1109 r += (threshold >> (GGL_DITHER_BITS-8 +5));
1110 g += (threshold >> (GGL_DITHER_BITS-8 +6));
1111 b += (threshold >> (GGL_DITHER_BITS-8 +5));
1112 if (r > 0xff)
1113 r = 0xff;
1114 if (g > 0xff)
1115 g = 0xff;
1116 if (b > 0xff)
1117 b = 0xff;
1118 return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3));
1119 }
1120 protected:
1121 const uint8_t* m_line;
1122 int m_index;
1123 };
1124
1125 /* This structure is used to blend (SRC_OVER) 32-bit source pixels
1126 * onto 16-bit destination ones. Usage is simply:
1127 *
1128 * blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
1129 */
1130 struct blender_32to16 {
blender_32to16android::blender_32to161131 blender_32to16(context_t* c) { }
writeandroid::blender_32to161132 void write(uint32_t s, uint16_t* dst) {
1133 if (s == 0)
1134 return;
1135 s = GGL_RGBA_TO_HOST(s);
1136 int sA = (s>>24);
1137 if (sA == 0xff) {
1138 *dst = convertAbgr8888ToRgb565(s);
1139 } else {
1140 int f = 0x100 - (sA + (sA>>7));
1141 int sR = (s >> ( 3))&0x1F;
1142 int sG = (s >> ( 8+2))&0x3F;
1143 int sB = (s >> (16+3))&0x1F;
1144 uint16_t d = *dst;
1145 int dR = (d>>11)&0x1f;
1146 int dG = (d>>5)&0x3f;
1147 int dB = (d)&0x1f;
1148 sR += (f*dR)>>8;
1149 sG += (f*dG)>>8;
1150 sB += (f*dB)>>8;
1151 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1152 }
1153 }
writeandroid::blender_32to161154 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1155 if (s == 0) {
1156 di.step();
1157 return;
1158 }
1159 s = GGL_RGBA_TO_HOST(s);
1160 int sA = (s>>24);
1161 if (sA == 0xff) {
1162 *dst = di.abgr8888ToRgb565(s);
1163 } else {
1164 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1165 int f = 0x100 - (sA + (sA>>7));
1166 int sR = (s >> ( 3))&0x1F;
1167 int sG = (s >> ( 8+2))&0x3F;
1168 int sB = (s >> (16+3))&0x1F;
1169 uint16_t d = *dst;
1170 int dR = (d>>11)&0x1f;
1171 int dG = (d>>5)&0x3f;
1172 int dB = (d)&0x1f;
1173 sR = ((sR << 8) + f*dR + threshold)>>8;
1174 sG = ((sG << 8) + f*dG + threshold)>>8;
1175 sB = ((sB << 8) + f*dB + threshold)>>8;
1176 if (sR > 0x1f) sR = 0x1f;
1177 if (sG > 0x3f) sG = 0x3f;
1178 if (sB > 0x1f) sB = 0x1f;
1179 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1180 }
1181 }
1182 };
1183
1184 /* This blender does the same for the 'blend_srca' operation.
1185 * where dstFactor=srcA*(1-srcA) srcFactor=srcA
1186 */
1187 struct blender_32to16_srcA {
blender_32to16_srcAandroid::blender_32to16_srcA1188 blender_32to16_srcA(const context_t* c) { }
writeandroid::blender_32to16_srcA1189 void write(uint32_t s, uint16_t* dst) {
1190 if (!s) {
1191 return;
1192 }
1193 uint16_t d = *dst;
1194 s = GGL_RGBA_TO_HOST(s);
1195 int sR = (s >> ( 3))&0x1F;
1196 int sG = (s >> ( 8+2))&0x3F;
1197 int sB = (s >> (16+3))&0x1F;
1198 int sA = (s>>24);
1199 int f1 = (sA + (sA>>7));
1200 int f2 = 0x100-f1;
1201 int dR = (d>>11)&0x1f;
1202 int dG = (d>>5)&0x3f;
1203 int dB = (d)&0x1f;
1204 sR = (f1*sR + f2*dR)>>8;
1205 sG = (f1*sG + f2*dG)>>8;
1206 sB = (f1*sB + f2*dB)>>8;
1207 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1208 }
1209 };
1210
1211 /* Common init code the modulating blenders */
1212 struct blender_modulate {
initandroid::blender_modulate1213 void init(const context_t* c) {
1214 const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
1215 const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
1216 const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
1217 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
1218 m_r = r + (r >> 7);
1219 m_g = g + (g >> 7);
1220 m_b = b + (b >> 7);
1221 m_a = a + (a >> 7);
1222 }
1223 protected:
1224 int m_r, m_g, m_b, m_a;
1225 };
1226
1227 /* This blender does a normal blend after modulation.
1228 */
1229 struct blender_32to16_modulate : blender_modulate {
blender_32to16_modulateandroid::blender_32to16_modulate1230 blender_32to16_modulate(const context_t* c) {
1231 init(c);
1232 }
writeandroid::blender_32to16_modulate1233 void write(uint32_t s, uint16_t* dst) {
1234 // blend source and destination
1235 if (!s) {
1236 return;
1237 }
1238 s = GGL_RGBA_TO_HOST(s);
1239
1240 /* We need to modulate s */
1241 uint32_t sA = (s >> 24);
1242 uint32_t sB = (s >> 16) & 0xff;
1243 uint32_t sG = (s >> 8) & 0xff;
1244 uint32_t sR = s & 0xff;
1245
1246 sA = (sA*m_a) >> 8;
1247 /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
1248 sR = (sR*m_r) >> (8 - 5);
1249 sG = (sG*m_g) >> (8 - 6);
1250 sB = (sB*m_b) >> (8 - 5);
1251
1252 /* Now do a normal blend */
1253 int f = 0x100 - (sA + (sA>>7));
1254 uint16_t d = *dst;
1255 int dR = (d>>11)&0x1f;
1256 int dG = (d>>5)&0x3f;
1257 int dB = (d)&0x1f;
1258 sR = (sR + f*dR)>>8;
1259 sG = (sG + f*dG)>>8;
1260 sB = (sB + f*dB)>>8;
1261 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1262 }
writeandroid::blender_32to16_modulate1263 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1264 // blend source and destination
1265 if (!s) {
1266 di.step();
1267 return;
1268 }
1269 s = GGL_RGBA_TO_HOST(s);
1270
1271 /* We need to modulate s */
1272 uint32_t sA = (s >> 24);
1273 uint32_t sB = (s >> 16) & 0xff;
1274 uint32_t sG = (s >> 8) & 0xff;
1275 uint32_t sR = s & 0xff;
1276
1277 sA = (sA*m_a) >> 8;
1278 /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
1279 sR = (sR*m_r) >> (8 - 5);
1280 sG = (sG*m_g) >> (8 - 6);
1281 sB = (sB*m_b) >> (8 - 5);
1282
1283 /* Scale threshold to 0.8 fixed float format */
1284 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1285 int f = 0x100 - (sA + (sA>>7));
1286 uint16_t d = *dst;
1287 int dR = (d>>11)&0x1f;
1288 int dG = (d>>5)&0x3f;
1289 int dB = (d)&0x1f;
1290 sR = (sR + f*dR + threshold)>>8;
1291 sG = (sG + f*dG + threshold)>>8;
1292 sB = (sB + f*dB + threshold)>>8;
1293 if (sR > 0x1f) sR = 0x1f;
1294 if (sG > 0x3f) sG = 0x3f;
1295 if (sB > 0x1f) sB = 0x1f;
1296 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1297 }
1298 };
1299
1300 /* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
1301 struct blender_x32to16_modulate : blender_modulate {
blender_x32to16_modulateandroid::blender_x32to16_modulate1302 blender_x32to16_modulate(const context_t* c) {
1303 init(c);
1304 }
writeandroid::blender_x32to16_modulate1305 void write(uint32_t s, uint16_t* dst) {
1306 s = GGL_RGBA_TO_HOST(s);
1307
1308 uint32_t sB = (s >> 16) & 0xff;
1309 uint32_t sG = (s >> 8) & 0xff;
1310 uint32_t sR = s & 0xff;
1311
1312 /* Keep R/G/B in 5.8 or 6.8 format */
1313 sR = (sR*m_r) >> (8 - 5);
1314 sG = (sG*m_g) >> (8 - 6);
1315 sB = (sB*m_b) >> (8 - 5);
1316
1317 int f = 0x100 - m_a;
1318 uint16_t d = *dst;
1319 int dR = (d>>11)&0x1f;
1320 int dG = (d>>5)&0x3f;
1321 int dB = (d)&0x1f;
1322 sR = (sR + f*dR)>>8;
1323 sG = (sG + f*dG)>>8;
1324 sB = (sB + f*dB)>>8;
1325 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1326 }
writeandroid::blender_x32to16_modulate1327 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1328 s = GGL_RGBA_TO_HOST(s);
1329
1330 uint32_t sB = (s >> 16) & 0xff;
1331 uint32_t sG = (s >> 8) & 0xff;
1332 uint32_t sR = s & 0xff;
1333
1334 sR = (sR*m_r) >> (8 - 5);
1335 sG = (sG*m_g) >> (8 - 6);
1336 sB = (sB*m_b) >> (8 - 5);
1337
1338 /* Now do a normal blend */
1339 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1340 int f = 0x100 - m_a;
1341 uint16_t d = *dst;
1342 int dR = (d>>11)&0x1f;
1343 int dG = (d>>5)&0x3f;
1344 int dB = (d)&0x1f;
1345 sR = (sR + f*dR + threshold)>>8;
1346 sG = (sG + f*dG + threshold)>>8;
1347 sB = (sB + f*dB + threshold)>>8;
1348 if (sR > 0x1f) sR = 0x1f;
1349 if (sG > 0x3f) sG = 0x3f;
1350 if (sB > 0x1f) sB = 0x1f;
1351 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1352 }
1353 };
1354
1355 /* Same as above, but source is 16bit rgb565 */
1356 struct blender_16to16_modulate : blender_modulate {
blender_16to16_modulateandroid::blender_16to16_modulate1357 blender_16to16_modulate(const context_t* c) {
1358 init(c);
1359 }
writeandroid::blender_16to16_modulate1360 void write(uint16_t s16, uint16_t* dst) {
1361 uint32_t s = s16;
1362
1363 uint32_t sR = s >> 11;
1364 uint32_t sG = (s >> 5) & 0x3f;
1365 uint32_t sB = s & 0x1f;
1366
1367 sR = (sR*m_r);
1368 sG = (sG*m_g);
1369 sB = (sB*m_b);
1370
1371 int f = 0x100 - m_a;
1372 uint16_t d = *dst;
1373 int dR = (d>>11)&0x1f;
1374 int dG = (d>>5)&0x3f;
1375 int dB = (d)&0x1f;
1376 sR = (sR + f*dR)>>8;
1377 sG = (sG + f*dG)>>8;
1378 sB = (sB + f*dB)>>8;
1379 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1380 }
1381 };
1382
1383 /* This is used to iterate over a 16-bit destination color buffer.
1384 * Usage is:
1385 *
1386 * dst_iterator16 di(context);
1387 * while (di.count--) {
1388 * <do stuff with dest pixel at di.dst>
1389 * di.dst++;
1390 * }
1391 */
1392 struct dst_iterator16 {
dst_iterator16android::dst_iterator161393 dst_iterator16(const context_t* c) {
1394 const int x = c->iterators.xl;
1395 const int width = c->iterators.xr - x;
1396 const int32_t y = c->iterators.y;
1397 const surface_t* cb = &(c->state.buffers.color);
1398 count = width;
1399 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
1400 }
1401 int count;
1402 uint16_t* dst;
1403 };
1404
1405
scanline_t32cb16_clamp(context_t * c)1406 static void scanline_t32cb16_clamp(context_t* c)
1407 {
1408 dst_iterator16 di(c);
1409
1410 if (is_context_horizontal(c)) {
1411 /* Special case for simple horizontal scaling */
1412 horz_clamp_iterator32 ci(c);
1413 while (di.count--) {
1414 uint32_t s = ci.get_pixel32();
1415 *di.dst++ = convertAbgr8888ToRgb565(s);
1416 }
1417 } else {
1418 /* General case */
1419 clamp_iterator ci(c);
1420 while (di.count--) {
1421 uint32_t s = ci.get_pixel32();
1422 *di.dst++ = convertAbgr8888ToRgb565(s);
1423 }
1424 }
1425 }
1426
scanline_t32cb16_dither(context_t * c)1427 static void scanline_t32cb16_dither(context_t* c)
1428 {
1429 horz_iterator32 si(c);
1430 dst_iterator16 di(c);
1431 ditherer dither(c);
1432
1433 while (di.count--) {
1434 uint32_t s = si.get_pixel32();
1435 *di.dst++ = dither.abgr8888ToRgb565(s);
1436 }
1437 }
1438
scanline_t32cb16_clamp_dither(context_t * c)1439 static void scanline_t32cb16_clamp_dither(context_t* c)
1440 {
1441 dst_iterator16 di(c);
1442 ditherer dither(c);
1443
1444 if (is_context_horizontal(c)) {
1445 /* Special case for simple horizontal scaling */
1446 horz_clamp_iterator32 ci(c);
1447 while (di.count--) {
1448 uint32_t s = ci.get_pixel32();
1449 *di.dst++ = dither.abgr8888ToRgb565(s);
1450 }
1451 } else {
1452 /* General case */
1453 clamp_iterator ci(c);
1454 while (di.count--) {
1455 uint32_t s = ci.get_pixel32();
1456 *di.dst++ = dither.abgr8888ToRgb565(s);
1457 }
1458 }
1459 }
1460
scanline_t32cb16blend_dither(context_t * c)1461 static void scanline_t32cb16blend_dither(context_t* c)
1462 {
1463 dst_iterator16 di(c);
1464 ditherer dither(c);
1465 blender_32to16 bl(c);
1466 horz_iterator32 hi(c);
1467 while (di.count--) {
1468 uint32_t s = hi.get_pixel32();
1469 bl.write(s, di.dst, dither);
1470 di.dst++;
1471 }
1472 }
1473
scanline_t32cb16blend_clamp(context_t * c)1474 static void scanline_t32cb16blend_clamp(context_t* c)
1475 {
1476 dst_iterator16 di(c);
1477 blender_32to16 bl(c);
1478
1479 if (is_context_horizontal(c)) {
1480 horz_clamp_iterator32 ci(c);
1481 while (di.count--) {
1482 uint32_t s = ci.get_pixel32();
1483 bl.write(s, di.dst);
1484 di.dst++;
1485 }
1486 } else {
1487 clamp_iterator ci(c);
1488 while (di.count--) {
1489 uint32_t s = ci.get_pixel32();
1490 bl.write(s, di.dst);
1491 di.dst++;
1492 }
1493 }
1494 }
1495
scanline_t32cb16blend_clamp_dither(context_t * c)1496 static void scanline_t32cb16blend_clamp_dither(context_t* c)
1497 {
1498 dst_iterator16 di(c);
1499 ditherer dither(c);
1500 blender_32to16 bl(c);
1501
1502 clamp_iterator ci(c);
1503 while (di.count--) {
1504 uint32_t s = ci.get_pixel32();
1505 bl.write(s, di.dst, dither);
1506 di.dst++;
1507 }
1508 }
1509
scanline_t32cb16blend_clamp_mod(context_t * c)1510 void scanline_t32cb16blend_clamp_mod(context_t* c)
1511 {
1512 dst_iterator16 di(c);
1513 blender_32to16_modulate bl(c);
1514
1515 clamp_iterator ci(c);
1516 while (di.count--) {
1517 uint32_t s = ci.get_pixel32();
1518 bl.write(s, di.dst);
1519 di.dst++;
1520 }
1521 }
1522
scanline_t32cb16blend_clamp_mod_dither(context_t * c)1523 void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
1524 {
1525 dst_iterator16 di(c);
1526 blender_32to16_modulate bl(c);
1527 ditherer dither(c);
1528
1529 clamp_iterator ci(c);
1530 while (di.count--) {
1531 uint32_t s = ci.get_pixel32();
1532 bl.write(s, di.dst, dither);
1533 di.dst++;
1534 }
1535 }
1536
1537 /* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
scanline_x32cb16blend_clamp_mod(context_t * c)1538 void scanline_x32cb16blend_clamp_mod(context_t* c)
1539 {
1540 dst_iterator16 di(c);
1541 blender_x32to16_modulate bl(c);
1542
1543 clamp_iterator ci(c);
1544 while (di.count--) {
1545 uint32_t s = ci.get_pixel32();
1546 bl.write(s, di.dst);
1547 di.dst++;
1548 }
1549 }
1550
scanline_x32cb16blend_clamp_mod_dither(context_t * c)1551 void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
1552 {
1553 dst_iterator16 di(c);
1554 blender_x32to16_modulate bl(c);
1555 ditherer dither(c);
1556
1557 clamp_iterator ci(c);
1558 while (di.count--) {
1559 uint32_t s = ci.get_pixel32();
1560 bl.write(s, di.dst, dither);
1561 di.dst++;
1562 }
1563 }
1564
scanline_t16cb16_clamp(context_t * c)1565 void scanline_t16cb16_clamp(context_t* c)
1566 {
1567 dst_iterator16 di(c);
1568
1569 /* Special case for simple horizontal scaling */
1570 if (is_context_horizontal(c)) {
1571 horz_clamp_iterator16 ci(c);
1572 while (di.count--) {
1573 *di.dst++ = ci.get_pixel16();
1574 }
1575 } else {
1576 clamp_iterator ci(c);
1577 while (di.count--) {
1578 *di.dst++ = ci.get_pixel16();
1579 }
1580 }
1581 }
1582
1583
1584
1585 template <typename T, typename U>
1586 static inline __attribute__((const))
interpolate(int y,T v0,U dvdx,U dvdy)1587 T interpolate(int y, T v0, U dvdx, U dvdy) {
1588 // interpolates in pixel's centers
1589 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
1590 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
1591 }
1592
1593 // ----------------------------------------------------------------------------
1594 #if 0
1595 #pragma mark -
1596 #endif
1597
init_y(context_t * c,int32_t ys)1598 void init_y(context_t* c, int32_t ys)
1599 {
1600 const uint32_t enables = c->state.enables;
1601
1602 // compute iterators...
1603 iterators_t& ci = c->iterators;
1604
1605 // sample in the center
1606 ci.y = ys;
1607
1608 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) {
1609 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
1610 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
1611 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
1612 }
1613
1614 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
1615 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
1616 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
1617 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
1618 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
1619 c->step_y = step_y__smooth;
1620 } else {
1621 ci.ydrdy = c->shade.r0;
1622 ci.ydgdy = c->shade.g0;
1623 ci.ydbdy = c->shade.b0;
1624 ci.ydady = c->shade.a0;
1625 // XXX: do only if needed, or make sure this is fast
1626 c->packed = ggl_pack_color(c, c->state.buffers.color.format,
1627 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
1628 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
1629 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
1630 }
1631
1632 // initialize the variables we need in the shader
1633 generated_vars_t& gen = c->generated_vars;
1634 gen.argb[GGLFormat::ALPHA].c = ci.ydady;
1635 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
1636 gen.argb[GGLFormat::RED ].c = ci.ydrdy;
1637 gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
1638 gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
1639 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
1640 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
1641 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
1642 gen.dzdx = c->shade.dzdx;
1643 gen.f = ci.ydfdy;
1644 gen.dfdx = c->shade.dfdx;
1645
1646 if (enables & GGL_ENABLE_TMUS) {
1647 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1648 texture_t& t = c->state.texture[i];
1649 if (!t.enable) continue;
1650
1651 texture_iterators_t& ti = t.iterators;
1652 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
1653 // we need to set all of these to 0 because in some cases
1654 // step_y__generic() or step_y__tmu() will be used and
1655 // therefore will update dtdy, however, in 1:1 mode
1656 // this is always done by the scanline rasterizer.
1657 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
1658 ti.ydsdy = t.shade.is0;
1659 ti.ydtdy = t.shade.it0;
1660 } else {
1661 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
1662 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
1663 ti.sscale = t.shade.sscale + adjustSWrap;
1664 ti.tscale = t.shade.tscale + adjustTWrap;
1665 if (!(enables & GGL_ENABLE_W)) {
1666 // S coordinate
1667 const int32_t sscale = ti.sscale;
1668 const int32_t sy = interpolate(ys,
1669 t.shade.is0, t.shade.idsdx, t.shade.idsdy);
1670 if (sscale>=0) {
1671 ti.ydsdy= sy << sscale;
1672 ti.dsdx = t.shade.idsdx << sscale;
1673 ti.dsdy = t.shade.idsdy << sscale;
1674 } else {
1675 ti.ydsdy= sy >> -sscale;
1676 ti.dsdx = t.shade.idsdx >> -sscale;
1677 ti.dsdy = t.shade.idsdy >> -sscale;
1678 }
1679 // T coordinate
1680 const int32_t tscale = ti.tscale;
1681 const int32_t ty = interpolate(ys,
1682 t.shade.it0, t.shade.idtdx, t.shade.idtdy);
1683 if (tscale>=0) {
1684 ti.ydtdy= ty << tscale;
1685 ti.dtdx = t.shade.idtdx << tscale;
1686 ti.dtdy = t.shade.idtdy << tscale;
1687 } else {
1688 ti.ydtdy= ty >> -tscale;
1689 ti.dtdx = t.shade.idtdx >> -tscale;
1690 ti.dtdy = t.shade.idtdy >> -tscale;
1691 }
1692 }
1693 }
1694 // mirror for generated code...
1695 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1696 gen.width = t.surface.width;
1697 gen.height = t.surface.height;
1698 gen.stride = t.surface.stride;
1699 gen.data = int32_t(t.surface.data);
1700 gen.dsdx = ti.dsdx;
1701 gen.dtdx = ti.dtdx;
1702 }
1703 }
1704
1705 // choose the y-stepper
1706 c->step_y = step_y__nop;
1707 if (enables & GGL_ENABLE_FOG) {
1708 c->step_y = step_y__generic;
1709 } else if (enables & GGL_ENABLE_TMUS) {
1710 if (enables & GGL_ENABLE_SMOOTH) {
1711 c->step_y = step_y__generic;
1712 } else if (enables & GGL_ENABLE_W) {
1713 c->step_y = step_y__w;
1714 } else {
1715 c->step_y = step_y__tmu;
1716 }
1717 } else {
1718 if (enables & GGL_ENABLE_SMOOTH) {
1719 c->step_y = step_y__smooth;
1720 }
1721 }
1722
1723 // choose the rectangle blitter
1724 c->rect = rect_generic;
1725 if ((c->step_y == step_y__nop) &&
1726 (c->scanline == scanline_memcpy))
1727 {
1728 c->rect = rect_memcpy;
1729 }
1730 }
1731
init_y_packed(context_t * c,int32_t y0)1732 void init_y_packed(context_t* c, int32_t y0)
1733 {
1734 uint8_t f = c->state.buffers.color.format;
1735 c->packed = ggl_pack_color(c, f,
1736 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
1737 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
1738 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
1739 c->iterators.y = y0;
1740 c->step_y = step_y__nop;
1741 // choose the rectangle blitter
1742 c->rect = rect_generic;
1743 if (c->scanline == scanline_memcpy) {
1744 c->rect = rect_memcpy;
1745 }
1746 }
1747
init_y_noop(context_t * c,int32_t y0)1748 void init_y_noop(context_t* c, int32_t y0)
1749 {
1750 c->iterators.y = y0;
1751 c->step_y = step_y__nop;
1752 // choose the rectangle blitter
1753 c->rect = rect_generic;
1754 if (c->scanline == scanline_memcpy) {
1755 c->rect = rect_memcpy;
1756 }
1757 }
1758
init_y_error(context_t * c,int32_t y0)1759 void init_y_error(context_t* c, int32_t y0)
1760 {
1761 // woooops, shoud never happen,
1762 // fail gracefully (don't display anything)
1763 init_y_noop(c, y0);
1764 ALOGE("color-buffer has an invalid format!");
1765 }
1766
1767 // ----------------------------------------------------------------------------
1768 #if 0
1769 #pragma mark -
1770 #endif
1771
step_y__generic(context_t * c)1772 void step_y__generic(context_t* c)
1773 {
1774 const uint32_t enables = c->state.enables;
1775
1776 // iterate...
1777 iterators_t& ci = c->iterators;
1778 ci.y += 1;
1779
1780 if (enables & GGL_ENABLE_SMOOTH) {
1781 ci.ydrdy += c->shade.drdy;
1782 ci.ydgdy += c->shade.dgdy;
1783 ci.ydbdy += c->shade.dbdy;
1784 ci.ydady += c->shade.dady;
1785 }
1786
1787 const uint32_t mask =
1788 GGL_ENABLE_DEPTH_TEST |
1789 GGL_ENABLE_W |
1790 GGL_ENABLE_FOG;
1791 if (enables & mask) {
1792 ci.ydzdy += c->shade.dzdy;
1793 ci.ydwdy += c->shade.dwdy;
1794 ci.ydfdy += c->shade.dfdy;
1795 }
1796
1797 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
1798 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1799 if (c->state.texture[i].enable) {
1800 texture_iterators_t& ti = c->state.texture[i].iterators;
1801 ti.ydsdy += ti.dsdy;
1802 ti.ydtdy += ti.dtdy;
1803 }
1804 }
1805 }
1806 }
1807
step_y__nop(context_t * c)1808 void step_y__nop(context_t* c)
1809 {
1810 c->iterators.y += 1;
1811 c->iterators.ydzdy += c->shade.dzdy;
1812 }
1813
step_y__smooth(context_t * c)1814 void step_y__smooth(context_t* c)
1815 {
1816 iterators_t& ci = c->iterators;
1817 ci.y += 1;
1818 ci.ydrdy += c->shade.drdy;
1819 ci.ydgdy += c->shade.dgdy;
1820 ci.ydbdy += c->shade.dbdy;
1821 ci.ydady += c->shade.dady;
1822 ci.ydzdy += c->shade.dzdy;
1823 }
1824
step_y__w(context_t * c)1825 void step_y__w(context_t* c)
1826 {
1827 iterators_t& ci = c->iterators;
1828 ci.y += 1;
1829 ci.ydzdy += c->shade.dzdy;
1830 ci.ydwdy += c->shade.dwdy;
1831 }
1832
step_y__tmu(context_t * c)1833 void step_y__tmu(context_t* c)
1834 {
1835 iterators_t& ci = c->iterators;
1836 ci.y += 1;
1837 ci.ydzdy += c->shade.dzdy;
1838 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1839 if (c->state.texture[i].enable) {
1840 texture_iterators_t& ti = c->state.texture[i].iterators;
1841 ti.ydsdy += ti.dsdy;
1842 ti.ydtdy += ti.dtdy;
1843 }
1844 }
1845 }
1846
1847 // ----------------------------------------------------------------------------
1848 #if 0
1849 #pragma mark -
1850 #endif
1851
scanline_perspective(context_t * c)1852 void scanline_perspective(context_t* c)
1853 {
1854 struct {
1855 union {
1856 struct {
1857 int32_t s, sq;
1858 int32_t t, tq;
1859 };
1860 struct {
1861 int32_t v, q;
1862 } st[2];
1863 };
1864 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
1865
1866 // XXX: we should have a special case when dwdx = 0
1867
1868 // 32 pixels spans works okay. 16 is a lot better,
1869 // but hey, it's a software renderer...
1870 const uint32_t SPAN_BITS = 5;
1871 const uint32_t ys = c->iterators.y;
1872 const uint32_t xs = c->iterators.xl;
1873 const uint32_t x1 = c->iterators.xr;
1874 const uint32_t xc = x1 - xs;
1875 uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
1876 uint32_t numSpans = xc >> SPAN_BITS;
1877
1878 const iterators_t& ci = c->iterators;
1879 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
1880 int32_t q0 = gglRecipQ(w0, 30);
1881 const int iwscale = 32 - gglClz(q0);
1882
1883 const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
1884 int32_t xl = c->iterators.xl;
1885
1886 // We process s & t with a loop to reduce the code size
1887 // (and i-cache pressure).
1888
1889 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1890 const texture_t& tmu = c->state.texture[i];
1891 if (!tmu.enable) continue;
1892 int32_t s = tmu.shade.is0 +
1893 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
1894 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
1895 int32_t t = tmu.shade.it0 +
1896 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
1897 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
1898 tc[i].s = s;
1899 tc[i].t = t;
1900 tc[i].sq = gglMulx(s, q0, iwscale);
1901 tc[i].tq = gglMulx(t, q0, iwscale);
1902 }
1903
1904 int32_t span = 0;
1905 do {
1906 int32_t w1;
1907 if (ggl_likely(numSpans)) {
1908 w1 = w0 + dwdx;
1909 } else {
1910 if (remainder) {
1911 // finish off the scanline...
1912 span = remainder;
1913 w1 = (c->shade.dwdx * span) + w0;
1914 } else {
1915 break;
1916 }
1917 }
1918 int32_t q1 = gglRecipQ(w1, 30);
1919 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1920 texture_t& tmu = c->state.texture[i];
1921 if (!tmu.enable) continue;
1922 texture_iterators_t& ti = tmu.iterators;
1923
1924 for (int j=0 ; j<2 ; j++) {
1925 int32_t v = tc[i].st[j].v;
1926 if (span) v += (tmu.shade.st[j].dx)*span;
1927 else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
1928 const int32_t v0 = tc[i].st[j].q;
1929 const int32_t v1 = gglMulx(v, q1, iwscale);
1930 int32_t dvdx = v1 - v0;
1931 if (span) dvdx /= span;
1932 else dvdx >>= SPAN_BITS;
1933 tc[i].st[j].v = v;
1934 tc[i].st[j].q = v1;
1935
1936 const int scale = ti.st[j].scale + (iwscale - 30);
1937 if (scale >= 0) {
1938 ti.st[j].ydvdy = v0 << scale;
1939 ti.st[j].dvdx = dvdx << scale;
1940 } else {
1941 ti.st[j].ydvdy = v0 >> -scale;
1942 ti.st[j].dvdx = dvdx >> -scale;
1943 }
1944 }
1945 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1946 gen.dsdx = ti.st[0].dvdx;
1947 gen.dtdx = ti.st[1].dvdx;
1948 }
1949 c->iterators.xl = xl;
1950 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
1951 w0 = w1;
1952 q0 = q1;
1953 c->span(c);
1954 } while(numSpans--);
1955 }
1956
scanline_perspective_single(context_t * c)1957 void scanline_perspective_single(context_t* c)
1958 {
1959 // 32 pixels spans works okay. 16 is a lot better,
1960 // but hey, it's a software renderer...
1961 const uint32_t SPAN_BITS = 5;
1962 const uint32_t ys = c->iterators.y;
1963 const uint32_t xs = c->iterators.xl;
1964 const uint32_t x1 = c->iterators.xr;
1965 const uint32_t xc = x1 - xs;
1966
1967 const iterators_t& ci = c->iterators;
1968 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
1969 int32_t iw = gglRecipQ(w, 30);
1970 const int iwscale = 32 - gglClz(iw);
1971
1972 const int i = 31 - gglClz(c->state.enabled_tmu);
1973 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1974 texture_t& tmu = c->state.texture[i];
1975 texture_iterators_t& ti = tmu.iterators;
1976 const int sscale = ti.sscale + (iwscale - 30);
1977 const int tscale = ti.tscale + (iwscale - 30);
1978 int32_t s = tmu.shade.is0 +
1979 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
1980 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
1981 int32_t t = tmu.shade.it0 +
1982 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
1983 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
1984 int32_t s0 = gglMulx(s, iw, iwscale);
1985 int32_t t0 = gglMulx(t, iw, iwscale);
1986 int32_t xl = c->iterators.xl;
1987
1988 int32_t sq, tq, dsdx, dtdx;
1989 int32_t premainder = xc & ((1<<SPAN_BITS)-1);
1990 uint32_t numSpans = xc >> SPAN_BITS;
1991 if (c->shade.dwdx == 0) {
1992 // XXX: we could choose to do this if the error is small enough
1993 numSpans = 0;
1994 premainder = xc;
1995 goto no_perspective;
1996 }
1997
1998 if (premainder) {
1999 w += c->shade.dwdx * premainder;
2000 iw = gglRecipQ(w, 30);
2001 no_perspective:
2002 s += tmu.shade.idsdx * premainder;
2003 t += tmu.shade.idtdx * premainder;
2004 sq = gglMulx(s, iw, iwscale);
2005 tq = gglMulx(t, iw, iwscale);
2006 dsdx = (sq - s0) / premainder;
2007 dtdx = (tq - t0) / premainder;
2008 c->iterators.xl = xl;
2009 c->iterators.xr = xl = xl + premainder;
2010 goto finish;
2011 }
2012
2013 while (numSpans--) {
2014 w += c->shade.dwdx << SPAN_BITS;
2015 s += tmu.shade.idsdx << SPAN_BITS;
2016 t += tmu.shade.idtdx << SPAN_BITS;
2017 iw = gglRecipQ(w, 30);
2018 sq = gglMulx(s, iw, iwscale);
2019 tq = gglMulx(t, iw, iwscale);
2020 dsdx = (sq - s0) >> SPAN_BITS;
2021 dtdx = (tq - t0) >> SPAN_BITS;
2022 c->iterators.xl = xl;
2023 c->iterators.xr = xl = xl + (1<<SPAN_BITS);
2024 finish:
2025 if (sscale >= 0) {
2026 ti.ydsdy = s0 << sscale;
2027 ti.dsdx = dsdx << sscale;
2028 } else {
2029 ti.ydsdy = s0 >>-sscale;
2030 ti.dsdx = dsdx >>-sscale;
2031 }
2032 if (tscale >= 0) {
2033 ti.ydtdy = t0 << tscale;
2034 ti.dtdx = dtdx << tscale;
2035 } else {
2036 ti.ydtdy = t0 >>-tscale;
2037 ti.dtdx = dtdx >>-tscale;
2038 }
2039 s0 = sq;
2040 t0 = tq;
2041 gen.dsdx = ti.dsdx;
2042 gen.dtdx = ti.dtdx;
2043 c->span(c);
2044 }
2045 }
2046
2047 // ----------------------------------------------------------------------------
2048
scanline_col32cb16blend(context_t * c)2049 void scanline_col32cb16blend(context_t* c)
2050 {
2051 int32_t x = c->iterators.xl;
2052 size_t ct = c->iterators.xr - x;
2053 int32_t y = c->iterators.y;
2054 surface_t* cb = &(c->state.buffers.color);
2055 union {
2056 uint16_t* dst;
2057 uint32_t* dst32;
2058 };
2059 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2060
2061 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
2062 #if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2063 scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
2064 #else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2065 scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
2066 #endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2067 #else
2068 uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
2069 int sA = (s>>24);
2070 int f = 0x100 - (sA + (sA>>7));
2071 while (ct--) {
2072 uint16_t d = *dst;
2073 int dR = (d>>11)&0x1f;
2074 int dG = (d>>5)&0x3f;
2075 int dB = (d)&0x1f;
2076 int sR = (s >> ( 3))&0x1F;
2077 int sG = (s >> ( 8+2))&0x3F;
2078 int sB = (s >> (16+3))&0x1F;
2079 sR += (f*dR)>>8;
2080 sG += (f*dG)>>8;
2081 sB += (f*dB)>>8;
2082 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
2083 }
2084 #endif
2085
2086 }
2087
scanline_t32cb16(context_t * c)2088 void scanline_t32cb16(context_t* c)
2089 {
2090 int32_t x = c->iterators.xl;
2091 size_t ct = c->iterators.xr - x;
2092 int32_t y = c->iterators.y;
2093 surface_t* cb = &(c->state.buffers.color);
2094 union {
2095 uint16_t* dst;
2096 uint32_t* dst32;
2097 };
2098 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2099
2100 surface_t* tex = &(c->state.texture[0].surface);
2101 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2102 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2103 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
2104 int sR, sG, sB;
2105 uint32_t s, d;
2106
2107 if (ct==1 || uint32_t(dst)&2) {
2108 last_one:
2109 s = GGL_RGBA_TO_HOST( *src++ );
2110 *dst++ = convertAbgr8888ToRgb565(s);
2111 ct--;
2112 }
2113
2114 while (ct >= 2) {
2115 #if BYTE_ORDER == BIG_ENDIAN
2116 s = GGL_RGBA_TO_HOST( *src++ );
2117 d = convertAbgr8888ToRgb565_hi16(s);
2118
2119 s = GGL_RGBA_TO_HOST( *src++ );
2120 d |= convertAbgr8888ToRgb565(s);
2121 #else
2122 s = GGL_RGBA_TO_HOST( *src++ );
2123 d = convertAbgr8888ToRgb565(s);
2124
2125 s = GGL_RGBA_TO_HOST( *src++ );
2126 d |= convertAbgr8888ToRgb565(s) << 16;
2127 #endif
2128 *dst32++ = d;
2129 ct -= 2;
2130 }
2131
2132 if (ct > 0) {
2133 goto last_one;
2134 }
2135 }
2136
scanline_t32cb16blend(context_t * c)2137 void scanline_t32cb16blend(context_t* c)
2138 {
2139 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
2140 int32_t x = c->iterators.xl;
2141 size_t ct = c->iterators.xr - x;
2142 int32_t y = c->iterators.y;
2143 surface_t* cb = &(c->state.buffers.color);
2144 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2145
2146 surface_t* tex = &(c->state.texture[0].surface);
2147 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2148 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2149 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
2150
2151 scanline_t32cb16blend_arm(dst, src, ct);
2152 #else
2153 dst_iterator16 di(c);
2154 horz_iterator32 hi(c);
2155 blender_32to16 bl(c);
2156 while (di.count--) {
2157 uint32_t s = hi.get_pixel32();
2158 bl.write(s, di.dst);
2159 di.dst++;
2160 }
2161 #endif
2162 }
2163
scanline_t32cb16blend_srca(context_t * c)2164 void scanline_t32cb16blend_srca(context_t* c)
2165 {
2166 dst_iterator16 di(c);
2167 horz_iterator32 hi(c);
2168 blender_32to16_srcA blender(c);
2169
2170 while (di.count--) {
2171 uint32_t s = hi.get_pixel32();
2172 blender.write(s,di.dst);
2173 di.dst++;
2174 }
2175 }
2176
scanline_t16cb16blend_clamp_mod(context_t * c)2177 void scanline_t16cb16blend_clamp_mod(context_t* c)
2178 {
2179 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
2180 if (a == 0) {
2181 return;
2182 }
2183
2184 if (a == 255) {
2185 scanline_t16cb16_clamp(c);
2186 return;
2187 }
2188
2189 dst_iterator16 di(c);
2190 blender_16to16_modulate blender(c);
2191 clamp_iterator ci(c);
2192
2193 while (di.count--) {
2194 uint16_t s = ci.get_pixel16();
2195 blender.write(s, di.dst);
2196 di.dst++;
2197 }
2198 }
2199
scanline_memcpy(context_t * c)2200 void scanline_memcpy(context_t* c)
2201 {
2202 int32_t x = c->iterators.xl;
2203 size_t ct = c->iterators.xr - x;
2204 int32_t y = c->iterators.y;
2205 surface_t* cb = &(c->state.buffers.color);
2206 const GGLFormat* fp = &(c->formats[cb->format]);
2207 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2208 (x + (cb->stride * y)) * fp->size;
2209
2210 surface_t* tex = &(c->state.texture[0].surface);
2211 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2212 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2213 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) +
2214 (u + (tex->stride * v)) * fp->size;
2215
2216 const size_t size = ct * fp->size;
2217 memcpy(dst, src, size);
2218 }
2219
scanline_memset8(context_t * c)2220 void scanline_memset8(context_t* c)
2221 {
2222 int32_t x = c->iterators.xl;
2223 size_t ct = c->iterators.xr - x;
2224 int32_t y = c->iterators.y;
2225 surface_t* cb = &(c->state.buffers.color);
2226 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y));
2227 uint32_t packed = c->packed;
2228 memset(dst, packed, ct);
2229 }
2230
scanline_memset16(context_t * c)2231 void scanline_memset16(context_t* c)
2232 {
2233 int32_t x = c->iterators.xl;
2234 size_t ct = c->iterators.xr - x;
2235 int32_t y = c->iterators.y;
2236 surface_t* cb = &(c->state.buffers.color);
2237 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2238 uint32_t packed = c->packed;
2239 android_memset16(dst, packed, ct*2);
2240 }
2241
scanline_memset32(context_t * c)2242 void scanline_memset32(context_t* c)
2243 {
2244 int32_t x = c->iterators.xl;
2245 size_t ct = c->iterators.xr - x;
2246 int32_t y = c->iterators.y;
2247 surface_t* cb = &(c->state.buffers.color);
2248 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y));
2249 uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
2250 android_memset32(dst, packed, ct*4);
2251 }
2252
scanline_clear(context_t * c)2253 void scanline_clear(context_t* c)
2254 {
2255 int32_t x = c->iterators.xl;
2256 size_t ct = c->iterators.xr - x;
2257 int32_t y = c->iterators.y;
2258 surface_t* cb = &(c->state.buffers.color);
2259 const GGLFormat* fp = &(c->formats[cb->format]);
2260 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2261 (x + (cb->stride * y)) * fp->size;
2262 const size_t size = ct * fp->size;
2263 memset(dst, 0, size);
2264 }
2265
scanline_set(context_t * c)2266 void scanline_set(context_t* c)
2267 {
2268 int32_t x = c->iterators.xl;
2269 size_t ct = c->iterators.xr - x;
2270 int32_t y = c->iterators.y;
2271 surface_t* cb = &(c->state.buffers.color);
2272 const GGLFormat* fp = &(c->formats[cb->format]);
2273 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2274 (x + (cb->stride * y)) * fp->size;
2275 const size_t size = ct * fp->size;
2276 memset(dst, 0xFF, size);
2277 }
2278
scanline_noop(context_t * c)2279 void scanline_noop(context_t* c)
2280 {
2281 }
2282
rect_generic(context_t * c,size_t yc)2283 void rect_generic(context_t* c, size_t yc)
2284 {
2285 do {
2286 c->scanline(c);
2287 c->step_y(c);
2288 } while (--yc);
2289 }
2290
rect_memcpy(context_t * c,size_t yc)2291 void rect_memcpy(context_t* c, size_t yc)
2292 {
2293 int32_t x = c->iterators.xl;
2294 size_t ct = c->iterators.xr - x;
2295 int32_t y = c->iterators.y;
2296 surface_t* cb = &(c->state.buffers.color);
2297 const GGLFormat* fp = &(c->formats[cb->format]);
2298 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2299 (x + (cb->stride * y)) * fp->size;
2300
2301 surface_t* tex = &(c->state.texture[0].surface);
2302 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2303 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2304 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) +
2305 (u + (tex->stride * v)) * fp->size;
2306
2307 if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
2308 memcpy(dst, src, ct * fp->size * yc);
2309 } else {
2310 const size_t size = ct * fp->size;
2311 const size_t dbpr = cb->stride * fp->size;
2312 const size_t sbpr = tex->stride * fp->size;
2313 do {
2314 memcpy(dst, src, size);
2315 dst += dbpr;
2316 src += sbpr;
2317 } while (--yc);
2318 }
2319 }
2320 // ----------------------------------------------------------------------------
2321 }; // namespace android
2322
2323