1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define LOG_TAG "GGLAssembler"
19
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <sys/types.h>
25 #include <cutils/log.h>
26
27 #include "codeflinger/GGLAssembler.h"
28
29 namespace android {
30
31 // ----------------------------------------------------------------------------
32
GGLAssembler(ARMAssemblerInterface * target)33 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
34 : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
35 {
36 }
37
~GGLAssembler()38 GGLAssembler::~GGLAssembler()
39 {
40 }
41
prolog()42 void GGLAssembler::prolog()
43 {
44 ARMAssemblerProxy::prolog();
45 }
46
epilog(uint32_t touched)47 void GGLAssembler::epilog(uint32_t touched)
48 {
49 ARMAssemblerProxy::epilog(touched);
50 }
51
reset(int opt_level)52 void GGLAssembler::reset(int opt_level)
53 {
54 ARMAssemblerProxy::reset();
55 RegisterAllocator::reset();
56 mOptLevel = opt_level;
57 }
58
59 // ---------------------------------------------------------------------------
60
scanline(const needs_t & needs,context_t const * c)61 int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
62 {
63 int err = 0;
64 int opt_level = mOptLevel;
65 while (opt_level >= 0) {
66 reset(opt_level);
67 err = scanline_core(needs, c);
68 if (err == 0)
69 break;
70 opt_level--;
71 }
72
73 // XXX: in theory, pcForLabel is not valid before generate()
74 uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
75 uint32_t* fragment_end_pc = pcForLabel("epilog");
76 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
77
78 // build a name for our pipeline
79 char name[64];
80 sprintf(name,
81 "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
82 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
83
84 if (err) {
85 ALOGE("Error while generating ""%s""\n", name);
86 disassemble(name);
87 return -1;
88 }
89
90 return generate(name);
91 }
92
scanline_core(const needs_t & needs,context_t const * c)93 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
94 {
95 int64_t duration = ggl_system_time();
96
97 mBlendFactorCached = 0;
98 mBlending = 0;
99 mMasking = 0;
100 mAA = GGL_READ_NEEDS(P_AA, needs.p);
101 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106 mBuilderContext.needs = needs;
107 mBuilderContext.c = c;
108 mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110
111 // ------------------------------------------------------------------------
112
113 decodeLogicOpNeeds(needs);
114
115 decodeTMUNeeds(needs, c);
116
117 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121
122 if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124 (mBlendSrc == GGL_DST_ALPHA)) {
125 mBlendSrc = GGL_ONE;
126 }
127 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128 (mBlendSrcA == GGL_DST_ALPHA)) {
129 mBlendSrcA = GGL_ONE;
130 }
131 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132 (mBlendDst == GGL_DST_ALPHA)) {
133 mBlendDst = GGL_ONE;
134 }
135 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136 (mBlendDstA == GGL_DST_ALPHA)) {
137 mBlendDstA = GGL_ONE;
138 }
139 }
140
141 // if we need the framebuffer, read it now
142 const int blending = blending_codes(mBlendSrc, mBlendDst) |
143 blending_codes(mBlendSrcA, mBlendDstA);
144
145 // XXX: handle special cases, destination not modified...
146 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148 // Destination unmodified (beware of logic ops)
149 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151 // Destination is zero (beware of logic ops)
152 }
153
154 int fbComponents = 0;
155 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
156 for (int i=0 ; i<4 ; i++) {
157 const int mask = 1<<i;
158 component_info_t& info = mInfo[i];
159 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
160 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
161 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
162 fs = GGL_ONE;
163 info.masked = !!(masking & mask);
164 info.inDest = !info.masked && mCbFormat.c[i].h &&
165 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
166 if (mCbFormat.components >= GGL_LUMINANCE &&
167 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
168 info.inDest = false;
169 }
170 info.needed = (i==GGLFormat::ALPHA) &&
171 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
172 info.replaced = !!(mTextureMachine.replaced & mask);
173 info.iterated = (!info.replaced && (info.inDest || info.needed));
174 info.smooth = mSmooth && info.iterated;
175 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA);
176 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
177
178 mBlending |= (info.blend ? mask : 0);
179 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
180 fbComponents |= mCbFormat.c[i].h ? mask : 0;
181 }
182
183 mAllMasked = (mMasking == fbComponents);
184 if (mAllMasked) {
185 mDithering = 0;
186 }
187
188 fragment_parts_t parts;
189
190 // ------------------------------------------------------------------------
191 prolog();
192 // ------------------------------------------------------------------------
193
194 build_scanline_prolog(parts, needs);
195
196 if (registerFile().status())
197 return registerFile().status();
198
199 // ------------------------------------------------------------------------
200 label("fragment_loop");
201 // ------------------------------------------------------------------------
202 {
203 Scratch regs(registerFile());
204
205 if (mDithering) {
206 // update the dither index.
207 MOV(AL, 0, parts.count.reg,
208 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
209 ADD(AL, 0, parts.count.reg, parts.count.reg,
210 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
211 MOV(AL, 0, parts.count.reg,
212 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
213 }
214
215 // XXX: could we do an early alpha-test here in some cases?
216 // It would probaly be used only with smooth-alpha and no texture
217 // (or no alpha component in the texture).
218
219 // Early z-test
220 if (mAlphaTest==GGL_ALWAYS) {
221 build_depth_test(parts, Z_TEST|Z_WRITE);
222 } else {
223 // we cannot do the z-write here, because
224 // it might be killed by the alpha-test later
225 build_depth_test(parts, Z_TEST);
226 }
227
228 { // texture coordinates
229 Scratch scratches(registerFile());
230
231 // texel generation
232 build_textures(parts, regs);
233 }
234
235 if ((blending & (FACTOR_DST|BLEND_DST)) ||
236 (mMasking && !mAllMasked) ||
237 (mLogicOp & LOGIC_OP_DST))
238 {
239 // blending / logic_op / masking need the framebuffer
240 mDstPixel.setTo(regs.obtain(), &mCbFormat);
241
242 // load the framebuffer pixel
243 comment("fetch color-buffer");
244 load(parts.cbPtr, mDstPixel);
245 }
246
247 if (registerFile().status())
248 return registerFile().status();
249
250 pixel_t pixel;
251 int directTex = mTextureMachine.directTexture;
252 if (directTex | parts.packed) {
253 // note: we can't have both here
254 // iterated color or direct texture
255 pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
256 pixel.flags &= ~CORRUPTIBLE;
257 } else {
258 if (mDithering) {
259 const int ctxtReg = mBuilderContext.Rctx;
260 const int mask = GGL_DITHER_SIZE-1;
261 parts.dither = reg_t(regs.obtain());
262 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
263 ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
264 LDRB(AL, parts.dither.reg, parts.dither.reg,
265 immed12_pre(GGL_OFFSETOF(ditherMatrix)));
266 }
267
268 // allocate a register for the resulting pixel
269 pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
270
271 build_component(pixel, parts, GGLFormat::ALPHA, regs);
272
273 if (mAlphaTest!=GGL_ALWAYS) {
274 // only handle the z-write part here. We know z-test
275 // was successful, as well as alpha-test.
276 build_depth_test(parts, Z_WRITE);
277 }
278
279 build_component(pixel, parts, GGLFormat::RED, regs);
280 build_component(pixel, parts, GGLFormat::GREEN, regs);
281 build_component(pixel, parts, GGLFormat::BLUE, regs);
282
283 pixel.flags |= CORRUPTIBLE;
284 }
285
286 if (registerFile().status())
287 return registerFile().status();
288
289 if (pixel.reg == -1) {
290 // be defensive here. if we're here it's probably
291 // that this whole fragment is a no-op.
292 pixel = mDstPixel;
293 }
294
295 if (!mAllMasked) {
296 // logic operation
297 build_logic_op(pixel, regs);
298
299 // masking
300 build_masking(pixel, regs);
301
302 comment("store");
303 store(parts.cbPtr, pixel, WRITE_BACK);
304 }
305 }
306
307 if (registerFile().status())
308 return registerFile().status();
309
310 // update the iterated color...
311 if (parts.reload != 3) {
312 build_smooth_shade(parts);
313 }
314
315 // update iterated z
316 build_iterate_z(parts);
317
318 // update iterated fog
319 build_iterate_f(parts);
320
321 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
322 B(PL, "fragment_loop");
323 label("epilog");
324 epilog(registerFile().touched());
325
326 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
327 if (mDepthTest!=GGL_ALWAYS) {
328 label("discard_before_textures");
329 build_iterate_texture_coordinates(parts);
330 }
331 label("discard_after_textures");
332 build_smooth_shade(parts);
333 build_iterate_z(parts);
334 build_iterate_f(parts);
335 if (!mAllMasked) {
336 ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
337 }
338 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
339 B(PL, "fragment_loop");
340 epilog(registerFile().touched());
341 }
342
343 return registerFile().status();
344 }
345
346 // ---------------------------------------------------------------------------
347
build_scanline_prolog(fragment_parts_t & parts,const needs_t & needs)348 void GGLAssembler::build_scanline_prolog(
349 fragment_parts_t& parts, const needs_t& needs)
350 {
351 Scratch scratches(registerFile());
352 int Rctx = mBuilderContext.Rctx;
353
354 // compute count
355 comment("compute ct (# of pixels to process)");
356 parts.count.setTo(obtainReg());
357 int Rx = scratches.obtain();
358 int Ry = scratches.obtain();
359 CONTEXT_LOAD(Rx, iterators.xl);
360 CONTEXT_LOAD(parts.count.reg, iterators.xr);
361 CONTEXT_LOAD(Ry, iterators.y);
362
363 // parts.count = iterators.xr - Rx
364 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
365 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
366
367 if (mDithering) {
368 // parts.count.reg = 0xNNNNXXDD
369 // NNNN = count-1
370 // DD = dither offset
371 // XX = 0xxxxxxx (x = garbage)
372 Scratch scratches(registerFile());
373 int tx = scratches.obtain();
374 int ty = scratches.obtain();
375 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
376 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
377 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
378 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
379 } else {
380 // parts.count.reg = 0xNNNN0000
381 // NNNN = count-1
382 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
383 }
384
385 if (!mAllMasked) {
386 // compute dst ptr
387 comment("compute color-buffer pointer");
388 const int cb_bits = mCbFormat.size*8;
389 int Rs = scratches.obtain();
390 parts.cbPtr.setTo(obtainReg(), cb_bits);
391 CONTEXT_LOAD(Rs, state.buffers.color.stride);
392 CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
393 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs
394 base_offset(parts.cbPtr, parts.cbPtr, Rs);
395 scratches.recycle(Rs);
396 }
397
398 // init fog
399 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
400 if (need_fog) {
401 comment("compute initial fog coordinate");
402 Scratch scratches(registerFile());
403 int dfdx = scratches.obtain();
404 int ydfdy = scratches.obtain();
405 int f = ydfdy;
406 CONTEXT_LOAD(dfdx, generated_vars.dfdx);
407 CONTEXT_LOAD(ydfdy, iterators.ydfdy);
408 MLA(AL, 0, f, Rx, dfdx, ydfdy);
409 CONTEXT_STORE(f, generated_vars.f);
410 }
411
412 // init Z coordinate
413 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
414 parts.z = reg_t(obtainReg());
415 comment("compute initial Z coordinate");
416 Scratch scratches(registerFile());
417 int dzdx = scratches.obtain();
418 int ydzdy = parts.z.reg;
419 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point
420 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point
421 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
422
423 // we're going to index zbase of parts.count
424 // zbase = base + (xl-count + stride*y)*2
425 int Rs = dzdx;
426 int zbase = scratches.obtain();
427 CONTEXT_LOAD(Rs, state.buffers.depth.stride);
428 CONTEXT_LOAD(zbase, state.buffers.depth.data);
429 SMLABB(AL, Rs, Ry, Rs, Rx);
430 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
431 ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
432 CONTEXT_STORE(zbase, generated_vars.zbase);
433 }
434
435 // init texture coordinates
436 init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
437 scratches.recycle(Ry);
438
439 // iterated color
440 init_iterated_color(parts, reg_t(Rx));
441
442 // init coverage factor application (anti-aliasing)
443 if (mAA) {
444 parts.covPtr.setTo(obtainReg(), 16);
445 CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
446 ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
447 }
448 }
449
450 // ---------------------------------------------------------------------------
451
build_component(pixel_t & pixel,const fragment_parts_t & parts,int component,Scratch & regs)452 void GGLAssembler::build_component( pixel_t& pixel,
453 const fragment_parts_t& parts,
454 int component,
455 Scratch& regs)
456 {
457 static char const * comments[] = {"alpha", "red", "green", "blue"};
458 comment(comments[component]);
459
460 // local register file
461 Scratch scratches(registerFile());
462 const int dst_component_size = pixel.component_size(component);
463
464 component_t temp(-1);
465 build_incoming_component( temp, dst_component_size,
466 parts, component, scratches, regs);
467
468 if (mInfo[component].inDest) {
469
470 // blending...
471 build_blending( temp, mDstPixel, component, scratches );
472
473 // downshift component and rebuild pixel...
474 downshift(pixel, component, temp, parts.dither);
475 }
476 }
477
build_incoming_component(component_t & temp,int dst_size,const fragment_parts_t & parts,int component,Scratch & scratches,Scratch & global_regs)478 void GGLAssembler::build_incoming_component(
479 component_t& temp,
480 int dst_size,
481 const fragment_parts_t& parts,
482 int component,
483 Scratch& scratches,
484 Scratch& global_regs)
485 {
486 const uint32_t component_mask = 1<<component;
487
488 // Figure out what we need for the blending stage...
489 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
490 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
491 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
492 fs = GGL_ONE;
493 }
494
495 // Figure out what we need to extract and for what reason
496 const int blending = blending_codes(fs, fd);
497
498 // Are we actually going to blend?
499 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
500
501 // expand the source if the destination has more bits
502 int need_expander = false;
503 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
504 texture_unit_t& tmu = mTextureMachine.tmu[i];
505 if ((tmu.format_idx) &&
506 (parts.texel[i].component_size(component) < dst_size)) {
507 need_expander = true;
508 }
509 }
510
511 // do we need to extract this component?
512 const bool multiTexture = mTextureMachine.activeUnits > 1;
513 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
514 (isAlphaSourceNeeded());
515 int need_extract = mInfo[component].needed;
516 if (mInfo[component].inDest)
517 {
518 need_extract |= ((need_blending ?
519 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
520 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
521 need_extract |= mInfo[component].smooth;
522 need_extract |= mInfo[component].fog;
523 need_extract |= mDithering;
524 need_extract |= multiTexture;
525 }
526
527 if (need_extract) {
528 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
529 component_t fragment;
530
531 // iterated color
532 build_iterated_color(fragment, parts, component, regs);
533
534 // texture environement (decal, modulate, replace)
535 build_texture_environment(fragment, parts, component, regs);
536
537 // expand the source if the destination has more bits
538 if (need_expander && (fragment.size() < dst_size)) {
539 // we're here only if we fetched a texel
540 // (so we know for sure fragment is CORRUPTIBLE)
541 expand(fragment, fragment, dst_size);
542 }
543
544 // We have a few specific things to do for the alpha-channel
545 if ((component==GGLFormat::ALPHA) &&
546 (mInfo[component].needed || fragment.size()<dst_size))
547 {
548 // convert to integer_t first and make sure
549 // we don't corrupt a needed register
550 if (fragment.l) {
551 component_t incoming(fragment);
552 modify(fragment, regs);
553 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
554 fragment.h -= fragment.l;
555 fragment.l = 0;
556 }
557
558 // coverage factor application
559 build_coverage_application(fragment, parts, regs);
560
561 // alpha-test
562 build_alpha_test(fragment, parts);
563
564 if (blend_needs_alpha_source) {
565 // We keep only 8 bits for the blending stage
566 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
567 if (fragment.flags & CORRUPTIBLE) {
568 fragment.flags &= ~CORRUPTIBLE;
569 mAlphaSource.setTo(fragment.reg,
570 fragment.size(), fragment.flags);
571 if (shift) {
572 MOV(AL, 0, mAlphaSource.reg,
573 reg_imm(mAlphaSource.reg, LSR, shift));
574 }
575 } else {
576 // XXX: it would better to do this in build_blend_factor()
577 // so we can avoid the extra MOV below.
578 mAlphaSource.setTo(regs.obtain(),
579 fragment.size(), CORRUPTIBLE);
580 if (shift) {
581 MOV(AL, 0, mAlphaSource.reg,
582 reg_imm(fragment.reg, LSR, shift));
583 } else {
584 MOV(AL, 0, mAlphaSource.reg, fragment.reg);
585 }
586 }
587 mAlphaSource.s -= shift;
588 }
589 }
590
591 // fog...
592 build_fog( fragment, component, regs );
593
594 temp = fragment;
595 } else {
596 if (mInfo[component].inDest) {
597 // extraction not needed and replace
598 // we just select the right component
599 if ((mTextureMachine.replaced & component_mask) == 0) {
600 // component wasn't replaced, so use it!
601 temp = component_t(parts.iterated, component);
602 }
603 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
604 const texture_unit_t& tmu = mTextureMachine.tmu[i];
605 if ((tmu.mask & component_mask) &&
606 ((tmu.replaced & component_mask) == 0)) {
607 temp = component_t(parts.texel[i], component);
608 }
609 }
610 }
611 }
612 }
613
isAlphaSourceNeeded() const614 bool GGLAssembler::isAlphaSourceNeeded() const
615 {
616 // XXX: also needed for alpha-test
617 const int bs = mBlendSrc;
618 const int bd = mBlendDst;
619 return bs==GGL_SRC_ALPHA_SATURATE ||
620 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
621 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
622 }
623
624 // ---------------------------------------------------------------------------
625
build_smooth_shade(const fragment_parts_t & parts)626 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
627 {
628 if (mSmooth && !parts.iterated_packed) {
629 // update the iterated color in a pipelined way...
630 comment("update iterated color");
631 Scratch scratches(registerFile());
632
633 const int reload = parts.reload;
634 for (int i=0 ; i<4 ; i++) {
635 if (!mInfo[i].iterated)
636 continue;
637
638 int c = parts.argb[i].reg;
639 int dx = parts.argb_dx[i].reg;
640
641 if (reload & 1) {
642 c = scratches.obtain();
643 CONTEXT_LOAD(c, generated_vars.argb[i].c);
644 }
645 if (reload & 2) {
646 dx = scratches.obtain();
647 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
648 }
649
650 if (mSmooth) {
651 ADD(AL, 0, c, c, dx);
652 }
653
654 if (reload & 1) {
655 CONTEXT_STORE(c, generated_vars.argb[i].c);
656 scratches.recycle(c);
657 }
658 if (reload & 2) {
659 scratches.recycle(dx);
660 }
661 }
662 }
663 }
664
665 // ---------------------------------------------------------------------------
666
build_coverage_application(component_t & fragment,const fragment_parts_t & parts,Scratch & regs)667 void GGLAssembler::build_coverage_application(component_t& fragment,
668 const fragment_parts_t& parts, Scratch& regs)
669 {
670 // here fragment.l is guarenteed to be 0
671 if (mAA) {
672 // coverages are 1.15 fixed-point numbers
673 comment("coverage application");
674
675 component_t incoming(fragment);
676 modify(fragment, regs);
677
678 Scratch scratches(registerFile());
679 int cf = scratches.obtain();
680 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
681 if (fragment.h > 31) {
682 fragment.h--;
683 SMULWB(AL, fragment.reg, incoming.reg, cf);
684 } else {
685 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
686 SMULWB(AL, fragment.reg, fragment.reg, cf);
687 }
688 }
689 }
690
691 // ---------------------------------------------------------------------------
692
build_alpha_test(component_t & fragment,const fragment_parts_t & parts)693 void GGLAssembler::build_alpha_test(component_t& fragment,
694 const fragment_parts_t& parts)
695 {
696 if (mAlphaTest != GGL_ALWAYS) {
697 comment("Alpha Test");
698 Scratch scratches(registerFile());
699 int ref = scratches.obtain();
700 const int shift = GGL_COLOR_BITS-fragment.size();
701 CONTEXT_LOAD(ref, state.alpha_test.ref);
702 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
703 else CMP(AL, fragment.reg, ref);
704 int cc = NV;
705 switch (mAlphaTest) {
706 case GGL_NEVER: cc = NV; break;
707 case GGL_LESS: cc = LT; break;
708 case GGL_EQUAL: cc = EQ; break;
709 case GGL_LEQUAL: cc = LS; break;
710 case GGL_GREATER: cc = HI; break;
711 case GGL_NOTEQUAL: cc = NE; break;
712 case GGL_GEQUAL: cc = HS; break;
713 }
714 B(cc^1, "discard_after_textures");
715 }
716 }
717
718 // ---------------------------------------------------------------------------
719
build_depth_test(const fragment_parts_t & parts,uint32_t mask)720 void GGLAssembler::build_depth_test(
721 const fragment_parts_t& parts, uint32_t mask)
722 {
723 mask &= Z_TEST|Z_WRITE;
724 const needs_t& needs = mBuilderContext.needs;
725 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
726 Scratch scratches(registerFile());
727
728 if (mDepthTest != GGL_ALWAYS || zmask) {
729 int cc=AL, ic=AL;
730 switch (mDepthTest) {
731 case GGL_LESS: ic = HI; break;
732 case GGL_EQUAL: ic = EQ; break;
733 case GGL_LEQUAL: ic = HS; break;
734 case GGL_GREATER: ic = LT; break;
735 case GGL_NOTEQUAL: ic = NE; break;
736 case GGL_GEQUAL: ic = LS; break;
737 case GGL_NEVER:
738 // this never happens, because it's taken care of when
739 // computing the needs. but we keep it for completness.
740 comment("Depth Test (NEVER)");
741 B(AL, "discard_before_textures");
742 return;
743 case GGL_ALWAYS:
744 // we're here because zmask is enabled
745 mask &= ~Z_TEST; // test always passes.
746 break;
747 }
748
749 // inverse the condition
750 cc = ic^1;
751
752 if ((mask & Z_WRITE) && !zmask) {
753 mask &= ~Z_WRITE;
754 }
755
756 if (!mask)
757 return;
758
759 comment("Depth Test");
760
761 int zbase = scratches.obtain();
762 int depth = scratches.obtain();
763 int z = parts.z.reg;
764
765 CONTEXT_LOAD(zbase, generated_vars.zbase); // stall
766 SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
767 // above does zbase = zbase + ((count >> 16) << 1)
768
769 if (mask & Z_TEST) {
770 LDRH(AL, depth, zbase); // stall
771 CMP(AL, depth, reg_imm(z, LSR, 16));
772 B(cc, "discard_before_textures");
773 }
774 if (mask & Z_WRITE) {
775 if (mask == Z_WRITE) {
776 // only z-write asked, cc is meaningless
777 ic = AL;
778 }
779 MOV(AL, 0, depth, reg_imm(z, LSR, 16));
780 STRH(ic, depth, zbase);
781 }
782 }
783 }
784
build_iterate_z(const fragment_parts_t & parts)785 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
786 {
787 const needs_t& needs = mBuilderContext.needs;
788 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
789 Scratch scratches(registerFile());
790 int dzdx = scratches.obtain();
791 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall
792 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
793 }
794 }
795
build_iterate_f(const fragment_parts_t & parts)796 void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
797 {
798 const needs_t& needs = mBuilderContext.needs;
799 if (GGL_READ_NEEDS(P_FOG, needs.p)) {
800 Scratch scratches(registerFile());
801 int dfdx = scratches.obtain();
802 int f = scratches.obtain();
803 CONTEXT_LOAD(f, generated_vars.f);
804 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall
805 ADD(AL, 0, f, f, dfdx);
806 CONTEXT_STORE(f, generated_vars.f);
807 }
808 }
809
810 // ---------------------------------------------------------------------------
811
build_logic_op(pixel_t & pixel,Scratch & regs)812 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
813 {
814 const needs_t& needs = mBuilderContext.needs;
815 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
816 if (opcode == GGL_COPY)
817 return;
818
819 comment("logic operation");
820
821 pixel_t s(pixel);
822 if (!(pixel.flags & CORRUPTIBLE)) {
823 pixel.reg = regs.obtain();
824 pixel.flags |= CORRUPTIBLE;
825 }
826
827 pixel_t d(mDstPixel);
828 switch(opcode) {
829 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break;
830 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break;
831 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break;
832 case GGL_COPY: break;
833 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break;
834 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break;
835 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break;
836 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break;
837 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg);
838 MVN(AL, 0, pixel.reg, pixel.reg); break;
839 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg);
840 MVN(AL, 0, pixel.reg, pixel.reg); break;
841 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break;
842 case GGL_OR_REVERSE: // s | ~d == ~(~s & d)
843 BIC(AL, 0, pixel.reg, d.reg, s.reg);
844 MVN(AL, 0, pixel.reg, pixel.reg); break;
845 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break;
846 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d)
847 BIC(AL, 0, pixel.reg, s.reg, d.reg);
848 MVN(AL, 0, pixel.reg, pixel.reg); break;
849 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg);
850 MVN(AL, 0, pixel.reg, pixel.reg); break;
851 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break;
852 };
853 }
854
855 // ---------------------------------------------------------------------------
856
find_bottom(uint32_t val)857 static uint32_t find_bottom(uint32_t val)
858 {
859 uint32_t i = 0;
860 while (!(val & (3<<i)))
861 i+= 2;
862 return i;
863 }
864
normalize(uint32_t & val,uint32_t & rot)865 static void normalize(uint32_t& val, uint32_t& rot)
866 {
867 rot = 0;
868 while (!(val&3) || (val & 0xFC000000)) {
869 uint32_t newval;
870 newval = val >> 2;
871 newval |= (val&3) << 30;
872 val = newval;
873 rot += 2;
874 if (rot == 32) {
875 rot = 0;
876 break;
877 }
878 }
879 }
880
build_and_immediate(int d,int s,uint32_t mask,int bits)881 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
882 {
883 uint32_t rot;
884 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
885 mask &= size;
886
887 if (mask == size) {
888 if (d != s)
889 MOV( AL, 0, d, s);
890 return;
891 }
892
893 int negative_logic = !isValidImmediate(mask);
894 if (negative_logic) {
895 mask = ~mask & size;
896 }
897 normalize(mask, rot);
898
899 if (mask) {
900 while (mask) {
901 uint32_t bitpos = find_bottom(mask);
902 int shift = rot + bitpos;
903 uint32_t m = mask & (0xff << bitpos);
904 mask &= ~m;
905 m >>= bitpos;
906 int32_t newMask = (m<<shift) | (m>>(32-shift));
907 if (!negative_logic) {
908 AND( AL, 0, d, s, imm(newMask) );
909 } else {
910 BIC( AL, 0, d, s, imm(newMask) );
911 }
912 s = d;
913 }
914 } else {
915 MOV( AL, 0, d, imm(0));
916 }
917 }
918
build_masking(pixel_t & pixel,Scratch & regs)919 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
920 {
921 if (!mMasking || mAllMasked) {
922 return;
923 }
924
925 comment("color mask");
926
927 pixel_t fb(mDstPixel);
928 pixel_t s(pixel);
929 if (!(pixel.flags & CORRUPTIBLE)) {
930 pixel.reg = regs.obtain();
931 pixel.flags |= CORRUPTIBLE;
932 }
933
934 int mask = 0;
935 for (int i=0 ; i<4 ; i++) {
936 const int component_mask = 1<<i;
937 const int h = fb.format.c[i].h;
938 const int l = fb.format.c[i].l;
939 if (h && (!(mMasking & component_mask))) {
940 mask |= ((1<<(h-l))-1) << l;
941 }
942 }
943
944 // There is no need to clear the masked components of the source
945 // (unless we applied a logic op), because they're already zeroed
946 // by construction (masked components are not computed)
947
948 if (mLogicOp) {
949 const needs_t& needs = mBuilderContext.needs;
950 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
951 if (opcode != GGL_CLEAR) {
952 // clear masked component of source
953 build_and_immediate(pixel.reg, s.reg, mask, fb.size());
954 s = pixel;
955 }
956 }
957
958 // clear non masked components of destination
959 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
960
961 // or back the channels that were masked
962 if (s.reg == fb.reg) {
963 // this is in fact a MOV
964 if (s.reg == pixel.reg) {
965 // ugh. this in in fact a nop
966 } else {
967 MOV(AL, 0, pixel.reg, fb.reg);
968 }
969 } else {
970 ORR(AL, 0, pixel.reg, s.reg, fb.reg);
971 }
972 }
973
974 // ---------------------------------------------------------------------------
975
base_offset(const pointer_t & d,const pointer_t & b,const reg_t & o)976 void GGLAssembler::base_offset(
977 const pointer_t& d, const pointer_t& b, const reg_t& o)
978 {
979 switch (b.size) {
980 case 32:
981 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
982 break;
983 case 24:
984 if (d.reg == b.reg) {
985 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
986 ADD(AL, 0, d.reg, d.reg, o.reg);
987 } else {
988 ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
989 ADD(AL, 0, d.reg, d.reg, b.reg);
990 }
991 break;
992 case 16:
993 ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
994 break;
995 case 8:
996 ADD(AL, 0, d.reg, b.reg, o.reg);
997 break;
998 }
999 }
1000
1001 // ----------------------------------------------------------------------------
1002 // cheezy register allocator...
1003 // ----------------------------------------------------------------------------
1004
reset()1005 void RegisterAllocator::reset()
1006 {
1007 mRegs.reset();
1008 }
1009
reserveReg(int reg)1010 int RegisterAllocator::reserveReg(int reg)
1011 {
1012 return mRegs.reserve(reg);
1013 }
1014
obtainReg()1015 int RegisterAllocator::obtainReg()
1016 {
1017 return mRegs.obtain();
1018 }
1019
recycleReg(int reg)1020 void RegisterAllocator::recycleReg(int reg)
1021 {
1022 mRegs.recycle(reg);
1023 }
1024
registerFile()1025 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1026 {
1027 return mRegs;
1028 }
1029
1030 // ----------------------------------------------------------------------------
1031
RegisterFile()1032 RegisterAllocator::RegisterFile::RegisterFile()
1033 : mRegs(0), mTouched(0), mStatus(0)
1034 {
1035 reserve(ARMAssemblerInterface::SP);
1036 reserve(ARMAssemblerInterface::PC);
1037 }
1038
RegisterFile(const RegisterFile & rhs)1039 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
1040 : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
1041 {
1042 }
1043
~RegisterFile()1044 RegisterAllocator::RegisterFile::~RegisterFile()
1045 {
1046 }
1047
operator ==(const RegisterFile & rhs) const1048 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1049 {
1050 return (mRegs == rhs.mRegs);
1051 }
1052
reset()1053 void RegisterAllocator::RegisterFile::reset()
1054 {
1055 mRegs = mTouched = mStatus = 0;
1056 reserve(ARMAssemblerInterface::SP);
1057 reserve(ARMAssemblerInterface::PC);
1058 }
1059
reserve(int reg)1060 int RegisterAllocator::RegisterFile::reserve(int reg)
1061 {
1062 LOG_ALWAYS_FATAL_IF(isUsed(reg),
1063 "reserving register %d, but already in use",
1064 reg);
1065 mRegs |= (1<<reg);
1066 mTouched |= mRegs;
1067 return reg;
1068 }
1069
reserveSeveral(uint32_t regMask)1070 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1071 {
1072 mRegs |= regMask;
1073 mTouched |= regMask;
1074 }
1075
isUsed(int reg) const1076 int RegisterAllocator::RegisterFile::isUsed(int reg) const
1077 {
1078 LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
1079 return mRegs & (1<<reg);
1080 }
1081
obtain()1082 int RegisterAllocator::RegisterFile::obtain()
1083 {
1084 const char priorityList[14] = { 0, 1, 2, 3,
1085 12, 14, 4, 5,
1086 6, 7, 8, 9,
1087 10, 11 };
1088 const int nbreg = sizeof(priorityList);
1089 int i, r;
1090 for (i=0 ; i<nbreg ; i++) {
1091 r = priorityList[i];
1092 if (!isUsed(r)) {
1093 break;
1094 }
1095 }
1096 // this is not an error anymore because, we'll try again with
1097 // a lower optimization level.
1098 //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1099 if (i >= nbreg) {
1100 mStatus |= OUT_OF_REGISTERS;
1101 // we return SP so we can more easily debug things
1102 // the code will never be run anyway.
1103 return ARMAssemblerInterface::SP;
1104 }
1105 reserve(r);
1106 return r;
1107 }
1108
hasFreeRegs() const1109 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1110 {
1111 return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
1112 }
1113
countFreeRegs() const1114 int RegisterAllocator::RegisterFile::countFreeRegs() const
1115 {
1116 int f = ~mRegs & 0xFFFF;
1117 // now count number of 1
1118 f = (f & 0x5555) + ((f>>1) & 0x5555);
1119 f = (f & 0x3333) + ((f>>2) & 0x3333);
1120 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1121 f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1122 return f;
1123 }
1124
recycle(int reg)1125 void RegisterAllocator::RegisterFile::recycle(int reg)
1126 {
1127 LOG_FATAL_IF(!isUsed(reg),
1128 "recycling unallocated register %d",
1129 reg);
1130 mRegs &= ~(1<<reg);
1131 }
1132
recycleSeveral(uint32_t regMask)1133 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1134 {
1135 LOG_FATAL_IF((mRegs & regMask)!=regMask,
1136 "recycling unallocated registers "
1137 "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1138 regMask, mRegs, mRegs®Mask);
1139 mRegs &= ~regMask;
1140 }
1141
touched() const1142 uint32_t RegisterAllocator::RegisterFile::touched() const
1143 {
1144 return mTouched;
1145 }
1146
1147 // ----------------------------------------------------------------------------
1148
1149 }; // namespace android
1150
1151