• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 **     http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17 
18 #define LOG_TAG "GGLAssembler"
19 
20 #include <assert.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <sys/types.h>
25 #include <cutils/log.h>
26 
27 #include "codeflinger/GGLAssembler.h"
28 
29 namespace android {
30 
31 // ----------------------------------------------------------------------------
32 
GGLAssembler(ARMAssemblerInterface * target)33 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
34     : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
35 {
36 }
37 
~GGLAssembler()38 GGLAssembler::~GGLAssembler()
39 {
40 }
41 
prolog()42 void GGLAssembler::prolog()
43 {
44     ARMAssemblerProxy::prolog();
45 }
46 
epilog(uint32_t touched)47 void GGLAssembler::epilog(uint32_t touched)
48 {
49     ARMAssemblerProxy::epilog(touched);
50 }
51 
reset(int opt_level)52 void GGLAssembler::reset(int opt_level)
53 {
54     ARMAssemblerProxy::reset();
55     RegisterAllocator::reset();
56     mOptLevel = opt_level;
57 }
58 
59 // ---------------------------------------------------------------------------
60 
scanline(const needs_t & needs,context_t const * c)61 int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
62 {
63     int err = 0;
64     int opt_level = mOptLevel;
65     while (opt_level >= 0) {
66         reset(opt_level);
67         err = scanline_core(needs, c);
68         if (err == 0)
69             break;
70         opt_level--;
71     }
72 
73     // XXX: in theory, pcForLabel is not valid before generate()
74     uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
75     uint32_t* fragment_end_pc = pcForLabel("epilog");
76     const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
77 
78     // build a name for our pipeline
79     char name[64];
80     sprintf(name,
81             "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
82             needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
83 
84     if (err) {
85         ALOGE("Error while generating ""%s""\n", name);
86         disassemble(name);
87         return -1;
88     }
89 
90     return generate(name);
91 }
92 
scanline_core(const needs_t & needs,context_t const * c)93 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
94 {
95     int64_t duration = ggl_system_time();
96 
97     mBlendFactorCached = 0;
98     mBlending = 0;
99     mMasking = 0;
100     mAA        = GGL_READ_NEEDS(P_AA, needs.p);
101     mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
102     mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
103     mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
104     mFog       = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
105     mSmooth    = GGL_READ_NEEDS(SHADE, needs.n) != 0;
106     mBuilderContext.needs = needs;
107     mBuilderContext.c = c;
108     mBuilderContext.Rctx = reserveReg(R0); // context always in R0
109     mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
110 
111     // ------------------------------------------------------------------------
112 
113     decodeLogicOpNeeds(needs);
114 
115     decodeTMUNeeds(needs, c);
116 
117     mBlendSrc  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
118     mBlendDst  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
119     mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
120     mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
121 
122     if (!mCbFormat.c[GGLFormat::ALPHA].h) {
123         if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
124             (mBlendSrc == GGL_DST_ALPHA)) {
125             mBlendSrc = GGL_ONE;
126         }
127         if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
128             (mBlendSrcA == GGL_DST_ALPHA)) {
129             mBlendSrcA = GGL_ONE;
130         }
131         if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
132             (mBlendDst == GGL_DST_ALPHA)) {
133             mBlendDst = GGL_ONE;
134         }
135         if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
136             (mBlendDstA == GGL_DST_ALPHA)) {
137             mBlendDstA = GGL_ONE;
138         }
139     }
140 
141     // if we need the framebuffer, read it now
142     const int blending =    blending_codes(mBlendSrc, mBlendDst) |
143                             blending_codes(mBlendSrcA, mBlendDstA);
144 
145     // XXX: handle special cases, destination not modified...
146     if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
147         (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
148         // Destination unmodified (beware of logic ops)
149     } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
150         (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
151         // Destination is zero (beware of logic ops)
152     }
153 
154     int fbComponents = 0;
155     const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
156     for (int i=0 ; i<4 ; i++) {
157         const int mask = 1<<i;
158         component_info_t& info = mInfo[i];
159         int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
160         int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
161         if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
162             fs = GGL_ONE;
163         info.masked =   !!(masking & mask);
164         info.inDest =   !info.masked && mCbFormat.c[i].h &&
165                         ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
166         if (mCbFormat.components >= GGL_LUMINANCE &&
167                 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
168             info.inDest = false;
169         }
170         info.needed =   (i==GGLFormat::ALPHA) &&
171                         (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
172         info.replaced = !!(mTextureMachine.replaced & mask);
173         info.iterated = (!info.replaced && (info.inDest || info.needed));
174         info.smooth =   mSmooth && info.iterated;
175         info.fog =      mFog && info.inDest && (i != GGLFormat::ALPHA);
176         info.blend =    (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
177 
178         mBlending |= (info.blend ? mask : 0);
179         mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
180         fbComponents |= mCbFormat.c[i].h ? mask : 0;
181     }
182 
183     mAllMasked = (mMasking == fbComponents);
184     if (mAllMasked) {
185         mDithering = 0;
186     }
187 
188     fragment_parts_t parts;
189 
190     // ------------------------------------------------------------------------
191     prolog();
192     // ------------------------------------------------------------------------
193 
194     build_scanline_prolog(parts, needs);
195 
196     if (registerFile().status())
197         return registerFile().status();
198 
199     // ------------------------------------------------------------------------
200     label("fragment_loop");
201     // ------------------------------------------------------------------------
202     {
203         Scratch regs(registerFile());
204 
205         if (mDithering) {
206             // update the dither index.
207             MOV(AL, 0, parts.count.reg,
208                     reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
209             ADD(AL, 0, parts.count.reg, parts.count.reg,
210                     imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
211             MOV(AL, 0, parts.count.reg,
212                     reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
213         }
214 
215         // XXX: could we do an early alpha-test here in some cases?
216         // It would probaly be used only with smooth-alpha and no texture
217         // (or no alpha component in the texture).
218 
219         // Early z-test
220         if (mAlphaTest==GGL_ALWAYS) {
221             build_depth_test(parts, Z_TEST|Z_WRITE);
222         } else {
223             // we cannot do the z-write here, because
224             // it might be killed by the alpha-test later
225             build_depth_test(parts, Z_TEST);
226         }
227 
228         { // texture coordinates
229             Scratch scratches(registerFile());
230 
231             // texel generation
232             build_textures(parts, regs);
233         }
234 
235         if ((blending & (FACTOR_DST|BLEND_DST)) ||
236                 (mMasking && !mAllMasked) ||
237                 (mLogicOp & LOGIC_OP_DST))
238         {
239             // blending / logic_op / masking need the framebuffer
240             mDstPixel.setTo(regs.obtain(), &mCbFormat);
241 
242             // load the framebuffer pixel
243             comment("fetch color-buffer");
244             load(parts.cbPtr, mDstPixel);
245         }
246 
247         if (registerFile().status())
248             return registerFile().status();
249 
250         pixel_t pixel;
251         int directTex = mTextureMachine.directTexture;
252         if (directTex | parts.packed) {
253             // note: we can't have both here
254             // iterated color or direct texture
255             pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
256             pixel.flags &= ~CORRUPTIBLE;
257         } else {
258             if (mDithering) {
259                 const int ctxtReg = mBuilderContext.Rctx;
260                 const int mask = GGL_DITHER_SIZE-1;
261                 parts.dither = reg_t(regs.obtain());
262                 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
263                 ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
264                 LDRB(AL, parts.dither.reg, parts.dither.reg,
265                         immed12_pre(GGL_OFFSETOF(ditherMatrix)));
266             }
267 
268             // allocate a register for the resulting pixel
269             pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
270 
271             build_component(pixel, parts, GGLFormat::ALPHA,    regs);
272 
273             if (mAlphaTest!=GGL_ALWAYS) {
274                 // only handle the z-write part here. We know z-test
275                 // was successful, as well as alpha-test.
276                 build_depth_test(parts, Z_WRITE);
277             }
278 
279             build_component(pixel, parts, GGLFormat::RED,      regs);
280             build_component(pixel, parts, GGLFormat::GREEN,    regs);
281             build_component(pixel, parts, GGLFormat::BLUE,     regs);
282 
283             pixel.flags |= CORRUPTIBLE;
284         }
285 
286         if (registerFile().status())
287             return registerFile().status();
288 
289         if (pixel.reg == -1) {
290             // be defensive here. if we're here it's probably
291             // that this whole fragment is a no-op.
292             pixel = mDstPixel;
293         }
294 
295         if (!mAllMasked) {
296             // logic operation
297             build_logic_op(pixel, regs);
298 
299             // masking
300             build_masking(pixel, regs);
301 
302             comment("store");
303             store(parts.cbPtr, pixel, WRITE_BACK);
304         }
305     }
306 
307     if (registerFile().status())
308         return registerFile().status();
309 
310     // update the iterated color...
311     if (parts.reload != 3) {
312         build_smooth_shade(parts);
313     }
314 
315     // update iterated z
316     build_iterate_z(parts);
317 
318     // update iterated fog
319     build_iterate_f(parts);
320 
321     SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
322     B(PL, "fragment_loop");
323     label("epilog");
324     epilog(registerFile().touched());
325 
326     if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
327         if (mDepthTest!=GGL_ALWAYS) {
328             label("discard_before_textures");
329             build_iterate_texture_coordinates(parts);
330         }
331         label("discard_after_textures");
332         build_smooth_shade(parts);
333         build_iterate_z(parts);
334         build_iterate_f(parts);
335         if (!mAllMasked) {
336             ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
337         }
338         SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
339         B(PL, "fragment_loop");
340         epilog(registerFile().touched());
341     }
342 
343     return registerFile().status();
344 }
345 
346 // ---------------------------------------------------------------------------
347 
build_scanline_prolog(fragment_parts_t & parts,const needs_t & needs)348 void GGLAssembler::build_scanline_prolog(
349     fragment_parts_t& parts, const needs_t& needs)
350 {
351     Scratch scratches(registerFile());
352     int Rctx = mBuilderContext.Rctx;
353 
354     // compute count
355     comment("compute ct (# of pixels to process)");
356     parts.count.setTo(obtainReg());
357     int Rx = scratches.obtain();
358     int Ry = scratches.obtain();
359     CONTEXT_LOAD(Rx, iterators.xl);
360     CONTEXT_LOAD(parts.count.reg, iterators.xr);
361     CONTEXT_LOAD(Ry, iterators.y);
362 
363     // parts.count = iterators.xr - Rx
364     SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
365     SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
366 
367     if (mDithering) {
368         // parts.count.reg = 0xNNNNXXDD
369         // NNNN = count-1
370         // DD   = dither offset
371         // XX   = 0xxxxxxx (x = garbage)
372         Scratch scratches(registerFile());
373         int tx = scratches.obtain();
374         int ty = scratches.obtain();
375         AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
376         AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
377         ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
378         ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
379     } else {
380         // parts.count.reg = 0xNNNN0000
381         // NNNN = count-1
382         MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
383     }
384 
385     if (!mAllMasked) {
386         // compute dst ptr
387         comment("compute color-buffer pointer");
388         const int cb_bits = mCbFormat.size*8;
389         int Rs = scratches.obtain();
390         parts.cbPtr.setTo(obtainReg(), cb_bits);
391         CONTEXT_LOAD(Rs, state.buffers.color.stride);
392         CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
393         SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
394         base_offset(parts.cbPtr, parts.cbPtr, Rs);
395         scratches.recycle(Rs);
396     }
397 
398     // init fog
399     const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
400     if (need_fog) {
401         comment("compute initial fog coordinate");
402         Scratch scratches(registerFile());
403         int dfdx = scratches.obtain();
404         int ydfdy = scratches.obtain();
405         int f = ydfdy;
406         CONTEXT_LOAD(dfdx,  generated_vars.dfdx);
407         CONTEXT_LOAD(ydfdy, iterators.ydfdy);
408         MLA(AL, 0, f, Rx, dfdx, ydfdy);
409         CONTEXT_STORE(f, generated_vars.f);
410     }
411 
412     // init Z coordinate
413     if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
414         parts.z = reg_t(obtainReg());
415         comment("compute initial Z coordinate");
416         Scratch scratches(registerFile());
417         int dzdx = scratches.obtain();
418         int ydzdy = parts.z.reg;
419         CONTEXT_LOAD(dzdx,  generated_vars.dzdx);   // 1.31 fixed-point
420         CONTEXT_LOAD(ydzdy, iterators.ydzdy);       // 1.31 fixed-point
421         MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
422 
423         // we're going to index zbase of parts.count
424         // zbase = base + (xl-count + stride*y)*2
425         int Rs = dzdx;
426         int zbase = scratches.obtain();
427         CONTEXT_LOAD(Rs, state.buffers.depth.stride);
428         CONTEXT_LOAD(zbase, state.buffers.depth.data);
429         SMLABB(AL, Rs, Ry, Rs, Rx);
430         ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
431         ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
432         CONTEXT_STORE(zbase, generated_vars.zbase);
433     }
434 
435     // init texture coordinates
436     init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
437     scratches.recycle(Ry);
438 
439     // iterated color
440     init_iterated_color(parts, reg_t(Rx));
441 
442     // init coverage factor application (anti-aliasing)
443     if (mAA) {
444         parts.covPtr.setTo(obtainReg(), 16);
445         CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
446         ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
447     }
448 }
449 
450 // ---------------------------------------------------------------------------
451 
build_component(pixel_t & pixel,const fragment_parts_t & parts,int component,Scratch & regs)452 void GGLAssembler::build_component( pixel_t& pixel,
453                                     const fragment_parts_t& parts,
454                                     int component,
455                                     Scratch& regs)
456 {
457     static char const * comments[] = {"alpha", "red", "green", "blue"};
458     comment(comments[component]);
459 
460     // local register file
461     Scratch scratches(registerFile());
462     const int dst_component_size = pixel.component_size(component);
463 
464     component_t temp(-1);
465     build_incoming_component( temp, dst_component_size,
466             parts, component, scratches, regs);
467 
468     if (mInfo[component].inDest) {
469 
470         // blending...
471         build_blending( temp, mDstPixel, component, scratches );
472 
473         // downshift component and rebuild pixel...
474         downshift(pixel, component, temp, parts.dither);
475     }
476 }
477 
build_incoming_component(component_t & temp,int dst_size,const fragment_parts_t & parts,int component,Scratch & scratches,Scratch & global_regs)478 void GGLAssembler::build_incoming_component(
479                                     component_t& temp,
480                                     int dst_size,
481                                     const fragment_parts_t& parts,
482                                     int component,
483                                     Scratch& scratches,
484                                     Scratch& global_regs)
485 {
486     const uint32_t component_mask = 1<<component;
487 
488     // Figure out what we need for the blending stage...
489     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
490     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
491     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
492         fs = GGL_ONE;
493     }
494 
495     // Figure out what we need to extract and for what reason
496     const int blending = blending_codes(fs, fd);
497 
498     // Are we actually going to blend?
499     const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
500 
501     // expand the source if the destination has more bits
502     int need_expander = false;
503     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
504         texture_unit_t& tmu = mTextureMachine.tmu[i];
505         if ((tmu.format_idx) &&
506             (parts.texel[i].component_size(component) < dst_size)) {
507             need_expander = true;
508         }
509     }
510 
511     // do we need to extract this component?
512     const bool multiTexture = mTextureMachine.activeUnits > 1;
513     const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
514                                         (isAlphaSourceNeeded());
515     int need_extract = mInfo[component].needed;
516     if (mInfo[component].inDest)
517     {
518         need_extract |= ((need_blending ?
519                 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
520         need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
521         need_extract |= mInfo[component].smooth;
522         need_extract |= mInfo[component].fog;
523         need_extract |= mDithering;
524         need_extract |= multiTexture;
525     }
526 
527     if (need_extract) {
528         Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
529         component_t fragment;
530 
531         // iterated color
532         build_iterated_color(fragment, parts, component, regs);
533 
534         // texture environement (decal, modulate, replace)
535         build_texture_environment(fragment, parts, component, regs);
536 
537         // expand the source if the destination has more bits
538         if (need_expander && (fragment.size() < dst_size)) {
539             // we're here only if we fetched a texel
540             // (so we know for sure fragment is CORRUPTIBLE)
541             expand(fragment, fragment, dst_size);
542         }
543 
544         // We have a few specific things to do for the alpha-channel
545         if ((component==GGLFormat::ALPHA) &&
546             (mInfo[component].needed || fragment.size()<dst_size))
547         {
548             // convert to integer_t first and make sure
549             // we don't corrupt a needed register
550             if (fragment.l) {
551                 component_t incoming(fragment);
552                 modify(fragment, regs);
553                 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
554                 fragment.h -= fragment.l;
555                 fragment.l = 0;
556             }
557 
558             // coverage factor application
559             build_coverage_application(fragment, parts, regs);
560 
561             // alpha-test
562             build_alpha_test(fragment, parts);
563 
564             if (blend_needs_alpha_source) {
565                 // We keep only 8 bits for the blending stage
566                 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
567                 if (fragment.flags & CORRUPTIBLE) {
568                     fragment.flags &= ~CORRUPTIBLE;
569                     mAlphaSource.setTo(fragment.reg,
570                             fragment.size(), fragment.flags);
571                     if (shift) {
572                         MOV(AL, 0, mAlphaSource.reg,
573                             reg_imm(mAlphaSource.reg, LSR, shift));
574                     }
575                 } else {
576                     // XXX: it would better to do this in build_blend_factor()
577                     // so we can avoid the extra MOV below.
578                     mAlphaSource.setTo(regs.obtain(),
579                             fragment.size(), CORRUPTIBLE);
580                     if (shift) {
581                         MOV(AL, 0, mAlphaSource.reg,
582                             reg_imm(fragment.reg, LSR, shift));
583                     } else {
584                         MOV(AL, 0, mAlphaSource.reg, fragment.reg);
585                     }
586                 }
587                 mAlphaSource.s -= shift;
588             }
589         }
590 
591         // fog...
592         build_fog( fragment, component, regs );
593 
594         temp = fragment;
595     } else {
596         if (mInfo[component].inDest) {
597             // extraction not needed and replace
598             // we just select the right component
599             if ((mTextureMachine.replaced & component_mask) == 0) {
600                 // component wasn't replaced, so use it!
601                 temp = component_t(parts.iterated, component);
602             }
603             for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
604                 const texture_unit_t& tmu = mTextureMachine.tmu[i];
605                 if ((tmu.mask & component_mask) &&
606                     ((tmu.replaced & component_mask) == 0)) {
607                     temp = component_t(parts.texel[i], component);
608                 }
609             }
610         }
611     }
612 }
613 
isAlphaSourceNeeded() const614 bool GGLAssembler::isAlphaSourceNeeded() const
615 {
616     // XXX: also needed for alpha-test
617     const int bs = mBlendSrc;
618     const int bd = mBlendDst;
619     return  bs==GGL_SRC_ALPHA_SATURATE ||
620             bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
621             bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
622 }
623 
624 // ---------------------------------------------------------------------------
625 
build_smooth_shade(const fragment_parts_t & parts)626 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
627 {
628     if (mSmooth && !parts.iterated_packed) {
629         // update the iterated color in a pipelined way...
630         comment("update iterated color");
631         Scratch scratches(registerFile());
632 
633         const int reload = parts.reload;
634         for (int i=0 ; i<4 ; i++) {
635             if (!mInfo[i].iterated)
636                 continue;
637 
638             int c = parts.argb[i].reg;
639             int dx = parts.argb_dx[i].reg;
640 
641             if (reload & 1) {
642                 c = scratches.obtain();
643                 CONTEXT_LOAD(c, generated_vars.argb[i].c);
644             }
645             if (reload & 2) {
646                 dx = scratches.obtain();
647                 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
648             }
649 
650             if (mSmooth) {
651                 ADD(AL, 0, c, c, dx);
652             }
653 
654             if (reload & 1) {
655                 CONTEXT_STORE(c, generated_vars.argb[i].c);
656                 scratches.recycle(c);
657             }
658             if (reload & 2) {
659                 scratches.recycle(dx);
660             }
661         }
662     }
663 }
664 
665 // ---------------------------------------------------------------------------
666 
build_coverage_application(component_t & fragment,const fragment_parts_t & parts,Scratch & regs)667 void GGLAssembler::build_coverage_application(component_t& fragment,
668         const fragment_parts_t& parts, Scratch& regs)
669 {
670     // here fragment.l is guarenteed to be 0
671     if (mAA) {
672         // coverages are 1.15 fixed-point numbers
673         comment("coverage application");
674 
675         component_t incoming(fragment);
676         modify(fragment, regs);
677 
678         Scratch scratches(registerFile());
679         int cf = scratches.obtain();
680         LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
681         if (fragment.h > 31) {
682             fragment.h--;
683             SMULWB(AL, fragment.reg, incoming.reg, cf);
684         } else {
685             MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
686             SMULWB(AL, fragment.reg, fragment.reg, cf);
687         }
688     }
689 }
690 
691 // ---------------------------------------------------------------------------
692 
build_alpha_test(component_t & fragment,const fragment_parts_t & parts)693 void GGLAssembler::build_alpha_test(component_t& fragment,
694                                     const fragment_parts_t& parts)
695 {
696     if (mAlphaTest != GGL_ALWAYS) {
697         comment("Alpha Test");
698         Scratch scratches(registerFile());
699         int ref = scratches.obtain();
700         const int shift = GGL_COLOR_BITS-fragment.size();
701         CONTEXT_LOAD(ref, state.alpha_test.ref);
702         if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
703         else       CMP(AL, fragment.reg, ref);
704         int cc = NV;
705         switch (mAlphaTest) {
706         case GGL_NEVER:     cc = NV;    break;
707         case GGL_LESS:      cc = LT;    break;
708         case GGL_EQUAL:     cc = EQ;    break;
709         case GGL_LEQUAL:    cc = LS;    break;
710         case GGL_GREATER:   cc = HI;    break;
711         case GGL_NOTEQUAL:  cc = NE;    break;
712         case GGL_GEQUAL:    cc = HS;    break;
713         }
714         B(cc^1, "discard_after_textures");
715     }
716 }
717 
718 // ---------------------------------------------------------------------------
719 
build_depth_test(const fragment_parts_t & parts,uint32_t mask)720 void GGLAssembler::build_depth_test(
721         const fragment_parts_t& parts, uint32_t mask)
722 {
723     mask &= Z_TEST|Z_WRITE;
724     const needs_t& needs = mBuilderContext.needs;
725     const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
726     Scratch scratches(registerFile());
727 
728     if (mDepthTest != GGL_ALWAYS || zmask) {
729         int cc=AL, ic=AL;
730         switch (mDepthTest) {
731         case GGL_LESS:      ic = HI;    break;
732         case GGL_EQUAL:     ic = EQ;    break;
733         case GGL_LEQUAL:    ic = HS;    break;
734         case GGL_GREATER:   ic = LT;    break;
735         case GGL_NOTEQUAL:  ic = NE;    break;
736         case GGL_GEQUAL:    ic = LS;    break;
737         case GGL_NEVER:
738             // this never happens, because it's taken care of when
739             // computing the needs. but we keep it for completness.
740             comment("Depth Test (NEVER)");
741             B(AL, "discard_before_textures");
742             return;
743         case GGL_ALWAYS:
744             // we're here because zmask is enabled
745             mask &= ~Z_TEST;    // test always passes.
746             break;
747         }
748 
749         // inverse the condition
750         cc = ic^1;
751 
752         if ((mask & Z_WRITE) && !zmask) {
753             mask &= ~Z_WRITE;
754         }
755 
756         if (!mask)
757             return;
758 
759         comment("Depth Test");
760 
761         int zbase = scratches.obtain();
762         int depth = scratches.obtain();
763         int z = parts.z.reg;
764 
765         CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
766         SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
767             // above does zbase = zbase + ((count >> 16) << 1)
768 
769         if (mask & Z_TEST) {
770             LDRH(AL, depth, zbase);  // stall
771             CMP(AL, depth, reg_imm(z, LSR, 16));
772             B(cc, "discard_before_textures");
773         }
774         if (mask & Z_WRITE) {
775             if (mask == Z_WRITE) {
776                 // only z-write asked, cc is meaningless
777                 ic = AL;
778             }
779             MOV(AL, 0, depth, reg_imm(z, LSR, 16));
780             STRH(ic, depth, zbase);
781         }
782     }
783 }
784 
build_iterate_z(const fragment_parts_t & parts)785 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
786 {
787     const needs_t& needs = mBuilderContext.needs;
788     if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
789         Scratch scratches(registerFile());
790         int dzdx = scratches.obtain();
791         CONTEXT_LOAD(dzdx, generated_vars.dzdx);    // stall
792         ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
793     }
794 }
795 
build_iterate_f(const fragment_parts_t & parts)796 void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
797 {
798     const needs_t& needs = mBuilderContext.needs;
799     if (GGL_READ_NEEDS(P_FOG, needs.p)) {
800         Scratch scratches(registerFile());
801         int dfdx = scratches.obtain();
802         int f = scratches.obtain();
803         CONTEXT_LOAD(f,     generated_vars.f);
804         CONTEXT_LOAD(dfdx,  generated_vars.dfdx);   // stall
805         ADD(AL, 0, f, f, dfdx);
806         CONTEXT_STORE(f,    generated_vars.f);
807     }
808 }
809 
810 // ---------------------------------------------------------------------------
811 
build_logic_op(pixel_t & pixel,Scratch & regs)812 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
813 {
814     const needs_t& needs = mBuilderContext.needs;
815     const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
816     if (opcode == GGL_COPY)
817         return;
818 
819     comment("logic operation");
820 
821     pixel_t s(pixel);
822     if (!(pixel.flags & CORRUPTIBLE)) {
823         pixel.reg = regs.obtain();
824         pixel.flags |= CORRUPTIBLE;
825     }
826 
827     pixel_t d(mDstPixel);
828     switch(opcode) {
829     case GGL_CLEAR:         MOV(AL, 0, pixel.reg, imm(0));          break;
830     case GGL_AND:           AND(AL, 0, pixel.reg, s.reg, d.reg);    break;
831     case GGL_AND_REVERSE:   BIC(AL, 0, pixel.reg, s.reg, d.reg);    break;
832     case GGL_COPY:                                                  break;
833     case GGL_AND_INVERTED:  BIC(AL, 0, pixel.reg, d.reg, s.reg);    break;
834     case GGL_NOOP:          MOV(AL, 0, pixel.reg, d.reg);           break;
835     case GGL_XOR:           EOR(AL, 0, pixel.reg, s.reg, d.reg);    break;
836     case GGL_OR:            ORR(AL, 0, pixel.reg, s.reg, d.reg);    break;
837     case GGL_NOR:           ORR(AL, 0, pixel.reg, s.reg, d.reg);
838                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
839     case GGL_EQUIV:         EOR(AL, 0, pixel.reg, s.reg, d.reg);
840                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
841     case GGL_INVERT:        MVN(AL, 0, pixel.reg, d.reg);           break;
842     case GGL_OR_REVERSE:    // s | ~d == ~(~s & d)
843                             BIC(AL, 0, pixel.reg, d.reg, s.reg);
844                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
845     case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg);           break;
846     case GGL_OR_INVERTED:   // ~s | d == ~(s & ~d)
847                             BIC(AL, 0, pixel.reg, s.reg, d.reg);
848                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
849     case GGL_NAND:          AND(AL, 0, pixel.reg, s.reg, d.reg);
850                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
851     case GGL_SET:           MVN(AL, 0, pixel.reg, imm(0));          break;
852     };
853 }
854 
855 // ---------------------------------------------------------------------------
856 
find_bottom(uint32_t val)857 static uint32_t find_bottom(uint32_t val)
858 {
859     uint32_t i = 0;
860     while (!(val & (3<<i)))
861         i+= 2;
862     return i;
863 }
864 
normalize(uint32_t & val,uint32_t & rot)865 static void normalize(uint32_t& val, uint32_t& rot)
866 {
867     rot = 0;
868     while (!(val&3)  || (val & 0xFC000000)) {
869         uint32_t newval;
870         newval = val >> 2;
871         newval |= (val&3) << 30;
872         val = newval;
873         rot += 2;
874         if (rot == 32) {
875             rot = 0;
876             break;
877         }
878     }
879 }
880 
build_and_immediate(int d,int s,uint32_t mask,int bits)881 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
882 {
883     uint32_t rot;
884     uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
885     mask &= size;
886 
887     if (mask == size) {
888         if (d != s)
889             MOV( AL, 0, d, s);
890         return;
891     }
892 
893     int negative_logic = !isValidImmediate(mask);
894     if (negative_logic) {
895         mask = ~mask & size;
896     }
897     normalize(mask, rot);
898 
899     if (mask) {
900         while (mask) {
901             uint32_t bitpos = find_bottom(mask);
902             int shift = rot + bitpos;
903             uint32_t m = mask & (0xff << bitpos);
904             mask &= ~m;
905             m >>= bitpos;
906             int32_t newMask =  (m<<shift) | (m>>(32-shift));
907             if (!negative_logic) {
908                 AND( AL, 0, d, s, imm(newMask) );
909             } else {
910                 BIC( AL, 0, d, s, imm(newMask) );
911             }
912             s = d;
913         }
914     } else {
915         MOV( AL, 0, d, imm(0));
916     }
917 }
918 
build_masking(pixel_t & pixel,Scratch & regs)919 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
920 {
921     if (!mMasking || mAllMasked) {
922         return;
923     }
924 
925     comment("color mask");
926 
927     pixel_t fb(mDstPixel);
928     pixel_t s(pixel);
929     if (!(pixel.flags & CORRUPTIBLE)) {
930         pixel.reg = regs.obtain();
931         pixel.flags |= CORRUPTIBLE;
932     }
933 
934     int mask = 0;
935     for (int i=0 ; i<4 ; i++) {
936         const int component_mask = 1<<i;
937         const int h = fb.format.c[i].h;
938         const int l = fb.format.c[i].l;
939         if (h && (!(mMasking & component_mask))) {
940             mask |= ((1<<(h-l))-1) << l;
941         }
942     }
943 
944     // There is no need to clear the masked components of the source
945     // (unless we applied a logic op), because they're already zeroed
946     // by construction (masked components are not computed)
947 
948     if (mLogicOp) {
949         const needs_t& needs = mBuilderContext.needs;
950         const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
951         if (opcode != GGL_CLEAR) {
952             // clear masked component of source
953             build_and_immediate(pixel.reg, s.reg, mask, fb.size());
954             s = pixel;
955         }
956     }
957 
958     // clear non masked components of destination
959     build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
960 
961     // or back the channels that were masked
962     if (s.reg == fb.reg) {
963          // this is in fact a MOV
964         if (s.reg == pixel.reg) {
965             // ugh. this in in fact a nop
966         } else {
967             MOV(AL, 0, pixel.reg, fb.reg);
968         }
969     } else {
970         ORR(AL, 0, pixel.reg, s.reg, fb.reg);
971     }
972 }
973 
974 // ---------------------------------------------------------------------------
975 
base_offset(const pointer_t & d,const pointer_t & b,const reg_t & o)976 void GGLAssembler::base_offset(
977         const pointer_t& d, const pointer_t& b, const reg_t& o)
978 {
979     switch (b.size) {
980     case 32:
981         ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
982         break;
983     case 24:
984         if (d.reg == b.reg) {
985             ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
986             ADD(AL, 0, d.reg, d.reg, o.reg);
987         } else {
988             ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
989             ADD(AL, 0, d.reg, d.reg, b.reg);
990         }
991         break;
992     case 16:
993         ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
994         break;
995     case 8:
996         ADD(AL, 0, d.reg, b.reg, o.reg);
997         break;
998     }
999 }
1000 
1001 // ----------------------------------------------------------------------------
1002 // cheezy register allocator...
1003 // ----------------------------------------------------------------------------
1004 
reset()1005 void RegisterAllocator::reset()
1006 {
1007     mRegs.reset();
1008 }
1009 
reserveReg(int reg)1010 int RegisterAllocator::reserveReg(int reg)
1011 {
1012     return mRegs.reserve(reg);
1013 }
1014 
obtainReg()1015 int RegisterAllocator::obtainReg()
1016 {
1017     return mRegs.obtain();
1018 }
1019 
recycleReg(int reg)1020 void RegisterAllocator::recycleReg(int reg)
1021 {
1022     mRegs.recycle(reg);
1023 }
1024 
registerFile()1025 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
1026 {
1027     return mRegs;
1028 }
1029 
1030 // ----------------------------------------------------------------------------
1031 
RegisterFile()1032 RegisterAllocator::RegisterFile::RegisterFile()
1033     : mRegs(0), mTouched(0), mStatus(0)
1034 {
1035     reserve(ARMAssemblerInterface::SP);
1036     reserve(ARMAssemblerInterface::PC);
1037 }
1038 
RegisterFile(const RegisterFile & rhs)1039 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
1040     : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
1041 {
1042 }
1043 
~RegisterFile()1044 RegisterAllocator::RegisterFile::~RegisterFile()
1045 {
1046 }
1047 
operator ==(const RegisterFile & rhs) const1048 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
1049 {
1050     return (mRegs == rhs.mRegs);
1051 }
1052 
reset()1053 void RegisterAllocator::RegisterFile::reset()
1054 {
1055     mRegs = mTouched = mStatus = 0;
1056     reserve(ARMAssemblerInterface::SP);
1057     reserve(ARMAssemblerInterface::PC);
1058 }
1059 
reserve(int reg)1060 int RegisterAllocator::RegisterFile::reserve(int reg)
1061 {
1062     LOG_ALWAYS_FATAL_IF(isUsed(reg),
1063                         "reserving register %d, but already in use",
1064                         reg);
1065     mRegs |= (1<<reg);
1066     mTouched |= mRegs;
1067     return reg;
1068 }
1069 
reserveSeveral(uint32_t regMask)1070 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
1071 {
1072     mRegs |= regMask;
1073     mTouched |= regMask;
1074 }
1075 
isUsed(int reg) const1076 int RegisterAllocator::RegisterFile::isUsed(int reg) const
1077 {
1078     LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
1079     return mRegs & (1<<reg);
1080 }
1081 
obtain()1082 int RegisterAllocator::RegisterFile::obtain()
1083 {
1084     const char priorityList[14] = {  0,  1, 2, 3,
1085                                     12, 14, 4, 5,
1086                                      6,  7, 8, 9,
1087                                     10, 11 };
1088     const int nbreg = sizeof(priorityList);
1089     int i, r;
1090     for (i=0 ; i<nbreg ; i++) {
1091         r = priorityList[i];
1092         if (!isUsed(r)) {
1093             break;
1094         }
1095     }
1096     // this is not an error anymore because, we'll try again with
1097     // a lower optimization level.
1098     //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
1099     if (i >= nbreg) {
1100         mStatus |= OUT_OF_REGISTERS;
1101         // we return SP so we can more easily debug things
1102         // the code will never be run anyway.
1103         return ARMAssemblerInterface::SP;
1104     }
1105     reserve(r);
1106     return r;
1107 }
1108 
hasFreeRegs() const1109 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
1110 {
1111     return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
1112 }
1113 
countFreeRegs() const1114 int RegisterAllocator::RegisterFile::countFreeRegs() const
1115 {
1116     int f = ~mRegs & 0xFFFF;
1117     // now count number of 1
1118    f = (f & 0x5555) + ((f>>1) & 0x5555);
1119    f = (f & 0x3333) + ((f>>2) & 0x3333);
1120    f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
1121    f = (f & 0x00FF) + ((f>>8) & 0x00FF);
1122    return f;
1123 }
1124 
recycle(int reg)1125 void RegisterAllocator::RegisterFile::recycle(int reg)
1126 {
1127     LOG_FATAL_IF(!isUsed(reg),
1128             "recycling unallocated register %d",
1129             reg);
1130     mRegs &= ~(1<<reg);
1131 }
1132 
recycleSeveral(uint32_t regMask)1133 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
1134 {
1135     LOG_FATAL_IF((mRegs & regMask)!=regMask,
1136             "recycling unallocated registers "
1137             "(recycle=%08x, allocated=%08x, unallocated=%08x)",
1138             regMask, mRegs, mRegs&regMask);
1139     mRegs &= ~regMask;
1140 }
1141 
touched() const1142 uint32_t RegisterAllocator::RegisterFile::touched() const
1143 {
1144     return mTouched;
1145 }
1146 
1147 // ----------------------------------------------------------------------------
1148 
1149 }; // namespace android
1150 
1151