1 /* libs/pixelflinger/codeflinger/blending.cpp
2 **
3 ** Copyright 2006, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #include <assert.h>
19 #include <stdint.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <sys/types.h>
23
24 #include <cutils/log.h>
25
26 #include "codeflinger/GGLAssembler.h"
27
28
29 namespace android {
30
build_fog(component_t & temp,int component,Scratch & regs)31 void GGLAssembler::build_fog(
32 component_t& temp, // incomming fragment / output
33 int component,
34 Scratch& regs)
35 {
36 if (mInfo[component].fog) {
37 Scratch scratches(registerFile());
38 comment("fog");
39
40 integer_t fragment(temp.reg, temp.h, temp.flags);
41 if (!(temp.flags & CORRUPTIBLE)) {
42 temp.reg = regs.obtain();
43 temp.flags |= CORRUPTIBLE;
44 }
45
46 integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
47 LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
48 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
49
50 integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
51 CONTEXT_LOAD(factor.reg, generated_vars.f);
52
53 // clamp fog factor (TODO: see if there is a way to guarantee
54 // we won't overflow, when setting the iterators)
55 BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
56 CMP(AL, factor.reg, imm( 0x10000 ));
57 MOV(HS, 0, factor.reg, imm( 0x10000 ));
58
59 build_blendFOneMinusF(temp, factor, fragment, fogColor);
60 }
61 }
62
build_blending(component_t & temp,const pixel_t & pixel,int component,Scratch & regs)63 void GGLAssembler::build_blending(
64 component_t& temp, // incomming fragment / output
65 const pixel_t& pixel, // framebuffer
66 int component,
67 Scratch& regs)
68 {
69 if (!mInfo[component].blend)
70 return;
71
72 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
73 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
74 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
75 fs = GGL_ONE;
76 const int blending = blending_codes(fs, fd);
77 if (!temp.size()) {
78 // here, blending will produce something which doesn't depend on
79 // that component (eg: GL_ZERO:GL_*), so the register has not been
80 // allocated yet. Will never be used as a source.
81 temp = component_t(regs.obtain(), CORRUPTIBLE);
82 }
83
84 // we are doing real blending...
85 // fb: extracted dst
86 // fragment: extracted src
87 // temp: component_t(fragment) and result
88
89 // scoped register allocator
90 Scratch scratches(registerFile());
91 comment("blending");
92
93 // we can optimize these cases a bit...
94 // (1) saturation is not needed
95 // (2) we can use only one multiply instead of 2
96 // (3) we can reduce the register pressure
97 // R = S*f + D*(1-f) = (S-D)*f + D
98 // R = S*(1-f) + D*f = (D-S)*f + S
99
100 const bool same_factor_opt1 =
101 (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
102 (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
103 (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
104 (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
105
106 const bool same_factor_opt2 =
107 (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
108 (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
109 (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
110 (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
111
112
113 // XXX: we could also optimize these cases:
114 // R = S*f + D*f = (S+D)*f
115 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
116 // R = S*D + D*S = 2*S*D
117
118
119 // see if we need to extract 'component' from the destination (fb)
120 integer_t fb;
121 if (blending & (BLEND_DST|FACTOR_DST)) {
122 fb.setTo(scratches.obtain(), 32);
123 extract(fb, pixel, component);
124 if (mDithering) {
125 // XXX: maybe what we should do instead, is simply
126 // expand fb -or- fragment to the larger of the two
127 if (fb.size() < temp.size()) {
128 // for now we expand 'fb' to min(fragment, 8)
129 int new_size = temp.size() < 8 ? temp.size() : 8;
130 expand(fb, fb, new_size);
131 }
132 }
133 }
134
135
136 // convert input fragment to integer_t
137 if (temp.l && (temp.flags & CORRUPTIBLE)) {
138 MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
139 temp.h -= temp.l;
140 temp.l = 0;
141 }
142 integer_t fragment(temp.reg, temp.size(), temp.flags);
143
144 // if not done yet, convert input fragment to integer_t
145 if (temp.l) {
146 // here we know temp is not CORRUPTIBLE
147 fragment.reg = scratches.obtain();
148 MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
149 fragment.flags |= CORRUPTIBLE;
150 }
151
152 if (!(temp.flags & CORRUPTIBLE)) {
153 // temp is not corruptible, but since it's the destination it
154 // will be modified, so we need to allocate a new register.
155 temp.reg = regs.obtain();
156 temp.flags &= ~CORRUPTIBLE;
157 fragment.flags &= ~CORRUPTIBLE;
158 }
159
160 if ((blending & BLEND_SRC) && !same_factor_opt1) {
161 // source (fragment) is needed for the blending stage
162 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
163 fragment.flags &= ~CORRUPTIBLE;
164 }
165
166
167 if (same_factor_opt1) {
168 // R = S*f + D*(1-f) = (S-D)*f + D
169 integer_t factor;
170 build_blend_factor(factor, fs,
171 component, pixel, fragment, fb, scratches);
172 // fb is always corruptible from this point
173 fb.flags |= CORRUPTIBLE;
174 build_blendFOneMinusF(temp, factor, fragment, fb);
175 } else if (same_factor_opt2) {
176 // R = S*(1-f) + D*f = (D-S)*f + S
177 integer_t factor;
178 // fb is always corrruptible here
179 fb.flags |= CORRUPTIBLE;
180 build_blend_factor(factor, fd,
181 component, pixel, fragment, fb, scratches);
182 build_blendOneMinusFF(temp, factor, fragment, fb);
183 } else {
184 integer_t src_factor;
185 integer_t dst_factor;
186
187 // if destination (fb) is not needed for the blending stage,
188 // then it can be marked as CORRUPTIBLE
189 if (!(blending & BLEND_DST)) {
190 fb.flags |= CORRUPTIBLE;
191 }
192
193 // XXX: try to mark some registers as CORRUPTIBLE
194 // in most case we could make those corruptible
195 // when we're processing the last component
196 // but not always, for instance
197 // when fragment is constant and not reloaded
198 // when fb is needed for logic-ops or masking
199 // when a register is aliased (for instance with mAlphaSource)
200
201 // blend away...
202 if (fs==GGL_ZERO) {
203 if (fd==GGL_ZERO) { // R = 0
204 // already taken care of
205 } else if (fd==GGL_ONE) { // R = D
206 // already taken care of
207 } else { // R = D*fd
208 // compute fd
209 build_blend_factor(dst_factor, fd,
210 component, pixel, fragment, fb, scratches);
211 mul_factor(temp, fb, dst_factor);
212 }
213 } else if (fs==GGL_ONE) {
214 if (fd==GGL_ZERO) { // R = S
215 // NOP, taken care of
216 } else if (fd==GGL_ONE) { // R = S + D
217 component_add(temp, fb, fragment); // args order matters
218 component_sat(temp);
219 } else { // R = S + D*fd
220 // compute fd
221 build_blend_factor(dst_factor, fd,
222 component, pixel, fragment, fb, scratches);
223 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
224 component_sat(temp);
225 }
226 } else {
227 // compute fs
228 build_blend_factor(src_factor, fs,
229 component, pixel, fragment, fb, scratches);
230 if (fd==GGL_ZERO) { // R = S*fs
231 mul_factor(temp, fragment, src_factor);
232 } else if (fd==GGL_ONE) { // R = S*fs + D
233 mul_factor_add(temp, fragment, src_factor, component_t(fb));
234 component_sat(temp);
235 } else { // R = S*fs + D*fd
236 mul_factor(temp, fragment, src_factor);
237 if (scratches.isUsed(src_factor.reg))
238 scratches.recycle(src_factor.reg);
239 // compute fd
240 build_blend_factor(dst_factor, fd,
241 component, pixel, fragment, fb, scratches);
242 mul_factor_add(temp, fb, dst_factor, temp);
243 if (!same_factor_opt1 && !same_factor_opt2) {
244 component_sat(temp);
245 }
246 }
247 }
248 }
249
250 // now we can be corrupted (it's the dest)
251 temp.flags |= CORRUPTIBLE;
252 }
253
build_blend_factor(integer_t & factor,int f,int component,const pixel_t & dst_pixel,integer_t & fragment,integer_t & fb,Scratch & scratches)254 void GGLAssembler::build_blend_factor(
255 integer_t& factor, int f, int component,
256 const pixel_t& dst_pixel,
257 integer_t& fragment,
258 integer_t& fb,
259 Scratch& scratches)
260 {
261 integer_t src_alpha(fragment);
262
263 // src_factor/dst_factor won't be used after blending,
264 // so it's fine to mark them as CORRUPTIBLE (if not aliased)
265 factor.flags |= CORRUPTIBLE;
266
267 switch(f) {
268 case GGL_ONE_MINUS_SRC_ALPHA:
269 case GGL_SRC_ALPHA:
270 if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
271 // we're processing alpha, so we already have
272 // src-alpha in fragment, and we need src-alpha just this time.
273 } else {
274 // alpha-src will be needed for other components
275 if (!mBlendFactorCached || mBlendFactorCached==f) {
276 src_alpha = mAlphaSource;
277 factor = mAlphaSource;
278 factor.flags &= ~CORRUPTIBLE;
279 // we already computed the blend factor before, nothing to do.
280 if (mBlendFactorCached)
281 return;
282 // this is the first time, make sure to compute the blend
283 // factor properly.
284 mBlendFactorCached = f;
285 break;
286 } else {
287 // we have a cached alpha blend factor, but we want another one,
288 // this should really not happen because by construction,
289 // we cannot have BOTH source and destination
290 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
291 // the blending stage uses the f/(1-f) optimization
292
293 // for completeness, we handle this case though. Since there
294 // are only 2 choices, this meens we want "the other one"
295 // (1-factor)
296 factor = mAlphaSource;
297 factor.flags &= ~CORRUPTIBLE;
298 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
299 mBlendFactorCached = f;
300 return;
301 }
302 }
303 // fall-through...
304 case GGL_ONE_MINUS_DST_COLOR:
305 case GGL_DST_COLOR:
306 case GGL_ONE_MINUS_SRC_COLOR:
307 case GGL_SRC_COLOR:
308 case GGL_ONE_MINUS_DST_ALPHA:
309 case GGL_DST_ALPHA:
310 case GGL_SRC_ALPHA_SATURATE:
311 // help us find out what register we can use for the blend-factor
312 // CORRUPTIBLE registers are chosen first, or a new one is allocated.
313 if (fragment.flags & CORRUPTIBLE) {
314 factor.setTo(fragment.reg, 32, CORRUPTIBLE);
315 fragment.flags &= ~CORRUPTIBLE;
316 } else if (fb.flags & CORRUPTIBLE) {
317 factor.setTo(fb.reg, 32, CORRUPTIBLE);
318 fb.flags &= ~CORRUPTIBLE;
319 } else {
320 factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
321 }
322 break;
323 }
324
325 // XXX: doesn't work if size==1
326
327 switch(f) {
328 case GGL_ONE_MINUS_DST_COLOR:
329 case GGL_DST_COLOR:
330 factor.s = fb.s;
331 ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
332 break;
333 case GGL_ONE_MINUS_SRC_COLOR:
334 case GGL_SRC_COLOR:
335 factor.s = fragment.s;
336 ADD(AL, 0, factor.reg, fragment.reg,
337 reg_imm(fragment.reg, LSR, fragment.s-1));
338 break;
339 case GGL_ONE_MINUS_SRC_ALPHA:
340 case GGL_SRC_ALPHA:
341 factor.s = src_alpha.s;
342 ADD(AL, 0, factor.reg, src_alpha.reg,
343 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
344 break;
345 case GGL_ONE_MINUS_DST_ALPHA:
346 case GGL_DST_ALPHA:
347 // XXX: should be precomputed
348 extract(factor, dst_pixel, GGLFormat::ALPHA);
349 ADD(AL, 0, factor.reg, factor.reg,
350 reg_imm(factor.reg, LSR, factor.s-1));
351 break;
352 case GGL_SRC_ALPHA_SATURATE:
353 // XXX: should be precomputed
354 // XXX: f = min(As, 1-Ad)
355 // btw, we're guaranteed that Ad's size is <= 8, because
356 // it's extracted from the framebuffer
357 break;
358 }
359
360 switch(f) {
361 case GGL_ONE_MINUS_DST_COLOR:
362 case GGL_ONE_MINUS_SRC_COLOR:
363 case GGL_ONE_MINUS_DST_ALPHA:
364 case GGL_ONE_MINUS_SRC_ALPHA:
365 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
366 }
367
368 // don't need more than 8-bits for the blend factor
369 // and this will prevent overflows in the multiplies later
370 if (factor.s > 8) {
371 MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
372 factor.s = 8;
373 }
374 }
375
blending_codes(int fs,int fd)376 int GGLAssembler::blending_codes(int fs, int fd)
377 {
378 int blending = 0;
379 switch(fs) {
380 case GGL_ONE:
381 blending |= BLEND_SRC;
382 break;
383
384 case GGL_ONE_MINUS_DST_COLOR:
385 case GGL_DST_COLOR:
386 blending |= FACTOR_DST|BLEND_SRC;
387 break;
388 case GGL_ONE_MINUS_DST_ALPHA:
389 case GGL_DST_ALPHA:
390 // no need to extract 'component' from the destination
391 // for the blend factor, because we need ALPHA only.
392 blending |= BLEND_SRC;
393 break;
394
395 case GGL_ONE_MINUS_SRC_COLOR:
396 case GGL_SRC_COLOR:
397 blending |= FACTOR_SRC|BLEND_SRC;
398 break;
399 case GGL_ONE_MINUS_SRC_ALPHA:
400 case GGL_SRC_ALPHA:
401 case GGL_SRC_ALPHA_SATURATE:
402 blending |= FACTOR_SRC|BLEND_SRC;
403 break;
404 }
405 switch(fd) {
406 case GGL_ONE:
407 blending |= BLEND_DST;
408 break;
409
410 case GGL_ONE_MINUS_DST_COLOR:
411 case GGL_DST_COLOR:
412 blending |= FACTOR_DST|BLEND_DST;
413 break;
414 case GGL_ONE_MINUS_DST_ALPHA:
415 case GGL_DST_ALPHA:
416 blending |= FACTOR_DST|BLEND_DST;
417 break;
418
419 case GGL_ONE_MINUS_SRC_COLOR:
420 case GGL_SRC_COLOR:
421 blending |= FACTOR_SRC|BLEND_DST;
422 break;
423 case GGL_ONE_MINUS_SRC_ALPHA:
424 case GGL_SRC_ALPHA:
425 // no need to extract 'component' from the source
426 // for the blend factor, because we need ALPHA only.
427 blending |= BLEND_DST;
428 break;
429 }
430 return blending;
431 }
432
433 // ---------------------------------------------------------------------------
434
build_blendFOneMinusF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)435 void GGLAssembler::build_blendFOneMinusF(
436 component_t& temp,
437 const integer_t& factor,
438 const integer_t& fragment,
439 const integer_t& fb)
440 {
441 // R = S*f + D*(1-f) = (S-D)*f + D
442 Scratch scratches(registerFile());
443 // compute S-D
444 integer_t diff(fragment.flags & CORRUPTIBLE ?
445 fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
446 const int shift = fragment.size() - fb.size();
447 if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
448 else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
449 else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
450 mul_factor_add(temp, diff, factor, component_t(fb));
451 }
452
build_blendOneMinusFF(component_t & temp,const integer_t & factor,const integer_t & fragment,const integer_t & fb)453 void GGLAssembler::build_blendOneMinusFF(
454 component_t& temp,
455 const integer_t& factor,
456 const integer_t& fragment,
457 const integer_t& fb)
458 {
459 // R = S*f + D*(1-f) = (S-D)*f + D
460 Scratch scratches(registerFile());
461 // compute D-S
462 integer_t diff(fb.flags & CORRUPTIBLE ?
463 fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
464 const int shift = fragment.size() - fb.size();
465 if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
466 else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
467 else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
468 mul_factor_add(temp, diff, factor, component_t(fragment));
469 }
470
471 // ---------------------------------------------------------------------------
472
mul_factor(component_t & d,const integer_t & v,const integer_t & f)473 void GGLAssembler::mul_factor( component_t& d,
474 const integer_t& v,
475 const integer_t& f)
476 {
477 int vs = v.size();
478 int fs = f.size();
479 int ms = vs+fs;
480
481 // XXX: we could have special cases for 1 bit mul
482
483 // all this code below to use the best multiply instruction
484 // wrt the parameters size. We take advantage of the fact
485 // that the 16-bits multiplies allow a 16-bit shift
486 // The trick is that we just make sure that we have at least 8-bits
487 // per component (which is enough for a 8 bits display).
488
489 int xy;
490 int vshift = 0;
491 int fshift = 0;
492 int smulw = 0;
493
494 if (vs<16) {
495 if (fs<16) {
496 xy = xyBB;
497 } else if (GGL_BETWEEN(fs, 24, 31)) {
498 ms -= 16;
499 xy = xyTB;
500 } else {
501 // eg: 15 * 18 -> 15 * 15
502 fshift = fs - 15;
503 ms -= fshift;
504 xy = xyBB;
505 }
506 } else if (GGL_BETWEEN(vs, 24, 31)) {
507 if (fs<16) {
508 ms -= 16;
509 xy = xyTB;
510 } else if (GGL_BETWEEN(fs, 24, 31)) {
511 ms -= 32;
512 xy = xyTT;
513 } else {
514 // eg: 24 * 18 -> 8 * 18
515 fshift = fs - 15;
516 ms -= 16 + fshift;
517 xy = xyTB;
518 }
519 } else {
520 if (fs<16) {
521 // eg: 18 * 15 -> 15 * 15
522 vshift = vs - 15;
523 ms -= vshift;
524 xy = xyBB;
525 } else if (GGL_BETWEEN(fs, 24, 31)) {
526 // eg: 18 * 24 -> 15 * 8
527 vshift = vs - 15;
528 ms -= 16 + vshift;
529 xy = xyBT;
530 } else {
531 // eg: 18 * 18 -> (15 * 18)>>16
532 fshift = fs - 15;
533 ms -= 16 + fshift;
534 xy = yB; //XXX SMULWB
535 smulw = 1;
536 }
537 }
538
539 ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
540
541 int vreg = v.reg;
542 int freg = f.reg;
543 if (vshift) {
544 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
545 vreg = d.reg;
546 }
547 if (fshift) {
548 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
549 freg = d.reg;
550 }
551 if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
552 else SMUL(AL, xy, d.reg, vreg, freg);
553
554
555 d.h = ms;
556 if (mDithering) {
557 d.l = 0;
558 } else {
559 d.l = fs;
560 d.flags |= CLEAR_LO;
561 }
562 }
563
mul_factor_add(component_t & d,const integer_t & v,const integer_t & f,const component_t & a)564 void GGLAssembler::mul_factor_add( component_t& d,
565 const integer_t& v,
566 const integer_t& f,
567 const component_t& a)
568 {
569 // XXX: we could have special cases for 1 bit mul
570 Scratch scratches(registerFile());
571
572 int vs = v.size();
573 int fs = f.size();
574 int as = a.h;
575 int ms = vs+fs;
576
577 ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
578
579 integer_t add(a.reg, a.h, a.flags);
580
581 // 'a' is a component_t but it is guaranteed to have
582 // its high bits set to 0. However in the dithering case,
583 // we can't get away with truncating the potentially bad bits
584 // so extraction is needed.
585
586 if ((mDithering) && (a.size() < ms)) {
587 // we need to expand a
588 if (!(a.flags & CORRUPTIBLE)) {
589 // ... but it's not corruptible, so we need to pick a
590 // temporary register.
591 // Try to uses the destination register first (it's likely
592 // to be usable, unless it aliases an input).
593 if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
594 add.reg = d.reg;
595 } else {
596 add.reg = scratches.obtain();
597 }
598 }
599 expand(add, a, ms); // extracts and expands
600 as = ms;
601 }
602
603 if (ms == as) {
604 if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
605 else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
606 } else {
607 int temp = d.reg;
608 if (temp == add.reg) {
609 // the mul will modify add.reg, we need an intermediary reg
610 if (v.flags & CORRUPTIBLE) temp = v.reg;
611 else if (f.flags & CORRUPTIBLE) temp = f.reg;
612 else temp = scratches.obtain();
613 }
614
615 if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
616 else MUL(AL, 0, temp, v.reg, f.reg);
617
618 if (ms>as) {
619 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
620 } else if (ms<as) {
621 // not sure if we should expand the mul instead?
622 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
623 }
624 }
625
626 d.h = ms;
627 if (mDithering) {
628 d.l = a.l;
629 } else {
630 d.l = fs>a.l ? fs : a.l;
631 d.flags |= CLEAR_LO;
632 }
633 }
634
component_add(component_t & d,const integer_t & dst,const integer_t & src)635 void GGLAssembler::component_add(component_t& d,
636 const integer_t& dst, const integer_t& src)
637 {
638 // here we're guaranteed that fragment.size() >= fb.size()
639 const int shift = src.size() - dst.size();
640 if (!shift) {
641 ADD(AL, 0, d.reg, src.reg, dst.reg);
642 } else {
643 ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
644 }
645
646 d.h = src.size();
647 if (mDithering) {
648 d.l = 0;
649 } else {
650 d.l = shift;
651 d.flags |= CLEAR_LO;
652 }
653 }
654
component_sat(const component_t & v)655 void GGLAssembler::component_sat(const component_t& v)
656 {
657 const int one = ((1<<v.size())-1)<<v.l;
658 CMP(AL, v.reg, imm( 1<<v.h ));
659 if (isValidImmediate(one)) {
660 MOV(HS, 0, v.reg, imm( one ));
661 } else if (isValidImmediate(~one)) {
662 MVN(HS, 0, v.reg, imm( ~one ));
663 } else {
664 MOV(HS, 0, v.reg, imm( 1<<v.h ));
665 SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
666 }
667 }
668
669 // ----------------------------------------------------------------------------
670
671 }; // namespace android
672
673