1 #define COMPONENT_SIZE 8
2 #define MASK 0xff
3 #define ONE_HALF 0x80
4
5 #define A_SHIFT 8 * 3
6 #define R_SHIFT 8 * 2
7 #define G_SHIFT 8
8 #define A_MASK 0xff000000
9 #define R_MASK 0xff0000
10 #define G_MASK 0xff00
11
12 #define RB_MASK 0xff00ff
13 #define AG_MASK 0xff00ff00
14 #define RB_ONE_HALF 0x800080
15 #define RB_MASK_PLUS_ONE 0x1000100
16
17 #define ALPHA_8(x) ((x) >> A_SHIFT)
18 #define RED_8(x) (((x) >> R_SHIFT) & MASK)
19 #define GREEN_8(x) (((x) >> G_SHIFT) & MASK)
20 #define BLUE_8(x) ((x) & MASK)
21
22 /*
23 * ARMv6 has UQADD8 instruction, which implements unsigned saturated
24 * addition for 8-bit values packed in 32-bit registers. It is very useful
25 * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would
26 * otherwise need a lot of arithmetic operations to simulate this operation).
27 * Since most of the major ARM linux distros are built for ARMv7, we are
28 * much less dependent on runtime CPU detection and can get practical
29 * benefits from conditional compilation here for a lot of users.
30 */
31
32 #if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \
33 !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__))
34 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
35 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
36 defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \
37 defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \
38 defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \
39 defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
40
41 static force_inline uint32_t
un8x4_add_un8x4(uint32_t x,uint32_t y)42 un8x4_add_un8x4 (uint32_t x, uint32_t y)
43 {
44 uint32_t t;
45 asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y));
46 return t;
47 }
48
49 #define UN8x4_ADD_UN8x4(x, y) \
50 ((x) = un8x4_add_un8x4 ((x), (y)))
51
52 #define UN8_rb_ADD_UN8_rb(x, y, t) \
53 ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t))
54
55 #define ADD_UN8(x, y, t) \
56 ((t) = (x), un8x4_add_un8x4 ((t), (y)))
57
58 #endif
59 #endif
60
61 /*****************************************************************************/
62
63 /*
64 * Helper macros.
65 */
66
67 #define MUL_UN8(a, b, t) \
68 ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
69
70 #define DIV_UN8(a, b) \
71 (((uint16_t) (a) * MASK + ((b) / 2)) / (b))
72
73 #ifndef ADD_UN8
74 #define ADD_UN8(x, y, t) \
75 ((t) = (x) + (y), \
76 (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT))))
77 #endif
78
79 #define DIV_ONE_UN8(x) \
80 (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
81
82 /*
83 * The methods below use some tricks to be able to do two color
84 * components at the same time.
85 */
86
87 /*
88 * x_rb = (x_rb * a) / 255
89 */
90 #define UN8_rb_MUL_UN8(x, a, t) \
91 do \
92 { \
93 t = ((x) & RB_MASK) * (a); \
94 t += RB_ONE_HALF; \
95 x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
96 x &= RB_MASK; \
97 } while (0)
98
99 /*
100 * x_rb = min (x_rb + y_rb, 255)
101 */
102 #ifndef UN8_rb_ADD_UN8_rb
103 #define UN8_rb_ADD_UN8_rb(x, y, t) \
104 do \
105 { \
106 t = ((x) + (y)); \
107 t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \
108 x = (t & RB_MASK); \
109 } while (0)
110 #endif
111
112 /*
113 * x_rb = (x_rb * a_rb) / 255
114 */
115 #define UN8_rb_MUL_UN8_rb(x, a, t) \
116 do \
117 { \
118 t = (x & MASK) * (a & MASK); \
119 t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \
120 t += RB_ONE_HALF; \
121 t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \
122 x = t & RB_MASK; \
123 } while (0)
124
125 /*
126 * x_c = (x_c * a) / 255
127 */
128 #define UN8x4_MUL_UN8(x, a) \
129 do \
130 { \
131 uint32_t r1__, r2__, t__; \
132 \
133 r1__ = (x); \
134 UN8_rb_MUL_UN8 (r1__, (a), t__); \
135 \
136 r2__ = (x) >> G_SHIFT; \
137 UN8_rb_MUL_UN8 (r2__, (a), t__); \
138 \
139 (x) = r1__ | (r2__ << G_SHIFT); \
140 } while (0)
141
142 /*
143 * x_c = (x_c * a) / 255 + y_c
144 */
145 #define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \
146 do \
147 { \
148 uint32_t r1__, r2__, r3__, t__; \
149 \
150 r1__ = (x); \
151 r2__ = (y) & RB_MASK; \
152 UN8_rb_MUL_UN8 (r1__, (a), t__); \
153 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \
154 \
155 r2__ = (x) >> G_SHIFT; \
156 r3__ = ((y) >> G_SHIFT) & RB_MASK; \
157 UN8_rb_MUL_UN8 (r2__, (a), t__); \
158 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \
159 \
160 (x) = r1__ | (r2__ << G_SHIFT); \
161 } while (0)
162
163 /*
164 * x_c = (x_c * a + y_c * b) / 255
165 */
166 #define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \
167 do \
168 { \
169 uint32_t r1__, r2__, r3__, t__; \
170 \
171 r1__ = (x); \
172 r2__ = (y); \
173 UN8_rb_MUL_UN8 (r1__, (a), t__); \
174 UN8_rb_MUL_UN8 (r2__, (b), t__); \
175 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \
176 \
177 r2__ = ((x) >> G_SHIFT); \
178 r3__ = ((y) >> G_SHIFT); \
179 UN8_rb_MUL_UN8 (r2__, (a), t__); \
180 UN8_rb_MUL_UN8 (r3__, (b), t__); \
181 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \
182 \
183 (x) = r1__ | (r2__ << G_SHIFT); \
184 } while (0)
185
186 /*
187 * x_c = (x_c * a_c) / 255
188 */
189 #define UN8x4_MUL_UN8x4(x, a) \
190 do \
191 { \
192 uint32_t r1__, r2__, r3__, t__; \
193 \
194 r1__ = (x); \
195 r2__ = (a); \
196 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \
197 \
198 r2__ = (x) >> G_SHIFT; \
199 r3__ = (a) >> G_SHIFT; \
200 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \
201 \
202 (x) = r1__ | (r2__ << G_SHIFT); \
203 } while (0)
204
205 /*
206 * x_c = (x_c * a_c) / 255 + y_c
207 */
208 #define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \
209 do \
210 { \
211 uint32_t r1__, r2__, r3__, t__; \
212 \
213 r1__ = (x); \
214 r2__ = (a); \
215 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \
216 r2__ = (y) & RB_MASK; \
217 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \
218 \
219 r2__ = ((x) >> G_SHIFT); \
220 r3__ = ((a) >> G_SHIFT); \
221 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \
222 r3__ = ((y) >> G_SHIFT) & RB_MASK; \
223 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \
224 \
225 (x) = r1__ | (r2__ << G_SHIFT); \
226 } while (0)
227
228 /*
229 * x_c = (x_c * a_c + y_c * b) / 255
230 */
231 #define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \
232 do \
233 { \
234 uint32_t r1__, r2__, r3__, t__; \
235 \
236 r1__ = (x); \
237 r2__ = (a); \
238 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \
239 r2__ = (y); \
240 UN8_rb_MUL_UN8 (r2__, (b), t__); \
241 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \
242 \
243 r2__ = (x) >> G_SHIFT; \
244 r3__ = (a) >> G_SHIFT; \
245 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \
246 r3__ = (y) >> G_SHIFT; \
247 UN8_rb_MUL_UN8 (r3__, (b), t__); \
248 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \
249 \
250 x = r1__ | (r2__ << G_SHIFT); \
251 } while (0)
252
253 /*
254 x_c = min(x_c + y_c, 255)
255 */
256 #ifndef UN8x4_ADD_UN8x4
257 #define UN8x4_ADD_UN8x4(x, y) \
258 do \
259 { \
260 uint32_t r1__, r2__, r3__, t__; \
261 \
262 r1__ = (x) & RB_MASK; \
263 r2__ = (y) & RB_MASK; \
264 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \
265 \
266 r2__ = ((x) >> G_SHIFT) & RB_MASK; \
267 r3__ = ((y) >> G_SHIFT) & RB_MASK; \
268 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \
269 \
270 x = r1__ | (r2__ << G_SHIFT); \
271 } while (0)
272 #endif
273