1 /*
2 * Copyright (C) 2011 University of Szeged
3 * Copyright (C) 2011 Zoltan Herczeg
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "FELightingNEON.h"
29
30 #if CPU(ARM_NEON) && COMPILER(GCC)
31
32 #include <wtf/Vector.h>
33
34 namespace WebCore {
35
36 // These constants are copied to the following SIMD registers:
37 // ALPHAX_Q ALPHAY_Q REMAPX_D REMAPY_D
38
39 WTF_ALIGNED(short, s_FELightingConstantsForNeon[], 16) = {
40 // Alpha coefficients.
41 -2, 1, 0, -1, 2, 1, 0, -1,
42 0, -1, -2, -1, 0, 1, 2, 1,
43 // Remapping indicies.
44 0x0f0e, 0x0302, 0x0504, 0x0706,
45 0x0b0a, 0x1312, 0x1514, 0x1716,
46 };
47
feLightingConstantsForNeon()48 short* feLightingConstantsForNeon()
49 {
50 return s_FELightingConstantsForNeon;
51 }
52
53 #define ASSTRING(str) #str
54 #define TOSTRING(value) ASSTRING(value)
55
56 #define PIXELS_OFFSET TOSTRING(0)
57 #define WIDTH_OFFSET TOSTRING(4)
58 #define HEIGHT_OFFSET TOSTRING(8)
59 #define FLAGS_OFFSET TOSTRING(12)
60 #define SPECULAR_EXPONENT_OFFSET TOSTRING(16)
61 #define CONE_EXPONENT_OFFSET TOSTRING(20)
62 #define FLOAT_ARGUMENTS_OFFSET TOSTRING(24)
63 #define DRAWING_CONSTANTS_OFFSET TOSTRING(28)
64 #define NL "\n"
65
66 // Register allocation
67 #define PAINTING_DATA_R "r11"
68 #define RESET_WIDTH_R PAINTING_DATA_R
69 #define PIXELS_R "r4"
70 #define WIDTH_R "r5"
71 #define HEIGHT_R "r6"
72 #define FLAGS_R "r7"
73 #define SPECULAR_EXPONENT_R "r8"
74 #define CONE_EXPONENT_R "r10"
75 #define SCANLINE_R "r12"
76
77 #define TMP1_Q "q0"
78 #define TMP1_D0 "d0"
79 #define TMP1_S0 "s0"
80 #define TMP1_S1 "s1"
81 #define TMP1_D1 "d1"
82 #define TMP1_S2 "s2"
83 #define TMP1_S3 "s3"
84 #define TMP2_Q "q1"
85 #define TMP2_D0 "d2"
86 #define TMP2_S0 "s4"
87 #define TMP2_S1 "s5"
88 #define TMP2_D1 "d3"
89 #define TMP2_S2 "s6"
90 #define TMP2_S3 "s7"
91 #define TMP3_Q "q2"
92 #define TMP3_D0 "d4"
93 #define TMP3_S0 "s8"
94 #define TMP3_S1 "s9"
95 #define TMP3_D1 "d5"
96 #define TMP3_S2 "s10"
97 #define TMP3_S3 "s11"
98
99 #define COSINE_OF_ANGLE "s12"
100 #define POWF_INT_S "s13"
101 #define POWF_FRAC_S "s14"
102 #define SPOT_COLOR_Q "q4"
103
104 // Because of VMIN and VMAX CONST_ZERO_S and CONST_ONE_S
105 // must be placed on the same side of the double vector
106
107 // Current pixel position
108 #define POSITION_Q "q5"
109 #define POSITION_X_S "s20"
110 #define POSITION_Y_S "s21"
111 #define POSITION_Z_S "s22"
112 #define CONST_ZERO_HI_D "d11"
113 #define CONST_ZERO_S "s23"
114
115 // -------------------------------
116 // Variable arguments
117 // Misc arguments
118 #define READ1_RANGE "d12-d15"
119 #define READ2_RANGE "d16-d19"
120 #define READ3_RANGE "d20-d21"
121
122 #define SCALE_S "s24"
123 #define SCALE_DIV4_S "s25"
124 #define DIFFUSE_CONST_S "s26"
125
126 // Light source position
127 #define CONE_CUT_OFF_S "s28"
128 #define CONE_FULL_LIGHT_S "s29"
129 #define CONE_CUT_OFF_RANGE_S "s30"
130 #define CONST_ONE_HI_D "d15"
131 #define CONST_ONE_S "s31"
132
133 #define LIGHT_Q "q8"
134 #define DIRECTION_Q "q9"
135 #define COLOR_Q "q10"
136 // -------------------------------
137 // Constant coefficients
138 #define READ4_RANGE "d22-d25"
139 #define READ5_RANGE "d26-d27"
140
141 #define ALPHAX_Q "q11"
142 #define ALPHAY_Q "q12"
143 #define REMAPX_D "d26"
144 #define REMAPY_D "d27"
145 // -------------------------------
146
147 #define ALL_ROWS_D "{d28,d29,d30}"
148 #define TOP_ROW_D "d28"
149 #define MIDDLE_ROW_D "d29"
150 #define BOTTOM_ROW_D "d30"
151
152 #define GET_LENGTH(source, temp) \
153 "vmul.f32 " temp##_Q ", " source##_Q ", " source##_Q NL \
154 "vadd.f32 " source##_S3 ", " temp##_S0 ", " temp##_S1 NL \
155 "vadd.f32 " source##_S3 ", " source##_S3 ", " temp##_S2 NL \
156 "vsqrt.f32 " source##_S3 ", " source##_S3 NL
157
158 // destination##_S3 can contain the multiply of length.
159 #define DOT_PRODUCT(destination, source1, source2) \
160 "vmul.f32 " destination##_Q ", " source1##_Q ", " source2##_Q NL \
161 "vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S1 NL \
162 "vadd.f32 " destination##_S0 ", " destination##_S0 ", " destination##_S2 NL
163
164 #define MULTIPLY_BY_DIFFUSE_CONST(normalVectorLength, dotProductLength) \
165 "tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL \
166 "vmuleq.f32 " TMP2_S1 ", " DIFFUSE_CONST_S ", " normalVectorLength NL \
167 "vdiveq.f32 " TMP2_S1 ", " TMP2_S1 ", " dotProductLength NL \
168 "vdivne.f32 " TMP2_S1 ", " normalVectorLength ", " dotProductLength NL
169
170 #define POWF_SQR(value, exponent, current, remaining) \
171 "tst " exponent ", #" ASSTRING(current) NL \
172 "vmulne.f32 " value ", " value ", " POWF_INT_S NL \
173 "tst " exponent ", #" ASSTRING(remaining) NL \
174 "vmulne.f32 " POWF_INT_S ", " POWF_INT_S ", " POWF_INT_S NL
175
176 #define POWF_SQRT(value, exponent, current, remaining) \
177 "tst " exponent ", #" ASSTRING(remaining) NL \
178 "vsqrtne.f32 " POWF_FRAC_S ", " POWF_FRAC_S NL \
179 "tst " exponent ", #" ASSTRING(current) NL \
180 "vmulne.f32 " value ", " value ", " POWF_FRAC_S NL
181
182 // This simplified powf function is sufficiently accurate.
183 #define POWF(value, exponent) \
184 "tst " exponent ", #0xfc0" NL \
185 "vmovne.f32 " POWF_INT_S ", " value NL \
186 "tst " exponent ", #0x03f" NL \
187 "vmovne.f32 " POWF_FRAC_S ", " value NL \
188 "vmov.f32 " value ", " CONST_ONE_S NL \
189 \
190 POWF_SQR(value, exponent, 0x040, 0xf80) \
191 POWF_SQR(value, exponent, 0x080, 0xf00) \
192 POWF_SQR(value, exponent, 0x100, 0xe00) \
193 POWF_SQR(value, exponent, 0x200, 0xc00) \
194 POWF_SQR(value, exponent, 0x400, 0x800) \
195 "tst " exponent ", #0x800" NL \
196 "vmulne.f32 " value ", " value ", " POWF_INT_S NL \
197 \
198 POWF_SQRT(value, exponent, 0x20, 0x3f) \
199 POWF_SQRT(value, exponent, 0x10, 0x1f) \
200 POWF_SQRT(value, exponent, 0x08, 0x0f) \
201 POWF_SQRT(value, exponent, 0x04, 0x07) \
202 POWF_SQRT(value, exponent, 0x02, 0x03) \
203 POWF_SQRT(value, exponent, 0x01, 0x01)
204
205 // The following algorithm is an ARM-NEON optimized version of
206 // the main loop found in FELighting.cpp. Since the whole code
207 // is redesigned to be as effective as possible (ARM specific
208 // thinking), it is four times faster than its C++ counterpart.
209
210 asm ( // NOLINT
211 ".globl " TOSTRING(neonDrawLighting) NL
212 TOSTRING(neonDrawLighting) ":" NL
213 // Because of the clever register allocation, nothing is stored on the stack
214 // except the saved registers.
215 // Stack must be aligned to 8 bytes.
216 "stmdb sp!, {r4-r8, r10, r11, lr}" NL
217 "vstmdb sp!, {d8-d15}" NL
218 "mov " PAINTING_DATA_R ", r0" NL
219
220 // The following two arguments are loaded to SIMD registers.
221 "ldr r0, [" PAINTING_DATA_R ", #" FLOAT_ARGUMENTS_OFFSET "]" NL
222 "ldr r1, [" PAINTING_DATA_R ", #" DRAWING_CONSTANTS_OFFSET "]" NL
223 "ldr " PIXELS_R ", [" PAINTING_DATA_R ", #" PIXELS_OFFSET "]" NL
224 "ldr " WIDTH_R ", [" PAINTING_DATA_R ", #" WIDTH_OFFSET "]" NL
225 "ldr " HEIGHT_R ", [" PAINTING_DATA_R ", #" HEIGHT_OFFSET "]" NL
226 "ldr " FLAGS_R ", [" PAINTING_DATA_R ", #" FLAGS_OFFSET "]" NL
227 "ldr " SPECULAR_EXPONENT_R ", [" PAINTING_DATA_R ", #" SPECULAR_EXPONENT_OFFSET "]" NL
228 "ldr " CONE_EXPONENT_R ", [" PAINTING_DATA_R ", #" CONE_EXPONENT_OFFSET "]" NL
229
230 // Load all data to the SIMD registers with the least number of instructions.
231 "vld1.f32 { " READ1_RANGE " }, [r0]!" NL
232 "vld1.f32 { " READ2_RANGE " }, [r0]!" NL
233 "vld1.f32 { " READ3_RANGE " }, [r0]!" NL
234 "vld1.s16 {" READ4_RANGE "}, [r1]!" NL
235 "vld1.s16 {" READ5_RANGE "}, [r1]!" NL
236
237 // Initializing local variables.
238 "mov " SCANLINE_R ", " WIDTH_R ", lsl #2" NL
239 "add " SCANLINE_R ", " SCANLINE_R ", #8" NL
240 "add " PIXELS_R ", " PIXELS_R ", " SCANLINE_R NL
241 "add " PIXELS_R ", " PIXELS_R ", #3" NL
242 "mov r0, #0" NL
243 "vmov.f32 " CONST_ZERO_S ", r0" NL
244 "vmov.f32 " POSITION_Y_S ", " CONST_ONE_S NL
245 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL
246 "vmov.f32 " SPOT_COLOR_Q ", " COLOR_Q NL
247 "mov " RESET_WIDTH_R ", " WIDTH_R NL
248
249 ".mainloop:" NL
250 "mov r3, #3" NL
251 "vmov.f32 " POSITION_X_S ", " CONST_ONE_S NL
252
253 ".scanline:" NL
254 // The ROW registers are storing the alpha channel of the last three pixels.
255 // The alpha channel is stored as signed short (sint16) values. The fourth value
256 // is garbage. The following instructions are shifting out the unnecessary alpha
257 // values and load the next ones.
258 "ldrb r0, [" PIXELS_R ", -" SCANLINE_R "]" NL
259 "ldrb r1, [" PIXELS_R ", +" SCANLINE_R "]" NL
260 "ldrb r2, [" PIXELS_R "], #4" NL
261 "vext.s16 " TOP_ROW_D ", " TOP_ROW_D ", " TOP_ROW_D ", #3" NL
262 "vext.s16 " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", " MIDDLE_ROW_D ", #3" NL
263 "vext.s16 " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", " BOTTOM_ROW_D ", #3" NL
264 "vmov.s16 " TOP_ROW_D "[1], r0" NL
265 "vmov.s16 " MIDDLE_ROW_D "[1], r2" NL
266 "vmov.s16 " BOTTOM_ROW_D "[1], r1" NL
267
268 // The two border pixels (rightmost and leftmost) are skipped when
269 // the next scanline is reached. It also jumps, when the algorithm
270 // is started, and the first free alpha values are loaded to each row.
271 "subs r3, r3, #1" NL
272 "bne .scanline" NL
273
274 // The light vector goes to TMP1_Q. It is constant in case of distant light.
275 // The fourth value contains the length of the light vector.
276 "tst " FLAGS_R ", #" TOSTRING(FLAG_POINT_LIGHT | FLAG_SPOT_LIGHT) NL
277 "beq .distantLight" NL
278
279 "vmov.s16 r3, " MIDDLE_ROW_D "[2]" NL
280 "vmov.f32 " POSITION_Z_S ", r3" NL
281 "vcvt.f32.s32 " POSITION_Z_S ", " POSITION_Z_S NL
282 "vmul.f32 " POSITION_Z_S ", " POSITION_Z_S ", " SCALE_S NL
283
284 "vsub.f32 " TMP1_Q ", " LIGHT_Q ", " POSITION_Q NL
285 GET_LENGTH(TMP1, TMP2)
286
287 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL
288 "bne .cosineOfAngle" NL
289 ".visiblePixel:" NL
290
291 // | -1 0 1 | | -1 -2 -1 |
292 // X = | -2 0 2 | Y = | 0 0 0 |
293 // | -1 0 1 | | 1 2 1 |
294
295 // Multiply the alpha values by the X and Y matrices.
296
297 // Moving the 8 alpha value to TMP3.
298 "vtbl.8 " TMP3_D0 ", " ALL_ROWS_D ", " REMAPX_D NL
299 "vtbl.8 " TMP3_D1 ", " ALL_ROWS_D ", " REMAPY_D NL
300
301 "vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAX_Q NL
302 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL
303 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
304 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
305 "vmov.s16 r0, " TMP2_D0 "[0]" NL
306
307 "vmul.s16 " TMP2_Q ", " TMP3_Q ", " ALPHAY_Q NL
308 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D1 NL
309 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
310 "vpadd.s16 " TMP2_D0 ", " TMP2_D0 ", " TMP2_D0 NL
311 "vmov.s16 r1, " TMP2_D0 "[0]" NL
312
313 // r0 and r1 contains the X and Y coordinates of the
314 // normal vector, respectively.
315
316 // Calculating the spot light strength.
317 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPOT_LIGHT) NL
318 "beq .endLight" NL
319
320 "vneg.f32 " TMP3_S1 ", " COSINE_OF_ANGLE NL
321 "tst " FLAGS_R ", #" TOSTRING(FLAG_CONE_EXPONENT_IS_1) NL
322 "beq .coneExpPowf" NL
323 ".coneExpPowfFinished:" NL
324
325 // Smoothing the cone edge if necessary.
326 "vcmp.f32 " COSINE_OF_ANGLE ", " CONE_FULL_LIGHT_S NL
327 "fmstat" NL
328 "bhi .cutOff" NL
329 ".cutOffFinished:" NL
330
331 "vmin.f32 " TMP3_D0 ", " TMP3_D0 ", " CONST_ONE_HI_D NL
332 "vmul.f32 " COLOR_Q ", " SPOT_COLOR_Q ", " TMP3_D0 "[1]" NL
333
334 ".endLight:" NL
335 // Summarize:
336 // r0 and r1 contains the normalVector.
337 // TMP1_Q contains the light vector and its length.
338 // COLOR_Q contains the color of the light vector.
339
340 // Test whether both r0 and r1 are zero (Normal vector is (0, 0, 1)).
341 "orrs r2, r0, r1" NL
342 "bne .normalVectorIsNonZero" NL
343
344 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL
345 "bne .specularLight1" NL
346
347 // Calculate diffuse light strength.
348 MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3)
349 "b .lightStrengthCalculated" NL
350
351 ".specularLight1:" NL
352 // Calculating specular light strength.
353 "vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL
354 GET_LENGTH(TMP1, TMP2)
355
356 // When the exponent is 1, we don't need to call an expensive powf function.
357 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL
358 "vdiveq.f32 " TMP2_S1 ", " TMP1_S2 ", " TMP1_S3 NL
359 "beq .specularExpPowf" NL
360
361 MULTIPLY_BY_DIFFUSE_CONST(TMP1_S2, TMP1_S3)
362 "b .lightStrengthCalculated" NL
363
364 ".normalVectorIsNonZero:" NL
365 // Normal vector goes to TMP2, and its length is calculated as well.
366 "vmov.s32 " TMP2_S0 ", r0" NL
367 "vcvt.f32.s32 " TMP2_S0 ", " TMP2_S0 NL
368 "vmul.f32 " TMP2_S0 ", " TMP2_S0 ", " SCALE_DIV4_S NL
369 "vmov.s32 " TMP2_S1 ", r1" NL
370 "vcvt.f32.s32 " TMP2_S1 ", " TMP2_S1 NL
371 "vmul.f32 " TMP2_S1 ", " TMP2_S1 ", " SCALE_DIV4_S NL
372 "vmov.f32 " TMP2_S2 ", " CONST_ONE_S NL
373 GET_LENGTH(TMP2, TMP3)
374
375 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_LIGHT) NL
376 "bne .specularLight2" NL
377
378 // Calculating diffuse light strength.
379 DOT_PRODUCT(TMP3, TMP2, TMP1)
380 MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3)
381 "b .lightStrengthCalculated" NL
382
383 ".specularLight2:" NL
384 // Calculating specular light strength.
385 "vadd.f32 " TMP1_S2 ", " TMP1_S2 ", " TMP1_S3 NL
386 GET_LENGTH(TMP1, TMP3)
387 DOT_PRODUCT(TMP3, TMP2, TMP1)
388
389 // When the exponent is 1, we don't need to call an expensive powf function.
390 "tst " FLAGS_R ", #" TOSTRING(FLAG_SPECULAR_EXPONENT_IS_1) NL
391 "vdiveq.f32 " TMP2_S1 ", " TMP3_S0 ", " TMP3_S3 NL
392 "beq .specularExpPowf" NL
393 MULTIPLY_BY_DIFFUSE_CONST(TMP3_S0, TMP3_S3)
394
395 ".lightStrengthCalculated:" NL
396 // TMP2_S1 contains the light strength. Clamp it to [0, 1]
397 "vmax.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ZERO_HI_D NL
398 "vmin.f32 " TMP2_D0 ", " TMP2_D0 ", " CONST_ONE_HI_D NL
399 "vmul.f32 " TMP3_Q ", " COLOR_Q ", " TMP2_D0 "[1]" NL
400 "vcvt.u32.f32 " TMP3_Q ", " TMP3_Q NL
401 "vmov.u32 r2, r3, " TMP3_S0 ", " TMP3_S1 NL
402 // The color values are stored in-place.
403 "strb r2, [" PIXELS_R ", #-11]" NL
404 "strb r3, [" PIXELS_R ", #-10]" NL
405 "vmov.u32 r2, " TMP3_S2 NL
406 "strb r2, [" PIXELS_R ", #-9]" NL
407
408 // Continue to the next pixel.
409 ".blackPixel:" NL
410 "vadd.f32 " POSITION_X_S ", " CONST_ONE_S NL
411 "mov r3, #1" NL
412 "subs " WIDTH_R ", " WIDTH_R ", #1" NL
413 "bne .scanline" NL
414
415 // If the end of the scanline is reached, we continue
416 // to the next scanline.
417 "vadd.f32 " POSITION_Y_S ", " CONST_ONE_S NL
418 "mov " WIDTH_R ", " RESET_WIDTH_R NL
419 "subs " HEIGHT_R ", " HEIGHT_R ", #1" NL
420 "bne .mainloop" NL
421
422 // Return.
423 "vldmia sp!, {d8-d15}" NL
424 "ldmia sp!, {r4-r8, r10, r11, pc}" NL
425
426 ".distantLight:" NL
427 // In case of distant light, the light vector is constant,
428 // we simply copy it.
429 "vmov.f32 " TMP1_Q ", " LIGHT_Q NL
430 "b .visiblePixel" NL
431
432 ".cosineOfAngle:" NL
433 // If the pixel is outside of the cone angle, it is simply a black pixel.
434 DOT_PRODUCT(TMP3, TMP1, DIRECTION)
435 "vdiv.f32 " COSINE_OF_ANGLE ", " TMP3_S0 ", " TMP1_S3 NL
436 "vcmp.f32 " COSINE_OF_ANGLE ", " CONE_CUT_OFF_S NL
437 "fmstat" NL
438 "bls .visiblePixel" NL
439 "mov r0, #0" NL
440 "strh r0, [" PIXELS_R ", #-11]" NL
441 "strb r0, [" PIXELS_R ", #-9]" NL
442 "b .blackPixel" NL
443
444 ".cutOff:" NL
445 // Smoothing the light strength on the cone edge.
446 "vsub.f32 " TMP3_S0 ", " CONE_CUT_OFF_S ", " COSINE_OF_ANGLE NL
447 "vdiv.f32 " TMP3_S0 ", " TMP3_S0 ", " CONE_CUT_OFF_RANGE_S NL
448 "vmul.f32 " TMP3_S1 ", " TMP3_S1 ", " TMP3_S0 NL
449 "b .cutOffFinished" NL
450
451 ".coneExpPowf:" NL
452 POWF(TMP3_S1, CONE_EXPONENT_R)
453 "b .coneExpPowfFinished" NL
454
455 ".specularExpPowf:" NL
456 POWF(TMP2_S1, SPECULAR_EXPONENT_R)
457 "tst " FLAGS_R ", #" TOSTRING(FLAG_DIFFUSE_CONST_IS_1) NL
458 "vmuleq.f32 " TMP2_S1 ", " TMP2_S1 ", " DIFFUSE_CONST_S NL
459 "b .lightStrengthCalculated" NL
460 ); // NOLINT
461
462 } // namespace WebCore
463
464 #endif // CPU(ARM_NEON) && COMPILER(GCC)
465