Lines Matching full:n
41 "vdup.32 q4, %[input_range_min]\n" in Transform()
42 "vdup.32 q5, %[output_range_min]\n" in Transform()
43 "vdup.32 q6, %[input_range_offset]\n" in Transform()
44 "vdup.32 q7, %[input_range_scale]\n" in Transform()
45 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
46 "vsub.f32 q4, q4, q5\n" in Transform()
49 "subs %[count], %[count], #16\n" in Transform()
52 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
53 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
54 "pld [%[input], #64]\n" in Transform()
55 "vcvt.f32.s32 q0, q0\n" in Transform()
56 "vcvt.f32.s32 q1, q1\n" in Transform()
57 "vcvt.f32.s32 q2, q2\n" in Transform()
58 "vcvt.f32.s32 q3, q3\n" in Transform()
59 "vsub.f32 q0, q0, q6\n" in Transform()
60 "vsub.f32 q1, q1, q6\n" in Transform()
61 "vsub.f32 q2, q2, q6\n" in Transform()
62 "vsub.f32 q3, q3, q6\n" in Transform()
63 "vmul.f32 q0, q0, q7\n" in Transform()
64 "vmul.f32 q1, q1, q7\n" in Transform()
65 "vmul.f32 q2, q2, q7\n" in Transform()
66 "vmul.f32 q3, q3, q7\n" in Transform()
67 "vadd.f32 q0, q0, q4\n" in Transform()
68 "vadd.f32 q1, q1, q4\n" in Transform()
69 "vadd.f32 q2, q2, q4\n" in Transform()
70 "vadd.f32 q3, q3, q4\n" in Transform()
71 "vmul.f32 q0, q0, q8\n" in Transform()
72 "vmul.f32 q1, q1, q8\n" in Transform()
73 "vmul.f32 q2, q2, q8\n" in Transform()
74 "vmul.f32 q3, q3, q8\n" in Transform()
75 "vcvt.s32.f32 q0, q0\n" in Transform()
76 "vcvt.s32.f32 q1, q1\n" in Transform()
77 "vcvt.s32.f32 q2, q2\n" in Transform()
78 "vcvt.s32.f32 q3, q3\n" in Transform()
79 "vqmovn.s32 d0, q0\n" in Transform()
80 "vqmovn.s32 d1, q1\n" in Transform()
81 "vqmovn.s32 d4, q2\n" in Transform()
82 "vqmovn.s32 d5, q3\n" in Transform()
83 "vqmovun.s16 d0, q0\n" in Transform()
84 "vqmovun.s16 d1, q2\n" in Transform()
86 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
87 "pld [%[output]]\n" in Transform()
89 "bne 1b\n" in Transform()
116 "vdup.32 q4, %[input_range_min]\n" in Transform()
117 "vdup.32 q5, %[output_range_min]\n" in Transform()
118 "vdup.32 q6, %[input_range_offset]\n" in Transform()
119 "vdup.32 q7, %[input_range_scale]\n" in Transform()
120 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
121 "vsub.f32 q4, q4, q5\n" in Transform()
124 "subs %[count], %[count], #1\n" in Transform()
125 "beq 2f\n" in Transform()
128 "subs %[count], %[count], #16\n" in Transform()
131 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
132 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
133 "pld [%[input], #64]\n" in Transform()
134 "vcvt.f32.s32 q0, q0\n" in Transform()
135 "vcvt.f32.s32 q1, q1\n" in Transform()
136 "vcvt.f32.s32 q2, q2\n" in Transform()
137 "vcvt.f32.s32 q3, q3\n" in Transform()
138 "vsub.f32 q0, q0, q6\n" in Transform()
139 "vsub.f32 q1, q1, q6\n" in Transform()
140 "vsub.f32 q2, q2, q6\n" in Transform()
141 "vsub.f32 q3, q3, q6\n" in Transform()
142 "vmul.f32 q0, q0, q7\n" in Transform()
143 "vmul.f32 q1, q1, q7\n" in Transform()
144 "vmul.f32 q2, q2, q7\n" in Transform()
145 "vmul.f32 q3, q3, q7\n" in Transform()
146 "vadd.f32 q0, q0, q4\n" in Transform()
147 "vadd.f32 q1, q1, q4\n" in Transform()
148 "vadd.f32 q2, q2, q4\n" in Transform()
149 "vadd.f32 q3, q3, q4\n" in Transform()
150 "vmul.f32 q0, q0, q8\n" in Transform()
151 "vmul.f32 q1, q1, q8\n" in Transform()
152 "vmul.f32 q2, q2, q8\n" in Transform()
153 "vmul.f32 q3, q3, q8\n" in Transform()
154 "vcvt.s32.f32 q0, q0\n" in Transform()
155 "vcvt.s32.f32 q1, q1\n" in Transform()
156 "vcvt.s32.f32 q2, q2\n" in Transform()
157 "vcvt.s32.f32 q3, q3\n" in Transform()
158 "vqmovn.s32 d0, q0\n" in Transform()
159 "vqmovn.s32 d1, q1\n" in Transform()
160 "vqmovn.s32 d4, q2\n" in Transform()
161 "vqmovn.s32 d5, q3\n" in Transform()
162 "vqmovun.s16 d0, q0\n" in Transform()
163 "vqmovun.s16 d1, q2\n" in Transform()
165 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
166 "pld [%[output]]\n" in Transform()
168 "bne 1b\n" in Transform()
174 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
175 "pld [%[input], #64]\n" in Transform()
176 "vcvt.f32.s32 q0, q0\n" in Transform()
177 "vsub.f32 q0, q0, q6\n" in Transform()
178 "vmul.f32 q0, q0, q7\n" in Transform()
179 "vadd.f32 q0, q0, q4\n" in Transform()
180 "vmul.f32 q0, q0, q8\n" in Transform()
181 "vcvt.s32.f32 q0, q0\n" in Transform()
182 "vqmovn.s32 d0, q0\n" in Transform()
183 "vqmovun.s16 d0, q0\n" in Transform()
185 "vst1.8 {d0[0]}, [%[output]]!\n" in Transform()
186 "pld [%[output]]\n" in Transform()
213 "vdup.32 q4, %[input_range_min]\n" in Transform()
214 "vdup.32 q5, %[output_range_min]\n" in Transform()
215 "vdup.32 q6, %[input_range_offset]\n" in Transform()
216 "vdup.32 q7, %[input_range_scale]\n" in Transform()
217 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
218 "vsub.f32 q4, q4, q5\n" in Transform()
221 "subs %[count], %[count], #2\n" in Transform()
222 "beq 2f\n" in Transform()
225 "subs %[count], %[count], #16\n" in Transform()
228 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
229 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
230 "pld [%[input], #64]\n" in Transform()
231 "vcvt.f32.s32 q0, q0\n" in Transform()
232 "vcvt.f32.s32 q1, q1\n" in Transform()
233 "vcvt.f32.s32 q2, q2\n" in Transform()
234 "vcvt.f32.s32 q3, q3\n" in Transform()
235 "vsub.f32 q0, q0, q6\n" in Transform()
236 "vsub.f32 q1, q1, q6\n" in Transform()
237 "vsub.f32 q2, q2, q6\n" in Transform()
238 "vsub.f32 q3, q3, q6\n" in Transform()
239 "vmul.f32 q0, q0, q7\n" in Transform()
240 "vmul.f32 q1, q1, q7\n" in Transform()
241 "vmul.f32 q2, q2, q7\n" in Transform()
242 "vmul.f32 q3, q3, q7\n" in Transform()
243 "vadd.f32 q0, q0, q4\n" in Transform()
244 "vadd.f32 q1, q1, q4\n" in Transform()
245 "vadd.f32 q2, q2, q4\n" in Transform()
246 "vadd.f32 q3, q3, q4\n" in Transform()
247 "vmul.f32 q0, q0, q8\n" in Transform()
248 "vmul.f32 q1, q1, q8\n" in Transform()
249 "vmul.f32 q2, q2, q8\n" in Transform()
250 "vmul.f32 q3, q3, q8\n" in Transform()
251 "vcvt.s32.f32 q0, q0\n" in Transform()
252 "vcvt.s32.f32 q1, q1\n" in Transform()
253 "vcvt.s32.f32 q2, q2\n" in Transform()
254 "vcvt.s32.f32 q3, q3\n" in Transform()
255 "vqmovn.s32 d0, q0\n" in Transform()
256 "vqmovn.s32 d1, q1\n" in Transform()
257 "vqmovn.s32 d4, q2\n" in Transform()
258 "vqmovn.s32 d5, q3\n" in Transform()
259 "vqmovun.s16 d0, q0\n" in Transform()
260 "vqmovun.s16 d1, q2\n" in Transform()
262 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
263 "pld [%[output]]\n" in Transform()
265 "bne 1b\n" in Transform()
271 "vld1.32 {d0}, [%[input]]!\n" in Transform()
272 "pld [%[input], #64]\n" in Transform()
273 "vcvt.f32.s32 q0, q0\n" in Transform()
274 "vsub.f32 q0, q0, q6\n" in Transform()
275 "vmul.f32 q0, q0, q7\n" in Transform()
276 "vadd.f32 q0, q0, q4\n" in Transform()
277 "vmul.f32 q0, q0, q8\n" in Transform()
278 "vcvt.s32.f32 q0, q0\n" in Transform()
279 "vqmovn.s32 d0, q0\n" in Transform()
280 "vqmovun.s16 d0, q0\n" in Transform()
282 "vst1.16 {d0[0]}, [%[output]]!\n" in Transform()
283 "pld [%[output]]\n" in Transform()
310 "vdup.32 q4, %[input_range_min]\n" in Transform()
311 "vdup.32 q5, %[output_range_min]\n" in Transform()
312 "vdup.32 q6, %[input_range_offset]\n" in Transform()
313 "vdup.32 q7, %[input_range_scale]\n" in Transform()
314 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
315 "vsub.f32 q4, q4, q5\n" in Transform()
318 "subs %[count], %[count], #3\n" in Transform()
319 "beq 2f\n" in Transform()
322 "subs %[count], %[count], #16\n" in Transform()
325 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
326 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
327 "pld [%[input], #64]\n" in Transform()
328 "vcvt.f32.s32 q0, q0\n" in Transform()
329 "vcvt.f32.s32 q1, q1\n" in Transform()
330 "vcvt.f32.s32 q2, q2\n" in Transform()
331 "vcvt.f32.s32 q3, q3\n" in Transform()
332 "vsub.f32 q0, q0, q6\n" in Transform()
333 "vsub.f32 q1, q1, q6\n" in Transform()
334 "vsub.f32 q2, q2, q6\n" in Transform()
335 "vsub.f32 q3, q3, q6\n" in Transform()
336 "vmul.f32 q0, q0, q7\n" in Transform()
337 "vmul.f32 q1, q1, q7\n" in Transform()
338 "vmul.f32 q2, q2, q7\n" in Transform()
339 "vmul.f32 q3, q3, q7\n" in Transform()
340 "vadd.f32 q0, q0, q4\n" in Transform()
341 "vadd.f32 q1, q1, q4\n" in Transform()
342 "vadd.f32 q2, q2, q4\n" in Transform()
343 "vadd.f32 q3, q3, q4\n" in Transform()
344 "vmul.f32 q0, q0, q8\n" in Transform()
345 "vmul.f32 q1, q1, q8\n" in Transform()
346 "vmul.f32 q2, q2, q8\n" in Transform()
347 "vmul.f32 q3, q3, q8\n" in Transform()
348 "vcvt.s32.f32 q0, q0\n" in Transform()
349 "vcvt.s32.f32 q1, q1\n" in Transform()
350 "vcvt.s32.f32 q2, q2\n" in Transform()
351 "vcvt.s32.f32 q3, q3\n" in Transform()
352 "vqmovn.s32 d0, q0\n" in Transform()
353 "vqmovn.s32 d1, q1\n" in Transform()
354 "vqmovn.s32 d4, q2\n" in Transform()
355 "vqmovn.s32 d5, q3\n" in Transform()
356 "vqmovun.s16 d0, q0\n" in Transform()
357 "vqmovun.s16 d1, q2\n" in Transform()
359 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
360 "pld [%[output]]\n" in Transform()
362 "bne 1b\n" in Transform()
368 "vld1.32 {d0}, [%[input]]!\n" in Transform()
369 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
370 "pld [%[input], #64]\n" in Transform()
371 "vcvt.f32.s32 q0, q0\n" in Transform()
372 "vsub.f32 q0, q0, q6\n" in Transform()
373 "vmul.f32 q0, q0, q7\n" in Transform()
374 "vadd.f32 q0, q0, q4\n" in Transform()
375 "vmul.f32 q0, q0, q8\n" in Transform()
376 "vcvt.s32.f32 q0, q0\n" in Transform()
377 "vqmovn.s32 d0, q0\n" in Transform()
378 "vqmovun.s16 d0, q0\n" in Transform()
380 "vst1.16 {d0[0]}, [%[output]]!\n" in Transform()
381 "vst1.8 {d0[2]}, [%[output]]!\n" in Transform()
382 "pld [%[output]]\n" in Transform()
409 "vdup.32 q4, %[input_range_min]\n" in Transform()
410 "vdup.32 q5, %[output_range_min]\n" in Transform()
411 "vdup.32 q6, %[input_range_offset]\n" in Transform()
412 "vdup.32 q7, %[input_range_scale]\n" in Transform()
413 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
414 "vsub.f32 q4, q4, q5\n" in Transform()
417 "subs %[count], %[count], #4\n" in Transform()
418 "beq 2f\n" in Transform()
421 "subs %[count], %[count], #16\n" in Transform()
424 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
425 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
426 "pld [%[input], #64]\n" in Transform()
427 "vcvt.f32.s32 q0, q0\n" in Transform()
428 "vcvt.f32.s32 q1, q1\n" in Transform()
429 "vcvt.f32.s32 q2, q2\n" in Transform()
430 "vcvt.f32.s32 q3, q3\n" in Transform()
431 "vsub.f32 q0, q0, q6\n" in Transform()
432 "vsub.f32 q1, q1, q6\n" in Transform()
433 "vsub.f32 q2, q2, q6\n" in Transform()
434 "vsub.f32 q3, q3, q6\n" in Transform()
435 "vmul.f32 q0, q0, q7\n" in Transform()
436 "vmul.f32 q1, q1, q7\n" in Transform()
437 "vmul.f32 q2, q2, q7\n" in Transform()
438 "vmul.f32 q3, q3, q7\n" in Transform()
439 "vadd.f32 q0, q0, q4\n" in Transform()
440 "vadd.f32 q1, q1, q4\n" in Transform()
441 "vadd.f32 q2, q2, q4\n" in Transform()
442 "vadd.f32 q3, q3, q4\n" in Transform()
443 "vmul.f32 q0, q0, q8\n" in Transform()
444 "vmul.f32 q1, q1, q8\n" in Transform()
445 "vmul.f32 q2, q2, q8\n" in Transform()
446 "vmul.f32 q3, q3, q8\n" in Transform()
447 "vcvt.s32.f32 q0, q0\n" in Transform()
448 "vcvt.s32.f32 q1, q1\n" in Transform()
449 "vcvt.s32.f32 q2, q2\n" in Transform()
450 "vcvt.s32.f32 q3, q3\n" in Transform()
451 "vqmovn.s32 d0, q0\n" in Transform()
452 "vqmovn.s32 d1, q1\n" in Transform()
453 "vqmovn.s32 d4, q2\n" in Transform()
454 "vqmovn.s32 d5, q3\n" in Transform()
455 "vqmovun.s16 d0, q0\n" in Transform()
456 "vqmovun.s16 d1, q2\n" in Transform()
458 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
459 "pld [%[output]]\n" in Transform()
461 "bne 1b\n" in Transform()
467 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
468 "pld [%[input], #64]\n" in Transform()
469 "vcvt.f32.s32 q0, q0\n" in Transform()
470 "vsub.f32 q0, q0, q6\n" in Transform()
471 "vmul.f32 q0, q0, q7\n" in Transform()
472 "vadd.f32 q0, q0, q4\n" in Transform()
473 "vmul.f32 q0, q0, q8\n" in Transform()
474 "vcvt.s32.f32 q0, q0\n" in Transform()
475 "vqmovn.s32 d0, q0\n" in Transform()
476 "vqmovun.s16 d0, q0\n" in Transform()
478 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
479 "pld [%[output]]\n" in Transform()
506 "vdup.32 q4, %[input_range_min]\n" in Transform()
507 "vdup.32 q5, %[output_range_min]\n" in Transform()
508 "vdup.32 q6, %[input_range_offset]\n" in Transform()
509 "vdup.32 q7, %[input_range_scale]\n" in Transform()
510 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
511 "vsub.f32 q4, q4, q5\n" in Transform()
514 "subs %[count], %[count], #5\n" in Transform()
515 "beq 2f\n" in Transform()
518 "subs %[count], %[count], #16\n" in Transform()
521 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
522 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
523 "pld [%[input], #64]\n" in Transform()
524 "vcvt.f32.s32 q0, q0\n" in Transform()
525 "vcvt.f32.s32 q1, q1\n" in Transform()
526 "vcvt.f32.s32 q2, q2\n" in Transform()
527 "vcvt.f32.s32 q3, q3\n" in Transform()
528 "vsub.f32 q0, q0, q6\n" in Transform()
529 "vsub.f32 q1, q1, q6\n" in Transform()
530 "vsub.f32 q2, q2, q6\n" in Transform()
531 "vsub.f32 q3, q3, q6\n" in Transform()
532 "vmul.f32 q0, q0, q7\n" in Transform()
533 "vmul.f32 q1, q1, q7\n" in Transform()
534 "vmul.f32 q2, q2, q7\n" in Transform()
535 "vmul.f32 q3, q3, q7\n" in Transform()
536 "vadd.f32 q0, q0, q4\n" in Transform()
537 "vadd.f32 q1, q1, q4\n" in Transform()
538 "vadd.f32 q2, q2, q4\n" in Transform()
539 "vadd.f32 q3, q3, q4\n" in Transform()
540 "vmul.f32 q0, q0, q8\n" in Transform()
541 "vmul.f32 q1, q1, q8\n" in Transform()
542 "vmul.f32 q2, q2, q8\n" in Transform()
543 "vmul.f32 q3, q3, q8\n" in Transform()
544 "vcvt.s32.f32 q0, q0\n" in Transform()
545 "vcvt.s32.f32 q1, q1\n" in Transform()
546 "vcvt.s32.f32 q2, q2\n" in Transform()
547 "vcvt.s32.f32 q3, q3\n" in Transform()
548 "vqmovn.s32 d0, q0\n" in Transform()
549 "vqmovn.s32 d1, q1\n" in Transform()
550 "vqmovn.s32 d4, q2\n" in Transform()
551 "vqmovn.s32 d5, q3\n" in Transform()
552 "vqmovun.s16 d0, q0\n" in Transform()
553 "vqmovun.s16 d1, q2\n" in Transform()
555 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
556 "pld [%[output]]\n" in Transform()
558 "bne 1b\n" in Transform()
564 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
565 "vld1.32 {d2[0]}, [%[input]]!\n" in Transform()
566 "pld [%[input], #64]\n" in Transform()
567 "vcvt.f32.s32 q0, q0\n" in Transform()
568 "vcvt.f32.s32 q1, q1\n" in Transform()
569 "vsub.f32 q0, q0, q6\n" in Transform()
570 "vsub.f32 q1, q1, q6\n" in Transform()
571 "vmul.f32 q0, q0, q7\n" in Transform()
572 "vmul.f32 q1, q1, q7\n" in Transform()
573 "vadd.f32 q0, q0, q4\n" in Transform()
574 "vadd.f32 q1, q1, q4\n" in Transform()
575 "vmul.f32 q0, q0, q8\n" in Transform()
576 "vmul.f32 q1, q1, q8\n" in Transform()
577 "vcvt.s32.f32 q0, q0\n" in Transform()
578 "vcvt.s32.f32 q1, q1\n" in Transform()
579 "vqmovn.s32 d0, q0\n" in Transform()
580 "vqmovn.s32 d1, q1\n" in Transform()
581 "vqmovun.s16 d0, q0\n" in Transform()
583 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
584 "vst1.8 {d0[4]}, [%[output]]!\n" in Transform()
585 "pld [%[output]]\n" in Transform()
612 "vdup.32 q4, %[input_range_min]\n" in Transform()
613 "vdup.32 q5, %[output_range_min]\n" in Transform()
614 "vdup.32 q6, %[input_range_offset]\n" in Transform()
615 "vdup.32 q7, %[input_range_scale]\n" in Transform()
616 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
617 "vsub.f32 q4, q4, q5\n" in Transform()
620 "subs %[count], %[count], #6\n" in Transform()
621 "beq 2f\n" in Transform()
624 "subs %[count], %[count], #16\n" in Transform()
627 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
628 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
629 "pld [%[input], #64]\n" in Transform()
630 "vcvt.f32.s32 q0, q0\n" in Transform()
631 "vcvt.f32.s32 q1, q1\n" in Transform()
632 "vcvt.f32.s32 q2, q2\n" in Transform()
633 "vcvt.f32.s32 q3, q3\n" in Transform()
634 "vsub.f32 q0, q0, q6\n" in Transform()
635 "vsub.f32 q1, q1, q6\n" in Transform()
636 "vsub.f32 q2, q2, q6\n" in Transform()
637 "vsub.f32 q3, q3, q6\n" in Transform()
638 "vmul.f32 q0, q0, q7\n" in Transform()
639 "vmul.f32 q1, q1, q7\n" in Transform()
640 "vmul.f32 q2, q2, q7\n" in Transform()
641 "vmul.f32 q3, q3, q7\n" in Transform()
642 "vadd.f32 q0, q0, q4\n" in Transform()
643 "vadd.f32 q1, q1, q4\n" in Transform()
644 "vadd.f32 q2, q2, q4\n" in Transform()
645 "vadd.f32 q3, q3, q4\n" in Transform()
646 "vmul.f32 q0, q0, q8\n" in Transform()
647 "vmul.f32 q1, q1, q8\n" in Transform()
648 "vmul.f32 q2, q2, q8\n" in Transform()
649 "vmul.f32 q3, q3, q8\n" in Transform()
650 "vcvt.s32.f32 q0, q0\n" in Transform()
651 "vcvt.s32.f32 q1, q1\n" in Transform()
652 "vcvt.s32.f32 q2, q2\n" in Transform()
653 "vcvt.s32.f32 q3, q3\n" in Transform()
654 "vqmovn.s32 d0, q0\n" in Transform()
655 "vqmovn.s32 d1, q1\n" in Transform()
656 "vqmovn.s32 d4, q2\n" in Transform()
657 "vqmovn.s32 d5, q3\n" in Transform()
658 "vqmovun.s16 d0, q0\n" in Transform()
659 "vqmovun.s16 d1, q2\n" in Transform()
661 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
662 "pld [%[output]]\n" in Transform()
664 "bne 1b\n" in Transform()
670 "vld1.32 {d0, d1, d2}, [%[input]]!\n" in Transform()
671 "pld [%[input], #64]\n" in Transform()
672 "vcvt.f32.s32 q0, q0\n" in Transform()
673 "vcvt.f32.s32 q1, q1\n" in Transform()
674 "vsub.f32 q0, q0, q6\n" in Transform()
675 "vsub.f32 q1, q1, q6\n" in Transform()
676 "vmul.f32 q0, q0, q7\n" in Transform()
677 "vmul.f32 q1, q1, q7\n" in Transform()
678 "vadd.f32 q0, q0, q4\n" in Transform()
679 "vadd.f32 q1, q1, q4\n" in Transform()
680 "vmul.f32 q0, q0, q8\n" in Transform()
681 "vmul.f32 q1, q1, q8\n" in Transform()
682 "vcvt.s32.f32 q0, q0\n" in Transform()
683 "vcvt.s32.f32 q1, q1\n" in Transform()
684 "vqmovn.s32 d0, q0\n" in Transform()
685 "vqmovn.s32 d1, q1\n" in Transform()
686 "vqmovun.s16 d0, q0\n" in Transform()
688 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
689 "vst1.16 {d0[2]}, [%[output]]!\n" in Transform()
690 "pld [%[output]]\n" in Transform()
717 "vdup.32 q4, %[input_range_min]\n" in Transform()
718 "vdup.32 q5, %[output_range_min]\n" in Transform()
719 "vdup.32 q6, %[input_range_offset]\n" in Transform()
720 "vdup.32 q7, %[input_range_scale]\n" in Transform()
721 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
722 "vsub.f32 q4, q4, q5\n" in Transform()
725 "subs %[count], %[count], #7\n" in Transform()
726 "beq 2f\n" in Transform()
729 "subs %[count], %[count], #16\n" in Transform()
732 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
733 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
734 "pld [%[input], #64]\n" in Transform()
735 "vcvt.f32.s32 q0, q0\n" in Transform()
736 "vcvt.f32.s32 q1, q1\n" in Transform()
737 "vcvt.f32.s32 q2, q2\n" in Transform()
738 "vcvt.f32.s32 q3, q3\n" in Transform()
739 "vsub.f32 q0, q0, q6\n" in Transform()
740 "vsub.f32 q1, q1, q6\n" in Transform()
741 "vsub.f32 q2, q2, q6\n" in Transform()
742 "vsub.f32 q3, q3, q6\n" in Transform()
743 "vmul.f32 q0, q0, q7\n" in Transform()
744 "vmul.f32 q1, q1, q7\n" in Transform()
745 "vmul.f32 q2, q2, q7\n" in Transform()
746 "vmul.f32 q3, q3, q7\n" in Transform()
747 "vadd.f32 q0, q0, q4\n" in Transform()
748 "vadd.f32 q1, q1, q4\n" in Transform()
749 "vadd.f32 q2, q2, q4\n" in Transform()
750 "vadd.f32 q3, q3, q4\n" in Transform()
751 "vmul.f32 q0, q0, q8\n" in Transform()
752 "vmul.f32 q1, q1, q8\n" in Transform()
753 "vmul.f32 q2, q2, q8\n" in Transform()
754 "vmul.f32 q3, q3, q8\n" in Transform()
755 "vcvt.s32.f32 q0, q0\n" in Transform()
756 "vcvt.s32.f32 q1, q1\n" in Transform()
757 "vcvt.s32.f32 q2, q2\n" in Transform()
758 "vcvt.s32.f32 q3, q3\n" in Transform()
759 "vqmovn.s32 d0, q0\n" in Transform()
760 "vqmovn.s32 d1, q1\n" in Transform()
761 "vqmovn.s32 d4, q2\n" in Transform()
762 "vqmovn.s32 d5, q3\n" in Transform()
763 "vqmovun.s16 d0, q0\n" in Transform()
764 "vqmovun.s16 d1, q2\n" in Transform()
766 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
767 "pld [%[output]]\n" in Transform()
769 "bne 1b\n" in Transform()
775 "vld1.32 {d0, d1, d2}, [%[input]]!\n" in Transform()
776 "vld1.32 {d3[0]}, [%[input]]!\n" in Transform()
777 "pld [%[input], #64]\n" in Transform()
778 "vcvt.f32.s32 q0, q0\n" in Transform()
779 "vcvt.f32.s32 q1, q1\n" in Transform()
780 "vsub.f32 q0, q0, q6\n" in Transform()
781 "vsub.f32 q1, q1, q6\n" in Transform()
782 "vmul.f32 q0, q0, q7\n" in Transform()
783 "vmul.f32 q1, q1, q7\n" in Transform()
784 "vadd.f32 q0, q0, q4\n" in Transform()
785 "vadd.f32 q1, q1, q4\n" in Transform()
786 "vmul.f32 q0, q0, q8\n" in Transform()
787 "vmul.f32 q1, q1, q8\n" in Transform()
788 "vcvt.s32.f32 q0, q0\n" in Transform()
789 "vcvt.s32.f32 q1, q1\n" in Transform()
790 "vqmovn.s32 d0, q0\n" in Transform()
791 "vqmovn.s32 d1, q1\n" in Transform()
792 "vqmovun.s16 d0, q0\n" in Transform()
794 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
795 "vst1.16 {d0[2]}, [%[output]]!\n" in Transform()
796 "vst1.8 {d0[6]}, [%[output]]!\n" in Transform()
797 "pld [%[output]]\n" in Transform()
824 "vdup.32 q4, %[input_range_min]\n" in Transform()
825 "vdup.32 q5, %[output_range_min]\n" in Transform()
826 "vdup.32 q6, %[input_range_offset]\n" in Transform()
827 "vdup.32 q7, %[input_range_scale]\n" in Transform()
828 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
829 "vsub.f32 q4, q4, q5\n" in Transform()
832 "subs %[count], %[count], #8\n" in Transform()
833 "beq 2f\n" in Transform()
836 "subs %[count], %[count], #16\n" in Transform()
839 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
840 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
841 "pld [%[input], #64]\n" in Transform()
842 "vcvt.f32.s32 q0, q0\n" in Transform()
843 "vcvt.f32.s32 q1, q1\n" in Transform()
844 "vcvt.f32.s32 q2, q2\n" in Transform()
845 "vcvt.f32.s32 q3, q3\n" in Transform()
846 "vsub.f32 q0, q0, q6\n" in Transform()
847 "vsub.f32 q1, q1, q6\n" in Transform()
848 "vsub.f32 q2, q2, q6\n" in Transform()
849 "vsub.f32 q3, q3, q6\n" in Transform()
850 "vmul.f32 q0, q0, q7\n" in Transform()
851 "vmul.f32 q1, q1, q7\n" in Transform()
852 "vmul.f32 q2, q2, q7\n" in Transform()
853 "vmul.f32 q3, q3, q7\n" in Transform()
854 "vadd.f32 q0, q0, q4\n" in Transform()
855 "vadd.f32 q1, q1, q4\n" in Transform()
856 "vadd.f32 q2, q2, q4\n" in Transform()
857 "vadd.f32 q3, q3, q4\n" in Transform()
858 "vmul.f32 q0, q0, q8\n" in Transform()
859 "vmul.f32 q1, q1, q8\n" in Transform()
860 "vmul.f32 q2, q2, q8\n" in Transform()
861 "vmul.f32 q3, q3, q8\n" in Transform()
862 "vcvt.s32.f32 q0, q0\n" in Transform()
863 "vcvt.s32.f32 q1, q1\n" in Transform()
864 "vcvt.s32.f32 q2, q2\n" in Transform()
865 "vcvt.s32.f32 q3, q3\n" in Transform()
866 "vqmovn.s32 d0, q0\n" in Transform()
867 "vqmovn.s32 d1, q1\n" in Transform()
868 "vqmovn.s32 d4, q2\n" in Transform()
869 "vqmovn.s32 d5, q3\n" in Transform()
870 "vqmovun.s16 d0, q0\n" in Transform()
871 "vqmovun.s16 d1, q2\n" in Transform()
873 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
874 "pld [%[output]]\n" in Transform()
876 "bne 1b\n" in Transform()
882 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
883 "pld [%[input], #64]\n" in Transform()
884 "vcvt.f32.s32 q0, q0\n" in Transform()
885 "vcvt.f32.s32 q1, q1\n" in Transform()
886 "vsub.f32 q0, q0, q6\n" in Transform()
887 "vsub.f32 q1, q1, q6\n" in Transform()
888 "vmul.f32 q0, q0, q7\n" in Transform()
889 "vmul.f32 q1, q1, q7\n" in Transform()
890 "vadd.f32 q0, q0, q4\n" in Transform()
891 "vadd.f32 q1, q1, q4\n" in Transform()
892 "vmul.f32 q0, q0, q8\n" in Transform()
893 "vmul.f32 q1, q1, q8\n" in Transform()
894 "vcvt.s32.f32 q0, q0\n" in Transform()
895 "vcvt.s32.f32 q1, q1\n" in Transform()
896 "vqmovn.s32 d0, q0\n" in Transform()
897 "vqmovn.s32 d1, q1\n" in Transform()
898 "vqmovun.s16 d0, q0\n" in Transform()
900 "vst1.32 {d0}, [%[output]]!\n" in Transform()
901 "pld [%[output]]\n" in Transform()
928 "vdup.32 q4, %[input_range_min]\n" in Transform()
929 "vdup.32 q5, %[output_range_min]\n" in Transform()
930 "vdup.32 q6, %[input_range_offset]\n" in Transform()
931 "vdup.32 q7, %[input_range_scale]\n" in Transform()
932 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
933 "vsub.f32 q4, q4, q5\n" in Transform()
936 "subs %[count], %[count], #9\n" in Transform()
937 "beq 2f\n" in Transform()
940 "subs %[count], %[count], #16\n" in Transform()
943 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
944 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
945 "pld [%[input], #64]\n" in Transform()
946 "vcvt.f32.s32 q0, q0\n" in Transform()
947 "vcvt.f32.s32 q1, q1\n" in Transform()
948 "vcvt.f32.s32 q2, q2\n" in Transform()
949 "vcvt.f32.s32 q3, q3\n" in Transform()
950 "vsub.f32 q0, q0, q6\n" in Transform()
951 "vsub.f32 q1, q1, q6\n" in Transform()
952 "vsub.f32 q2, q2, q6\n" in Transform()
953 "vsub.f32 q3, q3, q6\n" in Transform()
954 "vmul.f32 q0, q0, q7\n" in Transform()
955 "vmul.f32 q1, q1, q7\n" in Transform()
956 "vmul.f32 q2, q2, q7\n" in Transform()
957 "vmul.f32 q3, q3, q7\n" in Transform()
958 "vadd.f32 q0, q0, q4\n" in Transform()
959 "vadd.f32 q1, q1, q4\n" in Transform()
960 "vadd.f32 q2, q2, q4\n" in Transform()
961 "vadd.f32 q3, q3, q4\n" in Transform()
962 "vmul.f32 q0, q0, q8\n" in Transform()
963 "vmul.f32 q1, q1, q8\n" in Transform()
964 "vmul.f32 q2, q2, q8\n" in Transform()
965 "vmul.f32 q3, q3, q8\n" in Transform()
966 "vcvt.s32.f32 q0, q0\n" in Transform()
967 "vcvt.s32.f32 q1, q1\n" in Transform()
968 "vcvt.s32.f32 q2, q2\n" in Transform()
969 "vcvt.s32.f32 q3, q3\n" in Transform()
970 "vqmovn.s32 d0, q0\n" in Transform()
971 "vqmovn.s32 d1, q1\n" in Transform()
972 "vqmovn.s32 d4, q2\n" in Transform()
973 "vqmovn.s32 d5, q3\n" in Transform()
974 "vqmovun.s16 d0, q0\n" in Transform()
975 "vqmovun.s16 d1, q2\n" in Transform()
977 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
978 "pld [%[output]]\n" in Transform()
980 "bne 1b\n" in Transform()
986 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
987 "vld1.32 {d4[0]}, [%[input]]!\n" in Transform()
988 "pld [%[input], #64]\n" in Transform()
989 "vcvt.f32.s32 q0, q0\n" in Transform()
990 "vcvt.f32.s32 q1, q1\n" in Transform()
991 "vcvt.f32.s32 q2, q2\n" in Transform()
992 "vsub.f32 q0, q0, q6\n" in Transform()
993 "vsub.f32 q1, q1, q6\n" in Transform()
994 "vsub.f32 q2, q2, q6\n" in Transform()
995 "vmul.f32 q0, q0, q7\n" in Transform()
996 "vmul.f32 q1, q1, q7\n" in Transform()
997 "vmul.f32 q2, q2, q7\n" in Transform()
998 "vadd.f32 q0, q0, q4\n" in Transform()
999 "vadd.f32 q1, q1, q4\n" in Transform()
1000 "vadd.f32 q2, q2, q4\n" in Transform()
1001 "vmul.f32 q0, q0, q8\n" in Transform()
1002 "vmul.f32 q1, q1, q8\n" in Transform()
1003 "vmul.f32 q2, q2, q8\n" in Transform()
1004 "vcvt.s32.f32 q0, q0\n" in Transform()
1005 "vcvt.s32.f32 q1, q1\n" in Transform()
1006 "vcvt.s32.f32 q2, q2\n" in Transform()
1007 "vqmovn.s32 d0, q0\n" in Transform()
1008 "vqmovn.s32 d1, q1\n" in Transform()
1009 "vqmovn.s32 d4, q2\n" in Transform()
1010 "vqmovun.s16 d0, q0\n" in Transform()
1011 "vqmovun.s16 d1, q2\n" in Transform()
1013 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1014 "vst1.8 {d1[0]}, [%[output]]!\n" in Transform()
1015 "pld [%[output]]\n" in Transform()
1042 "vdup.32 q4, %[input_range_min]\n" in Transform()
1043 "vdup.32 q5, %[output_range_min]\n" in Transform()
1044 "vdup.32 q6, %[input_range_offset]\n" in Transform()
1045 "vdup.32 q7, %[input_range_scale]\n" in Transform()
1046 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
1047 "vsub.f32 q4, q4, q5\n" in Transform()
1050 "subs %[count], %[count], #10\n" in Transform()
1051 "beq 2f\n" in Transform()
1054 "subs %[count], %[count], #16\n" in Transform()
1057 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1058 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1059 "pld [%[input], #64]\n" in Transform()
1060 "vcvt.f32.s32 q0, q0\n" in Transform()
1061 "vcvt.f32.s32 q1, q1\n" in Transform()
1062 "vcvt.f32.s32 q2, q2\n" in Transform()
1063 "vcvt.f32.s32 q3, q3\n" in Transform()
1064 "vsub.f32 q0, q0, q6\n" in Transform()
1065 "vsub.f32 q1, q1, q6\n" in Transform()
1066 "vsub.f32 q2, q2, q6\n" in Transform()
1067 "vsub.f32 q3, q3, q6\n" in Transform()
1068 "vmul.f32 q0, q0, q7\n" in Transform()
1069 "vmul.f32 q1, q1, q7\n" in Transform()
1070 "vmul.f32 q2, q2, q7\n" in Transform()
1071 "vmul.f32 q3, q3, q7\n" in Transform()
1072 "vadd.f32 q0, q0, q4\n" in Transform()
1073 "vadd.f32 q1, q1, q4\n" in Transform()
1074 "vadd.f32 q2, q2, q4\n" in Transform()
1075 "vadd.f32 q3, q3, q4\n" in Transform()
1076 "vmul.f32 q0, q0, q8\n" in Transform()
1077 "vmul.f32 q1, q1, q8\n" in Transform()
1078 "vmul.f32 q2, q2, q8\n" in Transform()
1079 "vmul.f32 q3, q3, q8\n" in Transform()
1080 "vcvt.s32.f32 q0, q0\n" in Transform()
1081 "vcvt.s32.f32 q1, q1\n" in Transform()
1082 "vcvt.s32.f32 q2, q2\n" in Transform()
1083 "vcvt.s32.f32 q3, q3\n" in Transform()
1084 "vqmovn.s32 d0, q0\n" in Transform()
1085 "vqmovn.s32 d1, q1\n" in Transform()
1086 "vqmovn.s32 d4, q2\n" in Transform()
1087 "vqmovn.s32 d5, q3\n" in Transform()
1088 "vqmovun.s16 d0, q0\n" in Transform()
1089 "vqmovun.s16 d1, q2\n" in Transform()
1091 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1092 "pld [%[output]]\n" in Transform()
1094 "bne 1b\n" in Transform()
1100 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1101 "vld1.32 {d4}, [%[input]]!\n" in Transform()
1102 "pld [%[input], #64]\n" in Transform()
1103 "vcvt.f32.s32 q0, q0\n" in Transform()
1104 "vcvt.f32.s32 q1, q1\n" in Transform()
1105 "vcvt.f32.s32 q2, q2\n" in Transform()
1106 "vsub.f32 q0, q0, q6\n" in Transform()
1107 "vsub.f32 q1, q1, q6\n" in Transform()
1108 "vsub.f32 q2, q2, q6\n" in Transform()
1109 "vmul.f32 q0, q0, q7\n" in Transform()
1110 "vmul.f32 q1, q1, q7\n" in Transform()
1111 "vmul.f32 q2, q2, q7\n" in Transform()
1112 "vadd.f32 q0, q0, q4\n" in Transform()
1113 "vadd.f32 q1, q1, q4\n" in Transform()
1114 "vadd.f32 q2, q2, q4\n" in Transform()
1115 "vmul.f32 q0, q0, q8\n" in Transform()
1116 "vmul.f32 q1, q1, q8\n" in Transform()
1117 "vmul.f32 q2, q2, q8\n" in Transform()
1118 "vcvt.s32.f32 q0, q0\n" in Transform()
1119 "vcvt.s32.f32 q1, q1\n" in Transform()
1120 "vcvt.s32.f32 q2, q2\n" in Transform()
1121 "vqmovn.s32 d0, q0\n" in Transform()
1122 "vqmovn.s32 d1, q1\n" in Transform()
1123 "vqmovn.s32 d4, q2\n" in Transform()
1124 "vqmovun.s16 d0, q0\n" in Transform()
1125 "vqmovun.s16 d1, q2\n" in Transform()
1127 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1128 "vst1.16 {d1[0]}, [%[output]]!\n" in Transform()
1129 "pld [%[output]]\n" in Transform()
1156 "vdup.32 q4, %[input_range_min]\n" in Transform()
1157 "vdup.32 q5, %[output_range_min]\n" in Transform()
1158 "vdup.32 q6, %[input_range_offset]\n" in Transform()
1159 "vdup.32 q7, %[input_range_scale]\n" in Transform()
1160 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
1161 "vsub.f32 q4, q4, q5\n" in Transform()
1164 "subs %[count], %[count], #11\n" in Transform()
1165 "beq 2f\n" in Transform()
1168 "subs %[count], %[count], #16\n" in Transform()
1171 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1172 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1173 "pld [%[input], #64]\n" in Transform()
1174 "vcvt.f32.s32 q0, q0\n" in Transform()
1175 "vcvt.f32.s32 q1, q1\n" in Transform()
1176 "vcvt.f32.s32 q2, q2\n" in Transform()
1177 "vcvt.f32.s32 q3, q3\n" in Transform()
1178 "vsub.f32 q0, q0, q6\n" in Transform()
1179 "vsub.f32 q1, q1, q6\n" in Transform()
1180 "vsub.f32 q2, q2, q6\n" in Transform()
1181 "vsub.f32 q3, q3, q6\n" in Transform()
1182 "vmul.f32 q0, q0, q7\n" in Transform()
1183 "vmul.f32 q1, q1, q7\n" in Transform()
1184 "vmul.f32 q2, q2, q7\n" in Transform()
1185 "vmul.f32 q3, q3, q7\n" in Transform()
1186 "vadd.f32 q0, q0, q4\n" in Transform()
1187 "vadd.f32 q1, q1, q4\n" in Transform()
1188 "vadd.f32 q2, q2, q4\n" in Transform()
1189 "vadd.f32 q3, q3, q4\n" in Transform()
1190 "vmul.f32 q0, q0, q8\n" in Transform()
1191 "vmul.f32 q1, q1, q8\n" in Transform()
1192 "vmul.f32 q2, q2, q8\n" in Transform()
1193 "vmul.f32 q3, q3, q8\n" in Transform()
1194 "vcvt.s32.f32 q0, q0\n" in Transform()
1195 "vcvt.s32.f32 q1, q1\n" in Transform()
1196 "vcvt.s32.f32 q2, q2\n" in Transform()
1197 "vcvt.s32.f32 q3, q3\n" in Transform()
1198 "vqmovn.s32 d0, q0\n" in Transform()
1199 "vqmovn.s32 d1, q1\n" in Transform()
1200 "vqmovn.s32 d4, q2\n" in Transform()
1201 "vqmovn.s32 d5, q3\n" in Transform()
1202 "vqmovun.s16 d0, q0\n" in Transform()
1203 "vqmovun.s16 d1, q2\n" in Transform()
1205 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1206 "pld [%[output]]\n" in Transform()
1208 "bne 1b\n" in Transform()
1214 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1215 "vld1.32 {d4}, [%[input]]!\n" in Transform()
1216 "vld1.32 {d5[0]}, [%[input]]!\n" in Transform()
1217 "pld [%[input], #64]\n" in Transform()
1218 "vcvt.f32.s32 q0, q0\n" in Transform()
1219 "vcvt.f32.s32 q1, q1\n" in Transform()
1220 "vcvt.f32.s32 q2, q2\n" in Transform()
1221 "vsub.f32 q0, q0, q6\n" in Transform()
1222 "vsub.f32 q1, q1, q6\n" in Transform()
1223 "vsub.f32 q2, q2, q6\n" in Transform()
1224 "vmul.f32 q0, q0, q7\n" in Transform()
1225 "vmul.f32 q1, q1, q7\n" in Transform()
1226 "vmul.f32 q2, q2, q7\n" in Transform()
1227 "vadd.f32 q0, q0, q4\n" in Transform()
1228 "vadd.f32 q1, q1, q4\n" in Transform()
1229 "vadd.f32 q2, q2, q4\n" in Transform()
1230 "vmul.f32 q0, q0, q8\n" in Transform()
1231 "vmul.f32 q1, q1, q8\n" in Transform()
1232 "vmul.f32 q2, q2, q8\n" in Transform()
1233 "vcvt.s32.f32 q0, q0\n" in Transform()
1234 "vcvt.s32.f32 q1, q1\n" in Transform()
1235 "vcvt.s32.f32 q2, q2\n" in Transform()
1236 "vqmovn.s32 d0, q0\n" in Transform()
1237 "vqmovn.s32 d1, q1\n" in Transform()
1238 "vqmovn.s32 d4, q2\n" in Transform()
1239 "vqmovun.s16 d0, q0\n" in Transform()
1240 "vqmovun.s16 d1, q2\n" in Transform()
1242 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1243 "vst1.16 {d1[0]}, [%[output]]!\n" in Transform()
1244 "vst1.8 {d1[2]}, [%[output]]!\n" in Transform()
1245 "pld [%[output]]\n" in Transform()
1272 "vdup.32 q4, %[input_range_min]\n" in Transform()
1273 "vdup.32 q5, %[output_range_min]\n" in Transform()
1274 "vdup.32 q6, %[input_range_offset]\n" in Transform()
1275 "vdup.32 q7, %[input_range_scale]\n" in Transform()
1276 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
1277 "vsub.f32 q4, q4, q5\n" in Transform()
1280 "subs %[count], %[count], #12\n" in Transform()
1281 "beq 2f\n" in Transform()
1284 "subs %[count], %[count], #16\n" in Transform()
1287 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1288 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1289 "pld [%[input], #64]\n" in Transform()
1290 "vcvt.f32.s32 q0, q0\n" in Transform()
1291 "vcvt.f32.s32 q1, q1\n" in Transform()
1292 "vcvt.f32.s32 q2, q2\n" in Transform()
1293 "vcvt.f32.s32 q3, q3\n" in Transform()
1294 "vsub.f32 q0, q0, q6\n" in Transform()
1295 "vsub.f32 q1, q1, q6\n" in Transform()
1296 "vsub.f32 q2, q2, q6\n" in Transform()
1297 "vsub.f32 q3, q3, q6\n" in Transform()
1298 "vmul.f32 q0, q0, q7\n" in Transform()
1299 "vmul.f32 q1, q1, q7\n" in Transform()
1300 "vmul.f32 q2, q2, q7\n" in Transform()
1301 "vmul.f32 q3, q3, q7\n" in Transform()
1302 "vadd.f32 q0, q0, q4\n" in Transform()
1303 "vadd.f32 q1, q1, q4\n" in Transform()
1304 "vadd.f32 q2, q2, q4\n" in Transform()
1305 "vadd.f32 q3, q3, q4\n" in Transform()
1306 "vmul.f32 q0, q0, q8\n" in Transform()
1307 "vmul.f32 q1, q1, q8\n" in Transform()
1308 "vmul.f32 q2, q2, q8\n" in Transform()
1309 "vmul.f32 q3, q3, q8\n" in Transform()
1310 "vcvt.s32.f32 q0, q0\n" in Transform()
1311 "vcvt.s32.f32 q1, q1\n" in Transform()
1312 "vcvt.s32.f32 q2, q2\n" in Transform()
1313 "vcvt.s32.f32 q3, q3\n" in Transform()
1314 "vqmovn.s32 d0, q0\n" in Transform()
1315 "vqmovn.s32 d1, q1\n" in Transform()
1316 "vqmovn.s32 d4, q2\n" in Transform()
1317 "vqmovn.s32 d5, q3\n" in Transform()
1318 "vqmovun.s16 d0, q0\n" in Transform()
1319 "vqmovun.s16 d1, q2\n" in Transform()
1321 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1322 "pld [%[output]]\n" in Transform()
1324 "bne 1b\n" in Transform()
1330 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1331 "vld1.32 {d4, d5}, [%[input]]!\n" in Transform()
1332 "pld [%[input], #64]\n" in Transform()
1333 "vcvt.f32.s32 q0, q0\n" in Transform()
1334 "vcvt.f32.s32 q1, q1\n" in Transform()
1335 "vcvt.f32.s32 q2, q2\n" in Transform()
1336 "vsub.f32 q0, q0, q6\n" in Transform()
1337 "vsub.f32 q1, q1, q6\n" in Transform()
1338 "vsub.f32 q2, q2, q6\n" in Transform()
1339 "vmul.f32 q0, q0, q7\n" in Transform()
1340 "vmul.f32 q1, q1, q7\n" in Transform()
1341 "vmul.f32 q2, q2, q7\n" in Transform()
1342 "vadd.f32 q0, q0, q4\n" in Transform()
1343 "vadd.f32 q1, q1, q4\n" in Transform()
1344 "vadd.f32 q2, q2, q4\n" in Transform()
1345 "vmul.f32 q0, q0, q8\n" in Transform()
1346 "vmul.f32 q1, q1, q8\n" in Transform()
1347 "vmul.f32 q2, q2, q8\n" in Transform()
1348 "vcvt.s32.f32 q0, q0\n" in Transform()
1349 "vcvt.s32.f32 q1, q1\n" in Transform()
1350 "vcvt.s32.f32 q2, q2\n" in Transform()
1351 "vqmovn.s32 d0, q0\n" in Transform()
1352 "vqmovn.s32 d1, q1\n" in Transform()
1353 "vqmovn.s32 d4, q2\n" in Transform()
1354 "vqmovun.s16 d0, q0\n" in Transform()
1355 "vqmovun.s16 d1, q2\n" in Transform()
1357 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1358 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
1359 "pld [%[output]]\n" in Transform()
1386 "vdup.32 q4, %[input_range_min]\n" in Transform()
1387 "vdup.32 q5, %[output_range_min]\n" in Transform()
1388 "vdup.32 q6, %[input_range_offset]\n" in Transform()
1389 "vdup.32 q7, %[input_range_scale]\n" in Transform()
1390 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
1391 "vsub.f32 q4, q4, q5\n" in Transform()
1394 "subs %[count], %[count], #13\n" in Transform()
1395 "beq 2f\n" in Transform()
1398 "subs %[count], %[count], #16\n" in Transform()
1401 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1402 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1403 "pld [%[input], #64]\n" in Transform()
1404 "vcvt.f32.s32 q0, q0\n" in Transform()
1405 "vcvt.f32.s32 q1, q1\n" in Transform()
1406 "vcvt.f32.s32 q2, q2\n" in Transform()
1407 "vcvt.f32.s32 q3, q3\n" in Transform()
1408 "vsub.f32 q0, q0, q6\n" in Transform()
1409 "vsub.f32 q1, q1, q6\n" in Transform()
1410 "vsub.f32 q2, q2, q6\n" in Transform()
1411 "vsub.f32 q3, q3, q6\n" in Transform()
1412 "vmul.f32 q0, q0, q7\n" in Transform()
1413 "vmul.f32 q1, q1, q7\n" in Transform()
1414 "vmul.f32 q2, q2, q7\n" in Transform()
1415 "vmul.f32 q3, q3, q7\n" in Transform()
1416 "vadd.f32 q0, q0, q4\n" in Transform()
1417 "vadd.f32 q1, q1, q4\n" in Transform()
1418 "vadd.f32 q2, q2, q4\n" in Transform()
1419 "vadd.f32 q3, q3, q4\n" in Transform()
1420 "vmul.f32 q0, q0, q8\n" in Transform()
1421 "vmul.f32 q1, q1, q8\n" in Transform()
1422 "vmul.f32 q2, q2, q8\n" in Transform()
1423 "vmul.f32 q3, q3, q8\n" in Transform()
1424 "vcvt.s32.f32 q0, q0\n" in Transform()
1425 "vcvt.s32.f32 q1, q1\n" in Transform()
1426 "vcvt.s32.f32 q2, q2\n" in Transform()
1427 "vcvt.s32.f32 q3, q3\n" in Transform()
1428 "vqmovn.s32 d0, q0\n" in Transform()
1429 "vqmovn.s32 d1, q1\n" in Transform()
1430 "vqmovn.s32 d4, q2\n" in Transform()
1431 "vqmovn.s32 d5, q3\n" in Transform()
1432 "vqmovun.s16 d0, q0\n" in Transform()
1433 "vqmovun.s16 d1, q2\n" in Transform()
1435 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1436 "pld [%[output]]\n" in Transform()
1438 "bne 1b\n" in Transform()
1444 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1445 "vld1.32 {d4, d5}, [%[input]]!\n" in Transform()
1446 "vld1.32 {d6[0]}, [%[input]]!\n" in Transform()
1447 "pld [%[input], #64]\n" in Transform()
1448 "vcvt.f32.s32 q0, q0\n" in Transform()
1449 "vcvt.f32.s32 q1, q1\n" in Transform()
1450 "vcvt.f32.s32 q2, q2\n" in Transform()
1451 "vcvt.f32.s32 q3, q3\n" in Transform()
1452 "vsub.f32 q0, q0, q6\n" in Transform()
1453 "vsub.f32 q1, q1, q6\n" in Transform()
1454 "vsub.f32 q2, q2, q6\n" in Transform()
1455 "vsub.f32 q3, q3, q6\n" in Transform()
1456 "vmul.f32 q0, q0, q7\n" in Transform()
1457 "vmul.f32 q1, q1, q7\n" in Transform()
1458 "vmul.f32 q2, q2, q7\n" in Transform()
1459 "vmul.f32 q3, q3, q7\n" in Transform()
1460 "vadd.f32 q0, q0, q4\n" in Transform()
1461 "vadd.f32 q1, q1, q4\n" in Transform()
1462 "vadd.f32 q2, q2, q4\n" in Transform()
1463 "vadd.f32 q3, q3, q4\n" in Transform()
1464 "vmul.f32 q0, q0, q8\n" in Transform()
1465 "vmul.f32 q1, q1, q8\n" in Transform()
1466 "vmul.f32 q2, q2, q8\n" in Transform()
1467 "vmul.f32 q3, q3, q8\n" in Transform()
1468 "vcvt.s32.f32 q0, q0\n" in Transform()
1469 "vcvt.s32.f32 q1, q1\n" in Transform()
1470 "vcvt.s32.f32 q2, q2\n" in Transform()
1471 "vcvt.s32.f32 q3, q3\n" in Transform()
1472 "vqmovn.s32 d0, q0\n" in Transform()
1473 "vqmovn.s32 d1, q1\n" in Transform()
1474 "vqmovn.s32 d4, q2\n" in Transform()
1475 "vqmovn.s32 d5, q3\n" in Transform()
1476 "vqmovun.s16 d0, q0\n" in Transform()
1477 "vqmovun.s16 d1, q2\n" in Transform()
1479 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1480 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
1481 "vst1.8 {d1[4]}, [%[output]]!\n" in Transform()
1482 "pld [%[output]]\n" in Transform()
1509 "vdup.32 q4, %[input_range_min]\n" in Transform()
1510 "vdup.32 q5, %[output_range_min]\n" in Transform()
1511 "vdup.32 q6, %[input_range_offset]\n" in Transform()
1512 "vdup.32 q7, %[input_range_scale]\n" in Transform()
1513 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
1514 "vsub.f32 q4, q4, q5\n" in Transform()
1517 "subs %[count], %[count], #14\n" in Transform()
1518 "beq 2f\n" in Transform()
1521 "subs %[count], %[count], #16\n" in Transform()
1524 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1525 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1526 "pld [%[input], #64]\n" in Transform()
1527 "vcvt.f32.s32 q0, q0\n" in Transform()
1528 "vcvt.f32.s32 q1, q1\n" in Transform()
1529 "vcvt.f32.s32 q2, q2\n" in Transform()
1530 "vcvt.f32.s32 q3, q3\n" in Transform()
1531 "vsub.f32 q0, q0, q6\n" in Transform()
1532 "vsub.f32 q1, q1, q6\n" in Transform()
1533 "vsub.f32 q2, q2, q6\n" in Transform()
1534 "vsub.f32 q3, q3, q6\n" in Transform()
1535 "vmul.f32 q0, q0, q7\n" in Transform()
1536 "vmul.f32 q1, q1, q7\n" in Transform()
1537 "vmul.f32 q2, q2, q7\n" in Transform()
1538 "vmul.f32 q3, q3, q7\n" in Transform()
1539 "vadd.f32 q0, q0, q4\n" in Transform()
1540 "vadd.f32 q1, q1, q4\n" in Transform()
1541 "vadd.f32 q2, q2, q4\n" in Transform()
1542 "vadd.f32 q3, q3, q4\n" in Transform()
1543 "vmul.f32 q0, q0, q8\n" in Transform()
1544 "vmul.f32 q1, q1, q8\n" in Transform()
1545 "vmul.f32 q2, q2, q8\n" in Transform()
1546 "vmul.f32 q3, q3, q8\n" in Transform()
1547 "vcvt.s32.f32 q0, q0\n" in Transform()
1548 "vcvt.s32.f32 q1, q1\n" in Transform()
1549 "vcvt.s32.f32 q2, q2\n" in Transform()
1550 "vcvt.s32.f32 q3, q3\n" in Transform()
1551 "vqmovn.s32 d0, q0\n" in Transform()
1552 "vqmovn.s32 d1, q1\n" in Transform()
1553 "vqmovn.s32 d4, q2\n" in Transform()
1554 "vqmovn.s32 d5, q3\n" in Transform()
1555 "vqmovun.s16 d0, q0\n" in Transform()
1556 "vqmovun.s16 d1, q2\n" in Transform()
1558 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1559 "pld [%[output]]\n" in Transform()
1561 "bne 1b\n" in Transform()
1567 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1568 "vld1.32 {d4, d5, d6}, [%[input]]!\n" in Transform()
1569 "pld [%[input], #64]\n" in Transform()
1570 "vcvt.f32.s32 q0, q0\n" in Transform()
1571 "vcvt.f32.s32 q1, q1\n" in Transform()
1572 "vcvt.f32.s32 q2, q2\n" in Transform()
1573 "vcvt.f32.s32 q3, q3\n" in Transform()
1574 "vsub.f32 q0, q0, q6\n" in Transform()
1575 "vsub.f32 q1, q1, q6\n" in Transform()
1576 "vsub.f32 q2, q2, q6\n" in Transform()
1577 "vsub.f32 q3, q3, q6\n" in Transform()
1578 "vmul.f32 q0, q0, q7\n" in Transform()
1579 "vmul.f32 q1, q1, q7\n" in Transform()
1580 "vmul.f32 q2, q2, q7\n" in Transform()
1581 "vmul.f32 q3, q3, q7\n" in Transform()
1582 "vadd.f32 q0, q0, q4\n" in Transform()
1583 "vadd.f32 q1, q1, q4\n" in Transform()
1584 "vadd.f32 q2, q2, q4\n" in Transform()
1585 "vadd.f32 q3, q3, q4\n" in Transform()
1586 "vmul.f32 q0, q0, q8\n" in Transform()
1587 "vmul.f32 q1, q1, q8\n" in Transform()
1588 "vmul.f32 q2, q2, q8\n" in Transform()
1589 "vmul.f32 q3, q3, q8\n" in Transform()
1590 "vcvt.s32.f32 q0, q0\n" in Transform()
1591 "vcvt.s32.f32 q1, q1\n" in Transform()
1592 "vcvt.s32.f32 q2, q2\n" in Transform()
1593 "vcvt.s32.f32 q3, q3\n" in Transform()
1594 "vqmovn.s32 d0, q0\n" in Transform()
1595 "vqmovn.s32 d1, q1\n" in Transform()
1596 "vqmovn.s32 d4, q2\n" in Transform()
1597 "vqmovn.s32 d5, q3\n" in Transform()
1598 "vqmovun.s16 d0, q0\n" in Transform()
1599 "vqmovun.s16 d1, q2\n" in Transform()
1601 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1602 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
1603 "vst1.16 {d1[2]}, [%[output]]!\n" in Transform()
1604 "pld [%[output]]\n" in Transform()
1631 "vdup.32 q4, %[input_range_min]\n" in Transform()
1632 "vdup.32 q5, %[output_range_min]\n" in Transform()
1633 "vdup.32 q6, %[input_range_offset]\n" in Transform()
1634 "vdup.32 q7, %[input_range_scale]\n" in Transform()
1635 "vdup.32 q8, %[one_over_output_range_scale]\n" in Transform()
1636 "vsub.f32 q4, q4, q5\n" in Transform()
1639 "subs %[count], %[count], #15\n" in Transform()
1640 "beq 2f\n" in Transform()
1643 "subs %[count], %[count], #16\n" in Transform()
1646 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1647 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1648 "pld [%[input], #64]\n" in Transform()
1649 "vcvt.f32.s32 q0, q0\n" in Transform()
1650 "vcvt.f32.s32 q1, q1\n" in Transform()
1651 "vcvt.f32.s32 q2, q2\n" in Transform()
1652 "vcvt.f32.s32 q3, q3\n" in Transform()
1653 "vsub.f32 q0, q0, q6\n" in Transform()
1654 "vsub.f32 q1, q1, q6\n" in Transform()
1655 "vsub.f32 q2, q2, q6\n" in Transform()
1656 "vsub.f32 q3, q3, q6\n" in Transform()
1657 "vmul.f32 q0, q0, q7\n" in Transform()
1658 "vmul.f32 q1, q1, q7\n" in Transform()
1659 "vmul.f32 q2, q2, q7\n" in Transform()
1660 "vmul.f32 q3, q3, q7\n" in Transform()
1661 "vadd.f32 q0, q0, q4\n" in Transform()
1662 "vadd.f32 q1, q1, q4\n" in Transform()
1663 "vadd.f32 q2, q2, q4\n" in Transform()
1664 "vadd.f32 q3, q3, q4\n" in Transform()
1665 "vmul.f32 q0, q0, q8\n" in Transform()
1666 "vmul.f32 q1, q1, q8\n" in Transform()
1667 "vmul.f32 q2, q2, q8\n" in Transform()
1668 "vmul.f32 q3, q3, q8\n" in Transform()
1669 "vcvt.s32.f32 q0, q0\n" in Transform()
1670 "vcvt.s32.f32 q1, q1\n" in Transform()
1671 "vcvt.s32.f32 q2, q2\n" in Transform()
1672 "vcvt.s32.f32 q3, q3\n" in Transform()
1673 "vqmovn.s32 d0, q0\n" in Transform()
1674 "vqmovn.s32 d1, q1\n" in Transform()
1675 "vqmovn.s32 d4, q2\n" in Transform()
1676 "vqmovn.s32 d5, q3\n" in Transform()
1677 "vqmovun.s16 d0, q0\n" in Transform()
1678 "vqmovun.s16 d1, q2\n" in Transform()
1680 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1681 "pld [%[output]]\n" in Transform()
1683 "bne 1b\n" in Transform()
1689 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1690 "vld1.32 {d4, d5, d6}, [%[input]]!\n" in Transform()
1691 "vld1.32 {d7[0]}, [%[input]]!\n" in Transform()
1692 "pld [%[input], #64]\n" in Transform()
1693 "vcvt.f32.s32 q0, q0\n" in Transform()
1694 "vcvt.f32.s32 q1, q1\n" in Transform()
1695 "vcvt.f32.s32 q2, q2\n" in Transform()
1696 "vcvt.f32.s32 q3, q3\n" in Transform()
1697 "vsub.f32 q0, q0, q6\n" in Transform()
1698 "vsub.f32 q1, q1, q6\n" in Transform()
1699 "vsub.f32 q2, q2, q6\n" in Transform()
1700 "vsub.f32 q3, q3, q6\n" in Transform()
1701 "vmul.f32 q0, q0, q7\n" in Transform()
1702 "vmul.f32 q1, q1, q7\n" in Transform()
1703 "vmul.f32 q2, q2, q7\n" in Transform()
1704 "vmul.f32 q3, q3, q7\n" in Transform()
1705 "vadd.f32 q0, q0, q4\n" in Transform()
1706 "vadd.f32 q1, q1, q4\n" in Transform()
1707 "vadd.f32 q2, q2, q4\n" in Transform()
1708 "vadd.f32 q3, q3, q4\n" in Transform()
1709 "vmul.f32 q0, q0, q8\n" in Transform()
1710 "vmul.f32 q1, q1, q8\n" in Transform()
1711 "vmul.f32 q2, q2, q8\n" in Transform()
1712 "vmul.f32 q3, q3, q8\n" in Transform()
1713 "vcvt.s32.f32 q0, q0\n" in Transform()
1714 "vcvt.s32.f32 q1, q1\n" in Transform()
1715 "vcvt.s32.f32 q2, q2\n" in Transform()
1716 "vcvt.s32.f32 q3, q3\n" in Transform()
1717 "vqmovn.s32 d0, q0\n" in Transform()
1718 "vqmovn.s32 d1, q1\n" in Transform()
1719 "vqmovn.s32 d4, q2\n" in Transform()
1720 "vqmovn.s32 d5, q3\n" in Transform()
1721 "vqmovun.s16 d0, q0\n" in Transform()
1722 "vqmovun.s16 d1, q2\n" in Transform()
1724 "vst1.32 {d0}, [%[output]]!\n" in Transform()
1725 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
1726 "vst1.16 {d1[2]}, [%[output]]!\n" in Transform()
1727 "vst1.8 {d1[6]}, [%[output]]!\n" in Transform()
1728 "pld [%[output]]\n" in Transform()
1755 "vdup.32 q4, %[range_min]\n" in Transform()
1756 "vdup.32 q5, %[range_offset]\n" in Transform()
1757 "vdup.32 q6, %[range_scale]\n" in Transform()
1760 "subs %[count], %[count], #16\n" in Transform()
1763 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1764 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1765 "pld [%[input], #64]\n" in Transform()
1766 "vsub.f32 q0, q0, q4\n" in Transform()
1767 "vsub.f32 q1, q1, q4\n" in Transform()
1768 "vsub.f32 q2, q2, q4\n" in Transform()
1769 "vsub.f32 q3, q3, q4\n" in Transform()
1770 "vmul.f32 q0, q0, q6\n" in Transform()
1771 "vmul.f32 q1, q1, q6\n" in Transform()
1772 "vmul.f32 q2, q2, q6\n" in Transform()
1773 "vmul.f32 q3, q3, q6\n" in Transform()
1774 "vadd.f32 q0, q0, q5\n" in Transform()
1775 "vadd.f32 q1, q1, q5\n" in Transform()
1776 "vadd.f32 q2, q2, q5\n" in Transform()
1777 "vadd.f32 q3, q3, q5\n" in Transform()
1778 "vcvt.s32.f32 q0, q0\n" in Transform()
1779 "vcvt.s32.f32 q1, q1\n" in Transform()
1780 "vcvt.s32.f32 q2, q2\n" in Transform()
1781 "vcvt.s32.f32 q3, q3\n" in Transform()
1782 "vqmovn.s32 d0, q0\n" in Transform()
1783 "vqmovn.s32 d1, q1\n" in Transform()
1784 "vqmovn.s32 d4, q2\n" in Transform()
1785 "vqmovn.s32 d5, q3\n" in Transform()
1786 "vqmovun.s16 d0, q0\n" in Transform()
1787 "vqmovun.s16 d1, q2\n" in Transform()
1789 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1790 "pld [%[output]]\n" in Transform()
1792 "bne 1b\n" in Transform()
1816 "vdup.32 q4, %[range_min]\n" in Transform()
1817 "vdup.32 q5, %[range_offset]\n" in Transform()
1818 "vdup.32 q6, %[range_scale]\n" in Transform()
1821 "subs %[count], %[count], #1\n" in Transform()
1822 "beq 2f\n" in Transform()
1825 "subs %[count], %[count], #16\n" in Transform()
1828 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1829 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1830 "pld [%[input], #64]\n" in Transform()
1831 "vsub.f32 q0, q0, q4\n" in Transform()
1832 "vsub.f32 q1, q1, q4\n" in Transform()
1833 "vsub.f32 q2, q2, q4\n" in Transform()
1834 "vsub.f32 q3, q3, q4\n" in Transform()
1835 "vmul.f32 q0, q0, q6\n" in Transform()
1836 "vmul.f32 q1, q1, q6\n" in Transform()
1837 "vmul.f32 q2, q2, q6\n" in Transform()
1838 "vmul.f32 q3, q3, q6\n" in Transform()
1839 "vadd.f32 q0, q0, q5\n" in Transform()
1840 "vadd.f32 q1, q1, q5\n" in Transform()
1841 "vadd.f32 q2, q2, q5\n" in Transform()
1842 "vadd.f32 q3, q3, q5\n" in Transform()
1843 "vcvt.s32.f32 q0, q0\n" in Transform()
1844 "vcvt.s32.f32 q1, q1\n" in Transform()
1845 "vcvt.s32.f32 q2, q2\n" in Transform()
1846 "vcvt.s32.f32 q3, q3\n" in Transform()
1847 "vqmovn.s32 d0, q0\n" in Transform()
1848 "vqmovn.s32 d1, q1\n" in Transform()
1849 "vqmovn.s32 d4, q2\n" in Transform()
1850 "vqmovn.s32 d5, q3\n" in Transform()
1851 "vqmovun.s16 d0, q0\n" in Transform()
1852 "vqmovun.s16 d1, q2\n" in Transform()
1854 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1855 "pld [%[output]]\n" in Transform()
1857 "bne 1b\n" in Transform()
1863 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
1864 "pld [%[input], #64]\n" in Transform()
1865 "vsub.f32 q0, q0, q4\n" in Transform()
1866 "vmul.f32 q0, q0, q6\n" in Transform()
1867 "vadd.f32 q0, q0, q5\n" in Transform()
1868 "vcvt.s32.f32 q0, q0\n" in Transform()
1869 "vqmovn.s32 d0, q0\n" in Transform()
1870 "vqmovun.s16 d0, q0\n" in Transform()
1872 "vst1.8 {d0[0]}, [%[output]]!\n" in Transform()
1873 "pld [%[output]]\n" in Transform()
1897 "vdup.32 q4, %[range_min]\n" in Transform()
1898 "vdup.32 q5, %[range_offset]\n" in Transform()
1899 "vdup.32 q6, %[range_scale]\n" in Transform()
1902 "subs %[count], %[count], #2\n" in Transform()
1903 "beq 2f\n" in Transform()
1906 "subs %[count], %[count], #16\n" in Transform()
1909 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1910 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1911 "pld [%[input], #64]\n" in Transform()
1912 "vsub.f32 q0, q0, q4\n" in Transform()
1913 "vsub.f32 q1, q1, q4\n" in Transform()
1914 "vsub.f32 q2, q2, q4\n" in Transform()
1915 "vsub.f32 q3, q3, q4\n" in Transform()
1916 "vmul.f32 q0, q0, q6\n" in Transform()
1917 "vmul.f32 q1, q1, q6\n" in Transform()
1918 "vmul.f32 q2, q2, q6\n" in Transform()
1919 "vmul.f32 q3, q3, q6\n" in Transform()
1920 "vadd.f32 q0, q0, q5\n" in Transform()
1921 "vadd.f32 q1, q1, q5\n" in Transform()
1922 "vadd.f32 q2, q2, q5\n" in Transform()
1923 "vadd.f32 q3, q3, q5\n" in Transform()
1924 "vcvt.s32.f32 q0, q0\n" in Transform()
1925 "vcvt.s32.f32 q1, q1\n" in Transform()
1926 "vcvt.s32.f32 q2, q2\n" in Transform()
1927 "vcvt.s32.f32 q3, q3\n" in Transform()
1928 "vqmovn.s32 d0, q0\n" in Transform()
1929 "vqmovn.s32 d1, q1\n" in Transform()
1930 "vqmovn.s32 d4, q2\n" in Transform()
1931 "vqmovn.s32 d5, q3\n" in Transform()
1932 "vqmovun.s16 d0, q0\n" in Transform()
1933 "vqmovun.s16 d1, q2\n" in Transform()
1935 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
1936 "pld [%[output]]\n" in Transform()
1938 "bne 1b\n" in Transform()
1944 "vld1.32 {d0}, [%[input]]!\n" in Transform()
1945 "pld [%[input], #64]\n" in Transform()
1946 "vsub.f32 q0, q0, q4\n" in Transform()
1947 "vmul.f32 q0, q0, q6\n" in Transform()
1948 "vadd.f32 q0, q0, q5\n" in Transform()
1949 "vcvt.s32.f32 q0, q0\n" in Transform()
1950 "vqmovn.s32 d0, q0\n" in Transform()
1951 "vqmovun.s16 d0, q0\n" in Transform()
1953 "vst1.16 {d0[0]}, [%[output]]!\n" in Transform()
1954 "pld [%[output]]\n" in Transform()
1978 "vdup.32 q4, %[range_min]\n" in Transform()
1979 "vdup.32 q5, %[range_offset]\n" in Transform()
1980 "vdup.32 q6, %[range_scale]\n" in Transform()
1983 "subs %[count], %[count], #3\n" in Transform()
1984 "beq 2f\n" in Transform()
1987 "subs %[count], %[count], #16\n" in Transform()
1990 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
1991 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
1992 "pld [%[input], #64]\n" in Transform()
1993 "vsub.f32 q0, q0, q4\n" in Transform()
1994 "vsub.f32 q1, q1, q4\n" in Transform()
1995 "vsub.f32 q2, q2, q4\n" in Transform()
1996 "vsub.f32 q3, q3, q4\n" in Transform()
1997 "vmul.f32 q0, q0, q6\n" in Transform()
1998 "vmul.f32 q1, q1, q6\n" in Transform()
1999 "vmul.f32 q2, q2, q6\n" in Transform()
2000 "vmul.f32 q3, q3, q6\n" in Transform()
2001 "vadd.f32 q0, q0, q5\n" in Transform()
2002 "vadd.f32 q1, q1, q5\n" in Transform()
2003 "vadd.f32 q2, q2, q5\n" in Transform()
2004 "vadd.f32 q3, q3, q5\n" in Transform()
2005 "vcvt.s32.f32 q0, q0\n" in Transform()
2006 "vcvt.s32.f32 q1, q1\n" in Transform()
2007 "vcvt.s32.f32 q2, q2\n" in Transform()
2008 "vcvt.s32.f32 q3, q3\n" in Transform()
2009 "vqmovn.s32 d0, q0\n" in Transform()
2010 "vqmovn.s32 d1, q1\n" in Transform()
2011 "vqmovn.s32 d4, q2\n" in Transform()
2012 "vqmovn.s32 d5, q3\n" in Transform()
2013 "vqmovun.s16 d0, q0\n" in Transform()
2014 "vqmovun.s16 d1, q2\n" in Transform()
2016 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2017 "pld [%[output]]\n" in Transform()
2019 "bne 1b\n" in Transform()
2025 "vld1.32 {d0}, [%[input]]!\n" in Transform()
2026 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
2027 "pld [%[input], #64]\n" in Transform()
2028 "vsub.f32 q0, q0, q4\n" in Transform()
2029 "vmul.f32 q0, q0, q6\n" in Transform()
2030 "vadd.f32 q0, q0, q5\n" in Transform()
2031 "vcvt.s32.f32 q0, q0\n" in Transform()
2032 "vqmovn.s32 d0, q0\n" in Transform()
2033 "vqmovun.s16 d0, q0\n" in Transform()
2035 "vst1.16 {d0[0]}, [%[output]]!\n" in Transform()
2036 "vst1.8 {d0[2]}, [%[output]]!\n" in Transform()
2037 "pld [%[output]]\n" in Transform()
2061 "vdup.32 q4, %[range_min]\n" in Transform()
2062 "vdup.32 q5, %[range_offset]\n" in Transform()
2063 "vdup.32 q6, %[range_scale]\n" in Transform()
2066 "subs %[count], %[count], #4\n" in Transform()
2067 "beq 2f\n" in Transform()
2070 "subs %[count], %[count], #16\n" in Transform()
2073 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2074 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2075 "pld [%[input], #64]\n" in Transform()
2076 "vsub.f32 q0, q0, q4\n" in Transform()
2077 "vsub.f32 q1, q1, q4\n" in Transform()
2078 "vsub.f32 q2, q2, q4\n" in Transform()
2079 "vsub.f32 q3, q3, q4\n" in Transform()
2080 "vmul.f32 q0, q0, q6\n" in Transform()
2081 "vmul.f32 q1, q1, q6\n" in Transform()
2082 "vmul.f32 q2, q2, q6\n" in Transform()
2083 "vmul.f32 q3, q3, q6\n" in Transform()
2084 "vadd.f32 q0, q0, q5\n" in Transform()
2085 "vadd.f32 q1, q1, q5\n" in Transform()
2086 "vadd.f32 q2, q2, q5\n" in Transform()
2087 "vadd.f32 q3, q3, q5\n" in Transform()
2088 "vcvt.s32.f32 q0, q0\n" in Transform()
2089 "vcvt.s32.f32 q1, q1\n" in Transform()
2090 "vcvt.s32.f32 q2, q2\n" in Transform()
2091 "vcvt.s32.f32 q3, q3\n" in Transform()
2092 "vqmovn.s32 d0, q0\n" in Transform()
2093 "vqmovn.s32 d1, q1\n" in Transform()
2094 "vqmovn.s32 d4, q2\n" in Transform()
2095 "vqmovn.s32 d5, q3\n" in Transform()
2096 "vqmovun.s16 d0, q0\n" in Transform()
2097 "vqmovun.s16 d1, q2\n" in Transform()
2099 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2100 "pld [%[output]]\n" in Transform()
2102 "bne 1b\n" in Transform()
2108 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
2109 "pld [%[input], #64]\n" in Transform()
2110 "vsub.f32 q0, q0, q4\n" in Transform()
2111 "vmul.f32 q0, q0, q6\n" in Transform()
2112 "vadd.f32 q0, q0, q5\n" in Transform()
2113 "vcvt.s32.f32 q0, q0\n" in Transform()
2114 "vqmovn.s32 d0, q0\n" in Transform()
2115 "vqmovun.s16 d0, q0\n" in Transform()
2117 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
2118 "pld [%[output]]\n" in Transform()
2142 "vdup.32 q4, %[range_min]\n" in Transform()
2143 "vdup.32 q5, %[range_offset]\n" in Transform()
2144 "vdup.32 q6, %[range_scale]\n" in Transform()
2147 "subs %[count], %[count], #5\n" in Transform()
2148 "beq 2f\n" in Transform()
2151 "subs %[count], %[count], #16\n" in Transform()
2154 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2155 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2156 "pld [%[input], #64]\n" in Transform()
2157 "vsub.f32 q0, q0, q4\n" in Transform()
2158 "vsub.f32 q1, q1, q4\n" in Transform()
2159 "vsub.f32 q2, q2, q4\n" in Transform()
2160 "vsub.f32 q3, q3, q4\n" in Transform()
2161 "vmul.f32 q0, q0, q6\n" in Transform()
2162 "vmul.f32 q1, q1, q6\n" in Transform()
2163 "vmul.f32 q2, q2, q6\n" in Transform()
2164 "vmul.f32 q3, q3, q6\n" in Transform()
2165 "vadd.f32 q0, q0, q5\n" in Transform()
2166 "vadd.f32 q1, q1, q5\n" in Transform()
2167 "vadd.f32 q2, q2, q5\n" in Transform()
2168 "vadd.f32 q3, q3, q5\n" in Transform()
2169 "vcvt.s32.f32 q0, q0\n" in Transform()
2170 "vcvt.s32.f32 q1, q1\n" in Transform()
2171 "vcvt.s32.f32 q2, q2\n" in Transform()
2172 "vcvt.s32.f32 q3, q3\n" in Transform()
2173 "vqmovn.s32 d0, q0\n" in Transform()
2174 "vqmovn.s32 d1, q1\n" in Transform()
2175 "vqmovn.s32 d4, q2\n" in Transform()
2176 "vqmovn.s32 d5, q3\n" in Transform()
2177 "vqmovun.s16 d0, q0\n" in Transform()
2178 "vqmovun.s16 d1, q2\n" in Transform()
2180 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2181 "pld [%[output]]\n" in Transform()
2183 "bne 1b\n" in Transform()
2189 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
2190 "vld1.32 {d2[0]}, [%[input]]!\n" in Transform()
2191 "pld [%[input], #64]\n" in Transform()
2192 "vsub.f32 q0, q0, q4\n" in Transform()
2193 "vsub.f32 q1, q1, q4\n" in Transform()
2194 "vmul.f32 q0, q0, q6\n" in Transform()
2195 "vmul.f32 q1, q1, q6\n" in Transform()
2196 "vadd.f32 q0, q0, q5\n" in Transform()
2197 "vadd.f32 q1, q1, q5\n" in Transform()
2198 "vcvt.s32.f32 q0, q0\n" in Transform()
2199 "vcvt.s32.f32 q1, q1\n" in Transform()
2200 "vqmovn.s32 d0, q0\n" in Transform()
2201 "vqmovn.s32 d1, q1\n" in Transform()
2202 "vqmovun.s16 d0, q0\n" in Transform()
2204 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
2205 "vst1.8 {d0[4]}, [%[output]]!\n" in Transform()
2206 "pld [%[output]]\n" in Transform()
2230 "vdup.32 q4, %[range_min]\n" in Transform()
2231 "vdup.32 q5, %[range_offset]\n" in Transform()
2232 "vdup.32 q6, %[range_scale]\n" in Transform()
2235 "subs %[count], %[count], #6\n" in Transform()
2236 "beq 2f\n" in Transform()
2239 "subs %[count], %[count], #16\n" in Transform()
2242 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2243 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2244 "pld [%[input], #64]\n" in Transform()
2245 "vsub.f32 q0, q0, q4\n" in Transform()
2246 "vsub.f32 q1, q1, q4\n" in Transform()
2247 "vsub.f32 q2, q2, q4\n" in Transform()
2248 "vsub.f32 q3, q3, q4\n" in Transform()
2249 "vmul.f32 q0, q0, q6\n" in Transform()
2250 "vmul.f32 q1, q1, q6\n" in Transform()
2251 "vmul.f32 q2, q2, q6\n" in Transform()
2252 "vmul.f32 q3, q3, q6\n" in Transform()
2253 "vadd.f32 q0, q0, q5\n" in Transform()
2254 "vadd.f32 q1, q1, q5\n" in Transform()
2255 "vadd.f32 q2, q2, q5\n" in Transform()
2256 "vadd.f32 q3, q3, q5\n" in Transform()
2257 "vcvt.s32.f32 q0, q0\n" in Transform()
2258 "vcvt.s32.f32 q1, q1\n" in Transform()
2259 "vcvt.s32.f32 q2, q2\n" in Transform()
2260 "vcvt.s32.f32 q3, q3\n" in Transform()
2261 "vqmovn.s32 d0, q0\n" in Transform()
2262 "vqmovn.s32 d1, q1\n" in Transform()
2263 "vqmovn.s32 d4, q2\n" in Transform()
2264 "vqmovn.s32 d5, q3\n" in Transform()
2265 "vqmovun.s16 d0, q0\n" in Transform()
2266 "vqmovun.s16 d1, q2\n" in Transform()
2268 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2269 "pld [%[output]]\n" in Transform()
2271 "bne 1b\n" in Transform()
2277 "vld1.32 {d0, d1, d2}, [%[input]]!\n" in Transform()
2278 "pld [%[input], #64]\n" in Transform()
2279 "vsub.f32 q0, q0, q4\n" in Transform()
2280 "vsub.f32 q1, q1, q4\n" in Transform()
2281 "vmul.f32 q0, q0, q6\n" in Transform()
2282 "vmul.f32 q1, q1, q6\n" in Transform()
2283 "vadd.f32 q0, q0, q5\n" in Transform()
2284 "vadd.f32 q1, q1, q5\n" in Transform()
2285 "vcvt.s32.f32 q0, q0\n" in Transform()
2286 "vcvt.s32.f32 q1, q1\n" in Transform()
2287 "vqmovn.s32 d0, q0\n" in Transform()
2288 "vqmovn.s32 d1, q1\n" in Transform()
2289 "vqmovun.s16 d0, q0\n" in Transform()
2291 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
2292 "vst1.16 {d0[2]}, [%[output]]!\n" in Transform()
2293 "pld [%[output]]\n" in Transform()
2317 "vdup.32 q4, %[range_min]\n" in Transform()
2318 "vdup.32 q5, %[range_offset]\n" in Transform()
2319 "vdup.32 q6, %[range_scale]\n" in Transform()
2322 "subs %[count], %[count], #7\n" in Transform()
2323 "beq 2f\n" in Transform()
2326 "subs %[count], %[count], #16\n" in Transform()
2329 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2330 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2331 "pld [%[input], #64]\n" in Transform()
2332 "vsub.f32 q0, q0, q4\n" in Transform()
2333 "vsub.f32 q1, q1, q4\n" in Transform()
2334 "vsub.f32 q2, q2, q4\n" in Transform()
2335 "vsub.f32 q3, q3, q4\n" in Transform()
2336 "vmul.f32 q0, q0, q6\n" in Transform()
2337 "vmul.f32 q1, q1, q6\n" in Transform()
2338 "vmul.f32 q2, q2, q6\n" in Transform()
2339 "vmul.f32 q3, q3, q6\n" in Transform()
2340 "vadd.f32 q0, q0, q5\n" in Transform()
2341 "vadd.f32 q1, q1, q5\n" in Transform()
2342 "vadd.f32 q2, q2, q5\n" in Transform()
2343 "vadd.f32 q3, q3, q5\n" in Transform()
2344 "vcvt.s32.f32 q0, q0\n" in Transform()
2345 "vcvt.s32.f32 q1, q1\n" in Transform()
2346 "vcvt.s32.f32 q2, q2\n" in Transform()
2347 "vcvt.s32.f32 q3, q3\n" in Transform()
2348 "vqmovn.s32 d0, q0\n" in Transform()
2349 "vqmovn.s32 d1, q1\n" in Transform()
2350 "vqmovn.s32 d4, q2\n" in Transform()
2351 "vqmovn.s32 d5, q3\n" in Transform()
2352 "vqmovun.s16 d0, q0\n" in Transform()
2353 "vqmovun.s16 d1, q2\n" in Transform()
2355 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2356 "pld [%[output]]\n" in Transform()
2358 "bne 1b\n" in Transform()
2364 "vld1.32 {d0, d1, d2}, [%[input]]!\n" in Transform()
2365 "vld1.32 {d3[0]}, [%[input]]!\n" in Transform()
2366 "pld [%[input], #64]\n" in Transform()
2367 "vsub.f32 q0, q0, q4\n" in Transform()
2368 "vsub.f32 q1, q1, q4\n" in Transform()
2369 "vmul.f32 q0, q0, q6\n" in Transform()
2370 "vmul.f32 q1, q1, q6\n" in Transform()
2371 "vadd.f32 q0, q0, q5\n" in Transform()
2372 "vadd.f32 q1, q1, q5\n" in Transform()
2373 "vcvt.s32.f32 q0, q0\n" in Transform()
2374 "vcvt.s32.f32 q1, q1\n" in Transform()
2375 "vqmovn.s32 d0, q0\n" in Transform()
2376 "vqmovn.s32 d1, q1\n" in Transform()
2377 "vqmovun.s16 d0, q0\n" in Transform()
2379 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
2380 "vst1.16 {d0[2]}, [%[output]]!\n" in Transform()
2381 "vst1.8 {d0[6]}, [%[output]]!\n" in Transform()
2382 "pld [%[output]]\n" in Transform()
2406 "vdup.32 q4, %[range_min]\n" in Transform()
2407 "vdup.32 q5, %[range_offset]\n" in Transform()
2408 "vdup.32 q6, %[range_scale]\n" in Transform()
2411 "subs %[count], %[count], #8\n" in Transform()
2412 "beq 2f\n" in Transform()
2415 "subs %[count], %[count], #16\n" in Transform()
2418 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2419 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2420 "pld [%[input], #64]\n" in Transform()
2421 "vsub.f32 q0, q0, q4\n" in Transform()
2422 "vsub.f32 q1, q1, q4\n" in Transform()
2423 "vsub.f32 q2, q2, q4\n" in Transform()
2424 "vsub.f32 q3, q3, q4\n" in Transform()
2425 "vmul.f32 q0, q0, q6\n" in Transform()
2426 "vmul.f32 q1, q1, q6\n" in Transform()
2427 "vmul.f32 q2, q2, q6\n" in Transform()
2428 "vmul.f32 q3, q3, q6\n" in Transform()
2429 "vadd.f32 q0, q0, q5\n" in Transform()
2430 "vadd.f32 q1, q1, q5\n" in Transform()
2431 "vadd.f32 q2, q2, q5\n" in Transform()
2432 "vadd.f32 q3, q3, q5\n" in Transform()
2433 "vcvt.s32.f32 q0, q0\n" in Transform()
2434 "vcvt.s32.f32 q1, q1\n" in Transform()
2435 "vcvt.s32.f32 q2, q2\n" in Transform()
2436 "vcvt.s32.f32 q3, q3\n" in Transform()
2437 "vqmovn.s32 d0, q0\n" in Transform()
2438 "vqmovn.s32 d1, q1\n" in Transform()
2439 "vqmovn.s32 d4, q2\n" in Transform()
2440 "vqmovn.s32 d5, q3\n" in Transform()
2441 "vqmovun.s16 d0, q0\n" in Transform()
2442 "vqmovun.s16 d1, q2\n" in Transform()
2444 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2445 "pld [%[output]]\n" in Transform()
2447 "bne 1b\n" in Transform()
2453 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2454 "pld [%[input], #64]\n" in Transform()
2455 "vsub.f32 q0, q0, q4\n" in Transform()
2456 "vsub.f32 q1, q1, q4\n" in Transform()
2457 "vmul.f32 q0, q0, q6\n" in Transform()
2458 "vmul.f32 q1, q1, q6\n" in Transform()
2459 "vadd.f32 q0, q0, q5\n" in Transform()
2460 "vadd.f32 q1, q1, q5\n" in Transform()
2461 "vcvt.s32.f32 q0, q0\n" in Transform()
2462 "vcvt.s32.f32 q1, q1\n" in Transform()
2463 "vqmovn.s32 d0, q0\n" in Transform()
2464 "vqmovn.s32 d1, q1\n" in Transform()
2465 "vqmovun.s16 d0, q0\n" in Transform()
2467 "vst1.32 {d0}, [%[output]]!\n" in Transform()
2468 "pld [%[output]]\n" in Transform()
2492 "vdup.32 q4, %[range_min]\n" in Transform()
2493 "vdup.32 q5, %[range_offset]\n" in Transform()
2494 "vdup.32 q6, %[range_scale]\n" in Transform()
2497 "subs %[count], %[count], #9\n" in Transform()
2498 "beq 2f\n" in Transform()
2501 "subs %[count], %[count], #16\n" in Transform()
2504 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2505 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2506 "pld [%[input], #64]\n" in Transform()
2507 "vsub.f32 q0, q0, q4\n" in Transform()
2508 "vsub.f32 q1, q1, q4\n" in Transform()
2509 "vsub.f32 q2, q2, q4\n" in Transform()
2510 "vsub.f32 q3, q3, q4\n" in Transform()
2511 "vmul.f32 q0, q0, q6\n" in Transform()
2512 "vmul.f32 q1, q1, q6\n" in Transform()
2513 "vmul.f32 q2, q2, q6\n" in Transform()
2514 "vmul.f32 q3, q3, q6\n" in Transform()
2515 "vadd.f32 q0, q0, q5\n" in Transform()
2516 "vadd.f32 q1, q1, q5\n" in Transform()
2517 "vadd.f32 q2, q2, q5\n" in Transform()
2518 "vadd.f32 q3, q3, q5\n" in Transform()
2519 "vcvt.s32.f32 q0, q0\n" in Transform()
2520 "vcvt.s32.f32 q1, q1\n" in Transform()
2521 "vcvt.s32.f32 q2, q2\n" in Transform()
2522 "vcvt.s32.f32 q3, q3\n" in Transform()
2523 "vqmovn.s32 d0, q0\n" in Transform()
2524 "vqmovn.s32 d1, q1\n" in Transform()
2525 "vqmovn.s32 d4, q2\n" in Transform()
2526 "vqmovn.s32 d5, q3\n" in Transform()
2527 "vqmovun.s16 d0, q0\n" in Transform()
2528 "vqmovun.s16 d1, q2\n" in Transform()
2530 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2531 "pld [%[output]]\n" in Transform()
2533 "bne 1b\n" in Transform()
2539 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2540 "vld1.32 {d4[0]}, [%[input]]!\n" in Transform()
2541 "pld [%[input], #64]\n" in Transform()
2542 "vsub.f32 q0, q0, q4\n" in Transform()
2543 "vsub.f32 q1, q1, q4\n" in Transform()
2544 "vsub.f32 q2, q2, q4\n" in Transform()
2545 "vmul.f32 q0, q0, q6\n" in Transform()
2546 "vmul.f32 q1, q1, q6\n" in Transform()
2547 "vmul.f32 q2, q2, q6\n" in Transform()
2548 "vadd.f32 q0, q0, q5\n" in Transform()
2549 "vadd.f32 q1, q1, q5\n" in Transform()
2550 "vadd.f32 q2, q2, q5\n" in Transform()
2551 "vcvt.s32.f32 q0, q0\n" in Transform()
2552 "vcvt.s32.f32 q1, q1\n" in Transform()
2553 "vcvt.s32.f32 q2, q2\n" in Transform()
2554 "vqmovn.s32 d0, q0\n" in Transform()
2555 "vqmovn.s32 d1, q1\n" in Transform()
2556 "vqmovn.s32 d4, q2\n" in Transform()
2557 "vqmovun.s16 d0, q0\n" in Transform()
2558 "vqmovun.s16 d1, q2\n" in Transform()
2560 "vst1.32 {d0}, [%[output]]!\n" in Transform()
2561 "vst1.8 {d1[0]}, [%[output]]!\n" in Transform()
2562 "pld [%[output]]\n" in Transform()
2586 "vdup.32 q4, %[range_min]\n" in Transform()
2587 "vdup.32 q5, %[range_offset]\n" in Transform()
2588 "vdup.32 q6, %[range_scale]\n" in Transform()
2591 "subs %[count], %[count], #10\n" in Transform()
2592 "beq 2f\n" in Transform()
2595 "subs %[count], %[count], #16\n" in Transform()
2598 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2599 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2600 "pld [%[input], #64]\n" in Transform()
2601 "vsub.f32 q0, q0, q4\n" in Transform()
2602 "vsub.f32 q1, q1, q4\n" in Transform()
2603 "vsub.f32 q2, q2, q4\n" in Transform()
2604 "vsub.f32 q3, q3, q4\n" in Transform()
2605 "vmul.f32 q0, q0, q6\n" in Transform()
2606 "vmul.f32 q1, q1, q6\n" in Transform()
2607 "vmul.f32 q2, q2, q6\n" in Transform()
2608 "vmul.f32 q3, q3, q6\n" in Transform()
2609 "vadd.f32 q0, q0, q5\n" in Transform()
2610 "vadd.f32 q1, q1, q5\n" in Transform()
2611 "vadd.f32 q2, q2, q5\n" in Transform()
2612 "vadd.f32 q3, q3, q5\n" in Transform()
2613 "vcvt.s32.f32 q0, q0\n" in Transform()
2614 "vcvt.s32.f32 q1, q1\n" in Transform()
2615 "vcvt.s32.f32 q2, q2\n" in Transform()
2616 "vcvt.s32.f32 q3, q3\n" in Transform()
2617 "vqmovn.s32 d0, q0\n" in Transform()
2618 "vqmovn.s32 d1, q1\n" in Transform()
2619 "vqmovn.s32 d4, q2\n" in Transform()
2620 "vqmovn.s32 d5, q3\n" in Transform()
2621 "vqmovun.s16 d0, q0\n" in Transform()
2622 "vqmovun.s16 d1, q2\n" in Transform()
2624 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2625 "pld [%[output]]\n" in Transform()
2627 "bne 1b\n" in Transform()
2633 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2634 "vld1.32 {d4}, [%[input]]!\n" in Transform()
2635 "pld [%[input], #64]\n" in Transform()
2636 "vsub.f32 q0, q0, q4\n" in Transform()
2637 "vsub.f32 q1, q1, q4\n" in Transform()
2638 "vsub.f32 q2, q2, q4\n" in Transform()
2639 "vmul.f32 q0, q0, q6\n" in Transform()
2640 "vmul.f32 q1, q1, q6\n" in Transform()
2641 "vmul.f32 q2, q2, q6\n" in Transform()
2642 "vadd.f32 q0, q0, q5\n" in Transform()
2643 "vadd.f32 q1, q1, q5\n" in Transform()
2644 "vadd.f32 q2, q2, q5\n" in Transform()
2645 "vcvt.s32.f32 q0, q0\n" in Transform()
2646 "vcvt.s32.f32 q1, q1\n" in Transform()
2647 "vcvt.s32.f32 q2, q2\n" in Transform()
2648 "vqmovn.s32 d0, q0\n" in Transform()
2649 "vqmovn.s32 d1, q1\n" in Transform()
2650 "vqmovn.s32 d4, q2\n" in Transform()
2651 "vqmovun.s16 d0, q0\n" in Transform()
2652 "vqmovun.s16 d1, q2\n" in Transform()
2654 "vst1.32 {d0}, [%[output]]!\n" in Transform()
2655 "vst1.16 {d1[0]}, [%[output]]!\n" in Transform()
2656 "pld [%[output]]\n" in Transform()
2680 "vdup.32 q4, %[range_min]\n" in Transform()
2681 "vdup.32 q5, %[range_offset]\n" in Transform()
2682 "vdup.32 q6, %[range_scale]\n" in Transform()
2685 "subs %[count], %[count], #11\n" in Transform()
2686 "beq 2f\n" in Transform()
2689 "subs %[count], %[count], #16\n" in Transform()
2692 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2693 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2694 "pld [%[input], #64]\n" in Transform()
2695 "vsub.f32 q0, q0, q4\n" in Transform()
2696 "vsub.f32 q1, q1, q4\n" in Transform()
2697 "vsub.f32 q2, q2, q4\n" in Transform()
2698 "vsub.f32 q3, q3, q4\n" in Transform()
2699 "vmul.f32 q0, q0, q6\n" in Transform()
2700 "vmul.f32 q1, q1, q6\n" in Transform()
2701 "vmul.f32 q2, q2, q6\n" in Transform()
2702 "vmul.f32 q3, q3, q6\n" in Transform()
2703 "vadd.f32 q0, q0, q5\n" in Transform()
2704 "vadd.f32 q1, q1, q5\n" in Transform()
2705 "vadd.f32 q2, q2, q5\n" in Transform()
2706 "vadd.f32 q3, q3, q5\n" in Transform()
2707 "vcvt.s32.f32 q0, q0\n" in Transform()
2708 "vcvt.s32.f32 q1, q1\n" in Transform()
2709 "vcvt.s32.f32 q2, q2\n" in Transform()
2710 "vcvt.s32.f32 q3, q3\n" in Transform()
2711 "vqmovn.s32 d0, q0\n" in Transform()
2712 "vqmovn.s32 d1, q1\n" in Transform()
2713 "vqmovn.s32 d4, q2\n" in Transform()
2714 "vqmovn.s32 d5, q3\n" in Transform()
2715 "vqmovun.s16 d0, q0\n" in Transform()
2716 "vqmovun.s16 d1, q2\n" in Transform()
2718 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2719 "pld [%[output]]\n" in Transform()
2721 "bne 1b\n" in Transform()
2727 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2728 "vld1.32 {d4}, [%[input]]!\n" in Transform()
2729 "vld1.32 {d5[0]}, [%[input]]!\n" in Transform()
2730 "pld [%[input], #64]\n" in Transform()
2731 "vsub.f32 q0, q0, q4\n" in Transform()
2732 "vsub.f32 q1, q1, q4\n" in Transform()
2733 "vsub.f32 q2, q2, q4\n" in Transform()
2734 "vmul.f32 q0, q0, q6\n" in Transform()
2735 "vmul.f32 q1, q1, q6\n" in Transform()
2736 "vmul.f32 q2, q2, q6\n" in Transform()
2737 "vadd.f32 q0, q0, q5\n" in Transform()
2738 "vadd.f32 q1, q1, q5\n" in Transform()
2739 "vadd.f32 q2, q2, q5\n" in Transform()
2740 "vcvt.s32.f32 q0, q0\n" in Transform()
2741 "vcvt.s32.f32 q1, q1\n" in Transform()
2742 "vcvt.s32.f32 q2, q2\n" in Transform()
2743 "vqmovn.s32 d0, q0\n" in Transform()
2744 "vqmovn.s32 d1, q1\n" in Transform()
2745 "vqmovn.s32 d4, q2\n" in Transform()
2746 "vqmovun.s16 d0, q0\n" in Transform()
2747 "vqmovun.s16 d1, q2\n" in Transform()
2749 "vst1.32 {d0}, [%[output]]!\n" in Transform()
2750 "vst1.16 {d1[0]}, [%[output]]!\n" in Transform()
2751 "vst1.8 {d1[2]}, [%[output]]!\n" in Transform()
2752 "pld [%[output]]\n" in Transform()
2776 "vdup.32 q4, %[range_min]\n" in Transform()
2777 "vdup.32 q5, %[range_offset]\n" in Transform()
2778 "vdup.32 q6, %[range_scale]\n" in Transform()
2781 "subs %[count], %[count], #12\n" in Transform()
2782 "beq 2f\n" in Transform()
2785 "subs %[count], %[count], #16\n" in Transform()
2788 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2789 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2790 "pld [%[input], #64]\n" in Transform()
2791 "vsub.f32 q0, q0, q4\n" in Transform()
2792 "vsub.f32 q1, q1, q4\n" in Transform()
2793 "vsub.f32 q2, q2, q4\n" in Transform()
2794 "vsub.f32 q3, q3, q4\n" in Transform()
2795 "vmul.f32 q0, q0, q6\n" in Transform()
2796 "vmul.f32 q1, q1, q6\n" in Transform()
2797 "vmul.f32 q2, q2, q6\n" in Transform()
2798 "vmul.f32 q3, q3, q6\n" in Transform()
2799 "vadd.f32 q0, q0, q5\n" in Transform()
2800 "vadd.f32 q1, q1, q5\n" in Transform()
2801 "vadd.f32 q2, q2, q5\n" in Transform()
2802 "vadd.f32 q3, q3, q5\n" in Transform()
2803 "vcvt.s32.f32 q0, q0\n" in Transform()
2804 "vcvt.s32.f32 q1, q1\n" in Transform()
2805 "vcvt.s32.f32 q2, q2\n" in Transform()
2806 "vcvt.s32.f32 q3, q3\n" in Transform()
2807 "vqmovn.s32 d0, q0\n" in Transform()
2808 "vqmovn.s32 d1, q1\n" in Transform()
2809 "vqmovn.s32 d4, q2\n" in Transform()
2810 "vqmovn.s32 d5, q3\n" in Transform()
2811 "vqmovun.s16 d0, q0\n" in Transform()
2812 "vqmovun.s16 d1, q2\n" in Transform()
2814 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2815 "pld [%[output]]\n" in Transform()
2817 "bne 1b\n" in Transform()
2823 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2824 "vld1.32 {d4, d5}, [%[input]]!\n" in Transform()
2825 "pld [%[input], #64]\n" in Transform()
2826 "vsub.f32 q0, q0, q4\n" in Transform()
2827 "vsub.f32 q1, q1, q4\n" in Transform()
2828 "vsub.f32 q2, q2, q4\n" in Transform()
2829 "vmul.f32 q0, q0, q6\n" in Transform()
2830 "vmul.f32 q1, q1, q6\n" in Transform()
2831 "vmul.f32 q2, q2, q6\n" in Transform()
2832 "vadd.f32 q0, q0, q5\n" in Transform()
2833 "vadd.f32 q1, q1, q5\n" in Transform()
2834 "vadd.f32 q2, q2, q5\n" in Transform()
2835 "vcvt.s32.f32 q0, q0\n" in Transform()
2836 "vcvt.s32.f32 q1, q1\n" in Transform()
2837 "vcvt.s32.f32 q2, q2\n" in Transform()
2838 "vqmovn.s32 d0, q0\n" in Transform()
2839 "vqmovn.s32 d1, q1\n" in Transform()
2840 "vqmovn.s32 d4, q2\n" in Transform()
2841 "vqmovun.s16 d0, q0\n" in Transform()
2842 "vqmovun.s16 d1, q2\n" in Transform()
2844 "vst1.32 {d0}, [%[output]]!\n" in Transform()
2845 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
2846 "pld [%[output]]\n" in Transform()
2870 "vdup.32 q4, %[range_min]\n" in Transform()
2871 "vdup.32 q5, %[range_offset]\n" in Transform()
2872 "vdup.32 q6, %[range_scale]\n" in Transform()
2875 "subs %[count], %[count], #13\n" in Transform()
2876 "beq 2f\n" in Transform()
2879 "subs %[count], %[count], #16\n" in Transform()
2882 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2883 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2884 "pld [%[input], #64]\n" in Transform()
2885 "vsub.f32 q0, q0, q4\n" in Transform()
2886 "vsub.f32 q1, q1, q4\n" in Transform()
2887 "vsub.f32 q2, q2, q4\n" in Transform()
2888 "vsub.f32 q3, q3, q4\n" in Transform()
2889 "vmul.f32 q0, q0, q6\n" in Transform()
2890 "vmul.f32 q1, q1, q6\n" in Transform()
2891 "vmul.f32 q2, q2, q6\n" in Transform()
2892 "vmul.f32 q3, q3, q6\n" in Transform()
2893 "vadd.f32 q0, q0, q5\n" in Transform()
2894 "vadd.f32 q1, q1, q5\n" in Transform()
2895 "vadd.f32 q2, q2, q5\n" in Transform()
2896 "vadd.f32 q3, q3, q5\n" in Transform()
2897 "vcvt.s32.f32 q0, q0\n" in Transform()
2898 "vcvt.s32.f32 q1, q1\n" in Transform()
2899 "vcvt.s32.f32 q2, q2\n" in Transform()
2900 "vcvt.s32.f32 q3, q3\n" in Transform()
2901 "vqmovn.s32 d0, q0\n" in Transform()
2902 "vqmovn.s32 d1, q1\n" in Transform()
2903 "vqmovn.s32 d4, q2\n" in Transform()
2904 "vqmovn.s32 d5, q3\n" in Transform()
2905 "vqmovun.s16 d0, q0\n" in Transform()
2906 "vqmovun.s16 d1, q2\n" in Transform()
2908 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
2909 "pld [%[output]]\n" in Transform()
2911 "bne 1b\n" in Transform()
2917 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2918 "vld1.32 {d4, d5}, [%[input]]!\n" in Transform()
2919 "vld1.32 {d6[0]}, [%[input]]!\n" in Transform()
2920 "pld [%[input], #64]\n" in Transform()
2921 "vsub.f32 q0, q0, q4\n" in Transform()
2922 "vsub.f32 q1, q1, q4\n" in Transform()
2923 "vsub.f32 q2, q2, q4\n" in Transform()
2924 "vsub.f32 q3, q3, q4\n" in Transform()
2925 "vmul.f32 q0, q0, q6\n" in Transform()
2926 "vmul.f32 q1, q1, q6\n" in Transform()
2927 "vmul.f32 q2, q2, q6\n" in Transform()
2928 "vmul.f32 q3, q3, q6\n" in Transform()
2929 "vadd.f32 q0, q0, q5\n" in Transform()
2930 "vadd.f32 q1, q1, q5\n" in Transform()
2931 "vadd.f32 q2, q2, q5\n" in Transform()
2932 "vadd.f32 q3, q3, q5\n" in Transform()
2933 "vcvt.s32.f32 q0, q0\n" in Transform()
2934 "vcvt.s32.f32 q1, q1\n" in Transform()
2935 "vcvt.s32.f32 q2, q2\n" in Transform()
2936 "vcvt.s32.f32 q3, q3\n" in Transform()
2937 "vqmovn.s32 d0, q0\n" in Transform()
2938 "vqmovn.s32 d1, q1\n" in Transform()
2939 "vqmovn.s32 d4, q2\n" in Transform()
2940 "vqmovn.s32 d5, q3\n" in Transform()
2941 "vqmovun.s16 d0, q0\n" in Transform()
2942 "vqmovun.s16 d1, q2\n" in Transform()
2944 "vst1.32 {d0}, [%[output]]!\n" in Transform()
2945 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
2946 "vst1.8 {d1[4]}, [%[output]]!\n" in Transform()
2947 "pld [%[output]]\n" in Transform()
2971 "vdup.32 q4, %[range_min]\n" in Transform()
2972 "vdup.32 q5, %[range_offset]\n" in Transform()
2973 "vdup.32 q6, %[range_scale]\n" in Transform()
2976 "subs %[count], %[count], #14\n" in Transform()
2977 "beq 2f\n" in Transform()
2980 "subs %[count], %[count], #16\n" in Transform()
2983 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
2984 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
2985 "pld [%[input], #64]\n" in Transform()
2986 "vsub.f32 q0, q0, q4\n" in Transform()
2987 "vsub.f32 q1, q1, q4\n" in Transform()
2988 "vsub.f32 q2, q2, q4\n" in Transform()
2989 "vsub.f32 q3, q3, q4\n" in Transform()
2990 "vmul.f32 q0, q0, q6\n" in Transform()
2991 "vmul.f32 q1, q1, q6\n" in Transform()
2992 "vmul.f32 q2, q2, q6\n" in Transform()
2993 "vmul.f32 q3, q3, q6\n" in Transform()
2994 "vadd.f32 q0, q0, q5\n" in Transform()
2995 "vadd.f32 q1, q1, q5\n" in Transform()
2996 "vadd.f32 q2, q2, q5\n" in Transform()
2997 "vadd.f32 q3, q3, q5\n" in Transform()
2998 "vcvt.s32.f32 q0, q0\n" in Transform()
2999 "vcvt.s32.f32 q1, q1\n" in Transform()
3000 "vcvt.s32.f32 q2, q2\n" in Transform()
3001 "vcvt.s32.f32 q3, q3\n" in Transform()
3002 "vqmovn.s32 d0, q0\n" in Transform()
3003 "vqmovn.s32 d1, q1\n" in Transform()
3004 "vqmovn.s32 d4, q2\n" in Transform()
3005 "vqmovn.s32 d5, q3\n" in Transform()
3006 "vqmovun.s16 d0, q0\n" in Transform()
3007 "vqmovun.s16 d1, q2\n" in Transform()
3009 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
3010 "pld [%[output]]\n" in Transform()
3012 "bne 1b\n" in Transform()
3018 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
3019 "vld1.32 {d4, d5, d6}, [%[input]]!\n" in Transform()
3020 "pld [%[input], #64]\n" in Transform()
3021 "vsub.f32 q0, q0, q4\n" in Transform()
3022 "vsub.f32 q1, q1, q4\n" in Transform()
3023 "vsub.f32 q2, q2, q4\n" in Transform()
3024 "vsub.f32 q3, q3, q4\n" in Transform()
3025 "vmul.f32 q0, q0, q6\n" in Transform()
3026 "vmul.f32 q1, q1, q6\n" in Transform()
3027 "vmul.f32 q2, q2, q6\n" in Transform()
3028 "vmul.f32 q3, q3, q6\n" in Transform()
3029 "vadd.f32 q0, q0, q5\n" in Transform()
3030 "vadd.f32 q1, q1, q5\n" in Transform()
3031 "vadd.f32 q2, q2, q5\n" in Transform()
3032 "vadd.f32 q3, q3, q5\n" in Transform()
3033 "vcvt.s32.f32 q0, q0\n" in Transform()
3034 "vcvt.s32.f32 q1, q1\n" in Transform()
3035 "vcvt.s32.f32 q2, q2\n" in Transform()
3036 "vcvt.s32.f32 q3, q3\n" in Transform()
3037 "vqmovn.s32 d0, q0\n" in Transform()
3038 "vqmovn.s32 d1, q1\n" in Transform()
3039 "vqmovn.s32 d4, q2\n" in Transform()
3040 "vqmovn.s32 d5, q3\n" in Transform()
3041 "vqmovun.s16 d0, q0\n" in Transform()
3042 "vqmovun.s16 d1, q2\n" in Transform()
3044 "vst1.32 {d0}, [%[output]]!\n" in Transform()
3045 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
3046 "vst1.16 {d1[2]}, [%[output]]!\n" in Transform()
3047 "pld [%[output]]\n" in Transform()
3071 "vdup.32 q4, %[range_min]\n" in Transform()
3072 "vdup.32 q5, %[range_offset]\n" in Transform()
3073 "vdup.32 q6, %[range_scale]\n" in Transform()
3076 "subs %[count], %[count], #15\n" in Transform()
3077 "beq 2f\n" in Transform()
3080 "subs %[count], %[count], #16\n" in Transform()
3083 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
3084 "vld1.32 {d4, d5, d6, d7}, [%[input]]!\n" in Transform()
3085 "pld [%[input], #64]\n" in Transform()
3086 "vsub.f32 q0, q0, q4\n" in Transform()
3087 "vsub.f32 q1, q1, q4\n" in Transform()
3088 "vsub.f32 q2, q2, q4\n" in Transform()
3089 "vsub.f32 q3, q3, q4\n" in Transform()
3090 "vmul.f32 q0, q0, q6\n" in Transform()
3091 "vmul.f32 q1, q1, q6\n" in Transform()
3092 "vmul.f32 q2, q2, q6\n" in Transform()
3093 "vmul.f32 q3, q3, q6\n" in Transform()
3094 "vadd.f32 q0, q0, q5\n" in Transform()
3095 "vadd.f32 q1, q1, q5\n" in Transform()
3096 "vadd.f32 q2, q2, q5\n" in Transform()
3097 "vadd.f32 q3, q3, q5\n" in Transform()
3098 "vcvt.s32.f32 q0, q0\n" in Transform()
3099 "vcvt.s32.f32 q1, q1\n" in Transform()
3100 "vcvt.s32.f32 q2, q2\n" in Transform()
3101 "vcvt.s32.f32 q3, q3\n" in Transform()
3102 "vqmovn.s32 d0, q0\n" in Transform()
3103 "vqmovn.s32 d1, q1\n" in Transform()
3104 "vqmovn.s32 d4, q2\n" in Transform()
3105 "vqmovn.s32 d5, q3\n" in Transform()
3106 "vqmovun.s16 d0, q0\n" in Transform()
3107 "vqmovun.s16 d1, q2\n" in Transform()
3109 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
3110 "pld [%[output]]\n" in Transform()
3112 "bne 1b\n" in Transform()
3118 "vld1.32 {d0, d1, d2, d3}, [%[input]]!\n" in Transform()
3119 "vld1.32 {d4, d5, d6}, [%[input]]!\n" in Transform()
3120 "vld1.32 {d7[0]}, [%[input]]!\n" in Transform()
3121 "pld [%[input], #64]\n" in Transform()
3122 "vsub.f32 q0, q0, q4\n" in Transform()
3123 "vsub.f32 q1, q1, q4\n" in Transform()
3124 "vsub.f32 q2, q2, q4\n" in Transform()
3125 "vsub.f32 q3, q3, q4\n" in Transform()
3126 "vmul.f32 q0, q0, q6\n" in Transform()
3127 "vmul.f32 q1, q1, q6\n" in Transform()
3128 "vmul.f32 q2, q2, q6\n" in Transform()
3129 "vmul.f32 q3, q3, q6\n" in Transform()
3130 "vadd.f32 q0, q0, q5\n" in Transform()
3131 "vadd.f32 q1, q1, q5\n" in Transform()
3132 "vadd.f32 q2, q2, q5\n" in Transform()
3133 "vadd.f32 q3, q3, q5\n" in Transform()
3134 "vcvt.s32.f32 q0, q0\n" in Transform()
3135 "vcvt.s32.f32 q1, q1\n" in Transform()
3136 "vcvt.s32.f32 q2, q2\n" in Transform()
3137 "vcvt.s32.f32 q3, q3\n" in Transform()
3138 "vqmovn.s32 d0, q0\n" in Transform()
3139 "vqmovn.s32 d1, q1\n" in Transform()
3140 "vqmovn.s32 d4, q2\n" in Transform()
3141 "vqmovn.s32 d5, q3\n" in Transform()
3142 "vqmovun.s16 d0, q0\n" in Transform()
3143 "vqmovun.s16 d1, q2\n" in Transform()
3145 "vst1.32 {d0}, [%[output]]!\n" in Transform()
3146 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
3147 "vst1.16 {d1[2]}, [%[output]]!\n" in Transform()
3148 "vst1.8 {d1[6]}, [%[output]]!\n" in Transform()
3149 "pld [%[output]]\n" in Transform()
3173 "vdup.32 q4, %[range_min]\n" in Transform()
3174 "vdup.32 q5, %[range_offset]\n" in Transform()
3175 "vdup.32 q6, %[range_scale]\n" in Transform()
3178 "subs %[count], %[count], #16\n" in Transform()
3181 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3182 "pld [%[input], #32]\n" in Transform()
3183 "vmovl.u8 q1, d1\n" in Transform()
3184 "vmovl.u8 q0, d0\n" in Transform()
3185 "vmovl.s16 q3, d3\n" in Transform()
3186 "vmovl.s16 q2, d2\n" in Transform()
3187 "vmovl.s16 q1, d1\n" in Transform()
3188 "vmovl.s16 q0, d0\n" in Transform()
3189 "vcvt.f32.s32 q0, q0\n" in Transform()
3190 "vcvt.f32.s32 q1, q1\n" in Transform()
3191 "vcvt.f32.s32 q2, q2\n" in Transform()
3192 "vcvt.f32.s32 q3, q3\n" in Transform()
3193 "vsub.f32 q0, q0, q5\n" in Transform()
3194 "vsub.f32 q1, q1, q5\n" in Transform()
3195 "vsub.f32 q2, q2, q5\n" in Transform()
3196 "vsub.f32 q3, q3, q5\n" in Transform()
3197 "vmul.f32 q0, q0, q6\n" in Transform()
3198 "vmul.f32 q1, q1, q6\n" in Transform()
3199 "vmul.f32 q2, q2, q6\n" in Transform()
3200 "vmul.f32 q3, q3, q6\n" in Transform()
3201 "vadd.f32 q0, q0, q4\n" in Transform()
3202 "vadd.f32 q1, q1, q4\n" in Transform()
3203 "vadd.f32 q2, q2, q4\n" in Transform()
3204 "vadd.f32 q3, q3, q4\n" in Transform()
3206 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3207 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3208 "pld [%[output]]\n" in Transform()
3210 "bne 1b\n" in Transform()
3234 "vdup.32 q4, %[range_min]\n" in Transform()
3235 "vdup.32 q5, %[range_offset]\n" in Transform()
3236 "vdup.32 q6, %[range_scale]\n" in Transform()
3239 "subs %[count], %[count], #1\n" in Transform()
3240 "beq 2f\n" in Transform()
3243 "subs %[count], %[count], #16\n" in Transform()
3246 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3247 "pld [%[input], #32]\n" in Transform()
3248 "vmovl.u8 q1, d1\n" in Transform()
3249 "vmovl.u8 q0, d0\n" in Transform()
3250 "vmovl.s16 q3, d3\n" in Transform()
3251 "vmovl.s16 q2, d2\n" in Transform()
3252 "vmovl.s16 q1, d1\n" in Transform()
3253 "vmovl.s16 q0, d0\n" in Transform()
3254 "vcvt.f32.s32 q0, q0\n" in Transform()
3255 "vcvt.f32.s32 q1, q1\n" in Transform()
3256 "vcvt.f32.s32 q2, q2\n" in Transform()
3257 "vcvt.f32.s32 q3, q3\n" in Transform()
3258 "vsub.f32 q0, q0, q5\n" in Transform()
3259 "vsub.f32 q1, q1, q5\n" in Transform()
3260 "vsub.f32 q2, q2, q5\n" in Transform()
3261 "vsub.f32 q3, q3, q5\n" in Transform()
3262 "vmul.f32 q0, q0, q6\n" in Transform()
3263 "vmul.f32 q1, q1, q6\n" in Transform()
3264 "vmul.f32 q2, q2, q6\n" in Transform()
3265 "vmul.f32 q3, q3, q6\n" in Transform()
3266 "vadd.f32 q0, q0, q4\n" in Transform()
3267 "vadd.f32 q1, q1, q4\n" in Transform()
3268 "vadd.f32 q2, q2, q4\n" in Transform()
3269 "vadd.f32 q3, q3, q4\n" in Transform()
3271 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3272 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3273 "pld [%[output]]\n" in Transform()
3275 "bne 1b\n" in Transform()
3281 "vld1.8 {d0[0]}, [%[input]]!\n" in Transform()
3282 "pld [%[input], #32]\n" in Transform()
3283 "vmovl.u8 q0, d0\n" in Transform()
3284 "vmovl.s16 q0, d0\n" in Transform()
3285 "vcvt.f32.s32 q0, q0\n" in Transform()
3286 "vsub.f32 q0, q0, q5\n" in Transform()
3287 "vmul.f32 q0, q0, q6\n" in Transform()
3288 "vadd.f32 q0, q0, q4\n" in Transform()
3290 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
3291 "pld [%[output]]\n" in Transform()
3315 "vdup.32 q4, %[range_min]\n" in Transform()
3316 "vdup.32 q5, %[range_offset]\n" in Transform()
3317 "vdup.32 q6, %[range_scale]\n" in Transform()
3320 "subs %[count], %[count], #2\n" in Transform()
3321 "beq 2f\n" in Transform()
3324 "subs %[count], %[count], #16\n" in Transform()
3327 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3328 "pld [%[input], #32]\n" in Transform()
3329 "vmovl.u8 q1, d1\n" in Transform()
3330 "vmovl.u8 q0, d0\n" in Transform()
3331 "vmovl.s16 q3, d3\n" in Transform()
3332 "vmovl.s16 q2, d2\n" in Transform()
3333 "vmovl.s16 q1, d1\n" in Transform()
3334 "vmovl.s16 q0, d0\n" in Transform()
3335 "vcvt.f32.s32 q0, q0\n" in Transform()
3336 "vcvt.f32.s32 q1, q1\n" in Transform()
3337 "vcvt.f32.s32 q2, q2\n" in Transform()
3338 "vcvt.f32.s32 q3, q3\n" in Transform()
3339 "vsub.f32 q0, q0, q5\n" in Transform()
3340 "vsub.f32 q1, q1, q5\n" in Transform()
3341 "vsub.f32 q2, q2, q5\n" in Transform()
3342 "vsub.f32 q3, q3, q5\n" in Transform()
3343 "vmul.f32 q0, q0, q6\n" in Transform()
3344 "vmul.f32 q1, q1, q6\n" in Transform()
3345 "vmul.f32 q2, q2, q6\n" in Transform()
3346 "vmul.f32 q3, q3, q6\n" in Transform()
3347 "vadd.f32 q0, q0, q4\n" in Transform()
3348 "vadd.f32 q1, q1, q4\n" in Transform()
3349 "vadd.f32 q2, q2, q4\n" in Transform()
3350 "vadd.f32 q3, q3, q4\n" in Transform()
3352 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3353 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3354 "pld [%[output]]\n" in Transform()
3356 "bne 1b\n" in Transform()
3362 "vld1.16 {d0[0]}, [%[input]]!\n" in Transform()
3363 "pld [%[input], #32]\n" in Transform()
3364 "vmovl.u8 q0, d0\n" in Transform()
3365 "vmovl.s16 q0, d0\n" in Transform()
3366 "vcvt.f32.s32 q0, q0\n" in Transform()
3367 "vsub.f32 q0, q0, q5\n" in Transform()
3368 "vmul.f32 q0, q0, q6\n" in Transform()
3369 "vadd.f32 q0, q0, q4\n" in Transform()
3371 "vst1.32 {d0}, [%[output]]!\n" in Transform()
3372 "pld [%[output]]\n" in Transform()
3396 "vdup.32 q4, %[range_min]\n" in Transform()
3397 "vdup.32 q5, %[range_offset]\n" in Transform()
3398 "vdup.32 q6, %[range_scale]\n" in Transform()
3401 "subs %[count], %[count], #3\n" in Transform()
3402 "beq 2f\n" in Transform()
3405 "subs %[count], %[count], #16\n" in Transform()
3408 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3409 "pld [%[input], #32]\n" in Transform()
3410 "vmovl.u8 q1, d1\n" in Transform()
3411 "vmovl.u8 q0, d0\n" in Transform()
3412 "vmovl.s16 q3, d3\n" in Transform()
3413 "vmovl.s16 q2, d2\n" in Transform()
3414 "vmovl.s16 q1, d1\n" in Transform()
3415 "vmovl.s16 q0, d0\n" in Transform()
3416 "vcvt.f32.s32 q0, q0\n" in Transform()
3417 "vcvt.f32.s32 q1, q1\n" in Transform()
3418 "vcvt.f32.s32 q2, q2\n" in Transform()
3419 "vcvt.f32.s32 q3, q3\n" in Transform()
3420 "vsub.f32 q0, q0, q5\n" in Transform()
3421 "vsub.f32 q1, q1, q5\n" in Transform()
3422 "vsub.f32 q2, q2, q5\n" in Transform()
3423 "vsub.f32 q3, q3, q5\n" in Transform()
3424 "vmul.f32 q0, q0, q6\n" in Transform()
3425 "vmul.f32 q1, q1, q6\n" in Transform()
3426 "vmul.f32 q2, q2, q6\n" in Transform()
3427 "vmul.f32 q3, q3, q6\n" in Transform()
3428 "vadd.f32 q0, q0, q4\n" in Transform()
3429 "vadd.f32 q1, q1, q4\n" in Transform()
3430 "vadd.f32 q2, q2, q4\n" in Transform()
3431 "vadd.f32 q3, q3, q4\n" in Transform()
3433 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3434 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3435 "pld [%[output]]\n" in Transform()
3437 "bne 1b\n" in Transform()
3443 "vld1.16 {d0[0]}, [%[input]]!\n" in Transform()
3444 "vld1.8 {d0[2]}, [%[input]]!\n" in Transform()
3445 "pld [%[input], #32]\n" in Transform()
3446 "vmovl.u8 q0, d0\n" in Transform()
3447 "vmovl.s16 q0, d0\n" in Transform()
3448 "vcvt.f32.s32 q0, q0\n" in Transform()
3449 "vsub.f32 q0, q0, q5\n" in Transform()
3450 "vmul.f32 q0, q0, q6\n" in Transform()
3451 "vadd.f32 q0, q0, q4\n" in Transform()
3453 "vst1.32 {d0}, [%[output]]!\n" in Transform()
3454 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
3455 "pld [%[output]]\n" in Transform()
3479 "vdup.32 q4, %[range_min]\n" in Transform()
3480 "vdup.32 q5, %[range_offset]\n" in Transform()
3481 "vdup.32 q6, %[range_scale]\n" in Transform()
3484 "subs %[count], %[count], #4\n" in Transform()
3485 "beq 2f\n" in Transform()
3488 "subs %[count], %[count], #16\n" in Transform()
3491 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3492 "pld [%[input], #32]\n" in Transform()
3493 "vmovl.u8 q1, d1\n" in Transform()
3494 "vmovl.u8 q0, d0\n" in Transform()
3495 "vmovl.s16 q3, d3\n" in Transform()
3496 "vmovl.s16 q2, d2\n" in Transform()
3497 "vmovl.s16 q1, d1\n" in Transform()
3498 "vmovl.s16 q0, d0\n" in Transform()
3499 "vcvt.f32.s32 q0, q0\n" in Transform()
3500 "vcvt.f32.s32 q1, q1\n" in Transform()
3501 "vcvt.f32.s32 q2, q2\n" in Transform()
3502 "vcvt.f32.s32 q3, q3\n" in Transform()
3503 "vsub.f32 q0, q0, q5\n" in Transform()
3504 "vsub.f32 q1, q1, q5\n" in Transform()
3505 "vsub.f32 q2, q2, q5\n" in Transform()
3506 "vsub.f32 q3, q3, q5\n" in Transform()
3507 "vmul.f32 q0, q0, q6\n" in Transform()
3508 "vmul.f32 q1, q1, q6\n" in Transform()
3509 "vmul.f32 q2, q2, q6\n" in Transform()
3510 "vmul.f32 q3, q3, q6\n" in Transform()
3511 "vadd.f32 q0, q0, q4\n" in Transform()
3512 "vadd.f32 q1, q1, q4\n" in Transform()
3513 "vadd.f32 q2, q2, q4\n" in Transform()
3514 "vadd.f32 q3, q3, q4\n" in Transform()
3516 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3517 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3518 "pld [%[output]]\n" in Transform()
3520 "bne 1b\n" in Transform()
3526 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
3527 "pld [%[input], #32]\n" in Transform()
3528 "vmovl.u8 q0, d0\n" in Transform()
3529 "vmovl.s16 q0, d0\n" in Transform()
3530 "vcvt.f32.s32 q0, q0\n" in Transform()
3531 "vsub.f32 q0, q0, q5\n" in Transform()
3532 "vmul.f32 q0, q0, q6\n" in Transform()
3533 "vadd.f32 q0, q0, q4\n" in Transform()
3535 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
3536 "pld [%[output]]\n" in Transform()
3560 "vdup.32 q4, %[range_min]\n" in Transform()
3561 "vdup.32 q5, %[range_offset]\n" in Transform()
3562 "vdup.32 q6, %[range_scale]\n" in Transform()
3565 "subs %[count], %[count], #5\n" in Transform()
3566 "beq 2f\n" in Transform()
3569 "subs %[count], %[count], #16\n" in Transform()
3572 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3573 "pld [%[input], #32]\n" in Transform()
3574 "vmovl.u8 q1, d1\n" in Transform()
3575 "vmovl.u8 q0, d0\n" in Transform()
3576 "vmovl.s16 q3, d3\n" in Transform()
3577 "vmovl.s16 q2, d2\n" in Transform()
3578 "vmovl.s16 q1, d1\n" in Transform()
3579 "vmovl.s16 q0, d0\n" in Transform()
3580 "vcvt.f32.s32 q0, q0\n" in Transform()
3581 "vcvt.f32.s32 q1, q1\n" in Transform()
3582 "vcvt.f32.s32 q2, q2\n" in Transform()
3583 "vcvt.f32.s32 q3, q3\n" in Transform()
3584 "vsub.f32 q0, q0, q5\n" in Transform()
3585 "vsub.f32 q1, q1, q5\n" in Transform()
3586 "vsub.f32 q2, q2, q5\n" in Transform()
3587 "vsub.f32 q3, q3, q5\n" in Transform()
3588 "vmul.f32 q0, q0, q6\n" in Transform()
3589 "vmul.f32 q1, q1, q6\n" in Transform()
3590 "vmul.f32 q2, q2, q6\n" in Transform()
3591 "vmul.f32 q3, q3, q6\n" in Transform()
3592 "vadd.f32 q0, q0, q4\n" in Transform()
3593 "vadd.f32 q1, q1, q4\n" in Transform()
3594 "vadd.f32 q2, q2, q4\n" in Transform()
3595 "vadd.f32 q3, q3, q4\n" in Transform()
3597 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3598 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3599 "pld [%[output]]\n" in Transform()
3601 "bne 1b\n" in Transform()
3607 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
3608 "vld1.8 {d0[4]}, [%[input]]!\n" in Transform()
3609 "pld [%[input], #32]\n" in Transform()
3610 "vmovl.u8 q0, d0\n" in Transform()
3611 "vmovl.s16 q1, d1\n" in Transform()
3612 "vmovl.s16 q0, d0\n" in Transform()
3613 "vcvt.f32.s32 q0, q0\n" in Transform()
3614 "vcvt.f32.s32 q1, q1\n" in Transform()
3615 "vsub.f32 q0, q0, q5\n" in Transform()
3616 "vsub.f32 q1, q1, q5\n" in Transform()
3617 "vmul.f32 q0, q0, q6\n" in Transform()
3618 "vmul.f32 q1, q1, q6\n" in Transform()
3619 "vadd.f32 q0, q0, q4\n" in Transform()
3620 "vadd.f32 q1, q1, q4\n" in Transform()
3622 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
3623 "vst1.32 {d2[0]}, [%[output]]!\n" in Transform()
3624 "pld [%[output]]\n" in Transform()
3648 "vdup.32 q4, %[range_min]\n" in Transform()
3649 "vdup.32 q5, %[range_offset]\n" in Transform()
3650 "vdup.32 q6, %[range_scale]\n" in Transform()
3653 "subs %[count], %[count], #6\n" in Transform()
3654 "beq 2f\n" in Transform()
3657 "subs %[count], %[count], #16\n" in Transform()
3660 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3661 "pld [%[input], #32]\n" in Transform()
3662 "vmovl.u8 q1, d1\n" in Transform()
3663 "vmovl.u8 q0, d0\n" in Transform()
3664 "vmovl.s16 q3, d3\n" in Transform()
3665 "vmovl.s16 q2, d2\n" in Transform()
3666 "vmovl.s16 q1, d1\n" in Transform()
3667 "vmovl.s16 q0, d0\n" in Transform()
3668 "vcvt.f32.s32 q0, q0\n" in Transform()
3669 "vcvt.f32.s32 q1, q1\n" in Transform()
3670 "vcvt.f32.s32 q2, q2\n" in Transform()
3671 "vcvt.f32.s32 q3, q3\n" in Transform()
3672 "vsub.f32 q0, q0, q5\n" in Transform()
3673 "vsub.f32 q1, q1, q5\n" in Transform()
3674 "vsub.f32 q2, q2, q5\n" in Transform()
3675 "vsub.f32 q3, q3, q5\n" in Transform()
3676 "vmul.f32 q0, q0, q6\n" in Transform()
3677 "vmul.f32 q1, q1, q6\n" in Transform()
3678 "vmul.f32 q2, q2, q6\n" in Transform()
3679 "vmul.f32 q3, q3, q6\n" in Transform()
3680 "vadd.f32 q0, q0, q4\n" in Transform()
3681 "vadd.f32 q1, q1, q4\n" in Transform()
3682 "vadd.f32 q2, q2, q4\n" in Transform()
3683 "vadd.f32 q3, q3, q4\n" in Transform()
3685 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3686 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3687 "pld [%[output]]\n" in Transform()
3689 "bne 1b\n" in Transform()
3695 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
3696 "vld1.16 {d0[2]}, [%[input]]!\n" in Transform()
3697 "pld [%[input], #32]\n" in Transform()
3698 "vmovl.u8 q0, d0\n" in Transform()
3699 "vmovl.s16 q1, d1\n" in Transform()
3700 "vmovl.s16 q0, d0\n" in Transform()
3701 "vcvt.f32.s32 q0, q0\n" in Transform()
3702 "vcvt.f32.s32 q1, q1\n" in Transform()
3703 "vsub.f32 q0, q0, q5\n" in Transform()
3704 "vsub.f32 q1, q1, q5\n" in Transform()
3705 "vmul.f32 q0, q0, q6\n" in Transform()
3706 "vmul.f32 q1, q1, q6\n" in Transform()
3707 "vadd.f32 q0, q0, q4\n" in Transform()
3708 "vadd.f32 q1, q1, q4\n" in Transform()
3710 "vst1.32 {d0, d1, d2}, [%[output]]!\n" in Transform()
3711 "pld [%[output]]\n" in Transform()
3735 "vdup.32 q4, %[range_min]\n" in Transform()
3736 "vdup.32 q5, %[range_offset]\n" in Transform()
3737 "vdup.32 q6, %[range_scale]\n" in Transform()
3740 "subs %[count], %[count], #7\n" in Transform()
3741 "beq 2f\n" in Transform()
3744 "subs %[count], %[count], #16\n" in Transform()
3747 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3748 "pld [%[input], #32]\n" in Transform()
3749 "vmovl.u8 q1, d1\n" in Transform()
3750 "vmovl.u8 q0, d0\n" in Transform()
3751 "vmovl.s16 q3, d3\n" in Transform()
3752 "vmovl.s16 q2, d2\n" in Transform()
3753 "vmovl.s16 q1, d1\n" in Transform()
3754 "vmovl.s16 q0, d0\n" in Transform()
3755 "vcvt.f32.s32 q0, q0\n" in Transform()
3756 "vcvt.f32.s32 q1, q1\n" in Transform()
3757 "vcvt.f32.s32 q2, q2\n" in Transform()
3758 "vcvt.f32.s32 q3, q3\n" in Transform()
3759 "vsub.f32 q0, q0, q5\n" in Transform()
3760 "vsub.f32 q1, q1, q5\n" in Transform()
3761 "vsub.f32 q2, q2, q5\n" in Transform()
3762 "vsub.f32 q3, q3, q5\n" in Transform()
3763 "vmul.f32 q0, q0, q6\n" in Transform()
3764 "vmul.f32 q1, q1, q6\n" in Transform()
3765 "vmul.f32 q2, q2, q6\n" in Transform()
3766 "vmul.f32 q3, q3, q6\n" in Transform()
3767 "vadd.f32 q0, q0, q4\n" in Transform()
3768 "vadd.f32 q1, q1, q4\n" in Transform()
3769 "vadd.f32 q2, q2, q4\n" in Transform()
3770 "vadd.f32 q3, q3, q4\n" in Transform()
3772 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3773 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3774 "pld [%[output]]\n" in Transform()
3776 "bne 1b\n" in Transform()
3782 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
3783 "vld1.16 {d0[2]}, [%[input]]!\n" in Transform()
3784 "vld1.8 {d0[6]}, [%[input]]!\n" in Transform()
3785 "pld [%[input], #32]\n" in Transform()
3786 "vmovl.u8 q0, d0\n" in Transform()
3787 "vmovl.s16 q1, d1\n" in Transform()
3788 "vmovl.s16 q0, d0\n" in Transform()
3789 "vcvt.f32.s32 q0, q0\n" in Transform()
3790 "vcvt.f32.s32 q1, q1\n" in Transform()
3791 "vsub.f32 q0, q0, q5\n" in Transform()
3792 "vsub.f32 q1, q1, q5\n" in Transform()
3793 "vmul.f32 q0, q0, q6\n" in Transform()
3794 "vmul.f32 q1, q1, q6\n" in Transform()
3795 "vadd.f32 q0, q0, q4\n" in Transform()
3796 "vadd.f32 q1, q1, q4\n" in Transform()
3798 "vst1.32 {d0, d1, d2}, [%[output]]!\n" in Transform()
3799 "vst1.32 {d3[0]}, [%[output]]!\n" in Transform()
3800 "pld [%[output]]\n" in Transform()
3824 "vdup.32 q4, %[range_min]\n" in Transform()
3825 "vdup.32 q5, %[range_offset]\n" in Transform()
3826 "vdup.32 q6, %[range_scale]\n" in Transform()
3829 "subs %[count], %[count], #8\n" in Transform()
3830 "beq 2f\n" in Transform()
3833 "subs %[count], %[count], #16\n" in Transform()
3836 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3837 "pld [%[input], #32]\n" in Transform()
3838 "vmovl.u8 q1, d1\n" in Transform()
3839 "vmovl.u8 q0, d0\n" in Transform()
3840 "vmovl.s16 q3, d3\n" in Transform()
3841 "vmovl.s16 q2, d2\n" in Transform()
3842 "vmovl.s16 q1, d1\n" in Transform()
3843 "vmovl.s16 q0, d0\n" in Transform()
3844 "vcvt.f32.s32 q0, q0\n" in Transform()
3845 "vcvt.f32.s32 q1, q1\n" in Transform()
3846 "vcvt.f32.s32 q2, q2\n" in Transform()
3847 "vcvt.f32.s32 q3, q3\n" in Transform()
3848 "vsub.f32 q0, q0, q5\n" in Transform()
3849 "vsub.f32 q1, q1, q5\n" in Transform()
3850 "vsub.f32 q2, q2, q5\n" in Transform()
3851 "vsub.f32 q3, q3, q5\n" in Transform()
3852 "vmul.f32 q0, q0, q6\n" in Transform()
3853 "vmul.f32 q1, q1, q6\n" in Transform()
3854 "vmul.f32 q2, q2, q6\n" in Transform()
3855 "vmul.f32 q3, q3, q6\n" in Transform()
3856 "vadd.f32 q0, q0, q4\n" in Transform()
3857 "vadd.f32 q1, q1, q4\n" in Transform()
3858 "vadd.f32 q2, q2, q4\n" in Transform()
3859 "vadd.f32 q3, q3, q4\n" in Transform()
3861 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3862 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3863 "pld [%[output]]\n" in Transform()
3865 "bne 1b\n" in Transform()
3871 "vld1.32 {d0}, [%[input]]!\n" in Transform()
3872 "pld [%[input], #32]\n" in Transform()
3873 "vmovl.u8 q0, d0\n" in Transform()
3874 "vmovl.s16 q1, d1\n" in Transform()
3875 "vmovl.s16 q0, d0\n" in Transform()
3876 "vcvt.f32.s32 q0, q0\n" in Transform()
3877 "vcvt.f32.s32 q1, q1\n" in Transform()
3878 "vsub.f32 q0, q0, q5\n" in Transform()
3879 "vsub.f32 q1, q1, q5\n" in Transform()
3880 "vmul.f32 q0, q0, q6\n" in Transform()
3881 "vmul.f32 q1, q1, q6\n" in Transform()
3882 "vadd.f32 q0, q0, q4\n" in Transform()
3883 "vadd.f32 q1, q1, q4\n" in Transform()
3885 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3886 "pld [%[output]]\n" in Transform()
3910 "vdup.32 q4, %[range_min]\n" in Transform()
3911 "vdup.32 q5, %[range_offset]\n" in Transform()
3912 "vdup.32 q6, %[range_scale]\n" in Transform()
3915 "subs %[count], %[count], #9\n" in Transform()
3916 "beq 2f\n" in Transform()
3919 "subs %[count], %[count], #16\n" in Transform()
3922 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
3923 "pld [%[input], #32]\n" in Transform()
3924 "vmovl.u8 q1, d1\n" in Transform()
3925 "vmovl.u8 q0, d0\n" in Transform()
3926 "vmovl.s16 q3, d3\n" in Transform()
3927 "vmovl.s16 q2, d2\n" in Transform()
3928 "vmovl.s16 q1, d1\n" in Transform()
3929 "vmovl.s16 q0, d0\n" in Transform()
3930 "vcvt.f32.s32 q0, q0\n" in Transform()
3931 "vcvt.f32.s32 q1, q1\n" in Transform()
3932 "vcvt.f32.s32 q2, q2\n" in Transform()
3933 "vcvt.f32.s32 q3, q3\n" in Transform()
3934 "vsub.f32 q0, q0, q5\n" in Transform()
3935 "vsub.f32 q1, q1, q5\n" in Transform()
3936 "vsub.f32 q2, q2, q5\n" in Transform()
3937 "vsub.f32 q3, q3, q5\n" in Transform()
3938 "vmul.f32 q0, q0, q6\n" in Transform()
3939 "vmul.f32 q1, q1, q6\n" in Transform()
3940 "vmul.f32 q2, q2, q6\n" in Transform()
3941 "vmul.f32 q3, q3, q6\n" in Transform()
3942 "vadd.f32 q0, q0, q4\n" in Transform()
3943 "vadd.f32 q1, q1, q4\n" in Transform()
3944 "vadd.f32 q2, q2, q4\n" in Transform()
3945 "vadd.f32 q3, q3, q4\n" in Transform()
3947 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3948 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
3949 "pld [%[output]]\n" in Transform()
3951 "bne 1b\n" in Transform()
3957 "vld1.32 {d0}, [%[input]]!\n" in Transform()
3958 "vld1.8 {d1[0]}, [%[input]]!\n" in Transform()
3959 "pld [%[input], #32]\n" in Transform()
3960 "vmovl.u8 q1, d1\n" in Transform()
3961 "vmovl.u8 q0, d0\n" in Transform()
3962 "vmovl.s16 q2, d2\n" in Transform()
3963 "vmovl.s16 q1, d1\n" in Transform()
3964 "vmovl.s16 q0, d0\n" in Transform()
3965 "vcvt.f32.s32 q0, q0\n" in Transform()
3966 "vcvt.f32.s32 q1, q1\n" in Transform()
3967 "vcvt.f32.s32 q2, q2\n" in Transform()
3968 "vsub.f32 q0, q0, q5\n" in Transform()
3969 "vsub.f32 q1, q1, q5\n" in Transform()
3970 "vsub.f32 q2, q2, q5\n" in Transform()
3971 "vmul.f32 q0, q0, q6\n" in Transform()
3972 "vmul.f32 q1, q1, q6\n" in Transform()
3973 "vmul.f32 q2, q2, q6\n" in Transform()
3974 "vadd.f32 q0, q0, q4\n" in Transform()
3975 "vadd.f32 q1, q1, q4\n" in Transform()
3976 "vadd.f32 q2, q2, q4\n" in Transform()
3978 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
3979 "vst1.32 {d4[0]}, [%[output]]!\n" in Transform()
3980 "pld [%[output]]\n" in Transform()
4004 "vdup.32 q4, %[range_min]\n" in Transform()
4005 "vdup.32 q5, %[range_offset]\n" in Transform()
4006 "vdup.32 q6, %[range_scale]\n" in Transform()
4009 "subs %[count], %[count], #10\n" in Transform()
4010 "beq 2f\n" in Transform()
4013 "subs %[count], %[count], #16\n" in Transform()
4016 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4017 "pld [%[input], #32]\n" in Transform()
4018 "vmovl.u8 q1, d1\n" in Transform()
4019 "vmovl.u8 q0, d0\n" in Transform()
4020 "vmovl.s16 q3, d3\n" in Transform()
4021 "vmovl.s16 q2, d2\n" in Transform()
4022 "vmovl.s16 q1, d1\n" in Transform()
4023 "vmovl.s16 q0, d0\n" in Transform()
4024 "vcvt.f32.s32 q0, q0\n" in Transform()
4025 "vcvt.f32.s32 q1, q1\n" in Transform()
4026 "vcvt.f32.s32 q2, q2\n" in Transform()
4027 "vcvt.f32.s32 q3, q3\n" in Transform()
4028 "vsub.f32 q0, q0, q5\n" in Transform()
4029 "vsub.f32 q1, q1, q5\n" in Transform()
4030 "vsub.f32 q2, q2, q5\n" in Transform()
4031 "vsub.f32 q3, q3, q5\n" in Transform()
4032 "vmul.f32 q0, q0, q6\n" in Transform()
4033 "vmul.f32 q1, q1, q6\n" in Transform()
4034 "vmul.f32 q2, q2, q6\n" in Transform()
4035 "vmul.f32 q3, q3, q6\n" in Transform()
4036 "vadd.f32 q0, q0, q4\n" in Transform()
4037 "vadd.f32 q1, q1, q4\n" in Transform()
4038 "vadd.f32 q2, q2, q4\n" in Transform()
4039 "vadd.f32 q3, q3, q4\n" in Transform()
4041 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4042 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
4043 "pld [%[output]]\n" in Transform()
4045 "bne 1b\n" in Transform()
4051 "vld1.32 {d0}, [%[input]]!\n" in Transform()
4052 "vld1.16 {d1[0]}, [%[input]]!\n" in Transform()
4053 "pld [%[input], #32]\n" in Transform()
4054 "vmovl.u8 q1, d1\n" in Transform()
4055 "vmovl.u8 q0, d0\n" in Transform()
4056 "vmovl.s16 q2, d2\n" in Transform()
4057 "vmovl.s16 q1, d1\n" in Transform()
4058 "vmovl.s16 q0, d0\n" in Transform()
4059 "vcvt.f32.s32 q0, q0\n" in Transform()
4060 "vcvt.f32.s32 q1, q1\n" in Transform()
4061 "vcvt.f32.s32 q2, q2\n" in Transform()
4062 "vsub.f32 q0, q0, q5\n" in Transform()
4063 "vsub.f32 q1, q1, q5\n" in Transform()
4064 "vsub.f32 q2, q2, q5\n" in Transform()
4065 "vmul.f32 q0, q0, q6\n" in Transform()
4066 "vmul.f32 q1, q1, q6\n" in Transform()
4067 "vmul.f32 q2, q2, q6\n" in Transform()
4068 "vadd.f32 q0, q0, q4\n" in Transform()
4069 "vadd.f32 q1, q1, q4\n" in Transform()
4070 "vadd.f32 q2, q2, q4\n" in Transform()
4072 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4073 "vst1.32 {d4}, [%[output]]!\n" in Transform()
4074 "pld [%[output]]\n" in Transform()
4098 "vdup.32 q4, %[range_min]\n" in Transform()
4099 "vdup.32 q5, %[range_offset]\n" in Transform()
4100 "vdup.32 q6, %[range_scale]\n" in Transform()
4103 "subs %[count], %[count], #11\n" in Transform()
4104 "beq 2f\n" in Transform()
4107 "subs %[count], %[count], #16\n" in Transform()
4110 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4111 "pld [%[input], #32]\n" in Transform()
4112 "vmovl.u8 q1, d1\n" in Transform()
4113 "vmovl.u8 q0, d0\n" in Transform()
4114 "vmovl.s16 q3, d3\n" in Transform()
4115 "vmovl.s16 q2, d2\n" in Transform()
4116 "vmovl.s16 q1, d1\n" in Transform()
4117 "vmovl.s16 q0, d0\n" in Transform()
4118 "vcvt.f32.s32 q0, q0\n" in Transform()
4119 "vcvt.f32.s32 q1, q1\n" in Transform()
4120 "vcvt.f32.s32 q2, q2\n" in Transform()
4121 "vcvt.f32.s32 q3, q3\n" in Transform()
4122 "vsub.f32 q0, q0, q5\n" in Transform()
4123 "vsub.f32 q1, q1, q5\n" in Transform()
4124 "vsub.f32 q2, q2, q5\n" in Transform()
4125 "vsub.f32 q3, q3, q5\n" in Transform()
4126 "vmul.f32 q0, q0, q6\n" in Transform()
4127 "vmul.f32 q1, q1, q6\n" in Transform()
4128 "vmul.f32 q2, q2, q6\n" in Transform()
4129 "vmul.f32 q3, q3, q6\n" in Transform()
4130 "vadd.f32 q0, q0, q4\n" in Transform()
4131 "vadd.f32 q1, q1, q4\n" in Transform()
4132 "vadd.f32 q2, q2, q4\n" in Transform()
4133 "vadd.f32 q3, q3, q4\n" in Transform()
4135 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4136 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
4137 "pld [%[output]]\n" in Transform()
4139 "bne 1b\n" in Transform()
4145 "vld1.32 {d0}, [%[input]]!\n" in Transform()
4146 "vld1.16 {d1[0]}, [%[input]]!\n" in Transform()
4147 "vld1.8 {d1[2]}, [%[input]]!\n" in Transform()
4148 "pld [%[input], #32]\n" in Transform()
4149 "vmovl.u8 q1, d1\n" in Transform()
4150 "vmovl.u8 q0, d0\n" in Transform()
4151 "vmovl.s16 q2, d2\n" in Transform()
4152 "vmovl.s16 q1, d1\n" in Transform()
4153 "vmovl.s16 q0, d0\n" in Transform()
4154 "vcvt.f32.s32 q0, q0\n" in Transform()
4155 "vcvt.f32.s32 q1, q1\n" in Transform()
4156 "vcvt.f32.s32 q2, q2\n" in Transform()
4157 "vsub.f32 q0, q0, q5\n" in Transform()
4158 "vsub.f32 q1, q1, q5\n" in Transform()
4159 "vsub.f32 q2, q2, q5\n" in Transform()
4160 "vmul.f32 q0, q0, q6\n" in Transform()
4161 "vmul.f32 q1, q1, q6\n" in Transform()
4162 "vmul.f32 q2, q2, q6\n" in Transform()
4163 "vadd.f32 q0, q0, q4\n" in Transform()
4164 "vadd.f32 q1, q1, q4\n" in Transform()
4165 "vadd.f32 q2, q2, q4\n" in Transform()
4167 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4168 "vst1.32 {d4}, [%[output]]!\n" in Transform()
4169 "vst1.32 {d5[0]}, [%[output]]!\n" in Transform()
4170 "pld [%[output]]\n" in Transform()
4194 "vdup.32 q4, %[range_min]\n" in Transform()
4195 "vdup.32 q5, %[range_offset]\n" in Transform()
4196 "vdup.32 q6, %[range_scale]\n" in Transform()
4199 "subs %[count], %[count], #12\n" in Transform()
4200 "beq 2f\n" in Transform()
4203 "subs %[count], %[count], #16\n" in Transform()
4206 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4207 "pld [%[input], #32]\n" in Transform()
4208 "vmovl.u8 q1, d1\n" in Transform()
4209 "vmovl.u8 q0, d0\n" in Transform()
4210 "vmovl.s16 q3, d3\n" in Transform()
4211 "vmovl.s16 q2, d2\n" in Transform()
4212 "vmovl.s16 q1, d1\n" in Transform()
4213 "vmovl.s16 q0, d0\n" in Transform()
4214 "vcvt.f32.s32 q0, q0\n" in Transform()
4215 "vcvt.f32.s32 q1, q1\n" in Transform()
4216 "vcvt.f32.s32 q2, q2\n" in Transform()
4217 "vcvt.f32.s32 q3, q3\n" in Transform()
4218 "vsub.f32 q0, q0, q5\n" in Transform()
4219 "vsub.f32 q1, q1, q5\n" in Transform()
4220 "vsub.f32 q2, q2, q5\n" in Transform()
4221 "vsub.f32 q3, q3, q5\n" in Transform()
4222 "vmul.f32 q0, q0, q6\n" in Transform()
4223 "vmul.f32 q1, q1, q6\n" in Transform()
4224 "vmul.f32 q2, q2, q6\n" in Transform()
4225 "vmul.f32 q3, q3, q6\n" in Transform()
4226 "vadd.f32 q0, q0, q4\n" in Transform()
4227 "vadd.f32 q1, q1, q4\n" in Transform()
4228 "vadd.f32 q2, q2, q4\n" in Transform()
4229 "vadd.f32 q3, q3, q4\n" in Transform()
4231 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4232 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
4233 "pld [%[output]]\n" in Transform()
4235 "bne 1b\n" in Transform()
4241 "vld1.32 {d0}, [%[input]]!\n" in Transform()
4242 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
4243 "pld [%[input], #32]\n" in Transform()
4244 "vmovl.u8 q1, d1\n" in Transform()
4245 "vmovl.u8 q0, d0\n" in Transform()
4246 "vmovl.s16 q2, d2\n" in Transform()
4247 "vmovl.s16 q1, d1\n" in Transform()
4248 "vmovl.s16 q0, d0\n" in Transform()
4249 "vcvt.f32.s32 q0, q0\n" in Transform()
4250 "vcvt.f32.s32 q1, q1\n" in Transform()
4251 "vcvt.f32.s32 q2, q2\n" in Transform()
4252 "vsub.f32 q0, q0, q5\n" in Transform()
4253 "vsub.f32 q1, q1, q5\n" in Transform()
4254 "vsub.f32 q2, q2, q5\n" in Transform()
4255 "vmul.f32 q0, q0, q6\n" in Transform()
4256 "vmul.f32 q1, q1, q6\n" in Transform()
4257 "vmul.f32 q2, q2, q6\n" in Transform()
4258 "vadd.f32 q0, q0, q4\n" in Transform()
4259 "vadd.f32 q1, q1, q4\n" in Transform()
4260 "vadd.f32 q2, q2, q4\n" in Transform()
4262 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4263 "vst1.32 {d4, d5}, [%[output]]!\n" in Transform()
4264 "pld [%[output]]\n" in Transform()
4288 "vdup.32 q4, %[range_min]\n" in Transform()
4289 "vdup.32 q5, %[range_offset]\n" in Transform()
4290 "vdup.32 q6, %[range_scale]\n" in Transform()
4293 "subs %[count], %[count], #13\n" in Transform()
4294 "beq 2f\n" in Transform()
4297 "subs %[count], %[count], #16\n" in Transform()
4300 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4301 "pld [%[input], #32]\n" in Transform()
4302 "vmovl.u8 q1, d1\n" in Transform()
4303 "vmovl.u8 q0, d0\n" in Transform()
4304 "vmovl.s16 q3, d3\n" in Transform()
4305 "vmovl.s16 q2, d2\n" in Transform()
4306 "vmovl.s16 q1, d1\n" in Transform()
4307 "vmovl.s16 q0, d0\n" in Transform()
4308 "vcvt.f32.s32 q0, q0\n" in Transform()
4309 "vcvt.f32.s32 q1, q1\n" in Transform()
4310 "vcvt.f32.s32 q2, q2\n" in Transform()
4311 "vcvt.f32.s32 q3, q3\n" in Transform()
4312 "vsub.f32 q0, q0, q5\n" in Transform()
4313 "vsub.f32 q1, q1, q5\n" in Transform()
4314 "vsub.f32 q2, q2, q5\n" in Transform()
4315 "vsub.f32 q3, q3, q5\n" in Transform()
4316 "vmul.f32 q0, q0, q6\n" in Transform()
4317 "vmul.f32 q1, q1, q6\n" in Transform()
4318 "vmul.f32 q2, q2, q6\n" in Transform()
4319 "vmul.f32 q3, q3, q6\n" in Transform()
4320 "vadd.f32 q0, q0, q4\n" in Transform()
4321 "vadd.f32 q1, q1, q4\n" in Transform()
4322 "vadd.f32 q2, q2, q4\n" in Transform()
4323 "vadd.f32 q3, q3, q4\n" in Transform()
4325 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4326 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
4327 "pld [%[output]]\n" in Transform()
4329 "bne 1b\n" in Transform()
4335 "vld1.32 {d0}, [%[input]]!\n" in Transform()
4336 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
4337 "vld1.8 {d1[4]}, [%[input]]!\n" in Transform()
4338 "pld [%[input], #32]\n" in Transform()
4339 "vmovl.u8 q1, d1\n" in Transform()
4340 "vmovl.u8 q0, d0\n" in Transform()
4341 "vmovl.s16 q3, d3\n" in Transform()
4342 "vmovl.s16 q2, d2\n" in Transform()
4343 "vmovl.s16 q1, d1\n" in Transform()
4344 "vmovl.s16 q0, d0\n" in Transform()
4345 "vcvt.f32.s32 q0, q0\n" in Transform()
4346 "vcvt.f32.s32 q1, q1\n" in Transform()
4347 "vcvt.f32.s32 q2, q2\n" in Transform()
4348 "vcvt.f32.s32 q3, q3\n" in Transform()
4349 "vsub.f32 q0, q0, q5\n" in Transform()
4350 "vsub.f32 q1, q1, q5\n" in Transform()
4351 "vsub.f32 q2, q2, q5\n" in Transform()
4352 "vsub.f32 q3, q3, q5\n" in Transform()
4353 "vmul.f32 q0, q0, q6\n" in Transform()
4354 "vmul.f32 q1, q1, q6\n" in Transform()
4355 "vmul.f32 q2, q2, q6\n" in Transform()
4356 "vmul.f32 q3, q3, q6\n" in Transform()
4357 "vadd.f32 q0, q0, q4\n" in Transform()
4358 "vadd.f32 q1, q1, q4\n" in Transform()
4359 "vadd.f32 q2, q2, q4\n" in Transform()
4360 "vadd.f32 q3, q3, q4\n" in Transform()
4362 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4363 "vst1.32 {d4, d5}, [%[output]]!\n" in Transform()
4364 "vst1.32 {d6[0]}, [%[output]]!\n" in Transform()
4365 "pld [%[output]]\n" in Transform()
4389 "vdup.32 q4, %[range_min]\n" in Transform()
4390 "vdup.32 q5, %[range_offset]\n" in Transform()
4391 "vdup.32 q6, %[range_scale]\n" in Transform()
4394 "subs %[count], %[count], #14\n" in Transform()
4395 "beq 2f\n" in Transform()
4398 "subs %[count], %[count], #16\n" in Transform()
4401 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4402 "pld [%[input], #32]\n" in Transform()
4403 "vmovl.u8 q1, d1\n" in Transform()
4404 "vmovl.u8 q0, d0\n" in Transform()
4405 "vmovl.s16 q3, d3\n" in Transform()
4406 "vmovl.s16 q2, d2\n" in Transform()
4407 "vmovl.s16 q1, d1\n" in Transform()
4408 "vmovl.s16 q0, d0\n" in Transform()
4409 "vcvt.f32.s32 q0, q0\n" in Transform()
4410 "vcvt.f32.s32 q1, q1\n" in Transform()
4411 "vcvt.f32.s32 q2, q2\n" in Transform()
4412 "vcvt.f32.s32 q3, q3\n" in Transform()
4413 "vsub.f32 q0, q0, q5\n" in Transform()
4414 "vsub.f32 q1, q1, q5\n" in Transform()
4415 "vsub.f32 q2, q2, q5\n" in Transform()
4416 "vsub.f32 q3, q3, q5\n" in Transform()
4417 "vmul.f32 q0, q0, q6\n" in Transform()
4418 "vmul.f32 q1, q1, q6\n" in Transform()
4419 "vmul.f32 q2, q2, q6\n" in Transform()
4420 "vmul.f32 q3, q3, q6\n" in Transform()
4421 "vadd.f32 q0, q0, q4\n" in Transform()
4422 "vadd.f32 q1, q1, q4\n" in Transform()
4423 "vadd.f32 q2, q2, q4\n" in Transform()
4424 "vadd.f32 q3, q3, q4\n" in Transform()
4426 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4427 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
4428 "pld [%[output]]\n" in Transform()
4430 "bne 1b\n" in Transform()
4436 "vld1.32 {d0}, [%[input]]!\n" in Transform()
4437 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
4438 "vld1.16 {d1[2]}, [%[input]]!\n" in Transform()
4439 "pld [%[input], #32]\n" in Transform()
4440 "vmovl.u8 q1, d1\n" in Transform()
4441 "vmovl.u8 q0, d0\n" in Transform()
4442 "vmovl.s16 q3, d3\n" in Transform()
4443 "vmovl.s16 q2, d2\n" in Transform()
4444 "vmovl.s16 q1, d1\n" in Transform()
4445 "vmovl.s16 q0, d0\n" in Transform()
4446 "vcvt.f32.s32 q0, q0\n" in Transform()
4447 "vcvt.f32.s32 q1, q1\n" in Transform()
4448 "vcvt.f32.s32 q2, q2\n" in Transform()
4449 "vcvt.f32.s32 q3, q3\n" in Transform()
4450 "vsub.f32 q0, q0, q5\n" in Transform()
4451 "vsub.f32 q1, q1, q5\n" in Transform()
4452 "vsub.f32 q2, q2, q5\n" in Transform()
4453 "vsub.f32 q3, q3, q5\n" in Transform()
4454 "vmul.f32 q0, q0, q6\n" in Transform()
4455 "vmul.f32 q1, q1, q6\n" in Transform()
4456 "vmul.f32 q2, q2, q6\n" in Transform()
4457 "vmul.f32 q3, q3, q6\n" in Transform()
4458 "vadd.f32 q0, q0, q4\n" in Transform()
4459 "vadd.f32 q1, q1, q4\n" in Transform()
4460 "vadd.f32 q2, q2, q4\n" in Transform()
4461 "vadd.f32 q3, q3, q4\n" in Transform()
4463 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4464 "vst1.32 {d4, d5, d6}, [%[output]]!\n" in Transform()
4465 "pld [%[output]]\n" in Transform()
4489 "vdup.32 q4, %[range_min]\n" in Transform()
4490 "vdup.32 q5, %[range_offset]\n" in Transform()
4491 "vdup.32 q6, %[range_scale]\n" in Transform()
4494 "subs %[count], %[count], #15\n" in Transform()
4495 "beq 2f\n" in Transform()
4498 "subs %[count], %[count], #16\n" in Transform()
4501 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4502 "pld [%[input], #32]\n" in Transform()
4503 "vmovl.u8 q1, d1\n" in Transform()
4504 "vmovl.u8 q0, d0\n" in Transform()
4505 "vmovl.s16 q3, d3\n" in Transform()
4506 "vmovl.s16 q2, d2\n" in Transform()
4507 "vmovl.s16 q1, d1\n" in Transform()
4508 "vmovl.s16 q0, d0\n" in Transform()
4509 "vcvt.f32.s32 q0, q0\n" in Transform()
4510 "vcvt.f32.s32 q1, q1\n" in Transform()
4511 "vcvt.f32.s32 q2, q2\n" in Transform()
4512 "vcvt.f32.s32 q3, q3\n" in Transform()
4513 "vsub.f32 q0, q0, q5\n" in Transform()
4514 "vsub.f32 q1, q1, q5\n" in Transform()
4515 "vsub.f32 q2, q2, q5\n" in Transform()
4516 "vsub.f32 q3, q3, q5\n" in Transform()
4517 "vmul.f32 q0, q0, q6\n" in Transform()
4518 "vmul.f32 q1, q1, q6\n" in Transform()
4519 "vmul.f32 q2, q2, q6\n" in Transform()
4520 "vmul.f32 q3, q3, q6\n" in Transform()
4521 "vadd.f32 q0, q0, q4\n" in Transform()
4522 "vadd.f32 q1, q1, q4\n" in Transform()
4523 "vadd.f32 q2, q2, q4\n" in Transform()
4524 "vadd.f32 q3, q3, q4\n" in Transform()
4526 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4527 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
4528 "pld [%[output]]\n" in Transform()
4530 "bne 1b\n" in Transform()
4536 "vld1.32 {d0}, [%[input]]!\n" in Transform()
4537 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
4538 "vld1.16 {d1[2]}, [%[input]]!\n" in Transform()
4539 "vld1.8 {d1[6]}, [%[input]]!\n" in Transform()
4540 "pld [%[input], #32]\n" in Transform()
4541 "vmovl.u8 q1, d1\n" in Transform()
4542 "vmovl.u8 q0, d0\n" in Transform()
4543 "vmovl.s16 q3, d3\n" in Transform()
4544 "vmovl.s16 q2, d2\n" in Transform()
4545 "vmovl.s16 q1, d1\n" in Transform()
4546 "vmovl.s16 q0, d0\n" in Transform()
4547 "vcvt.f32.s32 q0, q0\n" in Transform()
4548 "vcvt.f32.s32 q1, q1\n" in Transform()
4549 "vcvt.f32.s32 q2, q2\n" in Transform()
4550 "vcvt.f32.s32 q3, q3\n" in Transform()
4551 "vsub.f32 q0, q0, q5\n" in Transform()
4552 "vsub.f32 q1, q1, q5\n" in Transform()
4553 "vsub.f32 q2, q2, q5\n" in Transform()
4554 "vsub.f32 q3, q3, q5\n" in Transform()
4555 "vmul.f32 q0, q0, q6\n" in Transform()
4556 "vmul.f32 q1, q1, q6\n" in Transform()
4557 "vmul.f32 q2, q2, q6\n" in Transform()
4558 "vmul.f32 q3, q3, q6\n" in Transform()
4559 "vadd.f32 q0, q0, q4\n" in Transform()
4560 "vadd.f32 q1, q1, q4\n" in Transform()
4561 "vadd.f32 q2, q2, q4\n" in Transform()
4562 "vadd.f32 q3, q3, q4\n" in Transform()
4564 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
4565 "vst1.32 {d4, d5, d6}, [%[output]]!\n" in Transform()
4566 "vst1.32 {d7[0]}, [%[output]]!\n" in Transform()
4567 "pld [%[output]]\n" in Transform()
4594 "vdup.8 q4, %[min]\n" in Transform()
4595 "vdup.8 q5, %[max]\n" in Transform()
4598 "subs %[count], %[count], #16\n" in Transform()
4601 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4602 "pld [%[input], #16]\n" in Transform()
4603 "vmax.u8 q0, q0, q4\n" in Transform()
4604 "vmin.u8 q0, q0, q5\n" in Transform()
4606 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4607 "pld [%[output]]\n" in Transform()
4609 "bne 1b\n" in Transform()
4634 "vdup.8 q4, %[min]\n" in Transform()
4635 "vdup.8 q5, %[max]\n" in Transform()
4638 "subs %[count], %[count], #1\n" in Transform()
4639 "beq 2f\n" in Transform()
4642 "subs %[count], %[count], #16\n" in Transform()
4645 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4646 "pld [%[input], #16]\n" in Transform()
4647 "vmax.u8 q0, q0, q4\n" in Transform()
4648 "vmin.u8 q0, q0, q5\n" in Transform()
4650 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4651 "pld [%[output]]\n" in Transform()
4653 "bne 1b\n" in Transform()
4659 "vld1.8 {d0[0]}, [%[input]]!\n" in Transform()
4660 "pld [%[input], #16]\n" in Transform()
4661 "vmax.u8 q0, q0, q4\n" in Transform()
4662 "vmin.u8 q0, q0, q5\n" in Transform()
4664 "vst1.8 {d0[0]}, [%[output]]!\n" in Transform()
4665 "pld [%[output]]\n" in Transform()
4690 "vdup.8 q4, %[min]\n" in Transform()
4691 "vdup.8 q5, %[max]\n" in Transform()
4694 "subs %[count], %[count], #2\n" in Transform()
4695 "beq 2f\n" in Transform()
4698 "subs %[count], %[count], #16\n" in Transform()
4701 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4702 "pld [%[input], #16]\n" in Transform()
4703 "vmax.u8 q0, q0, q4\n" in Transform()
4704 "vmin.u8 q0, q0, q5\n" in Transform()
4706 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4707 "pld [%[output]]\n" in Transform()
4709 "bne 1b\n" in Transform()
4715 "vld1.16 {d0[0]}, [%[input]]!\n" in Transform()
4716 "pld [%[input], #16]\n" in Transform()
4717 "vmax.u8 q0, q0, q4\n" in Transform()
4718 "vmin.u8 q0, q0, q5\n" in Transform()
4720 "vst1.16 {d0[0]}, [%[output]]!\n" in Transform()
4721 "pld [%[output]]\n" in Transform()
4746 "vdup.8 q4, %[min]\n" in Transform()
4747 "vdup.8 q5, %[max]\n" in Transform()
4750 "subs %[count], %[count], #3\n" in Transform()
4751 "beq 2f\n" in Transform()
4754 "subs %[count], %[count], #16\n" in Transform()
4757 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4758 "pld [%[input], #16]\n" in Transform()
4759 "vmax.u8 q0, q0, q4\n" in Transform()
4760 "vmin.u8 q0, q0, q5\n" in Transform()
4762 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4763 "pld [%[output]]\n" in Transform()
4765 "bne 1b\n" in Transform()
4771 "vld1.16 {d0[0]}, [%[input]]!\n" in Transform()
4772 "vld1.8 {d0[2]}, [%[input]]!\n" in Transform()
4773 "pld [%[input], #16]\n" in Transform()
4774 "vmax.u8 q0, q0, q4\n" in Transform()
4775 "vmin.u8 q0, q0, q5\n" in Transform()
4777 "vst1.16 {d0[0]}, [%[output]]!\n" in Transform()
4778 "vst1.8 {d0[2]}, [%[output]]!\n" in Transform()
4779 "pld [%[output]]\n" in Transform()
4804 "vdup.8 q4, %[min]\n" in Transform()
4805 "vdup.8 q5, %[max]\n" in Transform()
4808 "subs %[count], %[count], #4\n" in Transform()
4809 "beq 2f\n" in Transform()
4812 "subs %[count], %[count], #16\n" in Transform()
4815 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4816 "pld [%[input], #16]\n" in Transform()
4817 "vmax.u8 q0, q0, q4\n" in Transform()
4818 "vmin.u8 q0, q0, q5\n" in Transform()
4820 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4821 "pld [%[output]]\n" in Transform()
4823 "bne 1b\n" in Transform()
4829 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
4830 "pld [%[input], #16]\n" in Transform()
4831 "vmax.u8 q0, q0, q4\n" in Transform()
4832 "vmin.u8 q0, q0, q5\n" in Transform()
4834 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
4835 "pld [%[output]]\n" in Transform()
4860 "vdup.8 q4, %[min]\n" in Transform()
4861 "vdup.8 q5, %[max]\n" in Transform()
4864 "subs %[count], %[count], #5\n" in Transform()
4865 "beq 2f\n" in Transform()
4868 "subs %[count], %[count], #16\n" in Transform()
4871 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4872 "pld [%[input], #16]\n" in Transform()
4873 "vmax.u8 q0, q0, q4\n" in Transform()
4874 "vmin.u8 q0, q0, q5\n" in Transform()
4876 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4877 "pld [%[output]]\n" in Transform()
4879 "bne 1b\n" in Transform()
4885 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
4886 "vld1.8 {d0[4]}, [%[input]]!\n" in Transform()
4887 "pld [%[input], #16]\n" in Transform()
4888 "vmax.u8 q0, q0, q4\n" in Transform()
4889 "vmin.u8 q0, q0, q5\n" in Transform()
4891 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
4892 "vst1.8 {d0[4]}, [%[output]]!\n" in Transform()
4893 "pld [%[output]]\n" in Transform()
4918 "vdup.8 q4, %[min]\n" in Transform()
4919 "vdup.8 q5, %[max]\n" in Transform()
4922 "subs %[count], %[count], #6\n" in Transform()
4923 "beq 2f\n" in Transform()
4926 "subs %[count], %[count], #16\n" in Transform()
4929 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4930 "pld [%[input], #16]\n" in Transform()
4931 "vmax.u8 q0, q0, q4\n" in Transform()
4932 "vmin.u8 q0, q0, q5\n" in Transform()
4934 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4935 "pld [%[output]]\n" in Transform()
4937 "bne 1b\n" in Transform()
4943 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
4944 "vld1.16 {d0[2]}, [%[input]]!\n" in Transform()
4945 "pld [%[input], #16]\n" in Transform()
4946 "vmax.u8 q0, q0, q4\n" in Transform()
4947 "vmin.u8 q0, q0, q5\n" in Transform()
4949 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
4950 "vst1.16 {d0[2]}, [%[output]]!\n" in Transform()
4951 "pld [%[output]]\n" in Transform()
4976 "vdup.8 q4, %[min]\n" in Transform()
4977 "vdup.8 q5, %[max]\n" in Transform()
4980 "subs %[count], %[count], #7\n" in Transform()
4981 "beq 2f\n" in Transform()
4984 "subs %[count], %[count], #16\n" in Transform()
4987 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
4988 "pld [%[input], #16]\n" in Transform()
4989 "vmax.u8 q0, q0, q4\n" in Transform()
4990 "vmin.u8 q0, q0, q5\n" in Transform()
4992 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
4993 "pld [%[output]]\n" in Transform()
4995 "bne 1b\n" in Transform()
5001 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
5002 "vld1.16 {d0[2]}, [%[input]]!\n" in Transform()
5003 "vld1.8 {d0[6]}, [%[input]]!\n" in Transform()
5004 "pld [%[input], #16]\n" in Transform()
5005 "vmax.u8 q0, q0, q4\n" in Transform()
5006 "vmin.u8 q0, q0, q5\n" in Transform()
5008 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
5009 "vst1.16 {d0[2]}, [%[output]]!\n" in Transform()
5010 "vst1.8 {d0[6]}, [%[output]]!\n" in Transform()
5011 "pld [%[output]]\n" in Transform()
5036 "vdup.8 q4, %[min]\n" in Transform()
5037 "vdup.8 q5, %[max]\n" in Transform()
5040 "subs %[count], %[count], #8\n" in Transform()
5041 "beq 2f\n" in Transform()
5044 "subs %[count], %[count], #16\n" in Transform()
5047 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5048 "pld [%[input], #16]\n" in Transform()
5049 "vmax.u8 q0, q0, q4\n" in Transform()
5050 "vmin.u8 q0, q0, q5\n" in Transform()
5052 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5053 "pld [%[output]]\n" in Transform()
5055 "bne 1b\n" in Transform()
5061 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5062 "pld [%[input], #16]\n" in Transform()
5063 "vmax.u8 q0, q0, q4\n" in Transform()
5064 "vmin.u8 q0, q0, q5\n" in Transform()
5066 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5067 "pld [%[output]]\n" in Transform()
5092 "vdup.8 q4, %[min]\n" in Transform()
5093 "vdup.8 q5, %[max]\n" in Transform()
5096 "subs %[count], %[count], #9\n" in Transform()
5097 "beq 2f\n" in Transform()
5100 "subs %[count], %[count], #16\n" in Transform()
5103 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5104 "pld [%[input], #16]\n" in Transform()
5105 "vmax.u8 q0, q0, q4\n" in Transform()
5106 "vmin.u8 q0, q0, q5\n" in Transform()
5108 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5109 "pld [%[output]]\n" in Transform()
5111 "bne 1b\n" in Transform()
5117 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5118 "vld1.8 {d1[0]}, [%[input]]!\n" in Transform()
5119 "pld [%[input], #16]\n" in Transform()
5120 "vmax.u8 q0, q0, q4\n" in Transform()
5121 "vmin.u8 q0, q0, q5\n" in Transform()
5123 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5124 "vst1.8 {d1[0]}, [%[output]]!\n" in Transform()
5125 "pld [%[output]]\n" in Transform()
5150 "vdup.8 q4, %[min]\n" in Transform()
5151 "vdup.8 q5, %[max]\n" in Transform()
5154 "subs %[count], %[count], #10\n" in Transform()
5155 "beq 2f\n" in Transform()
5158 "subs %[count], %[count], #16\n" in Transform()
5161 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5162 "pld [%[input], #16]\n" in Transform()
5163 "vmax.u8 q0, q0, q4\n" in Transform()
5164 "vmin.u8 q0, q0, q5\n" in Transform()
5166 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5167 "pld [%[output]]\n" in Transform()
5169 "bne 1b\n" in Transform()
5175 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5176 "vld1.16 {d1[0]}, [%[input]]!\n" in Transform()
5177 "pld [%[input], #16]\n" in Transform()
5178 "vmax.u8 q0, q0, q4\n" in Transform()
5179 "vmin.u8 q0, q0, q5\n" in Transform()
5181 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5182 "vst1.16 {d1[0]}, [%[output]]!\n" in Transform()
5183 "pld [%[output]]\n" in Transform()
5208 "vdup.8 q4, %[min]\n" in Transform()
5209 "vdup.8 q5, %[max]\n" in Transform()
5212 "subs %[count], %[count], #11\n" in Transform()
5213 "beq 2f\n" in Transform()
5216 "subs %[count], %[count], #16\n" in Transform()
5219 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5220 "pld [%[input], #16]\n" in Transform()
5221 "vmax.u8 q0, q0, q4\n" in Transform()
5222 "vmin.u8 q0, q0, q5\n" in Transform()
5224 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5225 "pld [%[output]]\n" in Transform()
5227 "bne 1b\n" in Transform()
5233 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5234 "vld1.16 {d1[0]}, [%[input]]!\n" in Transform()
5235 "vld1.8 {d1[2]}, [%[input]]!\n" in Transform()
5236 "pld [%[input], #16]\n" in Transform()
5237 "vmax.u8 q0, q0, q4\n" in Transform()
5238 "vmin.u8 q0, q0, q5\n" in Transform()
5240 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5241 "vst1.16 {d1[0]}, [%[output]]!\n" in Transform()
5242 "vst1.8 {d1[2]}, [%[output]]!\n" in Transform()
5243 "pld [%[output]]\n" in Transform()
5268 "vdup.8 q4, %[min]\n" in Transform()
5269 "vdup.8 q5, %[max]\n" in Transform()
5272 "subs %[count], %[count], #12\n" in Transform()
5273 "beq 2f\n" in Transform()
5276 "subs %[count], %[count], #16\n" in Transform()
5279 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5280 "pld [%[input], #16]\n" in Transform()
5281 "vmax.u8 q0, q0, q4\n" in Transform()
5282 "vmin.u8 q0, q0, q5\n" in Transform()
5284 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5285 "pld [%[output]]\n" in Transform()
5287 "bne 1b\n" in Transform()
5293 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5294 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
5295 "pld [%[input], #16]\n" in Transform()
5296 "vmax.u8 q0, q0, q4\n" in Transform()
5297 "vmin.u8 q0, q0, q5\n" in Transform()
5299 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5300 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
5301 "pld [%[output]]\n" in Transform()
5326 "vdup.8 q4, %[min]\n" in Transform()
5327 "vdup.8 q5, %[max]\n" in Transform()
5330 "subs %[count], %[count], #13\n" in Transform()
5331 "beq 2f\n" in Transform()
5334 "subs %[count], %[count], #16\n" in Transform()
5337 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5338 "pld [%[input], #16]\n" in Transform()
5339 "vmax.u8 q0, q0, q4\n" in Transform()
5340 "vmin.u8 q0, q0, q5\n" in Transform()
5342 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5343 "pld [%[output]]\n" in Transform()
5345 "bne 1b\n" in Transform()
5351 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5352 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
5353 "vld1.8 {d1[4]}, [%[input]]!\n" in Transform()
5354 "pld [%[input], #16]\n" in Transform()
5355 "vmax.u8 q0, q0, q4\n" in Transform()
5356 "vmin.u8 q0, q0, q5\n" in Transform()
5358 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5359 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
5360 "vst1.8 {d1[4]}, [%[output]]!\n" in Transform()
5361 "pld [%[output]]\n" in Transform()
5386 "vdup.8 q4, %[min]\n" in Transform()
5387 "vdup.8 q5, %[max]\n" in Transform()
5390 "subs %[count], %[count], #14\n" in Transform()
5391 "beq 2f\n" in Transform()
5394 "subs %[count], %[count], #16\n" in Transform()
5397 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5398 "pld [%[input], #16]\n" in Transform()
5399 "vmax.u8 q0, q0, q4\n" in Transform()
5400 "vmin.u8 q0, q0, q5\n" in Transform()
5402 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5403 "pld [%[output]]\n" in Transform()
5405 "bne 1b\n" in Transform()
5411 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5412 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
5413 "vld1.16 {d1[2]}, [%[input]]!\n" in Transform()
5414 "pld [%[input], #16]\n" in Transform()
5415 "vmax.u8 q0, q0, q4\n" in Transform()
5416 "vmin.u8 q0, q0, q5\n" in Transform()
5418 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5419 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
5420 "vst1.16 {d1[2]}, [%[output]]!\n" in Transform()
5421 "pld [%[output]]\n" in Transform()
5446 "vdup.8 q4, %[min]\n" in Transform()
5447 "vdup.8 q5, %[max]\n" in Transform()
5450 "subs %[count], %[count], #15\n" in Transform()
5451 "beq 2f\n" in Transform()
5454 "subs %[count], %[count], #16\n" in Transform()
5457 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5458 "pld [%[input], #16]\n" in Transform()
5459 "vmax.u8 q0, q0, q4\n" in Transform()
5460 "vmin.u8 q0, q0, q5\n" in Transform()
5462 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
5463 "pld [%[output]]\n" in Transform()
5465 "bne 1b\n" in Transform()
5471 "vld1.32 {d0}, [%[input]]!\n" in Transform()
5472 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
5473 "vld1.16 {d1[2]}, [%[input]]!\n" in Transform()
5474 "vld1.8 {d1[6]}, [%[input]]!\n" in Transform()
5475 "pld [%[input], #16]\n" in Transform()
5476 "vmax.u8 q0, q0, q4\n" in Transform()
5477 "vmin.u8 q0, q0, q5\n" in Transform()
5479 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5480 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
5481 "vst1.16 {d1[2]}, [%[output]]!\n" in Transform()
5482 "vst1.8 {d1[6]}, [%[output]]!\n" in Transform()
5483 "pld [%[output]]\n" in Transform()
5506 "ldr r0, %[input_range_min]\n" in Transform()
5507 "vdup.32 q8, r0\n" in Transform()
5508 "ldr r0, %[input_range_scale]\n" in Transform()
5509 "vdup.32 q9, r0\n" in Transform()
5510 "ldr r0, %[bias_range_min]\n" in Transform()
5511 "vdup.32 q10, r0\n" in Transform()
5512 "ldr r0, %[bias_range_scale]\n" in Transform()
5513 "vdup.32 q11, r0\n" in Transform()
5514 "ldr r0, %[output_range_min]\n" in Transform()
5515 "vdup.32 q12, r0\n" in Transform()
5516 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
5517 "vdup.32 q13, r0\n" in Transform()
5518 "ldr r0, %[output_range_offset]\n" in Transform()
5519 "vdup.32 q14, r0\n" in Transform()
5521 "mov r0, %[count]\n" in Transform()
5522 "mov r1, %[bias]\n" in Transform()
5524 "subs r0, r0, #16\n" in Transform()
5527 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5528 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
5529 "pld [%[input], #32]\n" in Transform()
5530 "vmovl.u8 q1, d1\n" in Transform()
5531 "vmovl.u8 q0, d0\n" in Transform()
5532 "vmovl.u8 q5, d9\n" in Transform()
5533 "vmovl.u8 q4, d8\n" in Transform()
5534 "vmovl.s16 q3, d3\n" in Transform()
5535 "vmovl.s16 q2, d2\n" in Transform()
5536 "vmovl.s16 q7, d11\n" in Transform()
5537 "vmovl.s16 q6, d10\n" in Transform()
5538 "vmovl.s16 q1, d1\n" in Transform()
5539 "vmovl.s16 q0, d0\n" in Transform()
5540 "vmovl.s16 q5, d9\n" in Transform()
5541 "vmovl.s16 q4, d8\n" in Transform()
5542 "vcvt.f32.s32 q0, q0\n" in Transform()
5543 "vcvt.f32.s32 q1, q1\n" in Transform()
5544 "vcvt.f32.s32 q2, q2\n" in Transform()
5545 "vcvt.f32.s32 q3, q3\n" in Transform()
5546 "vcvt.f32.s32 q4, q4\n" in Transform()
5547 "vcvt.f32.s32 q5, q5\n" in Transform()
5548 "vcvt.f32.s32 q6, q6\n" in Transform()
5549 "vcvt.f32.s32 q7, q7\n" in Transform()
5550 "vmul.f32 q0, q0, q9\n" in Transform()
5551 "vmul.f32 q1, q1, q9\n" in Transform()
5552 "vmul.f32 q2, q2, q9\n" in Transform()
5553 "vmul.f32 q3, q3, q9\n" in Transform()
5554 "vmul.f32 q4, q4, q11\n" in Transform()
5555 "vmul.f32 q5, q5, q11\n" in Transform()
5556 "vmul.f32 q6, q6, q11\n" in Transform()
5557 "vmul.f32 q7, q7, q11\n" in Transform()
5558 "vadd.f32 q0, q0, q8\n" in Transform()
5559 "vadd.f32 q1, q1, q8\n" in Transform()
5560 "vadd.f32 q2, q2, q8\n" in Transform()
5561 "vadd.f32 q3, q3, q8\n" in Transform()
5562 "vadd.f32 q4, q4, q10\n" in Transform()
5563 "vadd.f32 q5, q5, q10\n" in Transform()
5564 "vadd.f32 q6, q6, q10\n" in Transform()
5565 "vadd.f32 q7, q7, q10\n" in Transform()
5566 "vadd.f32 q0, q0, q4\n" in Transform()
5567 "vadd.f32 q1, q1, q5\n" in Transform()
5568 "vadd.f32 q2, q2, q6\n" in Transform()
5569 "vadd.f32 q3, q3, q7\n" in Transform()
5570 "vsub.f32 q0, q0, q12\n" in Transform()
5571 "vsub.f32 q1, q1, q12\n" in Transform()
5572 "vsub.f32 q2, q2, q12\n" in Transform()
5573 "vsub.f32 q3, q3, q12\n" in Transform()
5574 "vmul.f32 q0, q0, q13\n" in Transform()
5575 "vmul.f32 q1, q1, q13\n" in Transform()
5576 "vmul.f32 q2, q2, q13\n" in Transform()
5577 "vmul.f32 q3, q3, q13\n" in Transform()
5578 "vadd.f32 q0, q0, q14\n" in Transform()
5579 "vadd.f32 q1, q1, q14\n" in Transform()
5580 "vadd.f32 q2, q2, q14\n" in Transform()
5581 "vadd.f32 q3, q3, q14\n" in Transform()
5582 "vcvt.s32.f32 q0, q0\n" in Transform()
5583 "vcvt.s32.f32 q1, q1\n" in Transform()
5584 "vcvt.s32.f32 q2, q2\n" in Transform()
5585 "vcvt.s32.f32 q3, q3\n" in Transform()
5587 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
5588 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
5589 "pld [%[output]]\n" in Transform()
5590 "bne 2b\n" in Transform()
5591 "subs %[rows], %[rows], #1\n" in Transform()
5592 "bne 1b\n" in Transform()
5624 "ldr r0, %[input_range_min]\n" in Transform()
5625 "vdup.32 q8, r0\n" in Transform()
5626 "ldr r0, %[input_range_scale]\n" in Transform()
5627 "vdup.32 q9, r0\n" in Transform()
5628 "ldr r0, %[bias_range_min]\n" in Transform()
5629 "vdup.32 q10, r0\n" in Transform()
5630 "ldr r0, %[bias_range_scale]\n" in Transform()
5631 "vdup.32 q11, r0\n" in Transform()
5632 "ldr r0, %[output_range_min]\n" in Transform()
5633 "vdup.32 q12, r0\n" in Transform()
5634 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
5635 "vdup.32 q13, r0\n" in Transform()
5636 "ldr r0, %[output_range_offset]\n" in Transform()
5637 "vdup.32 q14, r0\n" in Transform()
5639 "mov r0, %[count]\n" in Transform()
5640 "mov r1, %[bias]\n" in Transform()
5641 "subs r0, r0, #1\n" in Transform()
5642 "beq 3f\n" in Transform()
5644 "subs r0, r0, #16\n" in Transform()
5647 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5648 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
5649 "pld [%[input], #32]\n" in Transform()
5650 "vmovl.u8 q1, d1\n" in Transform()
5651 "vmovl.u8 q0, d0\n" in Transform()
5652 "vmovl.u8 q5, d9\n" in Transform()
5653 "vmovl.u8 q4, d8\n" in Transform()
5654 "vmovl.s16 q3, d3\n" in Transform()
5655 "vmovl.s16 q2, d2\n" in Transform()
5656 "vmovl.s16 q7, d11\n" in Transform()
5657 "vmovl.s16 q6, d10\n" in Transform()
5658 "vmovl.s16 q1, d1\n" in Transform()
5659 "vmovl.s16 q0, d0\n" in Transform()
5660 "vmovl.s16 q5, d9\n" in Transform()
5661 "vmovl.s16 q4, d8\n" in Transform()
5662 "vcvt.f32.s32 q0, q0\n" in Transform()
5663 "vcvt.f32.s32 q1, q1\n" in Transform()
5664 "vcvt.f32.s32 q2, q2\n" in Transform()
5665 "vcvt.f32.s32 q3, q3\n" in Transform()
5666 "vcvt.f32.s32 q4, q4\n" in Transform()
5667 "vcvt.f32.s32 q5, q5\n" in Transform()
5668 "vcvt.f32.s32 q6, q6\n" in Transform()
5669 "vcvt.f32.s32 q7, q7\n" in Transform()
5670 "vmul.f32 q0, q0, q9\n" in Transform()
5671 "vmul.f32 q1, q1, q9\n" in Transform()
5672 "vmul.f32 q2, q2, q9\n" in Transform()
5673 "vmul.f32 q3, q3, q9\n" in Transform()
5674 "vmul.f32 q4, q4, q11\n" in Transform()
5675 "vmul.f32 q5, q5, q11\n" in Transform()
5676 "vmul.f32 q6, q6, q11\n" in Transform()
5677 "vmul.f32 q7, q7, q11\n" in Transform()
5678 "vadd.f32 q0, q0, q8\n" in Transform()
5679 "vadd.f32 q1, q1, q8\n" in Transform()
5680 "vadd.f32 q2, q2, q8\n" in Transform()
5681 "vadd.f32 q3, q3, q8\n" in Transform()
5682 "vadd.f32 q4, q4, q10\n" in Transform()
5683 "vadd.f32 q5, q5, q10\n" in Transform()
5684 "vadd.f32 q6, q6, q10\n" in Transform()
5685 "vadd.f32 q7, q7, q10\n" in Transform()
5686 "vadd.f32 q0, q0, q4\n" in Transform()
5687 "vadd.f32 q1, q1, q5\n" in Transform()
5688 "vadd.f32 q2, q2, q6\n" in Transform()
5689 "vadd.f32 q3, q3, q7\n" in Transform()
5690 "vsub.f32 q0, q0, q12\n" in Transform()
5691 "vsub.f32 q1, q1, q12\n" in Transform()
5692 "vsub.f32 q2, q2, q12\n" in Transform()
5693 "vsub.f32 q3, q3, q12\n" in Transform()
5694 "vmul.f32 q0, q0, q13\n" in Transform()
5695 "vmul.f32 q1, q1, q13\n" in Transform()
5696 "vmul.f32 q2, q2, q13\n" in Transform()
5697 "vmul.f32 q3, q3, q13\n" in Transform()
5698 "vadd.f32 q0, q0, q14\n" in Transform()
5699 "vadd.f32 q1, q1, q14\n" in Transform()
5700 "vadd.f32 q2, q2, q14\n" in Transform()
5701 "vadd.f32 q3, q3, q14\n" in Transform()
5702 "vcvt.s32.f32 q0, q0\n" in Transform()
5703 "vcvt.s32.f32 q1, q1\n" in Transform()
5704 "vcvt.s32.f32 q2, q2\n" in Transform()
5705 "vcvt.s32.f32 q3, q3\n" in Transform()
5707 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
5708 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
5709 "pld [%[output]]\n" in Transform()
5710 "bne 2b\n" in Transform()
5714 "vld1.8 {d0[0]}, [%[input]]!\n" in Transform()
5715 "vld1.8 {d2[0]}, [r1]!\n" in Transform()
5716 "pld [%[input], #32]\n" in Transform()
5717 "vmovl.u8 q0, d0\n" in Transform()
5718 "vmovl.u8 q1, d2\n" in Transform()
5719 "vmovl.s16 q0, d0\n" in Transform()
5720 "vmovl.s16 q1, d2\n" in Transform()
5721 "vcvt.f32.s32 q0, q0\n" in Transform()
5722 "vcvt.f32.s32 q1, q1\n" in Transform()
5723 "vmul.f32 q0, q0, q9\n" in Transform()
5724 "vmul.f32 q1, q1, q11\n" in Transform()
5725 "vadd.f32 q0, q0, q8\n" in Transform()
5726 "vadd.f32 q1, q1, q10\n" in Transform()
5727 "vadd.f32 q0, q0, q1\n" in Transform()
5728 "vsub.f32 q0, q0, q12\n" in Transform()
5729 "vmul.f32 q0, q0, q13\n" in Transform()
5730 "vadd.f32 q0, q0, q14\n" in Transform()
5731 "vcvt.s32.f32 q0, q0\n" in Transform()
5733 "vst1.32 {d0[0]}, [%[output]]!\n" in Transform()
5734 "pld [%[output]]\n" in Transform()
5735 "subs %[rows], %[rows], #1\n" in Transform()
5736 "bne 1b\n" in Transform()
5768 "ldr r0, %[input_range_min]\n" in Transform()
5769 "vdup.32 q8, r0\n" in Transform()
5770 "ldr r0, %[input_range_scale]\n" in Transform()
5771 "vdup.32 q9, r0\n" in Transform()
5772 "ldr r0, %[bias_range_min]\n" in Transform()
5773 "vdup.32 q10, r0\n" in Transform()
5774 "ldr r0, %[bias_range_scale]\n" in Transform()
5775 "vdup.32 q11, r0\n" in Transform()
5776 "ldr r0, %[output_range_min]\n" in Transform()
5777 "vdup.32 q12, r0\n" in Transform()
5778 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
5779 "vdup.32 q13, r0\n" in Transform()
5780 "ldr r0, %[output_range_offset]\n" in Transform()
5781 "vdup.32 q14, r0\n" in Transform()
5783 "mov r0, %[count]\n" in Transform()
5784 "mov r1, %[bias]\n" in Transform()
5785 "subs r0, r0, #2\n" in Transform()
5786 "beq 3f\n" in Transform()
5788 "subs r0, r0, #16\n" in Transform()
5791 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5792 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
5793 "pld [%[input], #32]\n" in Transform()
5794 "vmovl.u8 q1, d1\n" in Transform()
5795 "vmovl.u8 q0, d0\n" in Transform()
5796 "vmovl.u8 q5, d9\n" in Transform()
5797 "vmovl.u8 q4, d8\n" in Transform()
5798 "vmovl.s16 q3, d3\n" in Transform()
5799 "vmovl.s16 q2, d2\n" in Transform()
5800 "vmovl.s16 q7, d11\n" in Transform()
5801 "vmovl.s16 q6, d10\n" in Transform()
5802 "vmovl.s16 q1, d1\n" in Transform()
5803 "vmovl.s16 q0, d0\n" in Transform()
5804 "vmovl.s16 q5, d9\n" in Transform()
5805 "vmovl.s16 q4, d8\n" in Transform()
5806 "vcvt.f32.s32 q0, q0\n" in Transform()
5807 "vcvt.f32.s32 q1, q1\n" in Transform()
5808 "vcvt.f32.s32 q2, q2\n" in Transform()
5809 "vcvt.f32.s32 q3, q3\n" in Transform()
5810 "vcvt.f32.s32 q4, q4\n" in Transform()
5811 "vcvt.f32.s32 q5, q5\n" in Transform()
5812 "vcvt.f32.s32 q6, q6\n" in Transform()
5813 "vcvt.f32.s32 q7, q7\n" in Transform()
5814 "vmul.f32 q0, q0, q9\n" in Transform()
5815 "vmul.f32 q1, q1, q9\n" in Transform()
5816 "vmul.f32 q2, q2, q9\n" in Transform()
5817 "vmul.f32 q3, q3, q9\n" in Transform()
5818 "vmul.f32 q4, q4, q11\n" in Transform()
5819 "vmul.f32 q5, q5, q11\n" in Transform()
5820 "vmul.f32 q6, q6, q11\n" in Transform()
5821 "vmul.f32 q7, q7, q11\n" in Transform()
5822 "vadd.f32 q0, q0, q8\n" in Transform()
5823 "vadd.f32 q1, q1, q8\n" in Transform()
5824 "vadd.f32 q2, q2, q8\n" in Transform()
5825 "vadd.f32 q3, q3, q8\n" in Transform()
5826 "vadd.f32 q4, q4, q10\n" in Transform()
5827 "vadd.f32 q5, q5, q10\n" in Transform()
5828 "vadd.f32 q6, q6, q10\n" in Transform()
5829 "vadd.f32 q7, q7, q10\n" in Transform()
5830 "vadd.f32 q0, q0, q4\n" in Transform()
5831 "vadd.f32 q1, q1, q5\n" in Transform()
5832 "vadd.f32 q2, q2, q6\n" in Transform()
5833 "vadd.f32 q3, q3, q7\n" in Transform()
5834 "vsub.f32 q0, q0, q12\n" in Transform()
5835 "vsub.f32 q1, q1, q12\n" in Transform()
5836 "vsub.f32 q2, q2, q12\n" in Transform()
5837 "vsub.f32 q3, q3, q12\n" in Transform()
5838 "vmul.f32 q0, q0, q13\n" in Transform()
5839 "vmul.f32 q1, q1, q13\n" in Transform()
5840 "vmul.f32 q2, q2, q13\n" in Transform()
5841 "vmul.f32 q3, q3, q13\n" in Transform()
5842 "vadd.f32 q0, q0, q14\n" in Transform()
5843 "vadd.f32 q1, q1, q14\n" in Transform()
5844 "vadd.f32 q2, q2, q14\n" in Transform()
5845 "vadd.f32 q3, q3, q14\n" in Transform()
5846 "vcvt.s32.f32 q0, q0\n" in Transform()
5847 "vcvt.s32.f32 q1, q1\n" in Transform()
5848 "vcvt.s32.f32 q2, q2\n" in Transform()
5849 "vcvt.s32.f32 q3, q3\n" in Transform()
5851 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
5852 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
5853 "pld [%[output]]\n" in Transform()
5854 "bne 2b\n" in Transform()
5858 "vld1.16 {d0[0]}, [%[input]]!\n" in Transform()
5859 "vld1.16 {d2[0]}, [r1]!\n" in Transform()
5860 "pld [%[input], #32]\n" in Transform()
5861 "vmovl.u8 q0, d0\n" in Transform()
5862 "vmovl.u8 q1, d2\n" in Transform()
5863 "vmovl.s16 q0, d0\n" in Transform()
5864 "vmovl.s16 q1, d2\n" in Transform()
5865 "vcvt.f32.s32 q0, q0\n" in Transform()
5866 "vcvt.f32.s32 q1, q1\n" in Transform()
5867 "vmul.f32 q0, q0, q9\n" in Transform()
5868 "vmul.f32 q1, q1, q11\n" in Transform()
5869 "vadd.f32 q0, q0, q8\n" in Transform()
5870 "vadd.f32 q1, q1, q10\n" in Transform()
5871 "vadd.f32 q0, q0, q1\n" in Transform()
5872 "vsub.f32 q0, q0, q12\n" in Transform()
5873 "vmul.f32 q0, q0, q13\n" in Transform()
5874 "vadd.f32 q0, q0, q14\n" in Transform()
5875 "vcvt.s32.f32 q0, q0\n" in Transform()
5877 "vst1.32 {d0}, [%[output]]!\n" in Transform()
5878 "pld [%[output]]\n" in Transform()
5879 "subs %[rows], %[rows], #1\n" in Transform()
5880 "bne 1b\n" in Transform()
5912 "ldr r0, %[input_range_min]\n" in Transform()
5913 "vdup.32 q8, r0\n" in Transform()
5914 "ldr r0, %[input_range_scale]\n" in Transform()
5915 "vdup.32 q9, r0\n" in Transform()
5916 "ldr r0, %[bias_range_min]\n" in Transform()
5917 "vdup.32 q10, r0\n" in Transform()
5918 "ldr r0, %[bias_range_scale]\n" in Transform()
5919 "vdup.32 q11, r0\n" in Transform()
5920 "ldr r0, %[output_range_min]\n" in Transform()
5921 "vdup.32 q12, r0\n" in Transform()
5922 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
5923 "vdup.32 q13, r0\n" in Transform()
5924 "ldr r0, %[output_range_offset]\n" in Transform()
5925 "vdup.32 q14, r0\n" in Transform()
5927 "mov r0, %[count]\n" in Transform()
5928 "mov r1, %[bias]\n" in Transform()
5929 "subs r0, r0, #3\n" in Transform()
5930 "beq 3f\n" in Transform()
5932 "subs r0, r0, #16\n" in Transform()
5935 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
5936 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
5937 "pld [%[input], #32]\n" in Transform()
5938 "vmovl.u8 q1, d1\n" in Transform()
5939 "vmovl.u8 q0, d0\n" in Transform()
5940 "vmovl.u8 q5, d9\n" in Transform()
5941 "vmovl.u8 q4, d8\n" in Transform()
5942 "vmovl.s16 q3, d3\n" in Transform()
5943 "vmovl.s16 q2, d2\n" in Transform()
5944 "vmovl.s16 q7, d11\n" in Transform()
5945 "vmovl.s16 q6, d10\n" in Transform()
5946 "vmovl.s16 q1, d1\n" in Transform()
5947 "vmovl.s16 q0, d0\n" in Transform()
5948 "vmovl.s16 q5, d9\n" in Transform()
5949 "vmovl.s16 q4, d8\n" in Transform()
5950 "vcvt.f32.s32 q0, q0\n" in Transform()
5951 "vcvt.f32.s32 q1, q1\n" in Transform()
5952 "vcvt.f32.s32 q2, q2\n" in Transform()
5953 "vcvt.f32.s32 q3, q3\n" in Transform()
5954 "vcvt.f32.s32 q4, q4\n" in Transform()
5955 "vcvt.f32.s32 q5, q5\n" in Transform()
5956 "vcvt.f32.s32 q6, q6\n" in Transform()
5957 "vcvt.f32.s32 q7, q7\n" in Transform()
5958 "vmul.f32 q0, q0, q9\n" in Transform()
5959 "vmul.f32 q1, q1, q9\n" in Transform()
5960 "vmul.f32 q2, q2, q9\n" in Transform()
5961 "vmul.f32 q3, q3, q9\n" in Transform()
5962 "vmul.f32 q4, q4, q11\n" in Transform()
5963 "vmul.f32 q5, q5, q11\n" in Transform()
5964 "vmul.f32 q6, q6, q11\n" in Transform()
5965 "vmul.f32 q7, q7, q11\n" in Transform()
5966 "vadd.f32 q0, q0, q8\n" in Transform()
5967 "vadd.f32 q1, q1, q8\n" in Transform()
5968 "vadd.f32 q2, q2, q8\n" in Transform()
5969 "vadd.f32 q3, q3, q8\n" in Transform()
5970 "vadd.f32 q4, q4, q10\n" in Transform()
5971 "vadd.f32 q5, q5, q10\n" in Transform()
5972 "vadd.f32 q6, q6, q10\n" in Transform()
5973 "vadd.f32 q7, q7, q10\n" in Transform()
5974 "vadd.f32 q0, q0, q4\n" in Transform()
5975 "vadd.f32 q1, q1, q5\n" in Transform()
5976 "vadd.f32 q2, q2, q6\n" in Transform()
5977 "vadd.f32 q3, q3, q7\n" in Transform()
5978 "vsub.f32 q0, q0, q12\n" in Transform()
5979 "vsub.f32 q1, q1, q12\n" in Transform()
5980 "vsub.f32 q2, q2, q12\n" in Transform()
5981 "vsub.f32 q3, q3, q12\n" in Transform()
5982 "vmul.f32 q0, q0, q13\n" in Transform()
5983 "vmul.f32 q1, q1, q13\n" in Transform()
5984 "vmul.f32 q2, q2, q13\n" in Transform()
5985 "vmul.f32 q3, q3, q13\n" in Transform()
5986 "vadd.f32 q0, q0, q14\n" in Transform()
5987 "vadd.f32 q1, q1, q14\n" in Transform()
5988 "vadd.f32 q2, q2, q14\n" in Transform()
5989 "vadd.f32 q3, q3, q14\n" in Transform()
5990 "vcvt.s32.f32 q0, q0\n" in Transform()
5991 "vcvt.s32.f32 q1, q1\n" in Transform()
5992 "vcvt.s32.f32 q2, q2\n" in Transform()
5993 "vcvt.s32.f32 q3, q3\n" in Transform()
5995 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
5996 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
5997 "pld [%[output]]\n" in Transform()
5998 "bne 2b\n" in Transform()
6002 "vld1.16 {d0[0]}, [%[input]]!\n" in Transform()
6003 "vld1.8 {d0[2]}, [%[input]]!\n" in Transform()
6004 "vld1.16 {d2[0]}, [r1]!\n" in Transform()
6005 "vld1.8 {d2[2]}, [r1]!\n" in Transform()
6006 "pld [%[input], #32]\n" in Transform()
6007 "vmovl.u8 q0, d0\n" in Transform()
6008 "vmovl.u8 q1, d2\n" in Transform()
6009 "vmovl.s16 q0, d0\n" in Transform()
6010 "vmovl.s16 q1, d2\n" in Transform()
6011 "vcvt.f32.s32 q0, q0\n" in Transform()
6012 "vcvt.f32.s32 q1, q1\n" in Transform()
6013 "vmul.f32 q0, q0, q9\n" in Transform()
6014 "vmul.f32 q1, q1, q11\n" in Transform()
6015 "vadd.f32 q0, q0, q8\n" in Transform()
6016 "vadd.f32 q1, q1, q10\n" in Transform()
6017 "vadd.f32 q0, q0, q1\n" in Transform()
6018 "vsub.f32 q0, q0, q12\n" in Transform()
6019 "vmul.f32 q0, q0, q13\n" in Transform()
6020 "vadd.f32 q0, q0, q14\n" in Transform()
6021 "vcvt.s32.f32 q0, q0\n" in Transform()
6023 "vst1.32 {d0}, [%[output]]!\n" in Transform()
6024 "vst1.32 {d1[0]}, [%[output]]!\n" in Transform()
6025 "pld [%[output]]\n" in Transform()
6026 "subs %[rows], %[rows], #1\n" in Transform()
6027 "bne 1b\n" in Transform()
6059 "ldr r0, %[input_range_min]\n" in Transform()
6060 "vdup.32 q8, r0\n" in Transform()
6061 "ldr r0, %[input_range_scale]\n" in Transform()
6062 "vdup.32 q9, r0\n" in Transform()
6063 "ldr r0, %[bias_range_min]\n" in Transform()
6064 "vdup.32 q10, r0\n" in Transform()
6065 "ldr r0, %[bias_range_scale]\n" in Transform()
6066 "vdup.32 q11, r0\n" in Transform()
6067 "ldr r0, %[output_range_min]\n" in Transform()
6068 "vdup.32 q12, r0\n" in Transform()
6069 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
6070 "vdup.32 q13, r0\n" in Transform()
6071 "ldr r0, %[output_range_offset]\n" in Transform()
6072 "vdup.32 q14, r0\n" in Transform()
6074 "mov r0, %[count]\n" in Transform()
6075 "mov r1, %[bias]\n" in Transform()
6076 "subs r0, r0, #4\n" in Transform()
6077 "beq 3f\n" in Transform()
6079 "subs r0, r0, #16\n" in Transform()
6082 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
6083 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
6084 "pld [%[input], #32]\n" in Transform()
6085 "vmovl.u8 q1, d1\n" in Transform()
6086 "vmovl.u8 q0, d0\n" in Transform()
6087 "vmovl.u8 q5, d9\n" in Transform()
6088 "vmovl.u8 q4, d8\n" in Transform()
6089 "vmovl.s16 q3, d3\n" in Transform()
6090 "vmovl.s16 q2, d2\n" in Transform()
6091 "vmovl.s16 q7, d11\n" in Transform()
6092 "vmovl.s16 q6, d10\n" in Transform()
6093 "vmovl.s16 q1, d1\n" in Transform()
6094 "vmovl.s16 q0, d0\n" in Transform()
6095 "vmovl.s16 q5, d9\n" in Transform()
6096 "vmovl.s16 q4, d8\n" in Transform()
6097 "vcvt.f32.s32 q0, q0\n" in Transform()
6098 "vcvt.f32.s32 q1, q1\n" in Transform()
6099 "vcvt.f32.s32 q2, q2\n" in Transform()
6100 "vcvt.f32.s32 q3, q3\n" in Transform()
6101 "vcvt.f32.s32 q4, q4\n" in Transform()
6102 "vcvt.f32.s32 q5, q5\n" in Transform()
6103 "vcvt.f32.s32 q6, q6\n" in Transform()
6104 "vcvt.f32.s32 q7, q7\n" in Transform()
6105 "vmul.f32 q0, q0, q9\n" in Transform()
6106 "vmul.f32 q1, q1, q9\n" in Transform()
6107 "vmul.f32 q2, q2, q9\n" in Transform()
6108 "vmul.f32 q3, q3, q9\n" in Transform()
6109 "vmul.f32 q4, q4, q11\n" in Transform()
6110 "vmul.f32 q5, q5, q11\n" in Transform()
6111 "vmul.f32 q6, q6, q11\n" in Transform()
6112 "vmul.f32 q7, q7, q11\n" in Transform()
6113 "vadd.f32 q0, q0, q8\n" in Transform()
6114 "vadd.f32 q1, q1, q8\n" in Transform()
6115 "vadd.f32 q2, q2, q8\n" in Transform()
6116 "vadd.f32 q3, q3, q8\n" in Transform()
6117 "vadd.f32 q4, q4, q10\n" in Transform()
6118 "vadd.f32 q5, q5, q10\n" in Transform()
6119 "vadd.f32 q6, q6, q10\n" in Transform()
6120 "vadd.f32 q7, q7, q10\n" in Transform()
6121 "vadd.f32 q0, q0, q4\n" in Transform()
6122 "vadd.f32 q1, q1, q5\n" in Transform()
6123 "vadd.f32 q2, q2, q6\n" in Transform()
6124 "vadd.f32 q3, q3, q7\n" in Transform()
6125 "vsub.f32 q0, q0, q12\n" in Transform()
6126 "vsub.f32 q1, q1, q12\n" in Transform()
6127 "vsub.f32 q2, q2, q12\n" in Transform()
6128 "vsub.f32 q3, q3, q12\n" in Transform()
6129 "vmul.f32 q0, q0, q13\n" in Transform()
6130 "vmul.f32 q1, q1, q13\n" in Transform()
6131 "vmul.f32 q2, q2, q13\n" in Transform()
6132 "vmul.f32 q3, q3, q13\n" in Transform()
6133 "vadd.f32 q0, q0, q14\n" in Transform()
6134 "vadd.f32 q1, q1, q14\n" in Transform()
6135 "vadd.f32 q2, q2, q14\n" in Transform()
6136 "vadd.f32 q3, q3, q14\n" in Transform()
6137 "vcvt.s32.f32 q0, q0\n" in Transform()
6138 "vcvt.s32.f32 q1, q1\n" in Transform()
6139 "vcvt.s32.f32 q2, q2\n" in Transform()
6140 "vcvt.s32.f32 q3, q3\n" in Transform()
6142 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6143 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
6144 "pld [%[output]]\n" in Transform()
6145 "bne 2b\n" in Transform()
6149 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
6150 "vld1.32 {d2[0]}, [r1]!\n" in Transform()
6151 "pld [%[input], #32]\n" in Transform()
6152 "vmovl.u8 q0, d0\n" in Transform()
6153 "vmovl.u8 q1, d2\n" in Transform()
6154 "vmovl.s16 q0, d0\n" in Transform()
6155 "vmovl.s16 q1, d2\n" in Transform()
6156 "vcvt.f32.s32 q0, q0\n" in Transform()
6157 "vcvt.f32.s32 q1, q1\n" in Transform()
6158 "vmul.f32 q0, q0, q9\n" in Transform()
6159 "vmul.f32 q1, q1, q11\n" in Transform()
6160 "vadd.f32 q0, q0, q8\n" in Transform()
6161 "vadd.f32 q1, q1, q10\n" in Transform()
6162 "vadd.f32 q0, q0, q1\n" in Transform()
6163 "vsub.f32 q0, q0, q12\n" in Transform()
6164 "vmul.f32 q0, q0, q13\n" in Transform()
6165 "vadd.f32 q0, q0, q14\n" in Transform()
6166 "vcvt.s32.f32 q0, q0\n" in Transform()
6168 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
6169 "pld [%[output]]\n" in Transform()
6170 "subs %[rows], %[rows], #1\n" in Transform()
6171 "bne 1b\n" in Transform()
6203 "ldr r0, %[input_range_min]\n" in Transform()
6204 "vdup.32 q8, r0\n" in Transform()
6205 "ldr r0, %[input_range_scale]\n" in Transform()
6206 "vdup.32 q9, r0\n" in Transform()
6207 "ldr r0, %[bias_range_min]\n" in Transform()
6208 "vdup.32 q10, r0\n" in Transform()
6209 "ldr r0, %[bias_range_scale]\n" in Transform()
6210 "vdup.32 q11, r0\n" in Transform()
6211 "ldr r0, %[output_range_min]\n" in Transform()
6212 "vdup.32 q12, r0\n" in Transform()
6213 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
6214 "vdup.32 q13, r0\n" in Transform()
6215 "ldr r0, %[output_range_offset]\n" in Transform()
6216 "vdup.32 q14, r0\n" in Transform()
6218 "mov r0, %[count]\n" in Transform()
6219 "mov r1, %[bias]\n" in Transform()
6220 "subs r0, r0, #5\n" in Transform()
6221 "beq 3f\n" in Transform()
6223 "subs r0, r0, #16\n" in Transform()
6226 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
6227 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
6228 "pld [%[input], #32]\n" in Transform()
6229 "vmovl.u8 q1, d1\n" in Transform()
6230 "vmovl.u8 q0, d0\n" in Transform()
6231 "vmovl.u8 q5, d9\n" in Transform()
6232 "vmovl.u8 q4, d8\n" in Transform()
6233 "vmovl.s16 q3, d3\n" in Transform()
6234 "vmovl.s16 q2, d2\n" in Transform()
6235 "vmovl.s16 q7, d11\n" in Transform()
6236 "vmovl.s16 q6, d10\n" in Transform()
6237 "vmovl.s16 q1, d1\n" in Transform()
6238 "vmovl.s16 q0, d0\n" in Transform()
6239 "vmovl.s16 q5, d9\n" in Transform()
6240 "vmovl.s16 q4, d8\n" in Transform()
6241 "vcvt.f32.s32 q0, q0\n" in Transform()
6242 "vcvt.f32.s32 q1, q1\n" in Transform()
6243 "vcvt.f32.s32 q2, q2\n" in Transform()
6244 "vcvt.f32.s32 q3, q3\n" in Transform()
6245 "vcvt.f32.s32 q4, q4\n" in Transform()
6246 "vcvt.f32.s32 q5, q5\n" in Transform()
6247 "vcvt.f32.s32 q6, q6\n" in Transform()
6248 "vcvt.f32.s32 q7, q7\n" in Transform()
6249 "vmul.f32 q0, q0, q9\n" in Transform()
6250 "vmul.f32 q1, q1, q9\n" in Transform()
6251 "vmul.f32 q2, q2, q9\n" in Transform()
6252 "vmul.f32 q3, q3, q9\n" in Transform()
6253 "vmul.f32 q4, q4, q11\n" in Transform()
6254 "vmul.f32 q5, q5, q11\n" in Transform()
6255 "vmul.f32 q6, q6, q11\n" in Transform()
6256 "vmul.f32 q7, q7, q11\n" in Transform()
6257 "vadd.f32 q0, q0, q8\n" in Transform()
6258 "vadd.f32 q1, q1, q8\n" in Transform()
6259 "vadd.f32 q2, q2, q8\n" in Transform()
6260 "vadd.f32 q3, q3, q8\n" in Transform()
6261 "vadd.f32 q4, q4, q10\n" in Transform()
6262 "vadd.f32 q5, q5, q10\n" in Transform()
6263 "vadd.f32 q6, q6, q10\n" in Transform()
6264 "vadd.f32 q7, q7, q10\n" in Transform()
6265 "vadd.f32 q0, q0, q4\n" in Transform()
6266 "vadd.f32 q1, q1, q5\n" in Transform()
6267 "vadd.f32 q2, q2, q6\n" in Transform()
6268 "vadd.f32 q3, q3, q7\n" in Transform()
6269 "vsub.f32 q0, q0, q12\n" in Transform()
6270 "vsub.f32 q1, q1, q12\n" in Transform()
6271 "vsub.f32 q2, q2, q12\n" in Transform()
6272 "vsub.f32 q3, q3, q12\n" in Transform()
6273 "vmul.f32 q0, q0, q13\n" in Transform()
6274 "vmul.f32 q1, q1, q13\n" in Transform()
6275 "vmul.f32 q2, q2, q13\n" in Transform()
6276 "vmul.f32 q3, q3, q13\n" in Transform()
6277 "vadd.f32 q0, q0, q14\n" in Transform()
6278 "vadd.f32 q1, q1, q14\n" in Transform()
6279 "vadd.f32 q2, q2, q14\n" in Transform()
6280 "vadd.f32 q3, q3, q14\n" in Transform()
6281 "vcvt.s32.f32 q0, q0\n" in Transform()
6282 "vcvt.s32.f32 q1, q1\n" in Transform()
6283 "vcvt.s32.f32 q2, q2\n" in Transform()
6284 "vcvt.s32.f32 q3, q3\n" in Transform()
6286 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6287 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
6288 "pld [%[output]]\n" in Transform()
6289 "bne 2b\n" in Transform()
6293 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
6294 "vld1.8 {d0[4]}, [%[input]]!\n" in Transform()
6295 "vld1.32 {d4[0]}, [r1]!\n" in Transform()
6296 "vld1.8 {d4[4]}, [r1]!\n" in Transform()
6297 "pld [%[input], #32]\n" in Transform()
6298 "vmovl.u8 q0, d0\n" in Transform()
6299 "vmovl.u8 q2, d4\n" in Transform()
6300 "vmovl.s16 q1, d1\n" in Transform()
6301 "vmovl.s16 q0, d0\n" in Transform()
6302 "vmovl.s16 q3, d5\n" in Transform()
6303 "vmovl.s16 q2, d4\n" in Transform()
6304 "vcvt.f32.s32 q0, q0\n" in Transform()
6305 "vcvt.f32.s32 q1, q1\n" in Transform()
6306 "vcvt.f32.s32 q2, q2\n" in Transform()
6307 "vcvt.f32.s32 q3, q3\n" in Transform()
6308 "vmul.f32 q0, q0, q9\n" in Transform()
6309 "vmul.f32 q1, q1, q9\n" in Transform()
6310 "vmul.f32 q2, q2, q11\n" in Transform()
6311 "vmul.f32 q3, q3, q11\n" in Transform()
6312 "vadd.f32 q0, q0, q8\n" in Transform()
6313 "vadd.f32 q1, q1, q8\n" in Transform()
6314 "vadd.f32 q2, q2, q10\n" in Transform()
6315 "vadd.f32 q3, q3, q10\n" in Transform()
6316 "vadd.f32 q0, q0, q2\n" in Transform()
6317 "vadd.f32 q1, q1, q3\n" in Transform()
6318 "vsub.f32 q0, q0, q12\n" in Transform()
6319 "vsub.f32 q1, q1, q12\n" in Transform()
6320 "vmul.f32 q0, q0, q13\n" in Transform()
6321 "vmul.f32 q1, q1, q13\n" in Transform()
6322 "vadd.f32 q0, q0, q14\n" in Transform()
6323 "vadd.f32 q1, q1, q14\n" in Transform()
6324 "vcvt.s32.f32 q0, q0\n" in Transform()
6325 "vcvt.s32.f32 q1, q1\n" in Transform()
6327 "vst1.32 {d0, d1}, [%[output]]!\n" in Transform()
6328 "vst1.32 {d2[0]}, [%[output]]!\n" in Transform()
6329 "pld [%[output]]\n" in Transform()
6330 "subs %[rows], %[rows], #1\n" in Transform()
6331 "bne 1b\n" in Transform()
6363 "ldr r0, %[input_range_min]\n" in Transform()
6364 "vdup.32 q8, r0\n" in Transform()
6365 "ldr r0, %[input_range_scale]\n" in Transform()
6366 "vdup.32 q9, r0\n" in Transform()
6367 "ldr r0, %[bias_range_min]\n" in Transform()
6368 "vdup.32 q10, r0\n" in Transform()
6369 "ldr r0, %[bias_range_scale]\n" in Transform()
6370 "vdup.32 q11, r0\n" in Transform()
6371 "ldr r0, %[output_range_min]\n" in Transform()
6372 "vdup.32 q12, r0\n" in Transform()
6373 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
6374 "vdup.32 q13, r0\n" in Transform()
6375 "ldr r0, %[output_range_offset]\n" in Transform()
6376 "vdup.32 q14, r0\n" in Transform()
6378 "mov r0, %[count]\n" in Transform()
6379 "mov r1, %[bias]\n" in Transform()
6380 "subs r0, r0, #6\n" in Transform()
6381 "beq 3f\n" in Transform()
6383 "subs r0, r0, #16\n" in Transform()
6386 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
6387 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
6388 "pld [%[input], #32]\n" in Transform()
6389 "vmovl.u8 q1, d1\n" in Transform()
6390 "vmovl.u8 q0, d0\n" in Transform()
6391 "vmovl.u8 q5, d9\n" in Transform()
6392 "vmovl.u8 q4, d8\n" in Transform()
6393 "vmovl.s16 q3, d3\n" in Transform()
6394 "vmovl.s16 q2, d2\n" in Transform()
6395 "vmovl.s16 q7, d11\n" in Transform()
6396 "vmovl.s16 q6, d10\n" in Transform()
6397 "vmovl.s16 q1, d1\n" in Transform()
6398 "vmovl.s16 q0, d0\n" in Transform()
6399 "vmovl.s16 q5, d9\n" in Transform()
6400 "vmovl.s16 q4, d8\n" in Transform()
6401 "vcvt.f32.s32 q0, q0\n" in Transform()
6402 "vcvt.f32.s32 q1, q1\n" in Transform()
6403 "vcvt.f32.s32 q2, q2\n" in Transform()
6404 "vcvt.f32.s32 q3, q3\n" in Transform()
6405 "vcvt.f32.s32 q4, q4\n" in Transform()
6406 "vcvt.f32.s32 q5, q5\n" in Transform()
6407 "vcvt.f32.s32 q6, q6\n" in Transform()
6408 "vcvt.f32.s32 q7, q7\n" in Transform()
6409 "vmul.f32 q0, q0, q9\n" in Transform()
6410 "vmul.f32 q1, q1, q9\n" in Transform()
6411 "vmul.f32 q2, q2, q9\n" in Transform()
6412 "vmul.f32 q3, q3, q9\n" in Transform()
6413 "vmul.f32 q4, q4, q11\n" in Transform()
6414 "vmul.f32 q5, q5, q11\n" in Transform()
6415 "vmul.f32 q6, q6, q11\n" in Transform()
6416 "vmul.f32 q7, q7, q11\n" in Transform()
6417 "vadd.f32 q0, q0, q8\n" in Transform()
6418 "vadd.f32 q1, q1, q8\n" in Transform()
6419 "vadd.f32 q2, q2, q8\n" in Transform()
6420 "vadd.f32 q3, q3, q8\n" in Transform()
6421 "vadd.f32 q4, q4, q10\n" in Transform()
6422 "vadd.f32 q5, q5, q10\n" in Transform()
6423 "vadd.f32 q6, q6, q10\n" in Transform()
6424 "vadd.f32 q7, q7, q10\n" in Transform()
6425 "vadd.f32 q0, q0, q4\n" in Transform()
6426 "vadd.f32 q1, q1, q5\n" in Transform()
6427 "vadd.f32 q2, q2, q6\n" in Transform()
6428 "vadd.f32 q3, q3, q7\n" in Transform()
6429 "vsub.f32 q0, q0, q12\n" in Transform()
6430 "vsub.f32 q1, q1, q12\n" in Transform()
6431 "vsub.f32 q2, q2, q12\n" in Transform()
6432 "vsub.f32 q3, q3, q12\n" in Transform()
6433 "vmul.f32 q0, q0, q13\n" in Transform()
6434 "vmul.f32 q1, q1, q13\n" in Transform()
6435 "vmul.f32 q2, q2, q13\n" in Transform()
6436 "vmul.f32 q3, q3, q13\n" in Transform()
6437 "vadd.f32 q0, q0, q14\n" in Transform()
6438 "vadd.f32 q1, q1, q14\n" in Transform()
6439 "vadd.f32 q2, q2, q14\n" in Transform()
6440 "vadd.f32 q3, q3, q14\n" in Transform()
6441 "vcvt.s32.f32 q0, q0\n" in Transform()
6442 "vcvt.s32.f32 q1, q1\n" in Transform()
6443 "vcvt.s32.f32 q2, q2\n" in Transform()
6444 "vcvt.s32.f32 q3, q3\n" in Transform()
6446 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6447 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
6448 "pld [%[output]]\n" in Transform()
6449 "bne 2b\n" in Transform()
6453 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
6454 "vld1.16 {d0[2]}, [%[input]]!\n" in Transform()
6455 "vld1.32 {d4[0]}, [r1]!\n" in Transform()
6456 "vld1.16 {d4[2]}, [r1]!\n" in Transform()
6457 "pld [%[input], #32]\n" in Transform()
6458 "vmovl.u8 q0, d0\n" in Transform()
6459 "vmovl.u8 q2, d4\n" in Transform()
6460 "vmovl.s16 q1, d1\n" in Transform()
6461 "vmovl.s16 q0, d0\n" in Transform()
6462 "vmovl.s16 q3, d5\n" in Transform()
6463 "vmovl.s16 q2, d4\n" in Transform()
6464 "vcvt.f32.s32 q0, q0\n" in Transform()
6465 "vcvt.f32.s32 q1, q1\n" in Transform()
6466 "vcvt.f32.s32 q2, q2\n" in Transform()
6467 "vcvt.f32.s32 q3, q3\n" in Transform()
6468 "vmul.f32 q0, q0, q9\n" in Transform()
6469 "vmul.f32 q1, q1, q9\n" in Transform()
6470 "vmul.f32 q2, q2, q11\n" in Transform()
6471 "vmul.f32 q3, q3, q11\n" in Transform()
6472 "vadd.f32 q0, q0, q8\n" in Transform()
6473 "vadd.f32 q1, q1, q8\n" in Transform()
6474 "vadd.f32 q2, q2, q10\n" in Transform()
6475 "vadd.f32 q3, q3, q10\n" in Transform()
6476 "vadd.f32 q0, q0, q2\n" in Transform()
6477 "vadd.f32 q1, q1, q3\n" in Transform()
6478 "vsub.f32 q0, q0, q12\n" in Transform()
6479 "vsub.f32 q1, q1, q12\n" in Transform()
6480 "vmul.f32 q0, q0, q13\n" in Transform()
6481 "vmul.f32 q1, q1, q13\n" in Transform()
6482 "vadd.f32 q0, q0, q14\n" in Transform()
6483 "vadd.f32 q1, q1, q14\n" in Transform()
6484 "vcvt.s32.f32 q0, q0\n" in Transform()
6485 "vcvt.s32.f32 q1, q1\n" in Transform()
6487 "vst1.32 {d0, d1, d2}, [%[output]]!\n" in Transform()
6488 "pld [%[output]]\n" in Transform()
6489 "subs %[rows], %[rows], #1\n" in Transform()
6490 "bne 1b\n" in Transform()
6522 "ldr r0, %[input_range_min]\n" in Transform()
6523 "vdup.32 q8, r0\n" in Transform()
6524 "ldr r0, %[input_range_scale]\n" in Transform()
6525 "vdup.32 q9, r0\n" in Transform()
6526 "ldr r0, %[bias_range_min]\n" in Transform()
6527 "vdup.32 q10, r0\n" in Transform()
6528 "ldr r0, %[bias_range_scale]\n" in Transform()
6529 "vdup.32 q11, r0\n" in Transform()
6530 "ldr r0, %[output_range_min]\n" in Transform()
6531 "vdup.32 q12, r0\n" in Transform()
6532 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
6533 "vdup.32 q13, r0\n" in Transform()
6534 "ldr r0, %[output_range_offset]\n" in Transform()
6535 "vdup.32 q14, r0\n" in Transform()
6537 "mov r0, %[count]\n" in Transform()
6538 "mov r1, %[bias]\n" in Transform()
6539 "subs r0, r0, #7\n" in Transform()
6540 "beq 3f\n" in Transform()
6542 "subs r0, r0, #16\n" in Transform()
6545 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
6546 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
6547 "pld [%[input], #32]\n" in Transform()
6548 "vmovl.u8 q1, d1\n" in Transform()
6549 "vmovl.u8 q0, d0\n" in Transform()
6550 "vmovl.u8 q5, d9\n" in Transform()
6551 "vmovl.u8 q4, d8\n" in Transform()
6552 "vmovl.s16 q3, d3\n" in Transform()
6553 "vmovl.s16 q2, d2\n" in Transform()
6554 "vmovl.s16 q7, d11\n" in Transform()
6555 "vmovl.s16 q6, d10\n" in Transform()
6556 "vmovl.s16 q1, d1\n" in Transform()
6557 "vmovl.s16 q0, d0\n" in Transform()
6558 "vmovl.s16 q5, d9\n" in Transform()
6559 "vmovl.s16 q4, d8\n" in Transform()
6560 "vcvt.f32.s32 q0, q0\n" in Transform()
6561 "vcvt.f32.s32 q1, q1\n" in Transform()
6562 "vcvt.f32.s32 q2, q2\n" in Transform()
6563 "vcvt.f32.s32 q3, q3\n" in Transform()
6564 "vcvt.f32.s32 q4, q4\n" in Transform()
6565 "vcvt.f32.s32 q5, q5\n" in Transform()
6566 "vcvt.f32.s32 q6, q6\n" in Transform()
6567 "vcvt.f32.s32 q7, q7\n" in Transform()
6568 "vmul.f32 q0, q0, q9\n" in Transform()
6569 "vmul.f32 q1, q1, q9\n" in Transform()
6570 "vmul.f32 q2, q2, q9\n" in Transform()
6571 "vmul.f32 q3, q3, q9\n" in Transform()
6572 "vmul.f32 q4, q4, q11\n" in Transform()
6573 "vmul.f32 q5, q5, q11\n" in Transform()
6574 "vmul.f32 q6, q6, q11\n" in Transform()
6575 "vmul.f32 q7, q7, q11\n" in Transform()
6576 "vadd.f32 q0, q0, q8\n" in Transform()
6577 "vadd.f32 q1, q1, q8\n" in Transform()
6578 "vadd.f32 q2, q2, q8\n" in Transform()
6579 "vadd.f32 q3, q3, q8\n" in Transform()
6580 "vadd.f32 q4, q4, q10\n" in Transform()
6581 "vadd.f32 q5, q5, q10\n" in Transform()
6582 "vadd.f32 q6, q6, q10\n" in Transform()
6583 "vadd.f32 q7, q7, q10\n" in Transform()
6584 "vadd.f32 q0, q0, q4\n" in Transform()
6585 "vadd.f32 q1, q1, q5\n" in Transform()
6586 "vadd.f32 q2, q2, q6\n" in Transform()
6587 "vadd.f32 q3, q3, q7\n" in Transform()
6588 "vsub.f32 q0, q0, q12\n" in Transform()
6589 "vsub.f32 q1, q1, q12\n" in Transform()
6590 "vsub.f32 q2, q2, q12\n" in Transform()
6591 "vsub.f32 q3, q3, q12\n" in Transform()
6592 "vmul.f32 q0, q0, q13\n" in Transform()
6593 "vmul.f32 q1, q1, q13\n" in Transform()
6594 "vmul.f32 q2, q2, q13\n" in Transform()
6595 "vmul.f32 q3, q3, q13\n" in Transform()
6596 "vadd.f32 q0, q0, q14\n" in Transform()
6597 "vadd.f32 q1, q1, q14\n" in Transform()
6598 "vadd.f32 q2, q2, q14\n" in Transform()
6599 "vadd.f32 q3, q3, q14\n" in Transform()
6600 "vcvt.s32.f32 q0, q0\n" in Transform()
6601 "vcvt.s32.f32 q1, q1\n" in Transform()
6602 "vcvt.s32.f32 q2, q2\n" in Transform()
6603 "vcvt.s32.f32 q3, q3\n" in Transform()
6605 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6606 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
6607 "pld [%[output]]\n" in Transform()
6608 "bne 2b\n" in Transform()
6612 "vld1.32 {d0[0]}, [%[input]]!\n" in Transform()
6613 "vld1.16 {d0[2]}, [%[input]]!\n" in Transform()
6614 "vld1.8 {d0[6]}, [%[input]]!\n" in Transform()
6615 "vld1.32 {d4[0]}, [r1]!\n" in Transform()
6616 "vld1.16 {d4[2]}, [r1]!\n" in Transform()
6617 "vld1.8 {d4[6]}, [r1]!\n" in Transform()
6618 "pld [%[input], #32]\n" in Transform()
6619 "vmovl.u8 q0, d0\n" in Transform()
6620 "vmovl.u8 q2, d4\n" in Transform()
6621 "vmovl.s16 q1, d1\n" in Transform()
6622 "vmovl.s16 q0, d0\n" in Transform()
6623 "vmovl.s16 q3, d5\n" in Transform()
6624 "vmovl.s16 q2, d4\n" in Transform()
6625 "vcvt.f32.s32 q0, q0\n" in Transform()
6626 "vcvt.f32.s32 q1, q1\n" in Transform()
6627 "vcvt.f32.s32 q2, q2\n" in Transform()
6628 "vcvt.f32.s32 q3, q3\n" in Transform()
6629 "vmul.f32 q0, q0, q9\n" in Transform()
6630 "vmul.f32 q1, q1, q9\n" in Transform()
6631 "vmul.f32 q2, q2, q11\n" in Transform()
6632 "vmul.f32 q3, q3, q11\n" in Transform()
6633 "vadd.f32 q0, q0, q8\n" in Transform()
6634 "vadd.f32 q1, q1, q8\n" in Transform()
6635 "vadd.f32 q2, q2, q10\n" in Transform()
6636 "vadd.f32 q3, q3, q10\n" in Transform()
6637 "vadd.f32 q0, q0, q2\n" in Transform()
6638 "vadd.f32 q1, q1, q3\n" in Transform()
6639 "vsub.f32 q0, q0, q12\n" in Transform()
6640 "vsub.f32 q1, q1, q12\n" in Transform()
6641 "vmul.f32 q0, q0, q13\n" in Transform()
6642 "vmul.f32 q1, q1, q13\n" in Transform()
6643 "vadd.f32 q0, q0, q14\n" in Transform()
6644 "vadd.f32 q1, q1, q14\n" in Transform()
6645 "vcvt.s32.f32 q0, q0\n" in Transform()
6646 "vcvt.s32.f32 q1, q1\n" in Transform()
6648 "vst1.32 {d0, d1, d2}, [%[output]]!\n" in Transform()
6649 "vst1.32 {d3[0]}, [%[output]]!\n" in Transform()
6650 "pld [%[output]]\n" in Transform()
6651 "subs %[rows], %[rows], #1\n" in Transform()
6652 "bne 1b\n" in Transform()
6684 "ldr r0, %[input_range_min]\n" in Transform()
6685 "vdup.32 q8, r0\n" in Transform()
6686 "ldr r0, %[input_range_scale]\n" in Transform()
6687 "vdup.32 q9, r0\n" in Transform()
6688 "ldr r0, %[bias_range_min]\n" in Transform()
6689 "vdup.32 q10, r0\n" in Transform()
6690 "ldr r0, %[bias_range_scale]\n" in Transform()
6691 "vdup.32 q11, r0\n" in Transform()
6692 "ldr r0, %[output_range_min]\n" in Transform()
6693 "vdup.32 q12, r0\n" in Transform()
6694 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
6695 "vdup.32 q13, r0\n" in Transform()
6696 "ldr r0, %[output_range_offset]\n" in Transform()
6697 "vdup.32 q14, r0\n" in Transform()
6699 "mov r0, %[count]\n" in Transform()
6700 "mov r1, %[bias]\n" in Transform()
6701 "subs r0, r0, #8\n" in Transform()
6702 "beq 3f\n" in Transform()
6704 "subs r0, r0, #16\n" in Transform()
6707 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
6708 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
6709 "pld [%[input], #32]\n" in Transform()
6710 "vmovl.u8 q1, d1\n" in Transform()
6711 "vmovl.u8 q0, d0\n" in Transform()
6712 "vmovl.u8 q5, d9\n" in Transform()
6713 "vmovl.u8 q4, d8\n" in Transform()
6714 "vmovl.s16 q3, d3\n" in Transform()
6715 "vmovl.s16 q2, d2\n" in Transform()
6716 "vmovl.s16 q7, d11\n" in Transform()
6717 "vmovl.s16 q6, d10\n" in Transform()
6718 "vmovl.s16 q1, d1\n" in Transform()
6719 "vmovl.s16 q0, d0\n" in Transform()
6720 "vmovl.s16 q5, d9\n" in Transform()
6721 "vmovl.s16 q4, d8\n" in Transform()
6722 "vcvt.f32.s32 q0, q0\n" in Transform()
6723 "vcvt.f32.s32 q1, q1\n" in Transform()
6724 "vcvt.f32.s32 q2, q2\n" in Transform()
6725 "vcvt.f32.s32 q3, q3\n" in Transform()
6726 "vcvt.f32.s32 q4, q4\n" in Transform()
6727 "vcvt.f32.s32 q5, q5\n" in Transform()
6728 "vcvt.f32.s32 q6, q6\n" in Transform()
6729 "vcvt.f32.s32 q7, q7\n" in Transform()
6730 "vmul.f32 q0, q0, q9\n" in Transform()
6731 "vmul.f32 q1, q1, q9\n" in Transform()
6732 "vmul.f32 q2, q2, q9\n" in Transform()
6733 "vmul.f32 q3, q3, q9\n" in Transform()
6734 "vmul.f32 q4, q4, q11\n" in Transform()
6735 "vmul.f32 q5, q5, q11\n" in Transform()
6736 "vmul.f32 q6, q6, q11\n" in Transform()
6737 "vmul.f32 q7, q7, q11\n" in Transform()
6738 "vadd.f32 q0, q0, q8\n" in Transform()
6739 "vadd.f32 q1, q1, q8\n" in Transform()
6740 "vadd.f32 q2, q2, q8\n" in Transform()
6741 "vadd.f32 q3, q3, q8\n" in Transform()
6742 "vadd.f32 q4, q4, q10\n" in Transform()
6743 "vadd.f32 q5, q5, q10\n" in Transform()
6744 "vadd.f32 q6, q6, q10\n" in Transform()
6745 "vadd.f32 q7, q7, q10\n" in Transform()
6746 "vadd.f32 q0, q0, q4\n" in Transform()
6747 "vadd.f32 q1, q1, q5\n" in Transform()
6748 "vadd.f32 q2, q2, q6\n" in Transform()
6749 "vadd.f32 q3, q3, q7\n" in Transform()
6750 "vsub.f32 q0, q0, q12\n" in Transform()
6751 "vsub.f32 q1, q1, q12\n" in Transform()
6752 "vsub.f32 q2, q2, q12\n" in Transform()
6753 "vsub.f32 q3, q3, q12\n" in Transform()
6754 "vmul.f32 q0, q0, q13\n" in Transform()
6755 "vmul.f32 q1, q1, q13\n" in Transform()
6756 "vmul.f32 q2, q2, q13\n" in Transform()
6757 "vmul.f32 q3, q3, q13\n" in Transform()
6758 "vadd.f32 q0, q0, q14\n" in Transform()
6759 "vadd.f32 q1, q1, q14\n" in Transform()
6760 "vadd.f32 q2, q2, q14\n" in Transform()
6761 "vadd.f32 q3, q3, q14\n" in Transform()
6762 "vcvt.s32.f32 q0, q0\n" in Transform()
6763 "vcvt.s32.f32 q1, q1\n" in Transform()
6764 "vcvt.s32.f32 q2, q2\n" in Transform()
6765 "vcvt.s32.f32 q3, q3\n" in Transform()
6767 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6768 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
6769 "pld [%[output]]\n" in Transform()
6770 "bne 2b\n" in Transform()
6774 "vld1.32 {d0}, [%[input]]!\n" in Transform()
6775 "vld1.32 {d4}, [r1]!\n" in Transform()
6776 "pld [%[input], #32]\n" in Transform()
6777 "vmovl.u8 q0, d0\n" in Transform()
6778 "vmovl.u8 q2, d4\n" in Transform()
6779 "vmovl.s16 q1, d1\n" in Transform()
6780 "vmovl.s16 q0, d0\n" in Transform()
6781 "vmovl.s16 q3, d5\n" in Transform()
6782 "vmovl.s16 q2, d4\n" in Transform()
6783 "vcvt.f32.s32 q0, q0\n" in Transform()
6784 "vcvt.f32.s32 q1, q1\n" in Transform()
6785 "vcvt.f32.s32 q2, q2\n" in Transform()
6786 "vcvt.f32.s32 q3, q3\n" in Transform()
6787 "vmul.f32 q0, q0, q9\n" in Transform()
6788 "vmul.f32 q1, q1, q9\n" in Transform()
6789 "vmul.f32 q2, q2, q11\n" in Transform()
6790 "vmul.f32 q3, q3, q11\n" in Transform()
6791 "vadd.f32 q0, q0, q8\n" in Transform()
6792 "vadd.f32 q1, q1, q8\n" in Transform()
6793 "vadd.f32 q2, q2, q10\n" in Transform()
6794 "vadd.f32 q3, q3, q10\n" in Transform()
6795 "vadd.f32 q0, q0, q2\n" in Transform()
6796 "vadd.f32 q1, q1, q3\n" in Transform()
6797 "vsub.f32 q0, q0, q12\n" in Transform()
6798 "vsub.f32 q1, q1, q12\n" in Transform()
6799 "vmul.f32 q0, q0, q13\n" in Transform()
6800 "vmul.f32 q1, q1, q13\n" in Transform()
6801 "vadd.f32 q0, q0, q14\n" in Transform()
6802 "vadd.f32 q1, q1, q14\n" in Transform()
6803 "vcvt.s32.f32 q0, q0\n" in Transform()
6804 "vcvt.s32.f32 q1, q1\n" in Transform()
6806 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6807 "pld [%[output]]\n" in Transform()
6808 "subs %[rows], %[rows], #1\n" in Transform()
6809 "bne 1b\n" in Transform()
6841 "ldr r0, %[input_range_min]\n" in Transform()
6842 "vdup.32 q8, r0\n" in Transform()
6843 "ldr r0, %[input_range_scale]\n" in Transform()
6844 "vdup.32 q9, r0\n" in Transform()
6845 "ldr r0, %[bias_range_min]\n" in Transform()
6846 "vdup.32 q10, r0\n" in Transform()
6847 "ldr r0, %[bias_range_scale]\n" in Transform()
6848 "vdup.32 q11, r0\n" in Transform()
6849 "ldr r0, %[output_range_min]\n" in Transform()
6850 "vdup.32 q12, r0\n" in Transform()
6851 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
6852 "vdup.32 q13, r0\n" in Transform()
6853 "ldr r0, %[output_range_offset]\n" in Transform()
6854 "vdup.32 q14, r0\n" in Transform()
6856 "mov r0, %[count]\n" in Transform()
6857 "mov r1, %[bias]\n" in Transform()
6858 "subs r0, r0, #9\n" in Transform()
6859 "beq 3f\n" in Transform()
6861 "subs r0, r0, #16\n" in Transform()
6864 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
6865 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
6866 "pld [%[input], #32]\n" in Transform()
6867 "vmovl.u8 q1, d1\n" in Transform()
6868 "vmovl.u8 q0, d0\n" in Transform()
6869 "vmovl.u8 q5, d9\n" in Transform()
6870 "vmovl.u8 q4, d8\n" in Transform()
6871 "vmovl.s16 q3, d3\n" in Transform()
6872 "vmovl.s16 q2, d2\n" in Transform()
6873 "vmovl.s16 q7, d11\n" in Transform()
6874 "vmovl.s16 q6, d10\n" in Transform()
6875 "vmovl.s16 q1, d1\n" in Transform()
6876 "vmovl.s16 q0, d0\n" in Transform()
6877 "vmovl.s16 q5, d9\n" in Transform()
6878 "vmovl.s16 q4, d8\n" in Transform()
6879 "vcvt.f32.s32 q0, q0\n" in Transform()
6880 "vcvt.f32.s32 q1, q1\n" in Transform()
6881 "vcvt.f32.s32 q2, q2\n" in Transform()
6882 "vcvt.f32.s32 q3, q3\n" in Transform()
6883 "vcvt.f32.s32 q4, q4\n" in Transform()
6884 "vcvt.f32.s32 q5, q5\n" in Transform()
6885 "vcvt.f32.s32 q6, q6\n" in Transform()
6886 "vcvt.f32.s32 q7, q7\n" in Transform()
6887 "vmul.f32 q0, q0, q9\n" in Transform()
6888 "vmul.f32 q1, q1, q9\n" in Transform()
6889 "vmul.f32 q2, q2, q9\n" in Transform()
6890 "vmul.f32 q3, q3, q9\n" in Transform()
6891 "vmul.f32 q4, q4, q11\n" in Transform()
6892 "vmul.f32 q5, q5, q11\n" in Transform()
6893 "vmul.f32 q6, q6, q11\n" in Transform()
6894 "vmul.f32 q7, q7, q11\n" in Transform()
6895 "vadd.f32 q0, q0, q8\n" in Transform()
6896 "vadd.f32 q1, q1, q8\n" in Transform()
6897 "vadd.f32 q2, q2, q8\n" in Transform()
6898 "vadd.f32 q3, q3, q8\n" in Transform()
6899 "vadd.f32 q4, q4, q10\n" in Transform()
6900 "vadd.f32 q5, q5, q10\n" in Transform()
6901 "vadd.f32 q6, q6, q10\n" in Transform()
6902 "vadd.f32 q7, q7, q10\n" in Transform()
6903 "vadd.f32 q0, q0, q4\n" in Transform()
6904 "vadd.f32 q1, q1, q5\n" in Transform()
6905 "vadd.f32 q2, q2, q6\n" in Transform()
6906 "vadd.f32 q3, q3, q7\n" in Transform()
6907 "vsub.f32 q0, q0, q12\n" in Transform()
6908 "vsub.f32 q1, q1, q12\n" in Transform()
6909 "vsub.f32 q2, q2, q12\n" in Transform()
6910 "vsub.f32 q3, q3, q12\n" in Transform()
6911 "vmul.f32 q0, q0, q13\n" in Transform()
6912 "vmul.f32 q1, q1, q13\n" in Transform()
6913 "vmul.f32 q2, q2, q13\n" in Transform()
6914 "vmul.f32 q3, q3, q13\n" in Transform()
6915 "vadd.f32 q0, q0, q14\n" in Transform()
6916 "vadd.f32 q1, q1, q14\n" in Transform()
6917 "vadd.f32 q2, q2, q14\n" in Transform()
6918 "vadd.f32 q3, q3, q14\n" in Transform()
6919 "vcvt.s32.f32 q0, q0\n" in Transform()
6920 "vcvt.s32.f32 q1, q1\n" in Transform()
6921 "vcvt.s32.f32 q2, q2\n" in Transform()
6922 "vcvt.s32.f32 q3, q3\n" in Transform()
6924 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6925 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
6926 "pld [%[output]]\n" in Transform()
6927 "bne 2b\n" in Transform()
6931 "vld1.32 {d0}, [%[input]]!\n" in Transform()
6932 "vld1.8 {d1[0]}, [%[input]]!\n" in Transform()
6933 "vld1.32 {d6}, [r1]!\n" in Transform()
6934 "vld1.8 {d7[0]}, [r1]!\n" in Transform()
6935 "pld [%[input], #32]\n" in Transform()
6936 "vmovl.u8 q1, d1\n" in Transform()
6937 "vmovl.u8 q0, d0\n" in Transform()
6938 "vmovl.u8 q4, d7\n" in Transform()
6939 "vmovl.u8 q3, d6\n" in Transform()
6940 "vmovl.s16 q2, d2\n" in Transform()
6941 "vmovl.s16 q5, d8\n" in Transform()
6942 "vmovl.s16 q1, d1\n" in Transform()
6943 "vmovl.s16 q0, d0\n" in Transform()
6944 "vmovl.s16 q4, d7\n" in Transform()
6945 "vmovl.s16 q3, d6\n" in Transform()
6946 "vcvt.f32.s32 q0, q0\n" in Transform()
6947 "vcvt.f32.s32 q1, q1\n" in Transform()
6948 "vcvt.f32.s32 q2, q2\n" in Transform()
6949 "vcvt.f32.s32 q3, q3\n" in Transform()
6950 "vcvt.f32.s32 q4, q4\n" in Transform()
6951 "vcvt.f32.s32 q5, q5\n" in Transform()
6952 "vmul.f32 q0, q0, q9\n" in Transform()
6953 "vmul.f32 q1, q1, q9\n" in Transform()
6954 "vmul.f32 q2, q2, q9\n" in Transform()
6955 "vmul.f32 q3, q3, q11\n" in Transform()
6956 "vmul.f32 q4, q4, q11\n" in Transform()
6957 "vmul.f32 q5, q5, q11\n" in Transform()
6958 "vadd.f32 q0, q0, q8\n" in Transform()
6959 "vadd.f32 q1, q1, q8\n" in Transform()
6960 "vadd.f32 q2, q2, q8\n" in Transform()
6961 "vadd.f32 q3, q3, q10\n" in Transform()
6962 "vadd.f32 q4, q4, q10\n" in Transform()
6963 "vadd.f32 q5, q5, q10\n" in Transform()
6964 "vadd.f32 q0, q0, q3\n" in Transform()
6965 "vadd.f32 q1, q1, q4\n" in Transform()
6966 "vadd.f32 q2, q2, q5\n" in Transform()
6967 "vsub.f32 q0, q0, q12\n" in Transform()
6968 "vsub.f32 q1, q1, q12\n" in Transform()
6969 "vsub.f32 q2, q2, q12\n" in Transform()
6970 "vmul.f32 q0, q0, q13\n" in Transform()
6971 "vmul.f32 q1, q1, q13\n" in Transform()
6972 "vmul.f32 q2, q2, q13\n" in Transform()
6973 "vadd.f32 q0, q0, q14\n" in Transform()
6974 "vadd.f32 q1, q1, q14\n" in Transform()
6975 "vadd.f32 q2, q2, q14\n" in Transform()
6976 "vcvt.s32.f32 q0, q0\n" in Transform()
6977 "vcvt.s32.f32 q1, q1\n" in Transform()
6978 "vcvt.s32.f32 q2, q2\n" in Transform()
6980 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
6981 "vst1.32 {d4[0]}, [%[output]]!\n" in Transform()
6982 "pld [%[output]]\n" in Transform()
6983 "subs %[rows], %[rows], #1\n" in Transform()
6984 "bne 1b\n" in Transform()
7016 "ldr r0, %[input_range_min]\n" in Transform()
7017 "vdup.32 q8, r0\n" in Transform()
7018 "ldr r0, %[input_range_scale]\n" in Transform()
7019 "vdup.32 q9, r0\n" in Transform()
7020 "ldr r0, %[bias_range_min]\n" in Transform()
7021 "vdup.32 q10, r0\n" in Transform()
7022 "ldr r0, %[bias_range_scale]\n" in Transform()
7023 "vdup.32 q11, r0\n" in Transform()
7024 "ldr r0, %[output_range_min]\n" in Transform()
7025 "vdup.32 q12, r0\n" in Transform()
7026 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
7027 "vdup.32 q13, r0\n" in Transform()
7028 "ldr r0, %[output_range_offset]\n" in Transform()
7029 "vdup.32 q14, r0\n" in Transform()
7031 "mov r0, %[count]\n" in Transform()
7032 "mov r1, %[bias]\n" in Transform()
7033 "subs r0, r0, #10\n" in Transform()
7034 "beq 3f\n" in Transform()
7036 "subs r0, r0, #16\n" in Transform()
7039 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
7040 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
7041 "pld [%[input], #32]\n" in Transform()
7042 "vmovl.u8 q1, d1\n" in Transform()
7043 "vmovl.u8 q0, d0\n" in Transform()
7044 "vmovl.u8 q5, d9\n" in Transform()
7045 "vmovl.u8 q4, d8\n" in Transform()
7046 "vmovl.s16 q3, d3\n" in Transform()
7047 "vmovl.s16 q2, d2\n" in Transform()
7048 "vmovl.s16 q7, d11\n" in Transform()
7049 "vmovl.s16 q6, d10\n" in Transform()
7050 "vmovl.s16 q1, d1\n" in Transform()
7051 "vmovl.s16 q0, d0\n" in Transform()
7052 "vmovl.s16 q5, d9\n" in Transform()
7053 "vmovl.s16 q4, d8\n" in Transform()
7054 "vcvt.f32.s32 q0, q0\n" in Transform()
7055 "vcvt.f32.s32 q1, q1\n" in Transform()
7056 "vcvt.f32.s32 q2, q2\n" in Transform()
7057 "vcvt.f32.s32 q3, q3\n" in Transform()
7058 "vcvt.f32.s32 q4, q4\n" in Transform()
7059 "vcvt.f32.s32 q5, q5\n" in Transform()
7060 "vcvt.f32.s32 q6, q6\n" in Transform()
7061 "vcvt.f32.s32 q7, q7\n" in Transform()
7062 "vmul.f32 q0, q0, q9\n" in Transform()
7063 "vmul.f32 q1, q1, q9\n" in Transform()
7064 "vmul.f32 q2, q2, q9\n" in Transform()
7065 "vmul.f32 q3, q3, q9\n" in Transform()
7066 "vmul.f32 q4, q4, q11\n" in Transform()
7067 "vmul.f32 q5, q5, q11\n" in Transform()
7068 "vmul.f32 q6, q6, q11\n" in Transform()
7069 "vmul.f32 q7, q7, q11\n" in Transform()
7070 "vadd.f32 q0, q0, q8\n" in Transform()
7071 "vadd.f32 q1, q1, q8\n" in Transform()
7072 "vadd.f32 q2, q2, q8\n" in Transform()
7073 "vadd.f32 q3, q3, q8\n" in Transform()
7074 "vadd.f32 q4, q4, q10\n" in Transform()
7075 "vadd.f32 q5, q5, q10\n" in Transform()
7076 "vadd.f32 q6, q6, q10\n" in Transform()
7077 "vadd.f32 q7, q7, q10\n" in Transform()
7078 "vadd.f32 q0, q0, q4\n" in Transform()
7079 "vadd.f32 q1, q1, q5\n" in Transform()
7080 "vadd.f32 q2, q2, q6\n" in Transform()
7081 "vadd.f32 q3, q3, q7\n" in Transform()
7082 "vsub.f32 q0, q0, q12\n" in Transform()
7083 "vsub.f32 q1, q1, q12\n" in Transform()
7084 "vsub.f32 q2, q2, q12\n" in Transform()
7085 "vsub.f32 q3, q3, q12\n" in Transform()
7086 "vmul.f32 q0, q0, q13\n" in Transform()
7087 "vmul.f32 q1, q1, q13\n" in Transform()
7088 "vmul.f32 q2, q2, q13\n" in Transform()
7089 "vmul.f32 q3, q3, q13\n" in Transform()
7090 "vadd.f32 q0, q0, q14\n" in Transform()
7091 "vadd.f32 q1, q1, q14\n" in Transform()
7092 "vadd.f32 q2, q2, q14\n" in Transform()
7093 "vadd.f32 q3, q3, q14\n" in Transform()
7094 "vcvt.s32.f32 q0, q0\n" in Transform()
7095 "vcvt.s32.f32 q1, q1\n" in Transform()
7096 "vcvt.s32.f32 q2, q2\n" in Transform()
7097 "vcvt.s32.f32 q3, q3\n" in Transform()
7099 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7100 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
7101 "pld [%[output]]\n" in Transform()
7102 "bne 2b\n" in Transform()
7106 "vld1.32 {d0}, [%[input]]!\n" in Transform()
7107 "vld1.16 {d1[0]}, [%[input]]!\n" in Transform()
7108 "vld1.32 {d6}, [r1]!\n" in Transform()
7109 "vld1.16 {d7[0]}, [r1]!\n" in Transform()
7110 "pld [%[input], #32]\n" in Transform()
7111 "vmovl.u8 q1, d1\n" in Transform()
7112 "vmovl.u8 q0, d0\n" in Transform()
7113 "vmovl.u8 q4, d7\n" in Transform()
7114 "vmovl.u8 q3, d6\n" in Transform()
7115 "vmovl.s16 q2, d2\n" in Transform()
7116 "vmovl.s16 q5, d8\n" in Transform()
7117 "vmovl.s16 q1, d1\n" in Transform()
7118 "vmovl.s16 q0, d0\n" in Transform()
7119 "vmovl.s16 q4, d7\n" in Transform()
7120 "vmovl.s16 q3, d6\n" in Transform()
7121 "vcvt.f32.s32 q0, q0\n" in Transform()
7122 "vcvt.f32.s32 q1, q1\n" in Transform()
7123 "vcvt.f32.s32 q2, q2\n" in Transform()
7124 "vcvt.f32.s32 q3, q3\n" in Transform()
7125 "vcvt.f32.s32 q4, q4\n" in Transform()
7126 "vcvt.f32.s32 q5, q5\n" in Transform()
7127 "vmul.f32 q0, q0, q9\n" in Transform()
7128 "vmul.f32 q1, q1, q9\n" in Transform()
7129 "vmul.f32 q2, q2, q9\n" in Transform()
7130 "vmul.f32 q3, q3, q11\n" in Transform()
7131 "vmul.f32 q4, q4, q11\n" in Transform()
7132 "vmul.f32 q5, q5, q11\n" in Transform()
7133 "vadd.f32 q0, q0, q8\n" in Transform()
7134 "vadd.f32 q1, q1, q8\n" in Transform()
7135 "vadd.f32 q2, q2, q8\n" in Transform()
7136 "vadd.f32 q3, q3, q10\n" in Transform()
7137 "vadd.f32 q4, q4, q10\n" in Transform()
7138 "vadd.f32 q5, q5, q10\n" in Transform()
7139 "vadd.f32 q0, q0, q3\n" in Transform()
7140 "vadd.f32 q1, q1, q4\n" in Transform()
7141 "vadd.f32 q2, q2, q5\n" in Transform()
7142 "vsub.f32 q0, q0, q12\n" in Transform()
7143 "vsub.f32 q1, q1, q12\n" in Transform()
7144 "vsub.f32 q2, q2, q12\n" in Transform()
7145 "vmul.f32 q0, q0, q13\n" in Transform()
7146 "vmul.f32 q1, q1, q13\n" in Transform()
7147 "vmul.f32 q2, q2, q13\n" in Transform()
7148 "vadd.f32 q0, q0, q14\n" in Transform()
7149 "vadd.f32 q1, q1, q14\n" in Transform()
7150 "vadd.f32 q2, q2, q14\n" in Transform()
7151 "vcvt.s32.f32 q0, q0\n" in Transform()
7152 "vcvt.s32.f32 q1, q1\n" in Transform()
7153 "vcvt.s32.f32 q2, q2\n" in Transform()
7155 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7156 "vst1.32 {d4}, [%[output]]!\n" in Transform()
7157 "pld [%[output]]\n" in Transform()
7158 "subs %[rows], %[rows], #1\n" in Transform()
7159 "bne 1b\n" in Transform()
7191 "ldr r0, %[input_range_min]\n" in Transform()
7192 "vdup.32 q8, r0\n" in Transform()
7193 "ldr r0, %[input_range_scale]\n" in Transform()
7194 "vdup.32 q9, r0\n" in Transform()
7195 "ldr r0, %[bias_range_min]\n" in Transform()
7196 "vdup.32 q10, r0\n" in Transform()
7197 "ldr r0, %[bias_range_scale]\n" in Transform()
7198 "vdup.32 q11, r0\n" in Transform()
7199 "ldr r0, %[output_range_min]\n" in Transform()
7200 "vdup.32 q12, r0\n" in Transform()
7201 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
7202 "vdup.32 q13, r0\n" in Transform()
7203 "ldr r0, %[output_range_offset]\n" in Transform()
7204 "vdup.32 q14, r0\n" in Transform()
7206 "mov r0, %[count]\n" in Transform()
7207 "mov r1, %[bias]\n" in Transform()
7208 "subs r0, r0, #11\n" in Transform()
7209 "beq 3f\n" in Transform()
7211 "subs r0, r0, #16\n" in Transform()
7214 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
7215 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
7216 "pld [%[input], #32]\n" in Transform()
7217 "vmovl.u8 q1, d1\n" in Transform()
7218 "vmovl.u8 q0, d0\n" in Transform()
7219 "vmovl.u8 q5, d9\n" in Transform()
7220 "vmovl.u8 q4, d8\n" in Transform()
7221 "vmovl.s16 q3, d3\n" in Transform()
7222 "vmovl.s16 q2, d2\n" in Transform()
7223 "vmovl.s16 q7, d11\n" in Transform()
7224 "vmovl.s16 q6, d10\n" in Transform()
7225 "vmovl.s16 q1, d1\n" in Transform()
7226 "vmovl.s16 q0, d0\n" in Transform()
7227 "vmovl.s16 q5, d9\n" in Transform()
7228 "vmovl.s16 q4, d8\n" in Transform()
7229 "vcvt.f32.s32 q0, q0\n" in Transform()
7230 "vcvt.f32.s32 q1, q1\n" in Transform()
7231 "vcvt.f32.s32 q2, q2\n" in Transform()
7232 "vcvt.f32.s32 q3, q3\n" in Transform()
7233 "vcvt.f32.s32 q4, q4\n" in Transform()
7234 "vcvt.f32.s32 q5, q5\n" in Transform()
7235 "vcvt.f32.s32 q6, q6\n" in Transform()
7236 "vcvt.f32.s32 q7, q7\n" in Transform()
7237 "vmul.f32 q0, q0, q9\n" in Transform()
7238 "vmul.f32 q1, q1, q9\n" in Transform()
7239 "vmul.f32 q2, q2, q9\n" in Transform()
7240 "vmul.f32 q3, q3, q9\n" in Transform()
7241 "vmul.f32 q4, q4, q11\n" in Transform()
7242 "vmul.f32 q5, q5, q11\n" in Transform()
7243 "vmul.f32 q6, q6, q11\n" in Transform()
7244 "vmul.f32 q7, q7, q11\n" in Transform()
7245 "vadd.f32 q0, q0, q8\n" in Transform()
7246 "vadd.f32 q1, q1, q8\n" in Transform()
7247 "vadd.f32 q2, q2, q8\n" in Transform()
7248 "vadd.f32 q3, q3, q8\n" in Transform()
7249 "vadd.f32 q4, q4, q10\n" in Transform()
7250 "vadd.f32 q5, q5, q10\n" in Transform()
7251 "vadd.f32 q6, q6, q10\n" in Transform()
7252 "vadd.f32 q7, q7, q10\n" in Transform()
7253 "vadd.f32 q0, q0, q4\n" in Transform()
7254 "vadd.f32 q1, q1, q5\n" in Transform()
7255 "vadd.f32 q2, q2, q6\n" in Transform()
7256 "vadd.f32 q3, q3, q7\n" in Transform()
7257 "vsub.f32 q0, q0, q12\n" in Transform()
7258 "vsub.f32 q1, q1, q12\n" in Transform()
7259 "vsub.f32 q2, q2, q12\n" in Transform()
7260 "vsub.f32 q3, q3, q12\n" in Transform()
7261 "vmul.f32 q0, q0, q13\n" in Transform()
7262 "vmul.f32 q1, q1, q13\n" in Transform()
7263 "vmul.f32 q2, q2, q13\n" in Transform()
7264 "vmul.f32 q3, q3, q13\n" in Transform()
7265 "vadd.f32 q0, q0, q14\n" in Transform()
7266 "vadd.f32 q1, q1, q14\n" in Transform()
7267 "vadd.f32 q2, q2, q14\n" in Transform()
7268 "vadd.f32 q3, q3, q14\n" in Transform()
7269 "vcvt.s32.f32 q0, q0\n" in Transform()
7270 "vcvt.s32.f32 q1, q1\n" in Transform()
7271 "vcvt.s32.f32 q2, q2\n" in Transform()
7272 "vcvt.s32.f32 q3, q3\n" in Transform()
7274 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7275 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
7276 "pld [%[output]]\n" in Transform()
7277 "bne 2b\n" in Transform()
7281 "vld1.32 {d0}, [%[input]]!\n" in Transform()
7282 "vld1.16 {d1[0]}, [%[input]]!\n" in Transform()
7283 "vld1.8 {d1[2]}, [%[input]]!\n" in Transform()
7284 "vld1.32 {d6}, [r1]!\n" in Transform()
7285 "vld1.16 {d7[0]}, [r1]!\n" in Transform()
7286 "vld1.8 {d7[2]}, [r1]!\n" in Transform()
7287 "pld [%[input], #32]\n" in Transform()
7288 "vmovl.u8 q1, d1\n" in Transform()
7289 "vmovl.u8 q0, d0\n" in Transform()
7290 "vmovl.u8 q4, d7\n" in Transform()
7291 "vmovl.u8 q3, d6\n" in Transform()
7292 "vmovl.s16 q2, d2\n" in Transform()
7293 "vmovl.s16 q5, d8\n" in Transform()
7294 "vmovl.s16 q1, d1\n" in Transform()
7295 "vmovl.s16 q0, d0\n" in Transform()
7296 "vmovl.s16 q4, d7\n" in Transform()
7297 "vmovl.s16 q3, d6\n" in Transform()
7298 "vcvt.f32.s32 q0, q0\n" in Transform()
7299 "vcvt.f32.s32 q1, q1\n" in Transform()
7300 "vcvt.f32.s32 q2, q2\n" in Transform()
7301 "vcvt.f32.s32 q3, q3\n" in Transform()
7302 "vcvt.f32.s32 q4, q4\n" in Transform()
7303 "vcvt.f32.s32 q5, q5\n" in Transform()
7304 "vmul.f32 q0, q0, q9\n" in Transform()
7305 "vmul.f32 q1, q1, q9\n" in Transform()
7306 "vmul.f32 q2, q2, q9\n" in Transform()
7307 "vmul.f32 q3, q3, q11\n" in Transform()
7308 "vmul.f32 q4, q4, q11\n" in Transform()
7309 "vmul.f32 q5, q5, q11\n" in Transform()
7310 "vadd.f32 q0, q0, q8\n" in Transform()
7311 "vadd.f32 q1, q1, q8\n" in Transform()
7312 "vadd.f32 q2, q2, q8\n" in Transform()
7313 "vadd.f32 q3, q3, q10\n" in Transform()
7314 "vadd.f32 q4, q4, q10\n" in Transform()
7315 "vadd.f32 q5, q5, q10\n" in Transform()
7316 "vadd.f32 q0, q0, q3\n" in Transform()
7317 "vadd.f32 q1, q1, q4\n" in Transform()
7318 "vadd.f32 q2, q2, q5\n" in Transform()
7319 "vsub.f32 q0, q0, q12\n" in Transform()
7320 "vsub.f32 q1, q1, q12\n" in Transform()
7321 "vsub.f32 q2, q2, q12\n" in Transform()
7322 "vmul.f32 q0, q0, q13\n" in Transform()
7323 "vmul.f32 q1, q1, q13\n" in Transform()
7324 "vmul.f32 q2, q2, q13\n" in Transform()
7325 "vadd.f32 q0, q0, q14\n" in Transform()
7326 "vadd.f32 q1, q1, q14\n" in Transform()
7327 "vadd.f32 q2, q2, q14\n" in Transform()
7328 "vcvt.s32.f32 q0, q0\n" in Transform()
7329 "vcvt.s32.f32 q1, q1\n" in Transform()
7330 "vcvt.s32.f32 q2, q2\n" in Transform()
7332 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7333 "vst1.32 {d4}, [%[output]]!\n" in Transform()
7334 "vst1.32 {d5[0]}, [%[output]]!\n" in Transform()
7335 "pld [%[output]]\n" in Transform()
7336 "subs %[rows], %[rows], #1\n" in Transform()
7337 "bne 1b\n" in Transform()
7369 "ldr r0, %[input_range_min]\n" in Transform()
7370 "vdup.32 q8, r0\n" in Transform()
7371 "ldr r0, %[input_range_scale]\n" in Transform()
7372 "vdup.32 q9, r0\n" in Transform()
7373 "ldr r0, %[bias_range_min]\n" in Transform()
7374 "vdup.32 q10, r0\n" in Transform()
7375 "ldr r0, %[bias_range_scale]\n" in Transform()
7376 "vdup.32 q11, r0\n" in Transform()
7377 "ldr r0, %[output_range_min]\n" in Transform()
7378 "vdup.32 q12, r0\n" in Transform()
7379 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
7380 "vdup.32 q13, r0\n" in Transform()
7381 "ldr r0, %[output_range_offset]\n" in Transform()
7382 "vdup.32 q14, r0\n" in Transform()
7384 "mov r0, %[count]\n" in Transform()
7385 "mov r1, %[bias]\n" in Transform()
7386 "subs r0, r0, #12\n" in Transform()
7387 "beq 3f\n" in Transform()
7389 "subs r0, r0, #16\n" in Transform()
7392 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
7393 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
7394 "pld [%[input], #32]\n" in Transform()
7395 "vmovl.u8 q1, d1\n" in Transform()
7396 "vmovl.u8 q0, d0\n" in Transform()
7397 "vmovl.u8 q5, d9\n" in Transform()
7398 "vmovl.u8 q4, d8\n" in Transform()
7399 "vmovl.s16 q3, d3\n" in Transform()
7400 "vmovl.s16 q2, d2\n" in Transform()
7401 "vmovl.s16 q7, d11\n" in Transform()
7402 "vmovl.s16 q6, d10\n" in Transform()
7403 "vmovl.s16 q1, d1\n" in Transform()
7404 "vmovl.s16 q0, d0\n" in Transform()
7405 "vmovl.s16 q5, d9\n" in Transform()
7406 "vmovl.s16 q4, d8\n" in Transform()
7407 "vcvt.f32.s32 q0, q0\n" in Transform()
7408 "vcvt.f32.s32 q1, q1\n" in Transform()
7409 "vcvt.f32.s32 q2, q2\n" in Transform()
7410 "vcvt.f32.s32 q3, q3\n" in Transform()
7411 "vcvt.f32.s32 q4, q4\n" in Transform()
7412 "vcvt.f32.s32 q5, q5\n" in Transform()
7413 "vcvt.f32.s32 q6, q6\n" in Transform()
7414 "vcvt.f32.s32 q7, q7\n" in Transform()
7415 "vmul.f32 q0, q0, q9\n" in Transform()
7416 "vmul.f32 q1, q1, q9\n" in Transform()
7417 "vmul.f32 q2, q2, q9\n" in Transform()
7418 "vmul.f32 q3, q3, q9\n" in Transform()
7419 "vmul.f32 q4, q4, q11\n" in Transform()
7420 "vmul.f32 q5, q5, q11\n" in Transform()
7421 "vmul.f32 q6, q6, q11\n" in Transform()
7422 "vmul.f32 q7, q7, q11\n" in Transform()
7423 "vadd.f32 q0, q0, q8\n" in Transform()
7424 "vadd.f32 q1, q1, q8\n" in Transform()
7425 "vadd.f32 q2, q2, q8\n" in Transform()
7426 "vadd.f32 q3, q3, q8\n" in Transform()
7427 "vadd.f32 q4, q4, q10\n" in Transform()
7428 "vadd.f32 q5, q5, q10\n" in Transform()
7429 "vadd.f32 q6, q6, q10\n" in Transform()
7430 "vadd.f32 q7, q7, q10\n" in Transform()
7431 "vadd.f32 q0, q0, q4\n" in Transform()
7432 "vadd.f32 q1, q1, q5\n" in Transform()
7433 "vadd.f32 q2, q2, q6\n" in Transform()
7434 "vadd.f32 q3, q3, q7\n" in Transform()
7435 "vsub.f32 q0, q0, q12\n" in Transform()
7436 "vsub.f32 q1, q1, q12\n" in Transform()
7437 "vsub.f32 q2, q2, q12\n" in Transform()
7438 "vsub.f32 q3, q3, q12\n" in Transform()
7439 "vmul.f32 q0, q0, q13\n" in Transform()
7440 "vmul.f32 q1, q1, q13\n" in Transform()
7441 "vmul.f32 q2, q2, q13\n" in Transform()
7442 "vmul.f32 q3, q3, q13\n" in Transform()
7443 "vadd.f32 q0, q0, q14\n" in Transform()
7444 "vadd.f32 q1, q1, q14\n" in Transform()
7445 "vadd.f32 q2, q2, q14\n" in Transform()
7446 "vadd.f32 q3, q3, q14\n" in Transform()
7447 "vcvt.s32.f32 q0, q0\n" in Transform()
7448 "vcvt.s32.f32 q1, q1\n" in Transform()
7449 "vcvt.s32.f32 q2, q2\n" in Transform()
7450 "vcvt.s32.f32 q3, q3\n" in Transform()
7452 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7453 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
7454 "pld [%[output]]\n" in Transform()
7455 "bne 2b\n" in Transform()
7459 "vld1.32 {d0}, [%[input]]!\n" in Transform()
7460 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
7461 "vld1.32 {d6}, [r1]!\n" in Transform()
7462 "vld1.32 {d7[0]}, [r1]!\n" in Transform()
7463 "pld [%[input], #32]\n" in Transform()
7464 "vmovl.u8 q1, d1\n" in Transform()
7465 "vmovl.u8 q0, d0\n" in Transform()
7466 "vmovl.u8 q4, d7\n" in Transform()
7467 "vmovl.u8 q3, d6\n" in Transform()
7468 "vmovl.s16 q2, d2\n" in Transform()
7469 "vmovl.s16 q5, d8\n" in Transform()
7470 "vmovl.s16 q1, d1\n" in Transform()
7471 "vmovl.s16 q0, d0\n" in Transform()
7472 "vmovl.s16 q4, d7\n" in Transform()
7473 "vmovl.s16 q3, d6\n" in Transform()
7474 "vcvt.f32.s32 q0, q0\n" in Transform()
7475 "vcvt.f32.s32 q1, q1\n" in Transform()
7476 "vcvt.f32.s32 q2, q2\n" in Transform()
7477 "vcvt.f32.s32 q3, q3\n" in Transform()
7478 "vcvt.f32.s32 q4, q4\n" in Transform()
7479 "vcvt.f32.s32 q5, q5\n" in Transform()
7480 "vmul.f32 q0, q0, q9\n" in Transform()
7481 "vmul.f32 q1, q1, q9\n" in Transform()
7482 "vmul.f32 q2, q2, q9\n" in Transform()
7483 "vmul.f32 q3, q3, q11\n" in Transform()
7484 "vmul.f32 q4, q4, q11\n" in Transform()
7485 "vmul.f32 q5, q5, q11\n" in Transform()
7486 "vadd.f32 q0, q0, q8\n" in Transform()
7487 "vadd.f32 q1, q1, q8\n" in Transform()
7488 "vadd.f32 q2, q2, q8\n" in Transform()
7489 "vadd.f32 q3, q3, q10\n" in Transform()
7490 "vadd.f32 q4, q4, q10\n" in Transform()
7491 "vadd.f32 q5, q5, q10\n" in Transform()
7492 "vadd.f32 q0, q0, q3\n" in Transform()
7493 "vadd.f32 q1, q1, q4\n" in Transform()
7494 "vadd.f32 q2, q2, q5\n" in Transform()
7495 "vsub.f32 q0, q0, q12\n" in Transform()
7496 "vsub.f32 q1, q1, q12\n" in Transform()
7497 "vsub.f32 q2, q2, q12\n" in Transform()
7498 "vmul.f32 q0, q0, q13\n" in Transform()
7499 "vmul.f32 q1, q1, q13\n" in Transform()
7500 "vmul.f32 q2, q2, q13\n" in Transform()
7501 "vadd.f32 q0, q0, q14\n" in Transform()
7502 "vadd.f32 q1, q1, q14\n" in Transform()
7503 "vadd.f32 q2, q2, q14\n" in Transform()
7504 "vcvt.s32.f32 q0, q0\n" in Transform()
7505 "vcvt.s32.f32 q1, q1\n" in Transform()
7506 "vcvt.s32.f32 q2, q2\n" in Transform()
7508 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7509 "vst1.32 {d4, d5}, [%[output]]!\n" in Transform()
7510 "pld [%[output]]\n" in Transform()
7511 "subs %[rows], %[rows], #1\n" in Transform()
7512 "bne 1b\n" in Transform()
7544 "ldr r0, %[input_range_min]\n" in Transform()
7545 "vdup.32 q8, r0\n" in Transform()
7546 "ldr r0, %[input_range_scale]\n" in Transform()
7547 "vdup.32 q9, r0\n" in Transform()
7548 "ldr r0, %[bias_range_min]\n" in Transform()
7549 "vdup.32 q10, r0\n" in Transform()
7550 "ldr r0, %[bias_range_scale]\n" in Transform()
7551 "vdup.32 q11, r0\n" in Transform()
7552 "ldr r0, %[output_range_min]\n" in Transform()
7553 "vdup.32 q12, r0\n" in Transform()
7554 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
7555 "vdup.32 q13, r0\n" in Transform()
7556 "ldr r0, %[output_range_offset]\n" in Transform()
7557 "vdup.32 q14, r0\n" in Transform()
7559 "mov r0, %[count]\n" in Transform()
7560 "mov r1, %[bias]\n" in Transform()
7561 "subs r0, r0, #13\n" in Transform()
7562 "beq 3f\n" in Transform()
7564 "subs r0, r0, #16\n" in Transform()
7567 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
7568 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
7569 "pld [%[input], #32]\n" in Transform()
7570 "vmovl.u8 q1, d1\n" in Transform()
7571 "vmovl.u8 q0, d0\n" in Transform()
7572 "vmovl.u8 q5, d9\n" in Transform()
7573 "vmovl.u8 q4, d8\n" in Transform()
7574 "vmovl.s16 q3, d3\n" in Transform()
7575 "vmovl.s16 q2, d2\n" in Transform()
7576 "vmovl.s16 q7, d11\n" in Transform()
7577 "vmovl.s16 q6, d10\n" in Transform()
7578 "vmovl.s16 q1, d1\n" in Transform()
7579 "vmovl.s16 q0, d0\n" in Transform()
7580 "vmovl.s16 q5, d9\n" in Transform()
7581 "vmovl.s16 q4, d8\n" in Transform()
7582 "vcvt.f32.s32 q0, q0\n" in Transform()
7583 "vcvt.f32.s32 q1, q1\n" in Transform()
7584 "vcvt.f32.s32 q2, q2\n" in Transform()
7585 "vcvt.f32.s32 q3, q3\n" in Transform()
7586 "vcvt.f32.s32 q4, q4\n" in Transform()
7587 "vcvt.f32.s32 q5, q5\n" in Transform()
7588 "vcvt.f32.s32 q6, q6\n" in Transform()
7589 "vcvt.f32.s32 q7, q7\n" in Transform()
7590 "vmul.f32 q0, q0, q9\n" in Transform()
7591 "vmul.f32 q1, q1, q9\n" in Transform()
7592 "vmul.f32 q2, q2, q9\n" in Transform()
7593 "vmul.f32 q3, q3, q9\n" in Transform()
7594 "vmul.f32 q4, q4, q11\n" in Transform()
7595 "vmul.f32 q5, q5, q11\n" in Transform()
7596 "vmul.f32 q6, q6, q11\n" in Transform()
7597 "vmul.f32 q7, q7, q11\n" in Transform()
7598 "vadd.f32 q0, q0, q8\n" in Transform()
7599 "vadd.f32 q1, q1, q8\n" in Transform()
7600 "vadd.f32 q2, q2, q8\n" in Transform()
7601 "vadd.f32 q3, q3, q8\n" in Transform()
7602 "vadd.f32 q4, q4, q10\n" in Transform()
7603 "vadd.f32 q5, q5, q10\n" in Transform()
7604 "vadd.f32 q6, q6, q10\n" in Transform()
7605 "vadd.f32 q7, q7, q10\n" in Transform()
7606 "vadd.f32 q0, q0, q4\n" in Transform()
7607 "vadd.f32 q1, q1, q5\n" in Transform()
7608 "vadd.f32 q2, q2, q6\n" in Transform()
7609 "vadd.f32 q3, q3, q7\n" in Transform()
7610 "vsub.f32 q0, q0, q12\n" in Transform()
7611 "vsub.f32 q1, q1, q12\n" in Transform()
7612 "vsub.f32 q2, q2, q12\n" in Transform()
7613 "vsub.f32 q3, q3, q12\n" in Transform()
7614 "vmul.f32 q0, q0, q13\n" in Transform()
7615 "vmul.f32 q1, q1, q13\n" in Transform()
7616 "vmul.f32 q2, q2, q13\n" in Transform()
7617 "vmul.f32 q3, q3, q13\n" in Transform()
7618 "vadd.f32 q0, q0, q14\n" in Transform()
7619 "vadd.f32 q1, q1, q14\n" in Transform()
7620 "vadd.f32 q2, q2, q14\n" in Transform()
7621 "vadd.f32 q3, q3, q14\n" in Transform()
7622 "vcvt.s32.f32 q0, q0\n" in Transform()
7623 "vcvt.s32.f32 q1, q1\n" in Transform()
7624 "vcvt.s32.f32 q2, q2\n" in Transform()
7625 "vcvt.s32.f32 q3, q3\n" in Transform()
7627 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7628 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
7629 "pld [%[output]]\n" in Transform()
7630 "bne 2b\n" in Transform()
7634 "vld1.32 {d0}, [%[input]]!\n" in Transform()
7635 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
7636 "vld1.8 {d1[4]}, [%[input]]!\n" in Transform()
7637 "vld1.32 {d8}, [r1]!\n" in Transform()
7638 "vld1.32 {d9[0]}, [r1]!\n" in Transform()
7639 "vld1.8 {d9[4]}, [r1]!\n" in Transform()
7640 "pld [%[input], #32]\n" in Transform()
7641 "vmovl.u8 q1, d1\n" in Transform()
7642 "vmovl.u8 q0, d0\n" in Transform()
7643 "vmovl.u8 q5, d9\n" in Transform()
7644 "vmovl.u8 q4, d8\n" in Transform()
7645 "vmovl.s16 q3, d3\n" in Transform()
7646 "vmovl.s16 q2, d2\n" in Transform()
7647 "vmovl.s16 q7, d11\n" in Transform()
7648 "vmovl.s16 q6, d10\n" in Transform()
7649 "vmovl.s16 q1, d1\n" in Transform()
7650 "vmovl.s16 q0, d0\n" in Transform()
7651 "vmovl.s16 q5, d9\n" in Transform()
7652 "vmovl.s16 q4, d8\n" in Transform()
7653 "vcvt.f32.s32 q0, q0\n" in Transform()
7654 "vcvt.f32.s32 q1, q1\n" in Transform()
7655 "vcvt.f32.s32 q2, q2\n" in Transform()
7656 "vcvt.f32.s32 q3, q3\n" in Transform()
7657 "vcvt.f32.s32 q4, q4\n" in Transform()
7658 "vcvt.f32.s32 q5, q5\n" in Transform()
7659 "vcvt.f32.s32 q6, q6\n" in Transform()
7660 "vcvt.f32.s32 q7, q7\n" in Transform()
7661 "vmul.f32 q0, q0, q9\n" in Transform()
7662 "vmul.f32 q1, q1, q9\n" in Transform()
7663 "vmul.f32 q2, q2, q9\n" in Transform()
7664 "vmul.f32 q3, q3, q9\n" in Transform()
7665 "vmul.f32 q4, q4, q11\n" in Transform()
7666 "vmul.f32 q5, q5, q11\n" in Transform()
7667 "vmul.f32 q6, q6, q11\n" in Transform()
7668 "vmul.f32 q7, q7, q11\n" in Transform()
7669 "vadd.f32 q0, q0, q8\n" in Transform()
7670 "vadd.f32 q1, q1, q8\n" in Transform()
7671 "vadd.f32 q2, q2, q8\n" in Transform()
7672 "vadd.f32 q3, q3, q8\n" in Transform()
7673 "vadd.f32 q4, q4, q10\n" in Transform()
7674 "vadd.f32 q5, q5, q10\n" in Transform()
7675 "vadd.f32 q6, q6, q10\n" in Transform()
7676 "vadd.f32 q7, q7, q10\n" in Transform()
7677 "vadd.f32 q0, q0, q4\n" in Transform()
7678 "vadd.f32 q1, q1, q5\n" in Transform()
7679 "vadd.f32 q2, q2, q6\n" in Transform()
7680 "vadd.f32 q3, q3, q7\n" in Transform()
7681 "vsub.f32 q0, q0, q12\n" in Transform()
7682 "vsub.f32 q1, q1, q12\n" in Transform()
7683 "vsub.f32 q2, q2, q12\n" in Transform()
7684 "vsub.f32 q3, q3, q12\n" in Transform()
7685 "vmul.f32 q0, q0, q13\n" in Transform()
7686 "vmul.f32 q1, q1, q13\n" in Transform()
7687 "vmul.f32 q2, q2, q13\n" in Transform()
7688 "vmul.f32 q3, q3, q13\n" in Transform()
7689 "vadd.f32 q0, q0, q14\n" in Transform()
7690 "vadd.f32 q1, q1, q14\n" in Transform()
7691 "vadd.f32 q2, q2, q14\n" in Transform()
7692 "vadd.f32 q3, q3, q14\n" in Transform()
7693 "vcvt.s32.f32 q0, q0\n" in Transform()
7694 "vcvt.s32.f32 q1, q1\n" in Transform()
7695 "vcvt.s32.f32 q2, q2\n" in Transform()
7696 "vcvt.s32.f32 q3, q3\n" in Transform()
7698 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7699 "vst1.32 {d4, d5}, [%[output]]!\n" in Transform()
7700 "vst1.32 {d6[0]}, [%[output]]!\n" in Transform()
7701 "pld [%[output]]\n" in Transform()
7702 "subs %[rows], %[rows], #1\n" in Transform()
7703 "bne 1b\n" in Transform()
7735 "ldr r0, %[input_range_min]\n" in Transform()
7736 "vdup.32 q8, r0\n" in Transform()
7737 "ldr r0, %[input_range_scale]\n" in Transform()
7738 "vdup.32 q9, r0\n" in Transform()
7739 "ldr r0, %[bias_range_min]\n" in Transform()
7740 "vdup.32 q10, r0\n" in Transform()
7741 "ldr r0, %[bias_range_scale]\n" in Transform()
7742 "vdup.32 q11, r0\n" in Transform()
7743 "ldr r0, %[output_range_min]\n" in Transform()
7744 "vdup.32 q12, r0\n" in Transform()
7745 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
7746 "vdup.32 q13, r0\n" in Transform()
7747 "ldr r0, %[output_range_offset]\n" in Transform()
7748 "vdup.32 q14, r0\n" in Transform()
7750 "mov r0, %[count]\n" in Transform()
7751 "mov r1, %[bias]\n" in Transform()
7752 "subs r0, r0, #14\n" in Transform()
7753 "beq 3f\n" in Transform()
7755 "subs r0, r0, #16\n" in Transform()
7758 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
7759 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
7760 "pld [%[input], #32]\n" in Transform()
7761 "vmovl.u8 q1, d1\n" in Transform()
7762 "vmovl.u8 q0, d0\n" in Transform()
7763 "vmovl.u8 q5, d9\n" in Transform()
7764 "vmovl.u8 q4, d8\n" in Transform()
7765 "vmovl.s16 q3, d3\n" in Transform()
7766 "vmovl.s16 q2, d2\n" in Transform()
7767 "vmovl.s16 q7, d11\n" in Transform()
7768 "vmovl.s16 q6, d10\n" in Transform()
7769 "vmovl.s16 q1, d1\n" in Transform()
7770 "vmovl.s16 q0, d0\n" in Transform()
7771 "vmovl.s16 q5, d9\n" in Transform()
7772 "vmovl.s16 q4, d8\n" in Transform()
7773 "vcvt.f32.s32 q0, q0\n" in Transform()
7774 "vcvt.f32.s32 q1, q1\n" in Transform()
7775 "vcvt.f32.s32 q2, q2\n" in Transform()
7776 "vcvt.f32.s32 q3, q3\n" in Transform()
7777 "vcvt.f32.s32 q4, q4\n" in Transform()
7778 "vcvt.f32.s32 q5, q5\n" in Transform()
7779 "vcvt.f32.s32 q6, q6\n" in Transform()
7780 "vcvt.f32.s32 q7, q7\n" in Transform()
7781 "vmul.f32 q0, q0, q9\n" in Transform()
7782 "vmul.f32 q1, q1, q9\n" in Transform()
7783 "vmul.f32 q2, q2, q9\n" in Transform()
7784 "vmul.f32 q3, q3, q9\n" in Transform()
7785 "vmul.f32 q4, q4, q11\n" in Transform()
7786 "vmul.f32 q5, q5, q11\n" in Transform()
7787 "vmul.f32 q6, q6, q11\n" in Transform()
7788 "vmul.f32 q7, q7, q11\n" in Transform()
7789 "vadd.f32 q0, q0, q8\n" in Transform()
7790 "vadd.f32 q1, q1, q8\n" in Transform()
7791 "vadd.f32 q2, q2, q8\n" in Transform()
7792 "vadd.f32 q3, q3, q8\n" in Transform()
7793 "vadd.f32 q4, q4, q10\n" in Transform()
7794 "vadd.f32 q5, q5, q10\n" in Transform()
7795 "vadd.f32 q6, q6, q10\n" in Transform()
7796 "vadd.f32 q7, q7, q10\n" in Transform()
7797 "vadd.f32 q0, q0, q4\n" in Transform()
7798 "vadd.f32 q1, q1, q5\n" in Transform()
7799 "vadd.f32 q2, q2, q6\n" in Transform()
7800 "vadd.f32 q3, q3, q7\n" in Transform()
7801 "vsub.f32 q0, q0, q12\n" in Transform()
7802 "vsub.f32 q1, q1, q12\n" in Transform()
7803 "vsub.f32 q2, q2, q12\n" in Transform()
7804 "vsub.f32 q3, q3, q12\n" in Transform()
7805 "vmul.f32 q0, q0, q13\n" in Transform()
7806 "vmul.f32 q1, q1, q13\n" in Transform()
7807 "vmul.f32 q2, q2, q13\n" in Transform()
7808 "vmul.f32 q3, q3, q13\n" in Transform()
7809 "vadd.f32 q0, q0, q14\n" in Transform()
7810 "vadd.f32 q1, q1, q14\n" in Transform()
7811 "vadd.f32 q2, q2, q14\n" in Transform()
7812 "vadd.f32 q3, q3, q14\n" in Transform()
7813 "vcvt.s32.f32 q0, q0\n" in Transform()
7814 "vcvt.s32.f32 q1, q1\n" in Transform()
7815 "vcvt.s32.f32 q2, q2\n" in Transform()
7816 "vcvt.s32.f32 q3, q3\n" in Transform()
7818 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7819 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
7820 "pld [%[output]]\n" in Transform()
7821 "bne 2b\n" in Transform()
7825 "vld1.32 {d0}, [%[input]]!\n" in Transform()
7826 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
7827 "vld1.16 {d1[2]}, [%[input]]!\n" in Transform()
7828 "vld1.32 {d8}, [r1]!\n" in Transform()
7829 "vld1.32 {d9[0]}, [r1]!\n" in Transform()
7830 "vld1.16 {d9[2]}, [r1]!\n" in Transform()
7831 "pld [%[input], #32]\n" in Transform()
7832 "vmovl.u8 q1, d1\n" in Transform()
7833 "vmovl.u8 q0, d0\n" in Transform()
7834 "vmovl.u8 q5, d9\n" in Transform()
7835 "vmovl.u8 q4, d8\n" in Transform()
7836 "vmovl.s16 q3, d3\n" in Transform()
7837 "vmovl.s16 q2, d2\n" in Transform()
7838 "vmovl.s16 q7, d11\n" in Transform()
7839 "vmovl.s16 q6, d10\n" in Transform()
7840 "vmovl.s16 q1, d1\n" in Transform()
7841 "vmovl.s16 q0, d0\n" in Transform()
7842 "vmovl.s16 q5, d9\n" in Transform()
7843 "vmovl.s16 q4, d8\n" in Transform()
7844 "vcvt.f32.s32 q0, q0\n" in Transform()
7845 "vcvt.f32.s32 q1, q1\n" in Transform()
7846 "vcvt.f32.s32 q2, q2\n" in Transform()
7847 "vcvt.f32.s32 q3, q3\n" in Transform()
7848 "vcvt.f32.s32 q4, q4\n" in Transform()
7849 "vcvt.f32.s32 q5, q5\n" in Transform()
7850 "vcvt.f32.s32 q6, q6\n" in Transform()
7851 "vcvt.f32.s32 q7, q7\n" in Transform()
7852 "vmul.f32 q0, q0, q9\n" in Transform()
7853 "vmul.f32 q1, q1, q9\n" in Transform()
7854 "vmul.f32 q2, q2, q9\n" in Transform()
7855 "vmul.f32 q3, q3, q9\n" in Transform()
7856 "vmul.f32 q4, q4, q11\n" in Transform()
7857 "vmul.f32 q5, q5, q11\n" in Transform()
7858 "vmul.f32 q6, q6, q11\n" in Transform()
7859 "vmul.f32 q7, q7, q11\n" in Transform()
7860 "vadd.f32 q0, q0, q8\n" in Transform()
7861 "vadd.f32 q1, q1, q8\n" in Transform()
7862 "vadd.f32 q2, q2, q8\n" in Transform()
7863 "vadd.f32 q3, q3, q8\n" in Transform()
7864 "vadd.f32 q4, q4, q10\n" in Transform()
7865 "vadd.f32 q5, q5, q10\n" in Transform()
7866 "vadd.f32 q6, q6, q10\n" in Transform()
7867 "vadd.f32 q7, q7, q10\n" in Transform()
7868 "vadd.f32 q0, q0, q4\n" in Transform()
7869 "vadd.f32 q1, q1, q5\n" in Transform()
7870 "vadd.f32 q2, q2, q6\n" in Transform()
7871 "vadd.f32 q3, q3, q7\n" in Transform()
7872 "vsub.f32 q0, q0, q12\n" in Transform()
7873 "vsub.f32 q1, q1, q12\n" in Transform()
7874 "vsub.f32 q2, q2, q12\n" in Transform()
7875 "vsub.f32 q3, q3, q12\n" in Transform()
7876 "vmul.f32 q0, q0, q13\n" in Transform()
7877 "vmul.f32 q1, q1, q13\n" in Transform()
7878 "vmul.f32 q2, q2, q13\n" in Transform()
7879 "vmul.f32 q3, q3, q13\n" in Transform()
7880 "vadd.f32 q0, q0, q14\n" in Transform()
7881 "vadd.f32 q1, q1, q14\n" in Transform()
7882 "vadd.f32 q2, q2, q14\n" in Transform()
7883 "vadd.f32 q3, q3, q14\n" in Transform()
7884 "vcvt.s32.f32 q0, q0\n" in Transform()
7885 "vcvt.s32.f32 q1, q1\n" in Transform()
7886 "vcvt.s32.f32 q2, q2\n" in Transform()
7887 "vcvt.s32.f32 q3, q3\n" in Transform()
7889 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
7890 "vst1.32 {d4, d5, d6}, [%[output]]!\n" in Transform()
7891 "pld [%[output]]\n" in Transform()
7892 "subs %[rows], %[rows], #1\n" in Transform()
7893 "bne 1b\n" in Transform()
7925 "ldr r0, %[input_range_min]\n" in Transform()
7926 "vdup.32 q8, r0\n" in Transform()
7927 "ldr r0, %[input_range_scale]\n" in Transform()
7928 "vdup.32 q9, r0\n" in Transform()
7929 "ldr r0, %[bias_range_min]\n" in Transform()
7930 "vdup.32 q10, r0\n" in Transform()
7931 "ldr r0, %[bias_range_scale]\n" in Transform()
7932 "vdup.32 q11, r0\n" in Transform()
7933 "ldr r0, %[output_range_min]\n" in Transform()
7934 "vdup.32 q12, r0\n" in Transform()
7935 "ldr r0, %[one_over_output_range_scale]\n" in Transform()
7936 "vdup.32 q13, r0\n" in Transform()
7937 "ldr r0, %[output_range_offset]\n" in Transform()
7938 "vdup.32 q14, r0\n" in Transform()
7940 "mov r0, %[count]\n" in Transform()
7941 "mov r1, %[bias]\n" in Transform()
7942 "subs r0, r0, #15\n" in Transform()
7943 "beq 3f\n" in Transform()
7945 "subs r0, r0, #16\n" in Transform()
7948 "vld1.32 {d0, d1}, [%[input]]!\n" in Transform()
7949 "vld1.32 {d8, d9}, [r1]!\n" in Transform()
7950 "pld [%[input], #32]\n" in Transform()
7951 "vmovl.u8 q1, d1\n" in Transform()
7952 "vmovl.u8 q0, d0\n" in Transform()
7953 "vmovl.u8 q5, d9\n" in Transform()
7954 "vmovl.u8 q4, d8\n" in Transform()
7955 "vmovl.s16 q3, d3\n" in Transform()
7956 "vmovl.s16 q2, d2\n" in Transform()
7957 "vmovl.s16 q7, d11\n" in Transform()
7958 "vmovl.s16 q6, d10\n" in Transform()
7959 "vmovl.s16 q1, d1\n" in Transform()
7960 "vmovl.s16 q0, d0\n" in Transform()
7961 "vmovl.s16 q5, d9\n" in Transform()
7962 "vmovl.s16 q4, d8\n" in Transform()
7963 "vcvt.f32.s32 q0, q0\n" in Transform()
7964 "vcvt.f32.s32 q1, q1\n" in Transform()
7965 "vcvt.f32.s32 q2, q2\n" in Transform()
7966 "vcvt.f32.s32 q3, q3\n" in Transform()
7967 "vcvt.f32.s32 q4, q4\n" in Transform()
7968 "vcvt.f32.s32 q5, q5\n" in Transform()
7969 "vcvt.f32.s32 q6, q6\n" in Transform()
7970 "vcvt.f32.s32 q7, q7\n" in Transform()
7971 "vmul.f32 q0, q0, q9\n" in Transform()
7972 "vmul.f32 q1, q1, q9\n" in Transform()
7973 "vmul.f32 q2, q2, q9\n" in Transform()
7974 "vmul.f32 q3, q3, q9\n" in Transform()
7975 "vmul.f32 q4, q4, q11\n" in Transform()
7976 "vmul.f32 q5, q5, q11\n" in Transform()
7977 "vmul.f32 q6, q6, q11\n" in Transform()
7978 "vmul.f32 q7, q7, q11\n" in Transform()
7979 "vadd.f32 q0, q0, q8\n" in Transform()
7980 "vadd.f32 q1, q1, q8\n" in Transform()
7981 "vadd.f32 q2, q2, q8\n" in Transform()
7982 "vadd.f32 q3, q3, q8\n" in Transform()
7983 "vadd.f32 q4, q4, q10\n" in Transform()
7984 "vadd.f32 q5, q5, q10\n" in Transform()
7985 "vadd.f32 q6, q6, q10\n" in Transform()
7986 "vadd.f32 q7, q7, q10\n" in Transform()
7987 "vadd.f32 q0, q0, q4\n" in Transform()
7988 "vadd.f32 q1, q1, q5\n" in Transform()
7989 "vadd.f32 q2, q2, q6\n" in Transform()
7990 "vadd.f32 q3, q3, q7\n" in Transform()
7991 "vsub.f32 q0, q0, q12\n" in Transform()
7992 "vsub.f32 q1, q1, q12\n" in Transform()
7993 "vsub.f32 q2, q2, q12\n" in Transform()
7994 "vsub.f32 q3, q3, q12\n" in Transform()
7995 "vmul.f32 q0, q0, q13\n" in Transform()
7996 "vmul.f32 q1, q1, q13\n" in Transform()
7997 "vmul.f32 q2, q2, q13\n" in Transform()
7998 "vmul.f32 q3, q3, q13\n" in Transform()
7999 "vadd.f32 q0, q0, q14\n" in Transform()
8000 "vadd.f32 q1, q1, q14\n" in Transform()
8001 "vadd.f32 q2, q2, q14\n" in Transform()
8002 "vadd.f32 q3, q3, q14\n" in Transform()
8003 "vcvt.s32.f32 q0, q0\n" in Transform()
8004 "vcvt.s32.f32 q1, q1\n" in Transform()
8005 "vcvt.s32.f32 q2, q2\n" in Transform()
8006 "vcvt.s32.f32 q3, q3\n" in Transform()
8008 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
8009 "vst1.32 {d4, d5, d6, d7}, [%[output]]!\n" in Transform()
8010 "pld [%[output]]\n" in Transform()
8011 "bne 2b\n" in Transform()
8015 "vld1.32 {d0}, [%[input]]!\n" in Transform()
8016 "vld1.32 {d1[0]}, [%[input]]!\n" in Transform()
8017 "vld1.16 {d1[2]}, [%[input]]!\n" in Transform()
8018 "vld1.8 {d1[6]}, [%[input]]!\n" in Transform()
8019 "vld1.32 {d8}, [r1]!\n" in Transform()
8020 "vld1.32 {d9[0]}, [r1]!\n" in Transform()
8021 "vld1.16 {d9[2]}, [r1]!\n" in Transform()
8022 "vld1.8 {d9[6]}, [r1]!\n" in Transform()
8023 "pld [%[input], #32]\n" in Transform()
8024 "vmovl.u8 q1, d1\n" in Transform()
8025 "vmovl.u8 q0, d0\n" in Transform()
8026 "vmovl.u8 q5, d9\n" in Transform()
8027 "vmovl.u8 q4, d8\n" in Transform()
8028 "vmovl.s16 q3, d3\n" in Transform()
8029 "vmovl.s16 q2, d2\n" in Transform()
8030 "vmovl.s16 q7, d11\n" in Transform()
8031 "vmovl.s16 q6, d10\n" in Transform()
8032 "vmovl.s16 q1, d1\n" in Transform()
8033 "vmovl.s16 q0, d0\n" in Transform()
8034 "vmovl.s16 q5, d9\n" in Transform()
8035 "vmovl.s16 q4, d8\n" in Transform()
8036 "vcvt.f32.s32 q0, q0\n" in Transform()
8037 "vcvt.f32.s32 q1, q1\n" in Transform()
8038 "vcvt.f32.s32 q2, q2\n" in Transform()
8039 "vcvt.f32.s32 q3, q3\n" in Transform()
8040 "vcvt.f32.s32 q4, q4\n" in Transform()
8041 "vcvt.f32.s32 q5, q5\n" in Transform()
8042 "vcvt.f32.s32 q6, q6\n" in Transform()
8043 "vcvt.f32.s32 q7, q7\n" in Transform()
8044 "vmul.f32 q0, q0, q9\n" in Transform()
8045 "vmul.f32 q1, q1, q9\n" in Transform()
8046 "vmul.f32 q2, q2, q9\n" in Transform()
8047 "vmul.f32 q3, q3, q9\n" in Transform()
8048 "vmul.f32 q4, q4, q11\n" in Transform()
8049 "vmul.f32 q5, q5, q11\n" in Transform()
8050 "vmul.f32 q6, q6, q11\n" in Transform()
8051 "vmul.f32 q7, q7, q11\n" in Transform()
8052 "vadd.f32 q0, q0, q8\n" in Transform()
8053 "vadd.f32 q1, q1, q8\n" in Transform()
8054 "vadd.f32 q2, q2, q8\n" in Transform()
8055 "vadd.f32 q3, q3, q8\n" in Transform()
8056 "vadd.f32 q4, q4, q10\n" in Transform()
8057 "vadd.f32 q5, q5, q10\n" in Transform()
8058 "vadd.f32 q6, q6, q10\n" in Transform()
8059 "vadd.f32 q7, q7, q10\n" in Transform()
8060 "vadd.f32 q0, q0, q4\n" in Transform()
8061 "vadd.f32 q1, q1, q5\n" in Transform()
8062 "vadd.f32 q2, q2, q6\n" in Transform()
8063 "vadd.f32 q3, q3, q7\n" in Transform()
8064 "vsub.f32 q0, q0, q12\n" in Transform()
8065 "vsub.f32 q1, q1, q12\n" in Transform()
8066 "vsub.f32 q2, q2, q12\n" in Transform()
8067 "vsub.f32 q3, q3, q12\n" in Transform()
8068 "vmul.f32 q0, q0, q13\n" in Transform()
8069 "vmul.f32 q1, q1, q13\n" in Transform()
8070 "vmul.f32 q2, q2, q13\n" in Transform()
8071 "vmul.f32 q3, q3, q13\n" in Transform()
8072 "vadd.f32 q0, q0, q14\n" in Transform()
8073 "vadd.f32 q1, q1, q14\n" in Transform()
8074 "vadd.f32 q2, q2, q14\n" in Transform()
8075 "vadd.f32 q3, q3, q14\n" in Transform()
8076 "vcvt.s32.f32 q0, q0\n" in Transform()
8077 "vcvt.s32.f32 q1, q1\n" in Transform()
8078 "vcvt.s32.f32 q2, q2\n" in Transform()
8079 "vcvt.s32.f32 q3, q3\n" in Transform()
8081 "vst1.32 {d0, d1, d2, d3}, [%[output]]!\n" in Transform()
8082 "vst1.32 {d4, d5, d6}, [%[output]]!\n" in Transform()
8083 "vst1.32 {d7[0]}, [%[output]]!\n" in Transform()
8084 "pld [%[output]]\n" in Transform()
8085 "subs %[rows], %[rows], #1\n" in Transform()
8086 "bne 1b\n" in Transform()