Lines Matching +full:d3 +full:- +full:time +full:- +full:format
4 * Use of this source code is governed by a BSD-style license
105 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 in ScaleRowDown4_NEON()
159 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 in ScaleRowDown34_NEON()
161 "vmov d2, d3 \n" // order d0, d1, d2 in ScaleRowDown34_NEON()
168 : "d0", "d1", "d2", "d3", "memory", "cc"); in ScaleRowDown34_NEON()
179 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 in ScaleRowDown34_0_Box_NEON()
195 "vmlal.u8 q11, d3, d24 \n" in ScaleRowDown34_0_Box_NEON()
201 "vqrshrn.u16 d3, q11, #2 \n" in ScaleRowDown34_0_Box_NEON()
213 "vmlal.u8 q8, d3, d24 \n" in ScaleRowDown34_0_Box_NEON()
236 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 in ScaleRowDown34_1_Box_NEON()
253 "vmlal.u8 q3, d3, d24 \n" in ScaleRowDown34_1_Box_NEON()
278 // 32 -> 12
287 "vld1.8 {d0, d1, d2, d3}, [%0]! \n" in ScaleRowDown38_NEON()
289 "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" in ScaleRowDown38_NEON()
290 "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" in ScaleRowDown38_NEON()
298 : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"); in ScaleRowDown38_NEON()
301 // 32x3 -> 12x1
318 // d3 = 30 70 31 71 32 72 33 73 in ScaleRowDown38_3_Box_NEON()
319 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" in ScaleRowDown38_3_Box_NEON()
325 // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 in ScaleRowDown38_3_Box_NEON()
333 // d3 = 60 70 61 71 62 72 63 73 in ScaleRowDown38_3_Box_NEON()
334 "vtrn.u8 d2, d3 \n" in ScaleRowDown38_3_Box_NEON()
344 // d3 = 60+70 61+71 62+72 63+73 in ScaleRowDown38_3_Box_NEON()
345 "vpaddl.u8 d3, d3 \n" in ScaleRowDown38_3_Box_NEON()
352 "vadd.u16 d4, d3, d7 \n" in ScaleRowDown38_3_Box_NEON()
377 "vtrn.u32 d2, d3 \n" in ScaleRowDown38_3_Box_NEON()
381 "vtrn.u16 d2, d3 \n" in ScaleRowDown38_3_Box_NEON()
395 "vtbl.u8 d3, {d0, d1, d2}, d28 \n" in ScaleRowDown38_3_Box_NEON()
398 "vst1.8 {d3}, [%1]! \n" in ScaleRowDown38_3_Box_NEON()
413 // 32x2 -> 12x1
427 // d3 = 30 70 31 71 32 72 33 73 in ScaleRowDown38_2_Box_NEON()
428 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" in ScaleRowDown38_2_Box_NEON()
433 // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 in ScaleRowDown38_2_Box_NEON()
440 // d3 = 60 70 61 71 62 72 63 73 in ScaleRowDown38_2_Box_NEON()
441 "vtrn.u8 d2, d3 \n" in ScaleRowDown38_2_Box_NEON()
449 // d3 = 60+70 61+71 62+72 63+73 in ScaleRowDown38_2_Box_NEON()
450 "vpaddl.u8 d3, d3 \n" in ScaleRowDown38_2_Box_NEON()
455 "vadd.u16 d4, d3, d7 \n" in ScaleRowDown38_2_Box_NEON()
474 "vtrn.u32 d2, d3 \n" in ScaleRowDown38_2_Box_NEON()
478 "vtrn.u16 d2, d3 \n" in ScaleRowDown38_2_Box_NEON()
492 "vtbl.u8 d3, {d0, d1, d2}, d28 \n" in ScaleRowDown38_2_Box_NEON()
495 "vst1.8 {d3}, [%1]! \n" in ScaleRowDown38_2_Box_NEON()
527 "subs %2, %2, #16 \n" // 8 sample -> 16 sample in ScaleRowUp2_Linear_NEON()
585 "vrshrn.u16 d3, q0, #4 \n" // 2, odd in ScaleRowUp2_Bilinear_NEON()
590 "vst2.8 {d2, d3}, [%3]! \n" // store in ScaleRowUp2_Bilinear_NEON()
591 "subs %4, %4, #16 \n" // 8 sample -> 16 sample in ScaleRowUp2_Bilinear_NEON()
624 "vst2.16 {d0, d1, d2, d3}, [%1]! \n" // store in ScaleRowUp2_Linear_12_NEON()
625 "subs %2, %2, #16 \n" // 8 sample -> 16 sample in ScaleRowUp2_Linear_12_NEON()
676 "vst2.16 {d0, d1, d2, d3}, [%2]! \n" // store in ScaleRowUp2_Bilinear_12_NEON()
678 "subs %4, %4, #16 \n" // 8 sample -> 16 sample in ScaleRowUp2_Bilinear_12_NEON()
707 "vmovl.u16 q5, d3 \n" // 5678 (32b) in ScaleRowUp2_Linear_16_NEON()
710 "vmlal.u16 q3, d3, d31 \n" in ScaleRowUp2_Linear_16_NEON()
717 "vrshrn.u32 d3, q3, #2 \n" in ScaleRowUp2_Linear_16_NEON()
720 "subs %2, %2, #16 \n" // 8 sample -> 16 sample in ScaleRowUp2_Linear_16_NEON()
769 "vrshrn.u32 d3, q2, #4 \n" in ScaleRowUp2_Bilinear_16_NEON()
773 "vst2.16 {d2, d3}, [%3]! \n" // store in ScaleRowUp2_Bilinear_16_NEON()
774 "subs %4, %4, #8 \n" // 4 sample -> 8 sample in ScaleRowUp2_Bilinear_16_NEON()
809 "subs %2, %2, #8 \n" // 4 uv -> 8 uv in ScaleUVRowUp2_Linear_NEON()
867 "vrshrn.u16 d3, q0, #4 \n" // 2, odd in ScaleUVRowUp2_Bilinear_NEON()
872 "vst2.16 {d2, d3}, [%3]! \n" // store in ScaleUVRowUp2_Bilinear_NEON()
873 "subs %4, %4, #8 \n" // 4 uv -> 8 uv in ScaleUVRowUp2_Bilinear_NEON()
902 "vmovl.u16 q5, d3 \n" // 3344 (1u1v, 32b) in ScaleUVRowUp2_Linear_16_NEON()
905 "vmlal.u16 q4, d3, d30 \n" // 3*near+far (odd) in ScaleUVRowUp2_Linear_16_NEON()
910 "vrshrn.u32 d3, q4, #2 \n" // 3/4*near+1/4*far (odd) in ScaleUVRowUp2_Linear_16_NEON()
914 "vst2.32 {d2, d3}, [%1]! \n" // store in ScaleUVRowUp2_Linear_16_NEON()
915 "subs %2, %2, #8 \n" // 4 uv -> 8 uv in ScaleUVRowUp2_Linear_16_NEON()
965 "vrshrn.u32 d3, q2, #4 \n" // 2, odd in ScaleUVRowUp2_Bilinear_16_NEON()
969 "vst2.32 {d2, d3}, [%3]! \n" // store in ScaleUVRowUp2_Bilinear_16_NEON()
970 "subs %4, %4, #4 \n" // 2 uv -> 4 uv in ScaleUVRowUp2_Bilinear_16_NEON()
986 // Reads 16 bytes and accumulates to 16 shorts at a time.
1017 // ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16))
1083 // 16x2 -> 16x1
1111 "vmlal.u8 q14, d3, d5 \n" in ScaleFilterRows_NEON()
1150 // Blend 100 / 0 - Copy row unchanged. in ScaleFilterRows_NEON()
1176 "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB in ScaleARGBRowDown2_NEON()
1193 // 54: f942 038d vst2.32 {d16-d19}, [r2]!
1204 "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB in ScaleARGBRowDown2Linear_NEON()
1227 "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB in ScaleARGBRowDown2Box_NEON()
1229 "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1230 "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1231 "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1232 "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1235 "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1236 "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1237 "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1238 "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. in ScaleARGBRowDown2Box_NEON()
1242 "vrshrn.u16 d3, q3, #2 \n" in ScaleARGBRowDown2Box_NEON()
1243 "vst4.8 {d0, d1, d2, d3}, [%2]! \n" in ScaleARGBRowDown2Box_NEON()
1253 // Reads 4 pixels at a time.
1278 // Reads 4 pixels at a time.
1289 "vld1.8 {d0}, [%0], r12 \n" // 4 2x2 blocks -> 2x1 in ScaleARGBRowDownEvenBox_NEON()
1292 "vld1.8 {d3}, [%1], r12 \n" in ScaleARGBRowDownEvenBox_NEON()
1298 "vaddl.u8 q1, d2, d3 \n" in ScaleARGBRowDownEvenBox_NEON()
1301 "vswp.8 d1, d2 \n" // ab_cd -> ac_bd in ScaleARGBRowDownEvenBox_NEON()
1302 "vswp.8 d5, d6 \n" // ef_gh -> eg_fh in ScaleARGBRowDownEvenBox_NEON()
1335 // clang-format off in ScaleARGBCols_NEON()
1342 LOAD1_DATA32_LANE(d3, 0) in ScaleARGBCols_NEON()
1343 LOAD1_DATA32_LANE(d3, 1) in ScaleARGBCols_NEON()
1344 // clang-format on in ScaleARGBCols_NEON()
1389 // d2, d3: b in ScaleARGBFilterCols_NEON()
1392 LOAD2_DATA32_LANE(d1, d3, 0) in ScaleARGBFilterCols_NEON()
1393 LOAD2_DATA32_LANE(d1, d3, 1) in ScaleARGBFilterCols_NEON()
1406 "vmull.u8 q14, d3, d5 \n" in ScaleARGBFilterCols_NEON()
1439 "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV in ScaleUVRowDown2_NEON()
1458 "vld2.16 {d1, d3}, [%0]! \n" // load next 8 UV in ScaleUVRowDown2Linear_NEON()
1479 "vld2.8 {d1, d3}, [%0]! \n" // load next 8 UV in ScaleUVRowDown2Box_NEON()
1481 "vpaddl.u8 q0, q0 \n" // U 16 bytes -> 8 shorts. in ScaleUVRowDown2Box_NEON()
1482 "vpaddl.u8 q1, q1 \n" // V 16 bytes -> 8 shorts. in ScaleUVRowDown2Box_NEON()
1485 "vpadal.u8 q0, q8 \n" // U 16 bytes -> 8 shorts. in ScaleUVRowDown2Box_NEON()
1486 "vpadal.u8 q1, q9 \n" // V 16 bytes -> 8 shorts. in ScaleUVRowDown2Box_NEON()
1499 // Reads 4 pixels at a time.