Lines Matching refs:BPS
393 vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0); in SaturateAndStore4x4_NEON()
394 vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1); in SaturateAndStore4x4_NEON()
395 vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0); in SaturateAndStore4x4_NEON()
396 vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1); in SaturateAndStore4x4_NEON()
406 dst01 = vld1_lane_u32((uint32_t*)(dst + 0 * BPS), dst01, 0); in Add4x4_NEON()
407 dst23 = vld1_lane_u32((uint32_t*)(dst + 2 * BPS), dst23, 0); in Add4x4_NEON()
408 dst01 = vld1_lane_u32((uint32_t*)(dst + 1 * BPS), dst01, 1); in Add4x4_NEON()
409 dst23 = vld1_lane_u32((uint32_t*)(dst + 3 * BPS), dst23, 1); in Add4x4_NEON()
1054 const int kBPS = BPS; in TransformOne_NEON()
1283 const uint8x8_t A = vld1_u8(dst - BPS); // top row in DC4_NEON()
1286 const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); in DC4_NEON()
1287 const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); in DC4_NEON()
1288 const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); in DC4_NEON()
1289 const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); in DC4_NEON()
1298 vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc), 0); in DC4_NEON()
1304 const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]' in TrueMotion_NEON()
1305 const uint8x8_t T = vld1_u8(dst - BPS); // top row 'A[0..3]' in TrueMotion_NEON()
1310 const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1)); in TrueMotion_NEON()
1311 const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1)); in TrueMotion_NEON()
1312 const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1)); in TrueMotion_NEON()
1313 const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1)); in TrueMotion_NEON()
1324 vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0_u32, 0); in TrueMotion_NEON()
1325 vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1_u32, 0); in TrueMotion_NEON()
1326 vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2_u32, 0); in TrueMotion_NEON()
1327 vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3_u32, 0); in TrueMotion_NEON()
1329 vst1_u32((uint32_t*)(dst + 0 * BPS), r0_u32); in TrueMotion_NEON()
1330 vst1_u32((uint32_t*)(dst + 1 * BPS), r1_u32); in TrueMotion_NEON()
1331 vst1_u32((uint32_t*)(dst + 2 * BPS), r2_u32); in TrueMotion_NEON()
1332 vst1_u32((uint32_t*)(dst + 3 * BPS), r3_u32); in TrueMotion_NEON()
1334 dst += 4 * BPS; in TrueMotion_NEON()
1342 const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1)); // top row in VE4_NEON()
1352 vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(avg), 0); in VE4_NEON()
1357 const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1); in RD4_NEON()
1360 const uint32_t I = dst[-1 + 0 * BPS]; in RD4_NEON()
1361 const uint32_t J = dst[-1 + 1 * BPS]; in RD4_NEON()
1362 const uint32_t K = dst[-1 + 2 * BPS]; in RD4_NEON()
1363 const uint32_t L = dst[-1 + 3 * BPS]; in RD4_NEON()
1379 vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0); in RD4_NEON()
1380 vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0); in RD4_NEON()
1381 vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0); in RD4_NEON()
1382 vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0); in RD4_NEON()
1387 const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0); in LD4_NEON()
1388 const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1); in LD4_NEON()
1389 const uint8x8_t CDEFGH00 = vld1_u8(dst - BPS + 2); in LD4_NEON()
1390 const uint8x8_t CDEFGHH0 = vset_lane_u8(dst[-BPS + 7], CDEFGH00, 6); in LD4_NEON()
1398 vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0); in LD4_NEON()
1399 vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0); in LD4_NEON()
1400 vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0); in LD4_NEON()
1401 vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0); in LD4_NEON()
1408 const uint8x8_t top = vld1_u8(dst - BPS); in VE8uv_NEON()
1411 vst1_u8(dst + j * BPS, top); in VE8uv_NEON()
1420 dst += BPS; in HE8uv_NEON()
1430 const uint8x8_t A = vld1_u8(dst - BPS); // top row in DC8_NEON()
1443 const uint8x8_t L0 = vld1_u8(dst + 0 * BPS - 1); in DC8_NEON()
1444 const uint8x8_t L1 = vld1_u8(dst + 1 * BPS - 1); in DC8_NEON()
1445 const uint8x8_t L2 = vld1_u8(dst + 2 * BPS - 1); in DC8_NEON()
1446 const uint8x8_t L3 = vld1_u8(dst + 3 * BPS - 1); in DC8_NEON()
1447 const uint8x8_t L4 = vld1_u8(dst + 4 * BPS - 1); in DC8_NEON()
1448 const uint8x8_t L5 = vld1_u8(dst + 5 * BPS - 1); in DC8_NEON()
1449 const uint8x8_t L6 = vld1_u8(dst + 6 * BPS - 1); in DC8_NEON()
1450 const uint8x8_t L7 = vld1_u8(dst + 7 * BPS - 1); in DC8_NEON()
1475 vst1_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc)); in DC8_NEON()
1491 const uint8x16_t top = vld1q_u8(dst - BPS); in VE16_NEON()
1494 vst1q_u8(dst + j * BPS, top); in VE16_NEON()
1503 dst += BPS; in HE16_NEON()
1513 const uint8x16_t A = vld1q_u8(dst - BPS); // top row in DC16_NEON()
1530 const uint8x8_t L0 = vld1_u8(dst + (i + 0) * BPS - 1); in DC16_NEON()
1531 const uint8x8_t L1 = vld1_u8(dst + (i + 1) * BPS - 1); in DC16_NEON()
1532 const uint8x8_t L2 = vld1_u8(dst + (i + 2) * BPS - 1); in DC16_NEON()
1533 const uint8x8_t L3 = vld1_u8(dst + (i + 3) * BPS - 1); in DC16_NEON()
1534 const uint8x8_t L4 = vld1_u8(dst + (i + 4) * BPS - 1); in DC16_NEON()
1535 const uint8x8_t L5 = vld1_u8(dst + (i + 5) * BPS - 1); in DC16_NEON()
1536 const uint8x8_t L6 = vld1_u8(dst + (i + 6) * BPS - 1); in DC16_NEON()
1537 const uint8x8_t L7 = vld1_u8(dst + (i + 7) * BPS - 1); in DC16_NEON()
1564 vst1q_u8(dst + i * BPS, dc); in DC16_NEON()
1575 const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1); // top-left pixel 'A[-1]' in TM16_NEON()
1576 const uint8x16_t T = vld1q_u8(dst - BPS); // top row 'A[0..15]' in TM16_NEON()
1583 const int16x8_t L0 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 0 * BPS - 1)); in TM16_NEON()
1584 const int16x8_t L1 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 1 * BPS - 1)); in TM16_NEON()
1585 const int16x8_t L2 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 2 * BPS - 1)); in TM16_NEON()
1586 const int16x8_t L3 = ConvertU8ToS16_NEON(vld1_dup_u8(dst + 3 * BPS - 1)); in TM16_NEON()
1600 vst1q_u8(dst + 0 * BPS, row0); in TM16_NEON()
1601 vst1q_u8(dst + 1 * BPS, row1); in TM16_NEON()
1602 vst1q_u8(dst + 2 * BPS, row2); in TM16_NEON()
1603 vst1q_u8(dst + 3 * BPS, row3); in TM16_NEON()
1604 dst += 4 * BPS; in TM16_NEON()