Lines Matching refs:BPS
54 vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0); in SaturateAndStore4x4()
55 vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1); in SaturateAndStore4x4()
56 vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0); in SaturateAndStore4x4()
57 vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1); in SaturateAndStore4x4()
66 dst01 = vld1_lane_u32((uint32_t*)(ref + 0 * BPS), dst01, 0); in Add4x4()
67 dst23 = vld1_lane_u32((uint32_t*)(ref + 2 * BPS), dst23, 0); in Add4x4()
68 dst01 = vld1_lane_u32((uint32_t*)(ref + 1 * BPS), dst01, 1); in Add4x4()
69 dst23 = vld1_lane_u32((uint32_t*)(ref + 3 * BPS), dst23, 1); in Add4x4()
132 const int kBPS = BPS; in ITransformOne()
257 out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0); in Load4x4()
258 out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1); in Load4x4()
259 out = vld1q_lane_u32((const uint32_t*)(src + 2 * BPS), out, 2); in Load4x4()
260 out = vld1q_lane_u32((const uint32_t*)(src + 3 * BPS), out, 3); in Load4x4()
363 const int kBPS = BPS; in FTransform()
638 LOAD_LANE_32b(a + 0 * BPS, d0d1, 0); // a00 a01 a02 a03 in Disto4x4()
639 LOAD_LANE_32b(a + 1 * BPS, d0d1, 1); // a10 a11 a12 a13 in Disto4x4()
640 LOAD_LANE_32b(b + 0 * BPS, d0d1, 2); // b00 b01 b02 b03 in Disto4x4()
641 LOAD_LANE_32b(b + 1 * BPS, d0d1, 3); // b10 b11 b12 b13 in Disto4x4()
642 LOAD_LANE_32b(a + 2 * BPS, d2d3, 0); // a20 a21 a22 a23 in Disto4x4()
643 LOAD_LANE_32b(a + 3 * BPS, d2d3, 1); // a30 a31 a32 a33 in Disto4x4()
644 LOAD_LANE_32b(b + 2 * BPS, d2d3, 2); // b20 b21 b22 b23 in Disto4x4()
645 LOAD_LANE_32b(b + 3 * BPS, d2d3, 3); // b30 b31 b32 b33 in Disto4x4()
683 const int kBPS = BPS; in Disto4x4()
873 for (y = 0; y < 16 * BPS; y += 4 * BPS) { in Disto16x16()
935 AccumulateSSE16(a + y * BPS, b + y * BPS, &sum); in SSE16x16()
944 AccumulateSSE16(a + y * BPS, b + y * BPS, &sum); in SSE16x8()
953 const uint8x8_t a0 = vld1_u8(a + y * BPS); in SSE8x8()
954 const uint8x8_t b0 = vld1_u8(b + y * BPS); in SSE8x8()