/external/libgav1/libgav1/src/dsp/ |
D | loop_restoration.cc | 279 uint32_t* const* square_sum3, in BoxSum() argument 297 (*square_sum3)[x] = square_sum; in BoxSum() 306 ++square_sum3; in BoxSum() 388 const uint16_t* const sum3[3], const uint32_t* const square_sum3[3], in BoxFilterPreProcess3() 397 a += square_sum3[dy][x]; in BoxFilterPreProcess3() 495 uint32_t* const square_sum3[4], in BoxFilterPass2() 500 BoxSum<Pixel, 3>(src0, 0, 1, width + 2, sum3 + 2, square_sum3 + 2); in BoxFilterPass2() 501 BoxFilterPreProcess3<bitdepth>(sum3, square_sum3, width, scale, true, in BoxFilterPass2() 515 uint32_t* const square_sum3[4], in BoxFilter() 525 BoxFilterPreProcess3<bitdepth>(sum3, square_sum3, width, scales[1], true, in BoxFilter() [all …]
|
D | common.h | 52 alignas(kMaxAlignment) uint32_t square_sum3[4 * kSgrStride]; member
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | loop_restoration_neon.cc | 908 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 925 vst1q_u32(square_sum3 + 0, row_sq3.val[0]); in BoxSum() 926 vst1q_u32(square_sum3 + 4, row_sq3.val[1]); in BoxSum() 933 square_sum3 += 8; in BoxSum() 1296 uint16_t* const sum3[3], uint32_t* const square_sum3[3], uint16x8_t sq[2], in BoxFilterPreProcess3Lo() 1306 vst1q_u32(square_sum3[2] + 0, sq3[2].val[0]); in BoxFilterPreProcess3Lo() 1307 vst1q_u32(square_sum3[2] + 4, sq3[2].val[1]); in BoxFilterPreProcess3Lo() 1310 sq3[0].val[0] = vld1q_u32(square_sum3[0] + 0); in BoxFilterPreProcess3Lo() 1311 sq3[0].val[1] = vld1q_u32(square_sum3[0] + 4); in BoxFilterPreProcess3Lo() 1312 sq3[1].val[0] = vld1q_u32(square_sum3[1] + 0); in BoxFilterPreProcess3Lo() [all …]
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | loop_restoration_10bit_sse4.cc | 897 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 921 StoreAligned32U32(square_sum3 + 0, row_sq3); in BoxSum() 925 StoreAligned32U32(square_sum3 + 8, row_sq3); in BoxSum() 932 square_sum3 += 16; in BoxSum() 938 square_sum3 += sum_stride - sum_width; in BoxSum() 1396 uint32_t* const square_sum3[3], __m128i sq[4], __m128i* const ma, in BoxFilterPreProcess3Lo() 1403 StoreAligned32U32(square_sum3[2], sq3[2]); in BoxFilterPreProcess3Lo() 1405 LoadAligned32x2U32(square_sum3, 0, sq3); in BoxFilterPreProcess3Lo() 1412 uint32_t* const square_sum3[3], __m128i sq[8], __m128i ma[2], in BoxFilterPreProcess3() 1420 StoreAligned32U32(square_sum3[2] + x + 0, sq3[2]); in BoxFilterPreProcess3() [all …]
|
D | loop_restoration_sse4.cc | 1053 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 1072 StoreAligned32U32(square_sum3 + 0, row_sq3); in BoxSum() 1075 StoreAligned32U32(square_sum3 + 8, row_sq3); in BoxSum() 1081 square_sum3 += 16; in BoxSum() 1087 square_sum3 += sum_stride - sum_width; in BoxSum() 1531 uint32_t* const square_sum3[3], __m128i sq[2], __m128i* const ma, in BoxFilterPreProcess3Lo() 1538 StoreAligned32U32(square_sum3[2], sq3[2]); in BoxFilterPreProcess3Lo() 1540 LoadAligned32x2U32(square_sum3, 0, sq3); in BoxFilterPreProcess3Lo() 1547 uint32_t* const square_sum3[3], __m128i sq[4], __m128i ma[2], in BoxFilterPreProcess3() 1554 StoreAligned32U32(square_sum3[2] + x + 0, sq3[2]); in BoxFilterPreProcess3() [all …]
|
D | loop_restoration_10bit_avx2.cc | 1137 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 1154 StoreAligned32U32(square_sum3, sq3); in BoxSum() 1159 square_sum3 += 8; in BoxSum() 1184 StoreAligned64(square_sum3 + 0, row_sq3); in BoxSum() 1188 StoreAligned64(square_sum3 + 16, row_sq3); in BoxSum() 1195 square_sum3 += 32; in BoxSum() 1202 square_sum3 += sum_stride - sum_width - 8; in BoxSum() 1949 uint32_t* const square_sum3[3], __m128i sq[4], __m128i* const ma, in BoxFilterPreProcess3Lo() 1956 StoreAligned32U32(square_sum3[2], sq3[2]); in BoxFilterPreProcess3Lo() 1958 LoadAligned32x2U32(square_sum3, 0, sq3); in BoxFilterPreProcess3Lo() [all …]
|
D | loop_restoration_avx2.cc | 1245 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 1258 StoreAligned32U32(square_sum3, sq3); in BoxSum() 1263 square_sum3 += 8; in BoxSum() 1279 StoreAligned64(square_sum3 + 0, row_sq3); in BoxSum() 1282 StoreAligned64(square_sum3 + 16, row_sq3); in BoxSum() 1288 square_sum3 += 32; in BoxSum() 1295 square_sum3 += sum_stride - sum_width - 8; in BoxSum() 1853 uint32_t* const square_sum3[3], __m128i sq[2], __m128i* const ma, in BoxFilterPreProcess3Lo() 1860 StoreAligned32U32(square_sum3[2], sq3[2]); in BoxFilterPreProcess3Lo() 1862 LoadAligned32x2U32(square_sum3, 0, sq3); in BoxFilterPreProcess3Lo() [all …]
|