/external/libgav1/libgav1/src/dsp/ |
D | loop_restoration.cc | 280 uint32_t* const* square_sum5) { in BoxSum() argument 301 (*square_sum5)[x] = square_sum + source0 * source0; in BoxSum() 307 ++square_sum5; in BoxSum() 365 const uint16_t* const sum5[5], const uint32_t* const square_sum5[5], in BoxFilterPreProcess5() 373 a += square_sum5[dy][x]; in BoxFilterPreProcess5() 474 uint32_t* const square_sum5[5], const int width, in BoxFilterPass1() 479 BoxFilterPreProcess5<bitdepth>(sum5, square_sum5, width, scale, sgr_buffer, in BoxFilterPass1() 516 uint32_t* const square_sum5[5], const int width, in BoxFilter() 523 BoxFilterPreProcess5<bitdepth>(sum5, square_sum5, width, scales[0], in BoxFilter() 563 uint32_t *square_sum3[4], *square_sum5[5], *b343[4], *b444[3], *b565[2]; in BoxFilterProcess() local [all …]
|
D | common.h | 53 alignas(kMaxAlignment) uint32_t square_sum5[5 * kSgrStride]; member
|
/external/libgav1/libgav1/src/dsp/arm/ |
D | loop_restoration_neon.cc | 908 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 927 vst1q_u32(square_sum5 + 0, row_sq5.val[0]); in BoxSum() 928 vst1q_u32(square_sum5 + 4, row_sq5.val[1]); in BoxSum() 934 square_sum5 += 8; in BoxSum() 1147 uint8x16_t s[2][2], uint16_t* const sum5[5], uint32_t* const square_sum5[5], in BoxFilterPreProcess5Lo() 1163 vst1q_u32(square_sum5[3] + 0, sq5[3].val[0]); in BoxFilterPreProcess5Lo() 1164 vst1q_u32(square_sum5[3] + 4, sq5[3].val[1]); in BoxFilterPreProcess5Lo() 1165 vst1q_u32(square_sum5[4] + 0, sq5[4].val[0]); in BoxFilterPreProcess5Lo() 1166 vst1q_u32(square_sum5[4] + 4, sq5[4].val[1]); in BoxFilterPreProcess5Lo() 1170 sq5[0].val[0] = vld1q_u32(square_sum5[0] + 0); in BoxFilterPreProcess5Lo() [all …]
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | loop_restoration_10bit_sse4.cc | 897 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 922 StoreAligned32U32(square_sum5 + 0, row_sq5); in BoxSum() 926 StoreAligned32U32(square_sum5 + 8, row_sq5); in BoxSum() 933 square_sum5 += 16; in BoxSum() 939 square_sum5 += sum_stride - sum_width; in BoxSum() 1299 uint32_t* const square_sum5[5], __m128i sq[2][8], __m128i* const ma, in BoxFilterPreProcess5Lo() 1309 StoreAligned32U32(square_sum5[3], sq5[3]); in BoxFilterPreProcess5Lo() 1311 StoreAligned32U32(square_sum5[4], sq5[4]); in BoxFilterPreProcess5Lo() 1313 LoadAligned32x3U32(square_sum5, 0, sq5); in BoxFilterPreProcess5Lo() 1320 uint32_t* const square_sum5[5], __m128i sq[2][8], __m128i ma[2], in BoxFilterPreProcess5() [all …]
|
D | loop_restoration_sse4.cc | 1053 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 1073 StoreAligned32U32(square_sum5 + 0, row_sq5); in BoxSum() 1076 StoreAligned32U32(square_sum5 + 8, row_sq5); in BoxSum() 1082 square_sum5 += 16; in BoxSum() 1088 square_sum5 += sum_stride - sum_width; in BoxSum() 1437 uint32_t* const square_sum5[5], __m128i sq[2][4], __m128i* const ma, in BoxFilterPreProcess5Lo() 1447 StoreAligned32U32(square_sum5[3], sq5[3]); in BoxFilterPreProcess5Lo() 1449 StoreAligned32U32(square_sum5[4], sq5[4]); in BoxFilterPreProcess5Lo() 1451 LoadAligned32x3U32(square_sum5, 0, sq5); in BoxFilterPreProcess5Lo() 1458 uint32_t* const square_sum5[5], __m128i sq[2][4], __m128i ma[2], in BoxFilterPreProcess5() [all …]
|
D | loop_restoration_10bit_avx2.cc | 1137 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 1155 StoreAligned32U32(square_sum5, sq5); in BoxSum() 1160 square_sum5 += 8; in BoxSum() 1185 StoreAligned64(square_sum5 + 0, row_sq5); in BoxSum() 1189 StoreAligned64(square_sum5 + 16, row_sq5); in BoxSum() 1196 square_sum5 += 32; in BoxSum() 1203 square_sum5 += sum_stride - sum_width - 8; in BoxSum() 1829 uint32_t* const square_sum5[5], __m128i sq[2][4], __m128i* const ma, in BoxFilterPreProcess5Lo() 1839 StoreAligned32U32(square_sum5[3], sq5[3]); in BoxFilterPreProcess5Lo() 1841 StoreAligned32U32(square_sum5[4], sq5[4]); in BoxFilterPreProcess5Lo() [all …]
|
D | loop_restoration_avx2.cc | 1245 uint32_t* square_sum3, uint32_t* square_sum5) { in BoxSum() argument 1259 StoreAligned32U32(square_sum5, sq5); in BoxSum() 1264 square_sum5 += 8; in BoxSum() 1280 StoreAligned64(square_sum5 + 0, row_sq5); in BoxSum() 1283 StoreAligned64(square_sum5 + 16, row_sq5); in BoxSum() 1289 square_sum5 += 32; in BoxSum() 1296 square_sum5 += sum_stride - sum_width - 8; in BoxSum() 1748 uint32_t* const square_sum5[5], __m128i sq[2][2], __m128i* const ma, in BoxFilterPreProcess5Lo() 1758 StoreAligned32U32(square_sum5[3], sq5[3]); in BoxFilterPreProcess5Lo() 1760 StoreAligned32U32(square_sum5[4], sq5[4]); in BoxFilterPreProcess5Lo() [all …]
|