/external/libgav1/libgav1/src/dsp/x86/ |
D | intrapred_cfl_sse4.cc | 82 const __m128i ac_q3 = LoadUnaligned16(input); in CflPredictUnclipped() 286 const __m128i samples = LoadUnaligned16(luma_ptr); in CflSubsampler444_8xH_SSE4_1() 411 __m128i samples = LoadUnaligned16(&luma_ptr[x]); in CflSubsampler444_SSE4_1() 592 const __m128i final_fill = LoadUnaligned16(luma_ptr - kCflLumaBufferStride); in CflSubsampler420Impl_8xH_SSE4_1() 614 const __m128i samples = LoadUnaligned16(luma_ptr); in CflSubsampler420Impl_8xH_SSE4_1() 651 const __m128i samples_row0_lo = LoadUnaligned16(src); in CflSubsampler420Impl_WxH_SSE4_1() 656 const __m128i samples_row0_hi = LoadUnaligned16(src + 16); in CflSubsampler420Impl_WxH_SSE4_1() 663 const __m128i samples_row1_lo = LoadUnaligned16(src_next); in CflSubsampler420Impl_WxH_SSE4_1() 668 const __m128i samples_row1_hi = LoadUnaligned16(src_next + 16); in CflSubsampler420Impl_WxH_SSE4_1() 697 LoadUnaligned16(luma_ptr - kCflLumaBufferStride); in CflSubsampler420Impl_WxH_SSE4_1() [all …]
|
D | intrapred_sse4.cc | 170 const __m128i vals = LoadUnaligned16(ref); in DcSum16_SSE4_1() 177 const __m128i vals1 = LoadUnaligned16(ref); in DcSum32_SSE4_1() 178 const __m128i vals2 = LoadUnaligned16(static_cast<const uint8_t*>(ref) + 16); in DcSum32_SSE4_1() 188 const __m128i vals1 = LoadUnaligned16(ref_ptr); in DcSum64_SSE4_1() 189 const __m128i vals2 = LoadUnaligned16(ref_ptr + 16); in DcSum64_SSE4_1() 190 const __m128i vals3 = LoadUnaligned16(ref_ptr + 32); in DcSum64_SSE4_1() 191 const __m128i vals4 = LoadUnaligned16(ref_ptr + 48); in DcSum64_SSE4_1() 437 LoadUnaligned16(static_cast<const uint8_t*>(column) + y); in ColStore32_SSE4_1() 466 LoadUnaligned16(static_cast<const uint8_t*>(column) + y); in ColStore64_SSE4_1() 793 const __m128i left = LoadUnaligned16(left_column); in Paeth4x16_SSE4_1() [all …]
|
D | intrapred_smooth_sse4.cc | 145 pixels[1] = LoadUnaligned16(left); in LoadSmoothPixels4() 173 const __m128i y_weights = LoadUnaligned16(weight_array + 12); in LoadSmoothWeights4() 300 pixels[2] = LoadUnaligned16(left); in LoadSmoothPixels8() 302 pixels[2] = LoadUnaligned16(left); in LoadSmoothPixels8() 305 pixels[6] = LoadUnaligned16(left + 16); in LoadSmoothPixels8() 323 __m128i loaded_weights = LoadUnaligned16(&weight_array[offset]); in LoadSmoothWeights8() 341 loaded_weights = LoadUnaligned16(weight_array + 12); in LoadSmoothWeights8() 348 const __m128i weight_lo = LoadUnaligned16(weight_array + 28); in LoadSmoothWeights8() 353 const __m128i weight_hi = LoadUnaligned16(weight_array + 44); in LoadSmoothWeights8() 765 const __m128i weights = LoadUnaligned16(kSmoothWeights + 12); in SmoothHorizontal16x4_SSE4_1() [all …]
|
D | convolve_avx2.cc | 213 SetrM128i(LoadUnaligned16(&src[x]), LoadUnaligned16(&src[x + 8])); in FilterHorizontal() 216 const __m256i src_long2 = SetrM128i(LoadUnaligned16(&src[x + 16]), in FilterHorizontal() 217 LoadUnaligned16(&src[x + 24])); in FilterHorizontal() 252 SetrM128i(LoadUnaligned16(&src[0]), LoadUnaligned16(&src[8])); in FilterHorizontal() 256 SetrM128i(LoadUnaligned16(&src[src_stride]), in FilterHorizontal() 257 LoadUnaligned16(&src[8 + src_stride])); in FilterHorizontal() 269 const __m256i src_long = SetrM128i(LoadUnaligned16(&src[0]), in FilterHorizontal() 270 LoadUnaligned16(&src[src_stride])); in FilterHorizontal() 274 LoadUnaligned16(&src[8]), LoadUnaligned16(&src[8 + src_stride])); in FilterHorizontal() 292 SetrM128i(LoadUnaligned16(&src[0]), LoadUnaligned16(&src[8])); in FilterHorizontal() [all …]
|
D | warp_sse4.cc | 112 const __m128i intermediate_0 = LoadUnaligned16(intermediate_result[y + k]); in WriteVerticalFilter() 114 LoadUnaligned16(intermediate_result[y + k + 1]); in WriteVerticalFilter() 179 f = LoadUnaligned16(kWarpedFilters[offset]); in VerticalFilter() 200 f = LoadUnaligned16(kWarpedFilters[offset]); in VerticalFilter() 309 const __m128i src_row_v = LoadUnaligned16(&src_row[ix4 - 7]); in WarpRegion3() 346 const __m128i src_row_v = LoadUnaligned16(&src_row[ix4 - 7]); in WarpRegion4()
|
D | obmc_sse4.cc | 162 const __m128i mask_val = LoadUnaligned16(mask + x); in OverlapBlendFromLeft_SSE4_1() 170 const __m128i pred_val = LoadUnaligned16(pred); in OverlapBlendFromLeft_SSE4_1() 171 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred); in OverlapBlendFromLeft_SSE4_1() 287 const __m128i pred_val = LoadUnaligned16(pred + x); in OverlapBlendFromTop_SSE4_1() 288 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred + x); in OverlapBlendFromTop_SSE4_1() 423 const __m128i pred_val = LoadUnaligned16(pred); in OverlapBlendFromLeft10bpp_SSE4_1() 424 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred); in OverlapBlendFromLeft10bpp_SSE4_1() 562 const __m128i pred_val = LoadUnaligned16(pred + x); in OverlapBlendFromTop10bpp_SSE4_1() 563 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred + x); in OverlapBlendFromTop10bpp_SSE4_1()
|
D | mask_blend_sse4.cc | 67 const __m128i row_vals = LoadUnaligned16(mask); in GetMask8() 74 const __m128i next_row_vals = LoadUnaligned16(mask + stride); in GetMask8() 94 const __m128i row_vals = LoadUnaligned16(mask); in GetInterIntraMask8() 101 const __m128i next_row_vals = LoadUnaligned16(mask + stride); in GetInterIntraMask8() 487 const __m128i row_vals = LoadUnaligned16(mask); in GetMask8() 494 const __m128i mask_val_0 = LoadUnaligned16(mask); in GetMask8() 495 const __m128i mask_val_1 = LoadUnaligned16(mask + stride); in GetMask8() 510 const __m128i pred_val_0 = LoadUnaligned16(pred_0); in WriteMaskBlendLine10bpp4x2_SSE4_1() 683 const __m128i pred_val_0 = LoadUnaligned16(pred_0 + x); in MaskBlend10bpp_SSE4_1() 684 const __m128i pred_val_1 = LoadUnaligned16(pred_1 + x); in MaskBlend10bpp_SSE4_1() [all …]
|
D | convolve_sse4.cc | 43 const __m128i src_long = LoadUnaligned16(src); in SumHorizontalTaps() 498 const __m128i v_src = LoadUnaligned16(&src[x]); in ConvolveCompoundCopy_SSE4() 1151 s[i] = LoadUnaligned16(src_y + i * src_stride); in ConvolveVerticalScale() 1346 const __m128i left = LoadUnaligned16(src); in HalfAddHorizontal() 1347 const __m128i right = LoadUnaligned16(src + 1); in HalfAddHorizontal() 1474 row[0] = LoadUnaligned16(src); in IntraBlockCopyVertical() 1477 row[1] = LoadUnaligned16(src); in IntraBlockCopyVertical() 1480 row[2] = LoadUnaligned16(src); in IntraBlockCopyVertical() 1482 row[3] = LoadUnaligned16(src); in IntraBlockCopyVertical() 1485 row[4] = LoadUnaligned16(src); in IntraBlockCopyVertical() [all …]
|
D | intra_edge_sse4.cc | 45 const __m128i edge_lo = LoadUnaligned16(source); in ComputeKernel1Store12() 81 const __m128i edge_lo = LoadUnaligned16(source); in ComputeKernel2Store12() 119 const __m128i edge_lo = LoadUnaligned16(source); in ComputeKernel3Store8() 217 const __m128i data = LoadUnaligned16(temp); in IntraEdgeUpsampler_SSE4_1()
|
D | intrapred_directional_sse4.cc | 187 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_Large() 229 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_Large() 290 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_SSE4_1() 344 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_SSE4_1() 354 vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_SSE4_1() 446 vals = LoadUnaligned16(left_column + left_base_y); in DirectionalZone3_8xH() 448 const __m128i top_vals = LoadUnaligned16(left_column + left_base_y); in DirectionalZone3_8xH() 579 const __m128i src_vals = LoadUnaligned16(source); in DirectionalZone2FromSource_SSE4_1() 1193 const __m128i values = LoadUnaligned16(top + top_base_x); in DirectionalZone1_4xH() 1270 const __m128i top_vals_0 = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_Large() [all …]
|
D | cdef_sse4.cc | 411 const __m128i division_table[2] = {LoadUnaligned16(kCdefDivisionTable), in CdefDirection_SSE4_1() 412 LoadUnaligned16(kCdefDivisionTable + 4)}; in CdefDirection_SSE4_1() 458 output[0] = LoadUnaligned16(src - y_0 * stride - x_0); in LoadDirection() 459 output[1] = LoadUnaligned16(src + y_0 * stride + x_0); in LoadDirection() 460 output[2] = LoadUnaligned16(src - y_1 * stride - x_1); in LoadDirection() 461 output[3] = LoadUnaligned16(src + y_1 * stride + x_1); in LoadDirection() 552 pixel = LoadUnaligned16(src); in CdefFilter_SSE4_1()
|
D | inverse_transform_sse4.cc | 71 x[i] = LoadUnaligned16(&src[i * stride + idx]); in LoadSrc() 72 x[i + 1] = LoadUnaligned16(&src[(i + 1) * stride + idx]); in LoadSrc() 73 x[i + 2] = LoadUnaligned16(&src[(i + 2) * stride + idx]); in LoadSrc() 74 x[i + 3] = LoadUnaligned16(&src[(i + 3) * stride + idx]); in LoadSrc() 278 const __m128i v_src = LoadUnaligned16(&dst[i]); in DctDcOnlyColumn() 1049 LoadUnaligned16(kAdst4DcOnlyMultiplier); in Adst4DcOnly() 1564 const __m128i v_src = LoadUnaligned16(dst); in Adst16DcOnlyColumn() 1593 const __m128i v_src = LoadUnaligned16(&dst[i * step]); in Identity4_SSE4_1() 1606 const __m128i v_src = LoadUnaligned16(&dst[i * step]); in Identity4_SSE4_1() 1669 const __m128i v_src = LoadUnaligned16(&source[row + j]); in Identity4ColumnStoreToFrame() [all …]
|
D | film_grain_sse4.cc | 62 inline __m128i LoadSource(const int16_t* src) { return LoadUnaligned16(src); } in LoadSource() 65 inline __m128i LoadSource(const uint16_t* src) { return LoadUnaligned16(src); } in LoadSource() 76 const __m128i src = LoadUnaligned16(luma); in GetAverageLuma() 104 _mm_hadd_epi16(LoadUnaligned16(luma), LoadUnaligned16(luma + 8)), 1); in GetAverageLuma() 106 return LoadUnaligned16(luma); in GetAverageLuma()
|
D | cdef_avx2.cc | 520 output[0] = LoadUnaligned16(src - y_0 * stride - x_0); in LoadDirection() 521 output[1] = LoadUnaligned16(src + y_0 * stride + x_0); in LoadDirection() 522 output[2] = LoadUnaligned16(src - y_1 * stride - x_1); in LoadDirection() 523 output[3] = LoadUnaligned16(src + y_1 * stride + x_1); in LoadDirection() 617 pixel_128 = LoadUnaligned16(src); in CdefFilter_AVX2()
|
D | common_sse4.inc | 77 inline __m128i LoadUnaligned16(const void* a) { 123 return MaskOverreads(LoadUnaligned16(source), over_read_in_bytes); 205 return LoadUnaligned16(kMask + n);
|
D | loop_filter_sse4.cc | 1025 __m128i x0 = LoadUnaligned16(dst - 8 + 0 * stride); in Vertical14() 1026 __m128i x1 = LoadUnaligned16(dst - 8 + 1 * stride); in Vertical14() 1027 __m128i x2 = LoadUnaligned16(dst - 8 + 2 * stride); in Vertical14() 1028 __m128i x3 = LoadUnaligned16(dst - 8 + 3 * stride); in Vertical14() 1547 __m128i x0 = LoadUnaligned16(dst - 3 + 0 * stride); in Vertical6() 1548 __m128i x1 = LoadUnaligned16(dst - 3 + 1 * stride); in Vertical6() 1549 __m128i x2 = LoadUnaligned16(dst - 3 + 2 * stride); in Vertical6() 1550 __m128i x3 = LoadUnaligned16(dst - 3 + 3 * stride); in Vertical6() 1797 __m128i x0 = LoadUnaligned16(dst - 4 + 0 * stride); in Vertical8() 1798 __m128i x1 = LoadUnaligned16(dst - 4 + 1 * stride); in Vertical8() [all …]
|
D | common_avx2.h | 58 using avx2::LoadUnaligned16;
|
D | motion_field_projection_sse4.cc | 95 mvs[0] = LoadUnaligned16(mv_int + 0); in GetPosition() 96 mvs[1] = LoadUnaligned16(mv_int + 4); in GetPosition() 201 const __m128i division_table = LoadUnaligned16(projection_divisions); in MotionFieldProjectionKernel_SSE4_1()
|
D | common_sse4.h | 111 using sse4::LoadUnaligned16;
|
D | loop_restoration_10bit_sse4.cc | 68 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap7() 69 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap7() 70 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap7() 71 s[3] = LoadUnaligned16(src + x + 3); in WienerHorizontalTap7() 72 s[4] = LoadUnaligned16(src + x + 4); in WienerHorizontalTap7() 73 s[5] = LoadUnaligned16(src + x + 5); in WienerHorizontalTap7() 74 s[6] = LoadUnaligned16(src + x + 6); in WienerHorizontalTap7() 107 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap5() 108 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap5() 109 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap5() [all …]
|
D | loop_restoration_sse4.cc | 112 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap7() 113 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap7() 114 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap7() 115 s[3] = LoadUnaligned16(src + x + 3); in WienerHorizontalTap7() 116 s[4] = LoadUnaligned16(src + x + 4); in WienerHorizontalTap7() 117 s[5] = LoadUnaligned16(src + x + 5); in WienerHorizontalTap7() 118 s[6] = LoadUnaligned16(src + x + 6); in WienerHorizontalTap7() 150 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap5() 151 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap5() 152 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap5() [all …]
|
D | average_blend_sse4.cc | 158 const __m128i pred_0 = LoadUnaligned16(prediction_0 + offset); in AverageBlendRow() 159 const __m128i pred_1 = LoadUnaligned16(prediction_1 + offset); in AverageBlendRow()
|
D | super_res_sse4.cc | 252 LoadUnaligned16(&src[subpixel_x >> kSuperResScaleBits]); in SuperRes_SSE4_1()
|
/external/libgav1/libgav1/src/utils/ |
D | entropy_decoder.cc | 342 inline __m128i LoadUnaligned16(const void* a) { in LoadUnaligned16() function 384 __m128i cdf_vec = LoadUnaligned16(cdf); in UpdateCdf7To9() 416 __m128i cdf_vec = LoadUnaligned16(cdf + 2); in UpdateCdf11() 452 __m128i cdf_vec1 = LoadUnaligned16(cdf + 4); in UpdateCdf13() 482 __m128i cdf_vec0 = LoadUnaligned16(cdf); in UpdateCdf16() 499 __m128i cdf_vec1 = LoadUnaligned16(cdf + 8); in UpdateCdf16()
|