/external/libgav1/libgav1/src/dsp/x86/ |
D | intrapred_cfl_sse4.cc | 183 const __m128i samples = LoadLo8(luma_ptr); in CflSubsampler444_4xH_SSE4_1() 233 __m128i samples0 = LoadLo8(src); in CflSubsampler444_8xH_SSE4_1() 246 samples1 = LoadLo8(src); in CflSubsampler444_8xH_SSE4_1() 455 const __m128i samples_row0 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() 457 const __m128i samples_row1 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() 461 const __m128i samples_row2 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() 463 const __m128i samples_row3 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() 469 const __m128i samples_row4 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() 471 const __m128i samples_row5 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() 475 const __m128i samples_row6 = _mm_cvtepu8_epi16(LoadLo8(src)); in CflSubsampler420_4xH_SSE4_1() [all …]
|
D | obmc_sse4.cc | 115 const __m128i mask_val = LoadLo8(kObmcMask + 6); in OverlapBlendFromLeft8xH_SSE4_1() 121 const __m128i pred_val = LoadLo8(pred); in OverlapBlendFromLeft8xH_SSE4_1() 122 const __m128i obmc_pred_val = LoadLo8(obmc_pred); in OverlapBlendFromLeft8xH_SSE4_1() 244 const __m128i pred_val = LoadLo8(pred); in OverlapBlendFromTop8xH_SSE4_1() 245 const __m128i obmc_pred_val = LoadLo8(obmc_pred); in OverlapBlendFromTop8xH_SSE4_1() 370 const __m128i pred_val = LoadHi8(LoadLo8(pred), pred + pred_stride); in OverlapBlendFromLeft4xH_SSE4_1() 372 LoadHi8(LoadLo8(obmc_pred), obmc_pred + obmc_pred_stride); in OverlapBlendFromLeft4xH_SSE4_1() 415 const __m128i mask_val = LoadLo8(mask + x); in OverlapBlendFromLeft10bpp_SSE4_1() 464 const __m128i pred_val = LoadHi8(LoadLo8(pred), pred + pred_stride); in OverlapBlendFromTop2xH_SSE4_1() 466 LoadHi8(LoadLo8(obmc_pred), obmc_pred + obmc_pred_stride); in OverlapBlendFromTop2xH_SSE4_1() [all …]
|
D | distance_weighted_blend_sse4.cc | 64 const __m128i src_00 = LoadLo8(pred_0); in DistanceWeightedBlend4xH_SSE4_1() 65 const __m128i src_10 = LoadLo8(pred_1); in DistanceWeightedBlend4xH_SSE4_1() 74 const __m128i src_01 = LoadLo8(pred_0); in DistanceWeightedBlend4xH_SSE4_1() 75 const __m128i src_11 = LoadLo8(pred_1); in DistanceWeightedBlend4xH_SSE4_1() 268 const __m128i src_00 = LoadLo8(pred_0); in DistanceWeightedBlend4xH_SSE4_1() 269 const __m128i src_10 = LoadLo8(pred_1); in DistanceWeightedBlend4xH_SSE4_1() 279 const __m128i src_01 = LoadLo8(pred_0); in DistanceWeightedBlend4xH_SSE4_1() 280 const __m128i src_11 = LoadLo8(pred_1); in DistanceWeightedBlend4xH_SSE4_1()
|
D | convolve_avx2.cc | 170 const __m128i input = LoadLo8(&src[2]); in FilterHorizontal() 551 LoadLo8(kHalfSubPixelFilters[filter_index][filter_id]); in DoHorizontalPass2xH() 576 LoadLo8(kHalfSubPixelFilters[filter_index][filter_id]); in DoHorizontalPass() 643 LoadLo8(kHalfSubPixelFilters[vert_filter_index][vertical_filter_id]); in Convolve2D_AVX2() 973 srcs[0] = _mm256_castsi128_si256(LoadLo8(src_x)); in FilterVertical8xH() 976 srcs[1] = _mm256_castsi128_si256(LoadLo8(src_x)); in FilterVertical8xH() 980 srcs[2] = _mm256_castsi128_si256(LoadLo8(src_x)); in FilterVertical8xH() 985 srcs[3] = _mm256_castsi128_si256(LoadLo8(src_x)); in FilterVertical8xH() 989 srcs[4] = _mm256_castsi128_si256(LoadLo8(src_x)); in FilterVertical8xH() 994 srcs[5] = _mm256_castsi128_si256(LoadLo8(src_x)); in FilterVertical8xH() [all …]
|
D | mask_blend_sse4.cc | 41 const __m128i mask_val_0 = _mm_cvtepu8_epi16(LoadLo8(mask)); in GetMask4x2() 43 _mm_cvtepu8_epi16(LoadLo8(mask + (mask_stride << subsampling_y))); in GetMask4x2() 47 _mm_cvtepu8_epi16(LoadLo8(mask + mask_stride)); in GetMask4x2() 49 _mm_cvtepu8_epi16(LoadLo8(mask + mask_stride * 3)); in GetMask4x2() 84 const __m128i mask_val = LoadLo8(mask); in GetMask8() 115 const __m128i mask_val = LoadLo8(mask); in GetInterIntraMask8() 287 const __m128i pred_val_0 = LoadLo8(pred_0); in InterIntraWriteMaskBlendLine8bpp4x2() 386 const __m128i pred_val_0 = LoadLo8(prediction_0 + x); in InterIntraMaskBlend8bpp_SSE4() 387 const __m128i pred_val_1 = LoadLo8(prediction_1 + x); in InterIntraMaskBlend8bpp_SSE4() 460 const __m128i mask_val_0 = _mm_cvtepu8_epi16(LoadLo8(mask)); in GetMask4x2() [all …]
|
D | loop_filter_sse4.cc | 429 __m128i x0 = LoadLo8(dst - 3 + 0 * stride); in Vertical6() 430 __m128i x1 = LoadLo8(dst - 3 + 1 * stride); in Vertical6() 431 __m128i x2 = LoadLo8(dst - 3 + 2 * stride); in Vertical6() 432 __m128i x3 = LoadLo8(dst - 3 + 3 * stride); in Vertical6() 673 __m128i x0 = LoadLo8(dst - 4 + 0 * stride); in Vertical8() 674 __m128i x1 = LoadLo8(dst - 4 + 1 * stride); in Vertical8() 675 __m128i x2 = LoadLo8(dst - 4 + 2 * stride); in Vertical8() 676 __m128i x3 = LoadLo8(dst - 4 + 3 * stride); in Vertical8() 1280 const __m128i p1 = LoadLo8(dst - 2 * stride); in Horizontal4() 1281 const __m128i p0 = LoadLo8(dst - 1 * stride); in Horizontal4() [all …]
|
D | intrapred_smooth_sse4.cc | 143 pixels[1] = LoadLo8(left); in LoadSmoothPixels4() 168 const __m128i y_weights = LoadLo8(weight_array + 4); in LoadSmoothWeights4() 289 __m128i top_row = _mm_cvtepu8_epi16(LoadLo8(above)); in LoadSmoothPixels8() 298 pixels[2] = LoadLo8(left); in LoadSmoothPixels8() 499 const __m128i top_x = LoadLo8(top_ptr + x); in SmoothWxH() 500 const __m128i weights_x = LoadLo8(sm_weights_w + x); in SmoothWxH() 646 const __m128i weights = _mm_cvtepu8_epi16(LoadLo8(kSmoothWeights + 4)); in SmoothHorizontal8x4_SSE4_1() 674 const __m128i left = _mm_cvtepu8_epi16(LoadLo8(left_column)); in SmoothHorizontal8x8_SSE4_1() 675 const __m128i weights = _mm_cvtepu8_epi16(LoadLo8(kSmoothWeights + 4)); in SmoothHorizontal8x8_SSE4_1() 694 const __m128i weights = _mm_cvtepu8_epi16(LoadLo8(kSmoothWeights + 4)); in SmoothHorizontal8x16_SSE4_1() [all …]
|
D | convolve_sse4.cc | 182 const __m128i input = LoadLo8(&src[2]); in FilterHorizontal() 215 LoadLo8(kHalfSubPixelFilters[filter_index][filter_id]); in DoHorizontalPass() 277 LoadLo8(kHalfSubPixelFilters[vert_filter_index][vertical_filter_id]); in Convolve2D_SSE4_1() 345 srcs[0] = LoadLo8(src_x); in FilterVertical() 348 srcs[1] = LoadLo8(src_x); in FilterVertical() 350 srcs[2] = LoadLo8(src_x); in FilterVertical() 353 srcs[3] = LoadLo8(src_x); in FilterVertical() 355 srcs[4] = LoadLo8(src_x); in FilterVertical() 358 srcs[5] = LoadLo8(src_x); in FilterVertical() 360 srcs[6] = LoadLo8(src_x); in FilterVertical() [all …]
|
D | cdef_avx2.cc | 285 i = _mm256_castsi128_si256(LoadLo8(src)); in AddPartial() 534 output[0] = LoadHi8(LoadLo8(src - y_0 * stride - x_0), in LoadDirection4() 536 output[1] = LoadHi8(LoadLo8(src + y_0 * stride + x_0), in LoadDirection4() 538 output[2] = LoadHi8(LoadLo8(src - y_1 * stride - x_1), in LoadDirection4() 540 output[3] = LoadHi8(LoadLo8(src + y_1 * stride + x_1), in LoadDirection4() 619 pixel_128 = LoadHi8(LoadLo8(src), src + src_stride); in CdefFilter_AVX2()
|
D | cdef_sse4.cc | 258 i = LoadLo8(src); in AddPartial() 472 output[0] = LoadHi8(LoadLo8(src - y_0 * stride - x_0), in LoadDirection4() 474 output[1] = LoadHi8(LoadLo8(src + y_0 * stride + x_0), in LoadDirection4() 476 output[2] = LoadHi8(LoadLo8(src - y_1 * stride - x_1), in LoadDirection4() 478 output[3] = LoadHi8(LoadLo8(src + y_1 * stride + x_1), in LoadDirection4() 554 pixel = LoadHi8(LoadLo8(src), src + src_stride); in CdefFilter_SSE4_1()
|
D | motion_field_projection_sse4.cc | 199 const __m128i skip_reference = LoadLo8(skip_references); in MotionFieldProjectionKernel_SSE4_1() 200 const __m128i r_offsets = LoadLo8(reference_offsets); in MotionFieldProjectionKernel_SSE4_1() 213 LoadLo8(source_reference_types + x8); in MotionFieldProjectionKernel_SSE4_1() 272 LoadLo8(source_reference_types + x8); in MotionFieldProjectionKernel_SSE4_1()
|
D | inverse_transform_sse4.cc | 79 x[i] = LoadLo8(&src[i * stride + idx]); in LoadSrc() 80 x[i + 1] = LoadLo8(&src[(i + 1) * stride + idx]); in LoadSrc() 81 x[i + 2] = LoadLo8(&src[(i + 2) * stride + idx]); in LoadSrc() 82 x[i + 3] = LoadLo8(&src[(i + 3) * stride + idx]); in LoadSrc() 272 const __m128i v_src = LoadLo8(dst); in DctDcOnlyColumn() 1072 const __m128i v_src = _mm_cvtepi16_epi32(LoadLo8(&dst[i])); in Adst4DcOnlyColumn() 1265 const __m128i v_src = LoadLo8(dst); in Adst8DcOnlyColumn() 1652 const __m128i v_src = LoadLo8(&source[i * tx_width]); in Identity4ColumnStoreToFrame() 1672 const __m128i frame_data = LoadLo8(dst + j); in Identity4ColumnStoreToFrame() 1701 const __m128i v_src = LoadLo8(&source[i * tx_width]); in Identity4RowColumnStoreToFrame() [all …]
|
D | intrapred_filter_sse4.cc | 107 __m128i left = (height == 4 ? Load4(left_ptr) : LoadLo8(left_ptr)); in Filter4xH() 186 left = LoadLo8(left_ptr + 8); in Filter4xH() 311 __m128i pixels = LoadLo8(top_ptr - 1); in FilterIntraPredictor_SSE4_1()
|
D | common_avx2.h | 56 using avx2::LoadLo8;
|
D | motion_vector_search_sse4.cc | 71 const __m128i temporal_mv = LoadLo8(tmvs); in MvProjectionCompoundClip() 80 const __m128i offsets = LoadLo8(reference_offsets); in MvProjectionCompoundClip()
|
D | intra_edge_sse4.cc | 230 const __m128i src_hi_extra = _mm_cvtepu8_epi16(LoadLo8(temp + 16)); in IntraEdgeUpsampler_SSE4_1() 239 _mm_unpacklo_epi8(_mm_packus_epi16(sum_hi, sum_hi), LoadLo8(temp + 10)); in IntraEdgeUpsampler_SSE4_1()
|
D | common_sse4.h | 109 using sse4::LoadLo8;
|
D | film_grain_sse4.cc | 43 return _mm_cvtepi8_epi16(LoadLo8(src)); in LoadSource() 48 return _mm_cvtepu8_epi16(LoadLo8(src)); in LoadSource() 83 return _mm_cvtepu8_epi16(LoadLo8(luma)); in GetAverageLuma()
|
D | common_sse4.inc | 67 inline __m128i LoadLo8(const void* a) { 108 return MaskOverreads(LoadLo8(source), over_read_in_bytes + 8);
|
D | average_blend_sse4.cc | 40 const __m128i pred_0 = LoadLo8(prediction_0); in AverageBlend4Row() 41 const __m128i pred_1 = LoadLo8(prediction_1); in AverageBlend4Row()
|
D | intrapred_sse4.cc | 163 const __m128i vals = LoadLo8(ref); in DcSum8_SSE4_1() 399 const __m128i col_data = LoadLo8(column); in ColStore8_SSE4_1() 747 const __m128i left = LoadLo8(left_column); in Paeth4x8_SSE4_1() 868 const __m128i top = _mm_cvtepu8_epi16(LoadLo8(top_row)); in Paeth8x4_SSE4_1() 896 const __m128i left = _mm_cvtepu8_epi16(LoadLo8(left_column)); in Paeth8x8_SSE4_1() 897 const __m128i top = _mm_cvtepu8_epi16(LoadLo8(top_row)); in Paeth8x8_SSE4_1() 941 const __m128i top = _mm_cvtepu8_epi16(LoadLo8(top_row)); in Paeth8x16_SSE4_1() 1122 const __m128i left = LoadLo8(left_column); in Paeth16x8_SSE4_1() 1264 const __m128i left = LoadLo8(left_column); in Paeth32x8_SSE4_1() 1942 const __m128i col_data = LoadLo8(column); in ColStore4_SSE4_1()
|
D | super_res_sse4.cc | 82 LoadLo8(kNegativeUpscaleFilter[remainder >> kSuperResExtraBits]); in SuperResCoefficients_SSE4_1()
|
D | intrapred_directional_sse4.cc | 130 const __m128i values = LoadLo8(top + top_base_x); in DirectionalZone1_4xH() 356 const __m128i top_vals = LoadLo8(top_row + top_base_x); in DirectionalZone1_SSE4_1() 401 vals = LoadLo8(left_column + left_base_y); in DirectionalZone3_4x4() 403 const __m128i top_vals = LoadLo8(left_column + left_base_y); in DirectionalZone3_4x4() 567 const __m128i original_vals = _mm_cvtepu8_epi16(LoadLo8(dest)); in DirectionalBlend8_SSE4_1()
|
D | warp_sse4.cc | 68 f = LoadLo8(kWarpedFilters8[offset]); in HorizontalFilter()
|
/external/libgav1/libgav1/src/utils/ |
D | entropy_decoder.cc | 338 inline __m128i LoadLo8(const void* a) { in LoadLo8() function 355 __m128i cdf_vec = LoadLo8(cdf); in UpdateCdf5() 451 __m128i cdf_vec0 = LoadLo8(cdf); in UpdateCdf13() 862 __m128i cdf_vec = LoadLo8(cdf); in ReadSymbol3Or4() 946 __m128i cdf_vec = LoadLo8(cdf); in ReadSymbol3Or4()
|