Home
last modified time | relevance | path

Searched refs:LoadUnaligned16 (Results 1 – 24 of 24) sorted by relevance

/external/libgav1/libgav1/src/dsp/x86/
Dintrapred_cfl_sse4.cc82 const __m128i ac_q3 = LoadUnaligned16(input); in CflPredictUnclipped()
286 const __m128i samples = LoadUnaligned16(luma_ptr); in CflSubsampler444_8xH_SSE4_1()
411 __m128i samples = LoadUnaligned16(&luma_ptr[x]); in CflSubsampler444_SSE4_1()
592 const __m128i final_fill = LoadUnaligned16(luma_ptr - kCflLumaBufferStride); in CflSubsampler420Impl_8xH_SSE4_1()
614 const __m128i samples = LoadUnaligned16(luma_ptr); in CflSubsampler420Impl_8xH_SSE4_1()
651 const __m128i samples_row0_lo = LoadUnaligned16(src); in CflSubsampler420Impl_WxH_SSE4_1()
656 const __m128i samples_row0_hi = LoadUnaligned16(src + 16); in CflSubsampler420Impl_WxH_SSE4_1()
663 const __m128i samples_row1_lo = LoadUnaligned16(src_next); in CflSubsampler420Impl_WxH_SSE4_1()
668 const __m128i samples_row1_hi = LoadUnaligned16(src_next + 16); in CflSubsampler420Impl_WxH_SSE4_1()
697 LoadUnaligned16(luma_ptr - kCflLumaBufferStride); in CflSubsampler420Impl_WxH_SSE4_1()
[all …]
Dintrapred_sse4.cc170 const __m128i vals = LoadUnaligned16(ref); in DcSum16_SSE4_1()
177 const __m128i vals1 = LoadUnaligned16(ref); in DcSum32_SSE4_1()
178 const __m128i vals2 = LoadUnaligned16(static_cast<const uint8_t*>(ref) + 16); in DcSum32_SSE4_1()
188 const __m128i vals1 = LoadUnaligned16(ref_ptr); in DcSum64_SSE4_1()
189 const __m128i vals2 = LoadUnaligned16(ref_ptr + 16); in DcSum64_SSE4_1()
190 const __m128i vals3 = LoadUnaligned16(ref_ptr + 32); in DcSum64_SSE4_1()
191 const __m128i vals4 = LoadUnaligned16(ref_ptr + 48); in DcSum64_SSE4_1()
437 LoadUnaligned16(static_cast<const uint8_t*>(column) + y); in ColStore32_SSE4_1()
466 LoadUnaligned16(static_cast<const uint8_t*>(column) + y); in ColStore64_SSE4_1()
793 const __m128i left = LoadUnaligned16(left_column); in Paeth4x16_SSE4_1()
[all …]
Dintrapred_smooth_sse4.cc145 pixels[1] = LoadUnaligned16(left); in LoadSmoothPixels4()
173 const __m128i y_weights = LoadUnaligned16(weight_array + 12); in LoadSmoothWeights4()
300 pixels[2] = LoadUnaligned16(left); in LoadSmoothPixels8()
302 pixels[2] = LoadUnaligned16(left); in LoadSmoothPixels8()
305 pixels[6] = LoadUnaligned16(left + 16); in LoadSmoothPixels8()
323 __m128i loaded_weights = LoadUnaligned16(&weight_array[offset]); in LoadSmoothWeights8()
341 loaded_weights = LoadUnaligned16(weight_array + 12); in LoadSmoothWeights8()
348 const __m128i weight_lo = LoadUnaligned16(weight_array + 28); in LoadSmoothWeights8()
353 const __m128i weight_hi = LoadUnaligned16(weight_array + 44); in LoadSmoothWeights8()
765 const __m128i weights = LoadUnaligned16(kSmoothWeights + 12); in SmoothHorizontal16x4_SSE4_1()
[all …]
Dconvolve_avx2.cc213 SetrM128i(LoadUnaligned16(&src[x]), LoadUnaligned16(&src[x + 8])); in FilterHorizontal()
216 const __m256i src_long2 = SetrM128i(LoadUnaligned16(&src[x + 16]), in FilterHorizontal()
217 LoadUnaligned16(&src[x + 24])); in FilterHorizontal()
252 SetrM128i(LoadUnaligned16(&src[0]), LoadUnaligned16(&src[8])); in FilterHorizontal()
256 SetrM128i(LoadUnaligned16(&src[src_stride]), in FilterHorizontal()
257 LoadUnaligned16(&src[8 + src_stride])); in FilterHorizontal()
269 const __m256i src_long = SetrM128i(LoadUnaligned16(&src[0]), in FilterHorizontal()
270 LoadUnaligned16(&src[src_stride])); in FilterHorizontal()
274 LoadUnaligned16(&src[8]), LoadUnaligned16(&src[8 + src_stride])); in FilterHorizontal()
292 SetrM128i(LoadUnaligned16(&src[0]), LoadUnaligned16(&src[8])); in FilterHorizontal()
[all …]
Dwarp_sse4.cc112 const __m128i intermediate_0 = LoadUnaligned16(intermediate_result[y + k]); in WriteVerticalFilter()
114 LoadUnaligned16(intermediate_result[y + k + 1]); in WriteVerticalFilter()
179 f = LoadUnaligned16(kWarpedFilters[offset]); in VerticalFilter()
200 f = LoadUnaligned16(kWarpedFilters[offset]); in VerticalFilter()
309 const __m128i src_row_v = LoadUnaligned16(&src_row[ix4 - 7]); in WarpRegion3()
346 const __m128i src_row_v = LoadUnaligned16(&src_row[ix4 - 7]); in WarpRegion4()
Dobmc_sse4.cc162 const __m128i mask_val = LoadUnaligned16(mask + x); in OverlapBlendFromLeft_SSE4_1()
170 const __m128i pred_val = LoadUnaligned16(pred); in OverlapBlendFromLeft_SSE4_1()
171 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred); in OverlapBlendFromLeft_SSE4_1()
287 const __m128i pred_val = LoadUnaligned16(pred + x); in OverlapBlendFromTop_SSE4_1()
288 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred + x); in OverlapBlendFromTop_SSE4_1()
423 const __m128i pred_val = LoadUnaligned16(pred); in OverlapBlendFromLeft10bpp_SSE4_1()
424 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred); in OverlapBlendFromLeft10bpp_SSE4_1()
562 const __m128i pred_val = LoadUnaligned16(pred + x); in OverlapBlendFromTop10bpp_SSE4_1()
563 const __m128i obmc_pred_val = LoadUnaligned16(obmc_pred + x); in OverlapBlendFromTop10bpp_SSE4_1()
Dmask_blend_sse4.cc67 const __m128i row_vals = LoadUnaligned16(mask); in GetMask8()
74 const __m128i next_row_vals = LoadUnaligned16(mask + stride); in GetMask8()
94 const __m128i row_vals = LoadUnaligned16(mask); in GetInterIntraMask8()
101 const __m128i next_row_vals = LoadUnaligned16(mask + stride); in GetInterIntraMask8()
487 const __m128i row_vals = LoadUnaligned16(mask); in GetMask8()
494 const __m128i mask_val_0 = LoadUnaligned16(mask); in GetMask8()
495 const __m128i mask_val_1 = LoadUnaligned16(mask + stride); in GetMask8()
510 const __m128i pred_val_0 = LoadUnaligned16(pred_0); in WriteMaskBlendLine10bpp4x2_SSE4_1()
683 const __m128i pred_val_0 = LoadUnaligned16(pred_0 + x); in MaskBlend10bpp_SSE4_1()
684 const __m128i pred_val_1 = LoadUnaligned16(pred_1 + x); in MaskBlend10bpp_SSE4_1()
[all …]
Dconvolve_sse4.cc43 const __m128i src_long = LoadUnaligned16(src); in SumHorizontalTaps()
498 const __m128i v_src = LoadUnaligned16(&src[x]); in ConvolveCompoundCopy_SSE4()
1151 s[i] = LoadUnaligned16(src_y + i * src_stride); in ConvolveVerticalScale()
1346 const __m128i left = LoadUnaligned16(src); in HalfAddHorizontal()
1347 const __m128i right = LoadUnaligned16(src + 1); in HalfAddHorizontal()
1474 row[0] = LoadUnaligned16(src); in IntraBlockCopyVertical()
1477 row[1] = LoadUnaligned16(src); in IntraBlockCopyVertical()
1480 row[2] = LoadUnaligned16(src); in IntraBlockCopyVertical()
1482 row[3] = LoadUnaligned16(src); in IntraBlockCopyVertical()
1485 row[4] = LoadUnaligned16(src); in IntraBlockCopyVertical()
[all …]
Dintra_edge_sse4.cc45 const __m128i edge_lo = LoadUnaligned16(source); in ComputeKernel1Store12()
81 const __m128i edge_lo = LoadUnaligned16(source); in ComputeKernel2Store12()
119 const __m128i edge_lo = LoadUnaligned16(source); in ComputeKernel3Store8()
217 const __m128i data = LoadUnaligned16(temp); in IntraEdgeUpsampler_SSE4_1()
Dintrapred_directional_sse4.cc187 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_Large()
229 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_Large()
290 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_SSE4_1()
344 const __m128i top_vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_SSE4_1()
354 vals = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_SSE4_1()
446 vals = LoadUnaligned16(left_column + left_base_y); in DirectionalZone3_8xH()
448 const __m128i top_vals = LoadUnaligned16(left_column + left_base_y); in DirectionalZone3_8xH()
579 const __m128i src_vals = LoadUnaligned16(source); in DirectionalZone2FromSource_SSE4_1()
1193 const __m128i values = LoadUnaligned16(top + top_base_x); in DirectionalZone1_4xH()
1270 const __m128i top_vals_0 = LoadUnaligned16(top_row + top_base_x); in DirectionalZone1_Large()
[all …]
Dcdef_sse4.cc411 const __m128i division_table[2] = {LoadUnaligned16(kCdefDivisionTable), in CdefDirection_SSE4_1()
412 LoadUnaligned16(kCdefDivisionTable + 4)}; in CdefDirection_SSE4_1()
458 output[0] = LoadUnaligned16(src - y_0 * stride - x_0); in LoadDirection()
459 output[1] = LoadUnaligned16(src + y_0 * stride + x_0); in LoadDirection()
460 output[2] = LoadUnaligned16(src - y_1 * stride - x_1); in LoadDirection()
461 output[3] = LoadUnaligned16(src + y_1 * stride + x_1); in LoadDirection()
552 pixel = LoadUnaligned16(src); in CdefFilter_SSE4_1()
Dinverse_transform_sse4.cc71 x[i] = LoadUnaligned16(&src[i * stride + idx]); in LoadSrc()
72 x[i + 1] = LoadUnaligned16(&src[(i + 1) * stride + idx]); in LoadSrc()
73 x[i + 2] = LoadUnaligned16(&src[(i + 2) * stride + idx]); in LoadSrc()
74 x[i + 3] = LoadUnaligned16(&src[(i + 3) * stride + idx]); in LoadSrc()
278 const __m128i v_src = LoadUnaligned16(&dst[i]); in DctDcOnlyColumn()
1049 LoadUnaligned16(kAdst4DcOnlyMultiplier); in Adst4DcOnly()
1564 const __m128i v_src = LoadUnaligned16(dst); in Adst16DcOnlyColumn()
1593 const __m128i v_src = LoadUnaligned16(&dst[i * step]); in Identity4_SSE4_1()
1606 const __m128i v_src = LoadUnaligned16(&dst[i * step]); in Identity4_SSE4_1()
1669 const __m128i v_src = LoadUnaligned16(&source[row + j]); in Identity4ColumnStoreToFrame()
[all …]
Dfilm_grain_sse4.cc62 inline __m128i LoadSource(const int16_t* src) { return LoadUnaligned16(src); } in LoadSource()
65 inline __m128i LoadSource(const uint16_t* src) { return LoadUnaligned16(src); } in LoadSource()
76 const __m128i src = LoadUnaligned16(luma); in GetAverageLuma()
104 _mm_hadd_epi16(LoadUnaligned16(luma), LoadUnaligned16(luma + 8)), 1); in GetAverageLuma()
106 return LoadUnaligned16(luma); in GetAverageLuma()
Dcdef_avx2.cc520 output[0] = LoadUnaligned16(src - y_0 * stride - x_0); in LoadDirection()
521 output[1] = LoadUnaligned16(src + y_0 * stride + x_0); in LoadDirection()
522 output[2] = LoadUnaligned16(src - y_1 * stride - x_1); in LoadDirection()
523 output[3] = LoadUnaligned16(src + y_1 * stride + x_1); in LoadDirection()
617 pixel_128 = LoadUnaligned16(src); in CdefFilter_AVX2()
Dcommon_sse4.inc77 inline __m128i LoadUnaligned16(const void* a) {
123 return MaskOverreads(LoadUnaligned16(source), over_read_in_bytes);
205 return LoadUnaligned16(kMask + n);
Dloop_filter_sse4.cc1025 __m128i x0 = LoadUnaligned16(dst - 8 + 0 * stride); in Vertical14()
1026 __m128i x1 = LoadUnaligned16(dst - 8 + 1 * stride); in Vertical14()
1027 __m128i x2 = LoadUnaligned16(dst - 8 + 2 * stride); in Vertical14()
1028 __m128i x3 = LoadUnaligned16(dst - 8 + 3 * stride); in Vertical14()
1547 __m128i x0 = LoadUnaligned16(dst - 3 + 0 * stride); in Vertical6()
1548 __m128i x1 = LoadUnaligned16(dst - 3 + 1 * stride); in Vertical6()
1549 __m128i x2 = LoadUnaligned16(dst - 3 + 2 * stride); in Vertical6()
1550 __m128i x3 = LoadUnaligned16(dst - 3 + 3 * stride); in Vertical6()
1797 __m128i x0 = LoadUnaligned16(dst - 4 + 0 * stride); in Vertical8()
1798 __m128i x1 = LoadUnaligned16(dst - 4 + 1 * stride); in Vertical8()
[all …]
Dcommon_avx2.h58 using avx2::LoadUnaligned16;
Dmotion_field_projection_sse4.cc95 mvs[0] = LoadUnaligned16(mv_int + 0); in GetPosition()
96 mvs[1] = LoadUnaligned16(mv_int + 4); in GetPosition()
201 const __m128i division_table = LoadUnaligned16(projection_divisions); in MotionFieldProjectionKernel_SSE4_1()
Dcommon_sse4.h111 using sse4::LoadUnaligned16;
Dloop_restoration_10bit_sse4.cc68 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap7()
69 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap7()
70 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap7()
71 s[3] = LoadUnaligned16(src + x + 3); in WienerHorizontalTap7()
72 s[4] = LoadUnaligned16(src + x + 4); in WienerHorizontalTap7()
73 s[5] = LoadUnaligned16(src + x + 5); in WienerHorizontalTap7()
74 s[6] = LoadUnaligned16(src + x + 6); in WienerHorizontalTap7()
107 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap5()
108 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap5()
109 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap5()
[all …]
Dloop_restoration_sse4.cc112 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap7()
113 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap7()
114 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap7()
115 s[3] = LoadUnaligned16(src + x + 3); in WienerHorizontalTap7()
116 s[4] = LoadUnaligned16(src + x + 4); in WienerHorizontalTap7()
117 s[5] = LoadUnaligned16(src + x + 5); in WienerHorizontalTap7()
118 s[6] = LoadUnaligned16(src + x + 6); in WienerHorizontalTap7()
150 s[0] = LoadUnaligned16(src + x + 0); in WienerHorizontalTap5()
151 s[1] = LoadUnaligned16(src + x + 1); in WienerHorizontalTap5()
152 s[2] = LoadUnaligned16(src + x + 2); in WienerHorizontalTap5()
[all …]
Daverage_blend_sse4.cc158 const __m128i pred_0 = LoadUnaligned16(prediction_0 + offset); in AverageBlendRow()
159 const __m128i pred_1 = LoadUnaligned16(prediction_1 + offset); in AverageBlendRow()
Dsuper_res_sse4.cc252 LoadUnaligned16(&src[subpixel_x >> kSuperResScaleBits]); in SuperRes_SSE4_1()
/external/libgav1/libgav1/src/utils/
Dentropy_decoder.cc342 inline __m128i LoadUnaligned16(const void* a) { in LoadUnaligned16() function
384 __m128i cdf_vec = LoadUnaligned16(cdf); in UpdateCdf7To9()
416 __m128i cdf_vec = LoadUnaligned16(cdf + 2); in UpdateCdf11()
452 __m128i cdf_vec1 = LoadUnaligned16(cdf + 4); in UpdateCdf13()
482 __m128i cdf_vec0 = LoadUnaligned16(cdf); in UpdateCdf16()
499 __m128i cdf_vec1 = LoadUnaligned16(cdf + 8); in UpdateCdf16()