/external/libvpx/libvpx/vpx_dsp/arm/ |
D | vpx_convolve8_neon.c | 87 tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); in vpx_convolve8_horiz_neon() 88 tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); in vpx_convolve8_horiz_neon() 89 tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); in vpx_convolve8_horiz_neon() 90 tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); in vpx_convolve8_horiz_neon() 107 tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); in vpx_convolve8_horiz_neon() 108 tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); in vpx_convolve8_horiz_neon() 109 tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); in vpx_convolve8_horiz_neon() 110 tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); in vpx_convolve8_horiz_neon() 161 s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); in vpx_convolve8_horiz_neon() 162 s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); in vpx_convolve8_horiz_neon() [all …]
|
D | vpx_convolve8_neon.h | 125 ss[0] = vreinterpretq_s16_u16(vmovl_u8(s[0])); in scale_filter_8() 126 ss[1] = vreinterpretq_s16_u16(vmovl_u8(s[1])); in scale_filter_8() 127 ss[2] = vreinterpretq_s16_u16(vmovl_u8(s[2])); in scale_filter_8() 128 ss[3] = vreinterpretq_s16_u16(vmovl_u8(s[3])); in scale_filter_8() 129 ss[4] = vreinterpretq_s16_u16(vmovl_u8(s[4])); in scale_filter_8() 130 ss[5] = vreinterpretq_s16_u16(vmovl_u8(s[5])); in scale_filter_8() 131 ss[6] = vreinterpretq_s16_u16(vmovl_u8(s[6])); in scale_filter_8() 132 ss[7] = vreinterpretq_s16_u16(vmovl_u8(s[7])); in scale_filter_8()
|
D | vpx_scaled_convolve8_neon.c | 50 ss[0] = vreinterpretq_s16_u16(vmovl_u8(s[0])); in scaledconvolve_horiz_w4() 51 ss[1] = vreinterpretq_s16_u16(vmovl_u8(s[1])); in scaledconvolve_horiz_w4() 52 ss[2] = vreinterpretq_s16_u16(vmovl_u8(s[2])); in scaledconvolve_horiz_w4() 53 ss[3] = vreinterpretq_s16_u16(vmovl_u8(s[3])); in scaledconvolve_horiz_w4() 177 t[0] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[0]))); in scaledconvolve_vert_w4() 178 t[1] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[1]))); in scaledconvolve_vert_w4() 179 t[2] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[2]))); in scaledconvolve_vert_w4() 180 t[3] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[3]))); in scaledconvolve_vert_w4() 181 t[4] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[4]))); in scaledconvolve_vert_w4() 182 t[5] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[5]))); in scaledconvolve_vert_w4() [all …]
|
D | idct_neon.h | 250 c[0] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[0])), a[0], 6); in add_and_store_u8_s16() 251 c[1] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[1])), a[1], 6); in add_and_store_u8_s16() 252 c[2] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[2])), a[2], 6); in add_and_store_u8_s16() 253 c[3] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[3])), a[3], 6); in add_and_store_u8_s16() 254 c[4] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[4])), a[4], 6); in add_and_store_u8_s16() 255 c[5] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[5])), a[5], 6); in add_and_store_u8_s16() 256 c[6] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[6])), a[6], 6); in add_and_store_u8_s16() 257 c[7] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[7])), a[7], 6); in add_and_store_u8_s16() 852 const uint16x8_t d = vmovl_u8(vqmovun_s16(res)); in highbd_idct16x16_add8x1_bd8()
|
/external/libaom/libaom/av1/common/arm/ |
D | convolve_neon.c | 237 s0 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); in av1_convolve_x_sr_neon() 238 s1 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); in av1_convolve_x_sr_neon() 239 s2 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); in av1_convolve_x_sr_neon() 240 s3 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t3))); in av1_convolve_x_sr_neon() 241 s4 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); in av1_convolve_x_sr_neon() 242 s5 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); in av1_convolve_x_sr_neon() 243 s6 = vget_high_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); in av1_convolve_x_sr_neon() 254 s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); in av1_convolve_x_sr_neon() 255 s8 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t1))); in av1_convolve_x_sr_neon() 256 s9 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t2))); in av1_convolve_x_sr_neon() [all …]
|
D | jnt_convolve_neon.c | 355 tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); in dist_wtd_convolve_2d_horiz_neon() 356 tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); in dist_wtd_convolve_2d_horiz_neon() 357 tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); in dist_wtd_convolve_2d_horiz_neon() 358 tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); in dist_wtd_convolve_2d_horiz_neon() 374 tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); in dist_wtd_convolve_2d_horiz_neon() 375 tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); in dist_wtd_convolve_2d_horiz_neon() 376 tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); in dist_wtd_convolve_2d_horiz_neon() 377 tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); in dist_wtd_convolve_2d_horiz_neon() 404 tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); // a0 a1 a2 a3 a4 a5 a6 a7 in dist_wtd_convolve_2d_horiz_neon() 412 s7 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(t0))); in dist_wtd_convolve_2d_horiz_neon() [all …]
|
D | wiener_convolve_neon.c | 121 res3 = vreinterpretq_s16_u16(vmovl_u8(t3)); in av1_wiener_convolve_add_src_neon() 128 res3 = vreinterpretq_s16_u16(vmovl_u8(t4)); in av1_wiener_convolve_add_src_neon() 135 res3 = vreinterpretq_s16_u16(vmovl_u8(t5)); in av1_wiener_convolve_add_src_neon() 142 res3 = vreinterpretq_s16_u16(vmovl_u8(t6)); in av1_wiener_convolve_add_src_neon() 149 res3 = vreinterpretq_s16_u16(vmovl_u8(t7)); in av1_wiener_convolve_add_src_neon() 156 res3 = vreinterpretq_s16_u16(vmovl_u8(t8)); in av1_wiener_convolve_add_src_neon() 163 res3 = vreinterpretq_s16_u16(vmovl_u8(t9)); in av1_wiener_convolve_add_src_neon() 170 res3 = vreinterpretq_s16_u16(vmovl_u8(t10)); in av1_wiener_convolve_add_src_neon() 225 res3 = vreinterpretq_s16_u16(vmovl_u8(t3)); in av1_wiener_convolve_add_src_neon() 262 tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); in av1_wiener_convolve_add_src_neon() [all …]
|
/external/libhevc/common/arm/ |
D | ihevc_inter_pred_chroma_copy_w16out.s | 141 vmovl.u8 q0,d0 @vmovl_u8(vld1_u8(pu1_src_tmp) 149 vmovl.u8 q11,d22 @vmovl_u8(vld1_u8(pu1_src_tmp) 152 vmovl.u8 q12,d24 @vmovl_u8(vld1_u8(pu1_src_tmp) 157 vmovl.u8 q13,d26 @vmovl_u8(vld1_u8(pu1_src_tmp) 183 vmovl.u8 q0,d0 @vmovl_u8(vld1_u8(pu1_src_tmp) 191 vmovl.u8 q11,d22 @vmovl_u8(vld1_u8(pu1_src_tmp) 194 vmovl.u8 q12,d24 @vmovl_u8(vld1_u8(pu1_src_tmp) 220 vmovl.u8 q8,d8 @vmovl_u8(vld1_u8(pu1_src_tmp)) 221 vmovl.u8 q9,d10 @vmovl_u8(vld1_u8(pu1_src_tmp) 222 vmovl.u8 q10,d12 @vmovl_u8(vld1_u8(pu1_src_tmp) [all …]
|
D | ihevc_inter_pred_luma_copy_w16out.s | 109 vmovl.u8 q0,d0 @vmovl_u8(vld1_u8(pu1_src_tmp) 117 vmovl.u8 q11,d22 @vmovl_u8(vld1_u8(pu1_src_tmp) 120 vmovl.u8 q12,d24 @vmovl_u8(vld1_u8(pu1_src_tmp) 125 vmovl.u8 q13,d26 @vmovl_u8(vld1_u8(pu1_src_tmp) 158 vmovl.u8 q8,d8 @vmovl_u8(vld1_u8(pu1_src_tmp)) 159 vmovl.u8 q9,d10 @vmovl_u8(vld1_u8(pu1_src_tmp) 160 vmovl.u8 q10,d12 @vmovl_u8(vld1_u8(pu1_src_tmp) 161 vmovl.u8 q11,d14 @vmovl_u8(vld1_u8(pu1_src_tmp) 188 vmovl.u8 q8,d8 @vmovl_u8(vld1_u8(pu1_src_tmp)) 191 vmovl.u8 q9,d10 @vmovl_u8(vld1_u8(pu1_src_tmp) [all …]
|
D | ihevc_sao_edge_offset_class1_chroma.s | 170 … @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 173 … @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 192 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 204 …D11 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 223 … @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 264 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 270 …D11 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 325 … @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 349 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 400 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c…
|
D | ihevc_sao_edge_offset_class1.s | 166 … @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 169 … @II pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 188 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 197 …11 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 247 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 253 …D11 @pi2_tmp_cur_row.val[1] = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pu1_… 308 … @II pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 325 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c… 365 …D10 @pi2_tmp_cur_row.val[0] = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pu1_c…
|
/external/XNNPACK/src/q8-dwconv/ |
D | up8x9-neon.c | 73 const int16x8_t vxi00 = vreinterpretq_s16_u16(vmovl_u8(vi00)); in xnn_q8_dwconv_ukernel_up8x9__neon() 74 const int16x8_t vxi01 = vreinterpretq_s16_u16(vmovl_u8(vi01)); in xnn_q8_dwconv_ukernel_up8x9__neon() 75 const int16x8_t vxi02 = vreinterpretq_s16_u16(vmovl_u8(vi02)); in xnn_q8_dwconv_ukernel_up8x9__neon() 88 const int16x8_t vxi10 = vreinterpretq_s16_u16(vmovl_u8(vi10)); in xnn_q8_dwconv_ukernel_up8x9__neon() 89 const int16x8_t vxi11 = vreinterpretq_s16_u16(vmovl_u8(vi11)); in xnn_q8_dwconv_ukernel_up8x9__neon() 90 const int16x8_t vxi12 = vreinterpretq_s16_u16(vmovl_u8(vi12)); in xnn_q8_dwconv_ukernel_up8x9__neon() 103 const int16x8_t vxi20 = vreinterpretq_s16_u16(vmovl_u8(vi20)); in xnn_q8_dwconv_ukernel_up8x9__neon() 104 const int16x8_t vxi21 = vreinterpretq_s16_u16(vmovl_u8(vi21)); in xnn_q8_dwconv_ukernel_up8x9__neon() 105 const int16x8_t vxi22 = vreinterpretq_s16_u16(vmovl_u8(vi22)); in xnn_q8_dwconv_ukernel_up8x9__neon() 116 const int16x8_t vxi03 = vreinterpretq_s16_u16(vmovl_u8(vi03)); in xnn_q8_dwconv_ukernel_up8x9__neon() [all …]
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | depthwiseconv_uint8.h | 48 filter[i] = vaddq_s16(vreinterpretq_s16_u16(vmovl_u8(filter_u8.val[i])), 62 const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); 91 const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); 110 input[i] = vreinterpretq_s16_u16(vmovl_u8(input_u8[i])); 138 const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); 159 const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); 173 const int16x8_t input_s16 = vreinterpretq_s16_u16(vmovl_u8(input_u8)); 205 vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); 231 const int16x8_t filter_s16 = vreinterpretq_s16_u16(vmovl_u8(filter_u8)); 250 vreinterpret_s16_u16(vget_low_u16(vmovl_u8(input_u8))); [all …]
|
/external/skia/src/core/ |
D | SkBlitRow_D32.cpp | 165 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32_blend() 187 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32_blend() 213 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32a_blend() 216 vdst_wide = vmovl_u8(vdst); in blit_row_s32a_blend() 246 vdst_scale = vmovl_u8(vsrc_alphas); in blit_row_s32a_blend() 256 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32a_blend() 259 vdst_wide = vmovl_u8(vdst); in blit_row_s32a_blend()
|
/external/skqp/src/core/ |
D | SkBlitRow_D32.cpp | 166 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32_blend() 188 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32_blend() 214 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32a_blend() 217 vdst_wide = vmovl_u8(vdst); in blit_row_s32a_blend() 247 vdst_scale = vmovl_u8(vsrc_alphas); in blit_row_s32a_blend() 257 vsrc_wide = vmovl_u8(vsrc); in blit_row_s32a_blend() 260 vdst_wide = vmovl_u8(vdst); in blit_row_s32a_blend()
|
/external/libhevc/encoder/arm/ |
D | ihevce_hme_utils_neon.c | 137 src0_8x16b = vreinterpretq_s16_u16(vmovl_u8(src0_8x8b)); in ihevce_get_wt_inp_4x8_neon() 138 src1_8x16b = vreinterpretq_s16_u16(vmovl_u8(src1_8x8b)); in ihevce_get_wt_inp_4x8_neon() 139 src2_8x16b = vreinterpretq_s16_u16(vmovl_u8(src2_8x8b)); in ihevce_get_wt_inp_4x8_neon() 140 src3_8x16b = vreinterpretq_s16_u16(vmovl_u8(src3_8x8b)); in ihevce_get_wt_inp_4x8_neon() 567 src0_8x16b = vreinterpretq_s16_u16(vmovl_u8(src0_8x8b)); in hme_get_wt_inp_ctb_neon() 568 src1_8x16b = vreinterpretq_s16_u16(vmovl_u8(src1_8x8b)); in hme_get_wt_inp_ctb_neon() 569 src2_8x16b = vreinterpretq_s16_u16(vmovl_u8(src2_8x8b)); in hme_get_wt_inp_ctb_neon() 570 src3_8x16b = vreinterpretq_s16_u16(vmovl_u8(src3_8x8b)); in hme_get_wt_inp_ctb_neon() 700 src0_8x16b = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src0_16x8b))); in hme_get_wt_inp_ctb_neon() 701 src1_8x16b = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src0_16x8b))); in hme_get_wt_inp_ctb_neon()
|
D | ihevce_itrans_recon_neon.c | 67 a1 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src_u8))); in ihevce_itrans_recon_dc_4x4_luma_neon() 68 a2 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src_u8))); in ihevce_itrans_recon_dc_4x4_luma_neon() 98 a1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pu1_pred + i * pred_strd))); in ihevce_itrans_recon_dc_4x4_chroma_neon()
|
/external/skqp/src/opts/ |
D | Sk4px_NEON.h | 11 return Sk16h(vmovl_u8(vget_low_u8 (this->fVec)), in widen() 12 vmovl_u8(vget_high_u8(this->fVec))); in widen()
|
/external/skia/src/opts/ |
D | Sk4px_NEON.h | 11 return Sk16h(vmovl_u8(vget_low_u8 (this->fVec)), in widen() 12 vmovl_u8(vget_high_u8(this->fVec))); in widen()
|
/external/XNNPACK/src/q8-igemm/ |
D | 8x8-neon.c | 134 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_igemm_ukernel_8x8__neon() 135 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_igemm_ukernel_8x8__neon() 136 const int16x8_t vxa2 = vreinterpretq_s16_u16(vmovl_u8(va2)); in xnn_q8_igemm_ukernel_8x8__neon() 137 const int16x8_t vxa3 = vreinterpretq_s16_u16(vmovl_u8(va3)); in xnn_q8_igemm_ukernel_8x8__neon() 138 const int16x8_t vxa4 = vreinterpretq_s16_u16(vmovl_u8(va4)); in xnn_q8_igemm_ukernel_8x8__neon() 139 const int16x8_t vxa5 = vreinterpretq_s16_u16(vmovl_u8(va5)); in xnn_q8_igemm_ukernel_8x8__neon() 140 const int16x8_t vxa6 = vreinterpretq_s16_u16(vmovl_u8(va6)); in xnn_q8_igemm_ukernel_8x8__neon() 141 const int16x8_t vxa7 = vreinterpretq_s16_u16(vmovl_u8(va7)); in xnn_q8_igemm_ukernel_8x8__neon() 330 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_igemm_ukernel_8x8__neon() 331 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_igemm_ukernel_8x8__neon() [all …]
|
D | 4x8-neon.c | 90 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_igemm_ukernel_4x8__neon() 91 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_igemm_ukernel_4x8__neon() 92 const int16x8_t vxa2 = vreinterpretq_s16_u16(vmovl_u8(va2)); in xnn_q8_igemm_ukernel_4x8__neon() 93 const int16x8_t vxa3 = vreinterpretq_s16_u16(vmovl_u8(va3)); in xnn_q8_igemm_ukernel_4x8__neon() 214 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_igemm_ukernel_4x8__neon() 215 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_igemm_ukernel_4x8__neon() 216 const int16x8_t vxa2 = vreinterpretq_s16_u16(vmovl_u8(va2)); in xnn_q8_igemm_ukernel_4x8__neon() 217 const int16x8_t vxa3 = vreinterpretq_s16_u16(vmovl_u8(va3)); in xnn_q8_igemm_ukernel_4x8__neon()
|
/external/XNNPACK/src/q8-gemm/ |
D | 8x8-neon.c | 102 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); a0 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 104 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); a1 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 106 const int16x8_t vxa2 = vreinterpretq_s16_u16(vmovl_u8(va2)); a2 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 108 const int16x8_t vxa3 = vreinterpretq_s16_u16(vmovl_u8(va3)); a3 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 110 const int16x8_t vxa4 = vreinterpretq_s16_u16(vmovl_u8(va4)); a4 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 112 const int16x8_t vxa5 = vreinterpretq_s16_u16(vmovl_u8(va5)); a5 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 114 const int16x8_t vxa6 = vreinterpretq_s16_u16(vmovl_u8(va6)); a6 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 116 const int16x8_t vxa7 = vreinterpretq_s16_u16(vmovl_u8(va7)); a7 += 8; in xnn_q8_gemm_ukernel_8x8__neon() 282 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_gemm_ukernel_8x8__neon() 284 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_gemm_ukernel_8x8__neon() [all …]
|
D | 4x8-neon.c | 70 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_gemm_ukernel_4x8__neon() 72 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_gemm_ukernel_4x8__neon() 74 const int16x8_t vxa2 = vreinterpretq_s16_u16(vmovl_u8(va2)); in xnn_q8_gemm_ukernel_4x8__neon() 76 const int16x8_t vxa3 = vreinterpretq_s16_u16(vmovl_u8(va3)); in xnn_q8_gemm_ukernel_4x8__neon() 178 const int16x8_t vxa0 = vreinterpretq_s16_u16(vmovl_u8(va0)); in xnn_q8_gemm_ukernel_4x8__neon() 180 const int16x8_t vxa1 = vreinterpretq_s16_u16(vmovl_u8(va1)); in xnn_q8_gemm_ukernel_4x8__neon() 182 const int16x8_t vxa2 = vreinterpretq_s16_u16(vmovl_u8(va2)); in xnn_q8_gemm_ukernel_4x8__neon() 184 const int16x8_t vxa3 = vreinterpretq_s16_u16(vmovl_u8(va3)); in xnn_q8_gemm_ukernel_4x8__neon()
|
/external/tensorflow/tensorflow/core/kernels/ |
D | quantized_instance_norm.cc | 71 const uint16x8_t v_high_u16 = vmovl_u8(v_high); in ColMeanAndVariance() 72 const uint16x8_t v_low_u16 = vmovl_u8(v_low); in ColMeanAndVariance() 165 const uint16x8_t v_high = vmovl_u8(vget_high_u8(v)); in MinAndMax() 166 const uint16x8_t v_low = vmovl_u8(vget_low_u8(v)); in MinAndMax() 221 const uint16x8_t v_high = vmovl_u8(vget_high_u8(v)); in InstanceNorm() 222 const uint16x8_t v_low = vmovl_u8(vget_low_u8(v)); in InstanceNorm()
|
/external/webp/src/dsp/ |
D | dec_neon.c | 380 return vreinterpretq_s16_u16(vmovl_u8(v)); in ConvertU8ToS16_NEON() 1286 const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); in DC4_NEON() 1287 const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); in DC4_NEON() 1288 const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); in DC4_NEON() 1289 const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); in DC4_NEON() 1432 const uint16x8_t B = vmovl_u8(A); in DC8_NEON() 1444 const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1)); in DC8_NEON() 1445 const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1)); in DC8_NEON() 1446 const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1)); in DC8_NEON() 1447 const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1)); in DC8_NEON() [all …]
|