Home
last modified time | relevance | path

Searched refs:vy_lo (Results 1 – 25 of 316) sorted by relevance

12345678910>>...13

/external/XNNPACK/src/x8-lut/gen/
Dlut-neon-tbx128x4-x16.c64 uint8x8_t vy_lo = vget_low_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x16() local
66 vst1_u8(y, vy_lo); y += 8; in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
67 vy_lo = vget_high_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
70 vst1_lane_u32((void*) y, vreinterpret_u32_u8(vy_lo), 0); y += 4; in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
71 vy_lo = vext_u8(vy_lo, vy_lo, 4); in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
74 vst1_lane_u16((void*) y, vreinterpret_u16_u8(vy_lo), 0); y += 2; in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
75 vy_lo = vext_u8(vy_lo, vy_lo, 2); in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
78 vst1_lane_u8(y, vy_lo, 0); in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
Dlut-neon-tbx128x4-x32.c89 uint8x8_t vy_lo = vget_low_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x32() local
91 vst1_u8(y, vy_lo); y += 8; in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
92 vy_lo = vget_high_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
95 vst1_lane_u32((void*) y, vreinterpret_u32_u8(vy_lo), 0); y += 4; in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
96 vy_lo = vext_u8(vy_lo, vy_lo, 4); in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
99 vst1_lane_u16((void*) y, vreinterpret_u16_u8(vy_lo), 0); y += 2; in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
100 vy_lo = vext_u8(vy_lo, vy_lo, 2); in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
103 vst1_lane_u8(y, vy_lo, 0); in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
Dlut-neon-tbx128x4-x48.c98 uint8x8_t vy_lo = vget_low_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x48() local
100 vst1_u8(y, vy_lo); y += 8; in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
101 vy_lo = vget_high_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
104 vst1_lane_u32((void*) y, vreinterpret_u32_u8(vy_lo), 0); y += 4; in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
105 vy_lo = vext_u8(vy_lo, vy_lo, 4); in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
108 vst1_lane_u16((void*) y, vreinterpret_u16_u8(vy_lo), 0); y += 2; in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
109 vy_lo = vext_u8(vy_lo, vy_lo, 2); in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
112 vst1_lane_u8(y, vy_lo, 0); in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
/external/XNNPACK/src/f32-vsqrt/gen/
Davx-sqrt-x8.c42 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() local
44 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
45 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
49 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
50 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
54 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
Davx-sqrt-x16.c54 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() local
56 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
57 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
61 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
62 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
66 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
/external/XNNPACK/src/f32-vrnd/gen/
Dvrndu-avx-x8.c45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndu_ukernel__avx_x8() local
47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8()
48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndu_ukernel__avx_x8()
52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8()
53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8()
57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8()
Dvrndd-avx-x8.c45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndd_ukernel__avx_x8() local
47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8()
48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndd_ukernel__avx_x8()
52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8()
53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8()
57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8()
Dvrndne-avx-x8.c45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndne_ukernel__avx_x8() local
47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8()
48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndne_ukernel__avx_x8()
52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8()
53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8()
57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8()
Dvrndz-avx-x8.c45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndz_ukernel__avx_x8() local
47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8()
48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndz_ukernel__avx_x8()
52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8()
53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8()
57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8()
Dvrndd-avx-x16.c57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndd_ukernel__avx_x16() local
59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16()
60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndd_ukernel__avx_x16()
64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16()
65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16()
69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16()
Dvrndu-avx-x16.c57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndu_ukernel__avx_x16() local
59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16()
60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndu_ukernel__avx_x16()
64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16()
65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16()
69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16()
Dvrndne-avx-x16.c57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndne_ukernel__avx_x16() local
59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16()
60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndne_ukernel__avx_x16()
64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16()
65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16()
69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16()
Dvrndz-avx-x16.c57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndz_ukernel__avx_x16() local
59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16()
60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndz_ukernel__avx_x16()
64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16()
65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16()
69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16()
/external/XNNPACK/src/f32-vunary/gen/
Dvsqr-avx-x8.c46 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqr_ukernel__avx_x8() local
48 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8()
49 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqr_ukernel__avx_x8()
53 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8()
54 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8()
58 _mm_store_ss(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8()
Dvneg-avx-x8.c47 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vneg_ukernel__avx_x8() local
49 _mm_storeu_ps(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8()
50 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vneg_ukernel__avx_x8()
54 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8()
55 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vneg_ukernel__avx_x8()
59 _mm_store_ss(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8()
Dvabs-avx-x8.c47 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vabs_ukernel__avx_x8() local
49 _mm_storeu_ps(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8()
50 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vabs_ukernel__avx_x8()
54 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8()
55 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vabs_ukernel__avx_x8()
59 _mm_store_ss(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8()
Dvneg-avx-x16.c57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vneg_ukernel__avx_x16() local
59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x16()
60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vneg_ukernel__avx_x16()
64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vneg_ukernel__avx_x16()
65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vneg_ukernel__avx_x16()
69 _mm_store_ss(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x16()
Dvsqr-avx-x16.c56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqr_ukernel__avx_x16() local
58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16()
59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqr_ukernel__avx_x16()
63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16()
64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16()
68 _mm_store_ss(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16()
Dvabs-avx-x16.c57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vabs_ukernel__avx_x16() local
59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16()
60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vabs_ukernel__avx_x16()
64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16()
65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vabs_ukernel__avx_x16()
69 _mm_store_ss(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16()
/external/XNNPACK/src/f32-vbinary/gen/
Dvmaxc-avx-x8.c53 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmaxc_ukernel__avx_x8() local
55 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8()
56 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmaxc_ukernel__avx_x8()
60 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8()
61 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8()
65 _mm_store_ss(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8()
Dvminc-avx-x8.c53 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vminc_ukernel__avx_x8() local
55 _mm_storeu_ps(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8()
56 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vminc_ukernel__avx_x8()
60 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8()
61 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vminc_ukernel__avx_x8()
65 _mm_store_ss(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8()
Dvsqrdiffc-avx-x8.c55 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrdiffc_ukernel__avx_x8() local
57 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
58 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
62 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
63 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
67 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
Dvmax-avx-x8.c56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmax_ukernel__avx_x8() local
58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8()
59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmax_ukernel__avx_x8()
63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8()
64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmax_ukernel__avx_x8()
68 _mm_store_ss(y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8()
Dvmin-avx-x8.c56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmin_ukernel__avx_x8() local
58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8()
59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmin_ukernel__avx_x8()
63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8()
64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmin_ukernel__avx_x8()
68 _mm_store_ss(y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8()
Dvsqrdiff-avx-x8.c58 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrdiff_ukernel__avx_x8() local
60 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8()
61 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrdiff_ukernel__avx_x8()
65 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8()
66 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8()
70 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8()

12345678910>>...13