/external/XNNPACK/src/x8-lut/gen/ |
D | lut-neon-tbx128x4-x16.c | 64 uint8x8_t vy_lo = vget_low_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x16() local 66 vst1_u8(y, vy_lo); y += 8; in xnn_x8_lut_ukernel__neon_tbx128x4_x16() 67 vy_lo = vget_high_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x16() 70 vst1_lane_u32((void*) y, vreinterpret_u32_u8(vy_lo), 0); y += 4; in xnn_x8_lut_ukernel__neon_tbx128x4_x16() 71 vy_lo = vext_u8(vy_lo, vy_lo, 4); in xnn_x8_lut_ukernel__neon_tbx128x4_x16() 74 vst1_lane_u16((void*) y, vreinterpret_u16_u8(vy_lo), 0); y += 2; in xnn_x8_lut_ukernel__neon_tbx128x4_x16() 75 vy_lo = vext_u8(vy_lo, vy_lo, 2); in xnn_x8_lut_ukernel__neon_tbx128x4_x16() 78 vst1_lane_u8(y, vy_lo, 0); in xnn_x8_lut_ukernel__neon_tbx128x4_x16()
|
D | lut-neon-tbx128x4-x32.c | 89 uint8x8_t vy_lo = vget_low_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x32() local 91 vst1_u8(y, vy_lo); y += 8; in xnn_x8_lut_ukernel__neon_tbx128x4_x32() 92 vy_lo = vget_high_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x32() 95 vst1_lane_u32((void*) y, vreinterpret_u32_u8(vy_lo), 0); y += 4; in xnn_x8_lut_ukernel__neon_tbx128x4_x32() 96 vy_lo = vext_u8(vy_lo, vy_lo, 4); in xnn_x8_lut_ukernel__neon_tbx128x4_x32() 99 vst1_lane_u16((void*) y, vreinterpret_u16_u8(vy_lo), 0); y += 2; in xnn_x8_lut_ukernel__neon_tbx128x4_x32() 100 vy_lo = vext_u8(vy_lo, vy_lo, 2); in xnn_x8_lut_ukernel__neon_tbx128x4_x32() 103 vst1_lane_u8(y, vy_lo, 0); in xnn_x8_lut_ukernel__neon_tbx128x4_x32()
|
D | lut-neon-tbx128x4-x48.c | 98 uint8x8_t vy_lo = vget_low_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x48() local 100 vst1_u8(y, vy_lo); y += 8; in xnn_x8_lut_ukernel__neon_tbx128x4_x48() 101 vy_lo = vget_high_u8(vy); in xnn_x8_lut_ukernel__neon_tbx128x4_x48() 104 vst1_lane_u32((void*) y, vreinterpret_u32_u8(vy_lo), 0); y += 4; in xnn_x8_lut_ukernel__neon_tbx128x4_x48() 105 vy_lo = vext_u8(vy_lo, vy_lo, 4); in xnn_x8_lut_ukernel__neon_tbx128x4_x48() 108 vst1_lane_u16((void*) y, vreinterpret_u16_u8(vy_lo), 0); y += 2; in xnn_x8_lut_ukernel__neon_tbx128x4_x48() 109 vy_lo = vext_u8(vy_lo, vy_lo, 2); in xnn_x8_lut_ukernel__neon_tbx128x4_x48() 112 vst1_lane_u8(y, vy_lo, 0); in xnn_x8_lut_ukernel__neon_tbx128x4_x48()
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx-sqrt-x8.c | 42 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() local 44 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 45 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 49 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 50 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 54 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
|
D | avx-sqrt-x16.c | 54 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() local 56 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 57 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 61 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 62 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 66 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndu-avx-x8.c | 45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndu_ukernel__avx_x8() local 47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8() 48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndu_ukernel__avx_x8() 52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8() 53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8() 57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8()
|
D | vrndd-avx-x8.c | 45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndd_ukernel__avx_x8() local 47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8() 48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndd_ukernel__avx_x8() 52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8() 53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8() 57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8()
|
D | vrndne-avx-x8.c | 45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndne_ukernel__avx_x8() local 47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8() 48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndne_ukernel__avx_x8() 52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8() 53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8() 57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8()
|
D | vrndz-avx-x8.c | 45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndz_ukernel__avx_x8() local 47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8() 48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndz_ukernel__avx_x8() 52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8() 53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8() 57 _mm_store_ss(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8()
|
D | vrndd-avx-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndd_ukernel__avx_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndd_ukernel__avx_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16()
|
D | vrndu-avx-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndu_ukernel__avx_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndu_ukernel__avx_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16()
|
D | vrndne-avx-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndne_ukernel__avx_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndne_ukernel__avx_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16()
|
D | vrndz-avx-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndz_ukernel__avx_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndz_ukernel__avx_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16()
|
/external/XNNPACK/src/f32-vunary/gen/ |
D | vsqr-avx-x8.c | 46 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqr_ukernel__avx_x8() local 48 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8() 49 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqr_ukernel__avx_x8() 53 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8() 54 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8() 58 _mm_store_ss(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8()
|
D | vneg-avx-x8.c | 47 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vneg_ukernel__avx_x8() local 49 _mm_storeu_ps(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8() 50 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vneg_ukernel__avx_x8() 54 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8() 55 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vneg_ukernel__avx_x8() 59 _mm_store_ss(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8()
|
D | vabs-avx-x8.c | 47 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vabs_ukernel__avx_x8() local 49 _mm_storeu_ps(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8() 50 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vabs_ukernel__avx_x8() 54 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8() 55 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vabs_ukernel__avx_x8() 59 _mm_store_ss(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8()
|
D | vneg-avx-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vneg_ukernel__avx_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vneg_ukernel__avx_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vneg_ukernel__avx_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vneg_ukernel__avx_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x16()
|
D | vsqr-avx-x16.c | 56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqr_ukernel__avx_x16() local 58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16() 59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqr_ukernel__avx_x16() 63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16() 64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16() 68 _mm_store_ss(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16()
|
D | vabs-avx-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vabs_ukernel__avx_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vabs_ukernel__avx_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vabs_ukernel__avx_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16()
|
/external/XNNPACK/src/f32-vbinary/gen/ |
D | vmaxc-avx-x8.c | 53 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmaxc_ukernel__avx_x8() local 55 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8() 56 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmaxc_ukernel__avx_x8() 60 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8() 61 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8() 65 _mm_store_ss(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8()
|
D | vminc-avx-x8.c | 53 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vminc_ukernel__avx_x8() local 55 _mm_storeu_ps(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8() 56 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vminc_ukernel__avx_x8() 60 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8() 61 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vminc_ukernel__avx_x8() 65 _mm_store_ss(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8()
|
D | vsqrdiffc-avx-x8.c | 55 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrdiffc_ukernel__avx_x8() local 57 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 58 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 62 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 63 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 67 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
|
D | vmax-avx-x8.c | 56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmax_ukernel__avx_x8() local 58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8() 59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmax_ukernel__avx_x8() 63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8() 64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmax_ukernel__avx_x8() 68 _mm_store_ss(y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8()
|
D | vmin-avx-x8.c | 56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmin_ukernel__avx_x8() local 58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8() 59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmin_ukernel__avx_x8() 63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8() 64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmin_ukernel__avx_x8() 68 _mm_store_ss(y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8()
|
D | vsqrdiff-avx-x8.c | 58 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrdiff_ukernel__avx_x8() local 60 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8() 61 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrdiff_ukernel__avx_x8() 65 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8() 66 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8() 70 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8()
|