/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx-sqrt-x8.c | 45 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() local 47 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 48 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 52 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 53 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8() 57 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x8()
|
D | avx-sqrt-x16.c | 57 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() local 59 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 60 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 64 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 65 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16() 69 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrt_ukernel__avx_sqrt_x16()
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndu-avx-x8.c | 48 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndu_ukernel__avx_x8() local 50 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8() 51 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndu_ukernel__avx_x8() 55 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8() 56 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8() 60 _mm_store_ss(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x8()
|
D | vrndne-avx-x8.c | 48 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndne_ukernel__avx_x8() local 50 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8() 51 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndne_ukernel__avx_x8() 55 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8() 56 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8() 60 _mm_store_ss(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x8()
|
D | vrndd-avx-x8.c | 48 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndd_ukernel__avx_x8() local 50 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8() 51 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndd_ukernel__avx_x8() 55 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8() 56 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8() 60 _mm_store_ss(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x8()
|
D | vrndz-avx-x8.c | 48 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndz_ukernel__avx_x8() local 50 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8() 51 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndz_ukernel__avx_x8() 55 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8() 56 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8() 60 _mm_store_ss(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x8()
|
D | vrndz-avx-x16.c | 60 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndz_ukernel__avx_x16() local 62 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16() 63 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndz_ukernel__avx_x16() 67 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16() 68 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16() 72 _mm_store_ss(y, vy_lo); in xnn_f32_vrndz_ukernel__avx_x16()
|
D | vrndd-avx-x16.c | 60 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndd_ukernel__avx_x16() local 62 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16() 63 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndd_ukernel__avx_x16() 67 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16() 68 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16() 72 _mm_store_ss(y, vy_lo); in xnn_f32_vrndd_ukernel__avx_x16()
|
D | vrndne-avx-x16.c | 60 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndne_ukernel__avx_x16() local 62 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16() 63 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndne_ukernel__avx_x16() 67 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16() 68 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16() 72 _mm_store_ss(y, vy_lo); in xnn_f32_vrndne_ukernel__avx_x16()
|
D | vrndu-avx-x16.c | 60 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vrndu_ukernel__avx_x16() local 62 _mm_storeu_ps(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16() 63 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vrndu_ukernel__avx_x16() 67 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16() 68 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16() 72 _mm_store_ss(y, vy_lo); in xnn_f32_vrndu_ukernel__avx_x16()
|
/external/XNNPACK/src/f32-vunary/gen/ |
D | vsqr-avx-x8.c | 49 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqr_ukernel__avx_x8() local 51 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8() 52 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqr_ukernel__avx_x8() 56 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8() 57 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8() 61 _mm_store_ss(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x8()
|
D | vabs-avx-x8.c | 50 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vabs_ukernel__avx_x8() local 52 _mm_storeu_ps(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8() 53 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vabs_ukernel__avx_x8() 57 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8() 58 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vabs_ukernel__avx_x8() 62 _mm_store_ss(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x8()
|
D | vneg-avx-x8.c | 50 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vneg_ukernel__avx_x8() local 52 _mm_storeu_ps(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8() 53 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vneg_ukernel__avx_x8() 57 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8() 58 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vneg_ukernel__avx_x8() 62 _mm_store_ss(y, vy_lo); in xnn_f32_vneg_ukernel__avx_x8()
|
D | vsqr-avx-x16.c | 59 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqr_ukernel__avx_x16() local 61 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16() 62 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqr_ukernel__avx_x16() 66 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16() 67 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16() 71 _mm_store_ss(y, vy_lo); in xnn_f32_vsqr_ukernel__avx_x16()
|
D | vabs-avx-x16.c | 60 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vabs_ukernel__avx_x16() local 62 _mm_storeu_ps(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16() 63 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vabs_ukernel__avx_x16() 67 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16() 68 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vabs_ukernel__avx_x16() 72 _mm_store_ss(y, vy_lo); in xnn_f32_vabs_ukernel__avx_x16()
|
/external/XNNPACK/src/f32-vbinary/gen/ |
D | vminc-avx-x8.c | 56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vminc_ukernel__avx_x8() local 58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8() 59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vminc_ukernel__avx_x8() 63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8() 64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vminc_ukernel__avx_x8() 68 _mm_store_ss(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x8()
|
D | vmaxc-avx-x8.c | 56 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmaxc_ukernel__avx_x8() local 58 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8() 59 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmaxc_ukernel__avx_x8() 63 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8() 64 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8() 68 _mm_store_ss(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x8()
|
D | vmin-avx-x8.c | 59 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmin_ukernel__avx_x8() local 61 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8() 62 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmin_ukernel__avx_x8() 66 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8() 67 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmin_ukernel__avx_x8() 71 _mm_store_ss(y, vy_lo); in xnn_f32_vmin_ukernel__avx_x8()
|
D | vmax-avx-x8.c | 59 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmax_ukernel__avx_x8() local 61 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8() 62 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmax_ukernel__avx_x8() 66 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8() 67 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmax_ukernel__avx_x8() 71 _mm_store_ss(y, vy_lo); in xnn_f32_vmax_ukernel__avx_x8()
|
D | vsqrdiffc-avx-x8.c | 58 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrdiffc_ukernel__avx_x8() local 60 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 61 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 65 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 66 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8() 70 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrdiffc_ukernel__avx_x8()
|
D | vsqrdiff-avx-x8.c | 61 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsqrdiff_ukernel__avx_x8() local 63 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8() 64 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsqrdiff_ukernel__avx_x8() 68 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8() 69 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8() 73 _mm_store_ss(y, vy_lo); in xnn_f32_vsqrdiff_ukernel__avx_x8()
|
D | vsubc-minmax-avx-x8.c | 63 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vsubc_minmax_ukernel__avx_x8() local 65 _mm_storeu_ps(y, vy_lo); in xnn_f32_vsubc_minmax_ukernel__avx_x8() 66 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vsubc_minmax_ukernel__avx_x8() 70 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vsubc_minmax_ukernel__avx_x8() 71 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vsubc_minmax_ukernel__avx_x8() 75 _mm_store_ss(y, vy_lo); in xnn_f32_vsubc_minmax_ukernel__avx_x8()
|
D | vmaxc-avx-x16.c | 67 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vmaxc_ukernel__avx_x16() local 69 _mm_storeu_ps(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x16() 70 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vmaxc_ukernel__avx_x16() 74 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x16() 75 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x16() 79 _mm_store_ss(y, vy_lo); in xnn_f32_vmaxc_ukernel__avx_x16()
|
D | vdivc-minmax-avx-x8.c | 63 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vdivc_minmax_ukernel__avx_x8() local 65 _mm_storeu_ps(y, vy_lo); in xnn_f32_vdivc_minmax_ukernel__avx_x8() 66 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vdivc_minmax_ukernel__avx_x8() 70 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vdivc_minmax_ukernel__avx_x8() 71 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vdivc_minmax_ukernel__avx_x8() 75 _mm_store_ss(y, vy_lo); in xnn_f32_vdivc_minmax_ukernel__avx_x8()
|
D | vminc-avx-x16.c | 67 __m128 vy_lo = _mm256_castps256_ps128(vy); in xnn_f32_vminc_ukernel__avx_x16() local 69 _mm_storeu_ps(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x16() 70 vy_lo = _mm256_extractf128_ps(vy, 1); in xnn_f32_vminc_ukernel__avx_x16() 74 _mm_storel_pi((__m64*) y, vy_lo); in xnn_f32_vminc_ukernel__avx_x16() 75 vy_lo = _mm_movehl_ps(vy_lo, vy_lo); in xnn_f32_vminc_ukernel__avx_x16() 79 _mm_store_ss(y, vy_lo); in xnn_f32_vminc_ukernel__avx_x16()
|