/external/XNNPACK/src/f32-vbinary/gen/ |
D | vrdivc-minmax-sse-x8.c | 41 __m128 vy0123 = _mm_div_ps(vb, va0123); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() local 45 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() 48 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() 51 _mm_storeu_ps(y, vy0123); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() 59 __m128 vy0123 = _mm_div_ps(vb, va0123); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() local 60 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() 61 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() 68 __m128 vy0123 = _mm_div_ps(vb, va0123); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() local 69 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_minmax_ukernel__sse_x8() [all …]
|
D | vmulc-minmax-sse-x8.c | 41 __m128 vy0123 = _mm_mul_ps(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__sse_x8() local 45 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__sse_x8() 48 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vmulc_minmax_ukernel__sse_x8() 51 _mm_storeu_ps(y, vy0123); in xnn_f32_vmulc_minmax_ukernel__sse_x8() 59 __m128 vy0123 = _mm_mul_ps(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__sse_x8() local 60 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__sse_x8() 61 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vmulc_minmax_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vmulc_minmax_ukernel__sse_x8() 68 __m128 vy0123 = _mm_mul_ps(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__sse_x8() local 69 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__sse_x8() [all …]
|
D | vaddc-minmax-sse-x8.c | 41 __m128 vy0123 = _mm_add_ps(va0123, vb); in xnn_f32_vaddc_minmax_ukernel__sse_x8() local 45 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_minmax_ukernel__sse_x8() 48 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vaddc_minmax_ukernel__sse_x8() 51 _mm_storeu_ps(y, vy0123); in xnn_f32_vaddc_minmax_ukernel__sse_x8() 59 __m128 vy0123 = _mm_add_ps(va0123, vb); in xnn_f32_vaddc_minmax_ukernel__sse_x8() local 60 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_minmax_ukernel__sse_x8() 61 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vaddc_minmax_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vaddc_minmax_ukernel__sse_x8() 68 __m128 vy0123 = _mm_add_ps(va0123, vb); in xnn_f32_vaddc_minmax_ukernel__sse_x8() local 69 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_minmax_ukernel__sse_x8() [all …]
|
D | vdivc-minmax-sse-x8.c | 41 __m128 vy0123 = _mm_div_ps(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__sse_x8() local 45 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__sse_x8() 48 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vdivc_minmax_ukernel__sse_x8() 51 _mm_storeu_ps(y, vy0123); in xnn_f32_vdivc_minmax_ukernel__sse_x8() 59 __m128 vy0123 = _mm_div_ps(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__sse_x8() local 60 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__sse_x8() 61 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vdivc_minmax_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vdivc_minmax_ukernel__sse_x8() 68 __m128 vy0123 = _mm_div_ps(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__sse_x8() local 69 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__sse_x8() [all …]
|
D | vrsubc-minmax-sse-x8.c | 41 __m128 vy0123 = _mm_sub_ps(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() local 45 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() 48 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() 51 _mm_storeu_ps(y, vy0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() 59 __m128 vy0123 = _mm_sub_ps(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() local 60 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() 61 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() 68 __m128 vy0123 = _mm_sub_ps(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() local 69 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__sse_x8() [all …]
|
D | vsubc-minmax-sse-x8.c | 41 __m128 vy0123 = _mm_sub_ps(va0123, vb); in xnn_f32_vsubc_minmax_ukernel__sse_x8() local 45 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_minmax_ukernel__sse_x8() 48 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vsubc_minmax_ukernel__sse_x8() 51 _mm_storeu_ps(y, vy0123); in xnn_f32_vsubc_minmax_ukernel__sse_x8() 59 __m128 vy0123 = _mm_sub_ps(va0123, vb); in xnn_f32_vsubc_minmax_ukernel__sse_x8() local 60 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_minmax_ukernel__sse_x8() 61 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vsubc_minmax_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vsubc_minmax_ukernel__sse_x8() 68 __m128 vy0123 = _mm_sub_ps(va0123, vb); in xnn_f32_vsubc_minmax_ukernel__sse_x8() local 69 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_minmax_ukernel__sse_x8() [all …]
|
D | vmul-minmax-sse-x8.c | 44 __m128 vy0123 = _mm_mul_ps(va0123, vb0123); in xnn_f32_vmul_minmax_ukernel__sse_x8() local 48 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmul_minmax_ukernel__sse_x8() 51 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vmul_minmax_ukernel__sse_x8() 54 _mm_storeu_ps(y, vy0123); in xnn_f32_vmul_minmax_ukernel__sse_x8() 65 __m128 vy0123 = _mm_mul_ps(va0123, vb0123); in xnn_f32_vmul_minmax_ukernel__sse_x8() local 66 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmul_minmax_ukernel__sse_x8() 67 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vmul_minmax_ukernel__sse_x8() 68 _mm_storeu_ps(y, vy0123); in xnn_f32_vmul_minmax_ukernel__sse_x8() 75 __m128 vy0123 = _mm_mul_ps(va0123, vb0123); in xnn_f32_vmul_minmax_ukernel__sse_x8() local 76 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmul_minmax_ukernel__sse_x8() [all …]
|
D | vsub-minmax-sse-x8.c | 44 __m128 vy0123 = _mm_sub_ps(va0123, vb0123); in xnn_f32_vsub_minmax_ukernel__sse_x8() local 48 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsub_minmax_ukernel__sse_x8() 51 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vsub_minmax_ukernel__sse_x8() 54 _mm_storeu_ps(y, vy0123); in xnn_f32_vsub_minmax_ukernel__sse_x8() 65 __m128 vy0123 = _mm_sub_ps(va0123, vb0123); in xnn_f32_vsub_minmax_ukernel__sse_x8() local 66 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsub_minmax_ukernel__sse_x8() 67 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vsub_minmax_ukernel__sse_x8() 68 _mm_storeu_ps(y, vy0123); in xnn_f32_vsub_minmax_ukernel__sse_x8() 75 __m128 vy0123 = _mm_sub_ps(va0123, vb0123); in xnn_f32_vsub_minmax_ukernel__sse_x8() local 76 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsub_minmax_ukernel__sse_x8() [all …]
|
D | vdiv-minmax-sse-x8.c | 44 __m128 vy0123 = _mm_div_ps(va0123, vb0123); in xnn_f32_vdiv_minmax_ukernel__sse_x8() local 48 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_minmax_ukernel__sse_x8() 51 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vdiv_minmax_ukernel__sse_x8() 54 _mm_storeu_ps(y, vy0123); in xnn_f32_vdiv_minmax_ukernel__sse_x8() 65 __m128 vy0123 = _mm_div_ps(va0123, vb0123); in xnn_f32_vdiv_minmax_ukernel__sse_x8() local 66 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_minmax_ukernel__sse_x8() 67 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vdiv_minmax_ukernel__sse_x8() 68 _mm_storeu_ps(y, vy0123); in xnn_f32_vdiv_minmax_ukernel__sse_x8() 75 __m128 vy0123 = _mm_div_ps(va0123, vb0123); in xnn_f32_vdiv_minmax_ukernel__sse_x8() local 76 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_minmax_ukernel__sse_x8() [all …]
|
D | vadd-minmax-sse-x8.c | 44 __m128 vy0123 = _mm_add_ps(va0123, vb0123); in xnn_f32_vadd_minmax_ukernel__sse_x8() local 48 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vadd_minmax_ukernel__sse_x8() 51 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vadd_minmax_ukernel__sse_x8() 54 _mm_storeu_ps(y, vy0123); in xnn_f32_vadd_minmax_ukernel__sse_x8() 65 __m128 vy0123 = _mm_add_ps(va0123, vb0123); in xnn_f32_vadd_minmax_ukernel__sse_x8() local 66 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vadd_minmax_ukernel__sse_x8() 67 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vadd_minmax_ukernel__sse_x8() 68 _mm_storeu_ps(y, vy0123); in xnn_f32_vadd_minmax_ukernel__sse_x8() 75 __m128 vy0123 = _mm_add_ps(va0123, vb0123); in xnn_f32_vadd_minmax_ukernel__sse_x8() local 76 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vadd_minmax_ukernel__sse_x8() [all …]
|
D | vsqrdiffc-sse-x8.c | 39 __m128 vy0123 = _mm_sub_ps(va0123, vb); in xnn_f32_vsqrdiffc_ukernel__sse_x8() local 42 vy0123 = _mm_mul_ps(vy0123, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() 46 _mm_storeu_ps(y, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() 54 __m128 vy0123 = _mm_sub_ps(va0123, vb); in xnn_f32_vsqrdiffc_ukernel__sse_x8() local 55 vy0123 = _mm_mul_ps(vy0123, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() 56 _mm_storeu_ps(y, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() 62 __m128 vy0123 = _mm_sub_ps(va0123, vb); in xnn_f32_vsqrdiffc_ukernel__sse_x8() local 63 vy0123 = _mm_mul_ps(vy0123, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() 65 _mm_storel_pi((__m64*) y, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() 66 vy0123 = _mm_movehl_ps(vy0123, vy0123); in xnn_f32_vsqrdiffc_ukernel__sse_x8() [all …]
|
D | vsqrdiff-sse-x8.c | 42 __m128 vy0123 = _mm_sub_ps(va0123, vb0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() local 45 vy0123 = _mm_mul_ps(vy0123, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() 49 _mm_storeu_ps(y, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() 60 __m128 vy0123 = _mm_sub_ps(va0123, vb0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() local 61 vy0123 = _mm_mul_ps(vy0123, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() 62 _mm_storeu_ps(y, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() 69 __m128 vy0123 = _mm_sub_ps(va0123, vb0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() local 70 vy0123 = _mm_mul_ps(vy0123, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() 72 _mm_storel_pi((__m64*) y, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() 73 vy0123 = _mm_movehl_ps(vy0123, vy0123); in xnn_f32_vsqrdiff_ukernel__sse_x8() [all …]
|
D | vrdivc-minmax-neon-x8.c | 39 float32x4_t vy0123 = vdivq_f32(vb, va0123); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() local 43 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() 46 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() 49 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vrdivc_minmax_ukernel__neon_x8() 55 float32x4_t vy0123 = vdivq_f32(vb, va0123); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() local 56 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() 57 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() 58 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vrdivc_minmax_ukernel__neon_x8() 63 float32x4_t vy0123 = vdivq_f32(vb, va0123); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() local 64 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vrdivc_minmax_ukernel__neon_x8() [all …]
|
D | vaddc-minmax-neon-x8.c | 39 float32x4_t vy0123 = vaddq_f32(va0123, vb); in xnn_f32_vaddc_minmax_ukernel__neon_x8() local 43 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vaddc_minmax_ukernel__neon_x8() 46 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vaddc_minmax_ukernel__neon_x8() 49 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vaddc_minmax_ukernel__neon_x8() 55 float32x4_t vy0123 = vaddq_f32(va0123, vb); in xnn_f32_vaddc_minmax_ukernel__neon_x8() local 56 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vaddc_minmax_ukernel__neon_x8() 57 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vaddc_minmax_ukernel__neon_x8() 58 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vaddc_minmax_ukernel__neon_x8() 63 float32x4_t vy0123 = vaddq_f32(va0123, vb); in xnn_f32_vaddc_minmax_ukernel__neon_x8() local 64 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vaddc_minmax_ukernel__neon_x8() [all …]
|
D | vdivc-minmax-neon-x8.c | 39 float32x4_t vy0123 = vdivq_f32(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__neon_x8() local 43 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__neon_x8() 46 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vdivc_minmax_ukernel__neon_x8() 49 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vdivc_minmax_ukernel__neon_x8() 55 float32x4_t vy0123 = vdivq_f32(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__neon_x8() local 56 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__neon_x8() 57 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vdivc_minmax_ukernel__neon_x8() 58 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vdivc_minmax_ukernel__neon_x8() 63 float32x4_t vy0123 = vdivq_f32(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__neon_x8() local 64 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__neon_x8() [all …]
|
D | vsubc-minmax-neon-x8.c | 39 float32x4_t vy0123 = vsubq_f32(va0123, vb); in xnn_f32_vsubc_minmax_ukernel__neon_x8() local 43 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vsubc_minmax_ukernel__neon_x8() 46 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vsubc_minmax_ukernel__neon_x8() 49 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vsubc_minmax_ukernel__neon_x8() 55 float32x4_t vy0123 = vsubq_f32(va0123, vb); in xnn_f32_vsubc_minmax_ukernel__neon_x8() local 56 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vsubc_minmax_ukernel__neon_x8() 57 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vsubc_minmax_ukernel__neon_x8() 58 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vsubc_minmax_ukernel__neon_x8() 63 float32x4_t vy0123 = vsubq_f32(va0123, vb); in xnn_f32_vsubc_minmax_ukernel__neon_x8() local 64 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vsubc_minmax_ukernel__neon_x8() [all …]
|
D | vmulc-minmax-neon-x8.c | 39 float32x4_t vy0123 = vmulq_f32(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__neon_x8() local 43 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__neon_x8() 46 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vmulc_minmax_ukernel__neon_x8() 49 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vmulc_minmax_ukernel__neon_x8() 55 float32x4_t vy0123 = vmulq_f32(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__neon_x8() local 56 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__neon_x8() 57 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vmulc_minmax_ukernel__neon_x8() 58 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vmulc_minmax_ukernel__neon_x8() 63 float32x4_t vy0123 = vmulq_f32(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__neon_x8() local 64 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__neon_x8() [all …]
|
D | vrsubc-minmax-neon-x8.c | 39 float32x4_t vy0123 = vsubq_f32(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() local 43 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() 46 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() 49 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vrsubc_minmax_ukernel__neon_x8() 55 float32x4_t vy0123 = vsubq_f32(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() local 56 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() 57 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() 58 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vrsubc_minmax_ukernel__neon_x8() 63 float32x4_t vy0123 = vsubq_f32(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() local 64 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__neon_x8() [all …]
|
D | vdiv-minmax-neon-x8.c | 40 float32x4_t vy0123 = vdivq_f32(va0123, vb0123); in xnn_f32_vdiv_minmax_ukernel__neon_x8() local 44 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vdiv_minmax_ukernel__neon_x8() 47 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vdiv_minmax_ukernel__neon_x8() 50 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vdiv_minmax_ukernel__neon_x8() 57 float32x4_t vy0123 = vdivq_f32(va0123, vb0123); in xnn_f32_vdiv_minmax_ukernel__neon_x8() local 58 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vdiv_minmax_ukernel__neon_x8() 59 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vdiv_minmax_ukernel__neon_x8() 60 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vdiv_minmax_ukernel__neon_x8() 66 float32x4_t vy0123 = vdivq_f32(va0123, vb0123); in xnn_f32_vdiv_minmax_ukernel__neon_x8() local 67 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vdiv_minmax_ukernel__neon_x8() [all …]
|
D | vsub-minmax-neon-x8.c | 40 float32x4_t vy0123 = vsubq_f32(va0123, vb0123); in xnn_f32_vsub_minmax_ukernel__neon_x8() local 44 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vsub_minmax_ukernel__neon_x8() 47 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vsub_minmax_ukernel__neon_x8() 50 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vsub_minmax_ukernel__neon_x8() 57 float32x4_t vy0123 = vsubq_f32(va0123, vb0123); in xnn_f32_vsub_minmax_ukernel__neon_x8() local 58 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vsub_minmax_ukernel__neon_x8() 59 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vsub_minmax_ukernel__neon_x8() 60 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vsub_minmax_ukernel__neon_x8() 66 float32x4_t vy0123 = vsubq_f32(va0123, vb0123); in xnn_f32_vsub_minmax_ukernel__neon_x8() local 67 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vsub_minmax_ukernel__neon_x8() [all …]
|
D | vadd-minmax-neon-x8.c | 40 float32x4_t vy0123 = vaddq_f32(va0123, vb0123); in xnn_f32_vadd_minmax_ukernel__neon_x8() local 44 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vadd_minmax_ukernel__neon_x8() 47 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vadd_minmax_ukernel__neon_x8() 50 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vadd_minmax_ukernel__neon_x8() 57 float32x4_t vy0123 = vaddq_f32(va0123, vb0123); in xnn_f32_vadd_minmax_ukernel__neon_x8() local 58 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vadd_minmax_ukernel__neon_x8() 59 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vadd_minmax_ukernel__neon_x8() 60 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vadd_minmax_ukernel__neon_x8() 66 float32x4_t vy0123 = vaddq_f32(va0123, vb0123); in xnn_f32_vadd_minmax_ukernel__neon_x8() local 67 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vadd_minmax_ukernel__neon_x8() [all …]
|
D | vmul-minmax-neon-x8.c | 40 float32x4_t vy0123 = vmulq_f32(va0123, vb0123); in xnn_f32_vmul_minmax_ukernel__neon_x8() local 44 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vmul_minmax_ukernel__neon_x8() 47 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vmul_minmax_ukernel__neon_x8() 50 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vmul_minmax_ukernel__neon_x8() 57 float32x4_t vy0123 = vmulq_f32(va0123, vb0123); in xnn_f32_vmul_minmax_ukernel__neon_x8() local 58 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vmul_minmax_ukernel__neon_x8() 59 vy0123 = vminq_f32(vy0123, vy_max); in xnn_f32_vmul_minmax_ukernel__neon_x8() 60 vst1q_f32(y, vy0123); y += 4; in xnn_f32_vmul_minmax_ukernel__neon_x8() 66 float32x4_t vy0123 = vmulq_f32(va0123, vb0123); in xnn_f32_vmul_minmax_ukernel__neon_x8() local 67 vy0123 = vmaxq_f32(vy0123, vy_min); in xnn_f32_vmul_minmax_ukernel__neon_x8() [all …]
|
D | vrsubc-minmax-sse-x4.c | 40 __m128 vy0123 = _mm_sub_ps(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() local 43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 45 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 47 _mm_storeu_ps(y, vy0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 53 __m128 vy0123 = _mm_sub_ps(vb, va0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() local 54 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 55 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 57 _mm_storel_pi((__m64*) y, vy0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 58 vy0123 = _mm_movehl_ps(vy0123, vy0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x4() 62 _mm_store_ss(y, vy0123); in xnn_f32_vrsubc_minmax_ukernel__sse_x4()
|
D | vdivc-minmax-sse-x4.c | 40 __m128 vy0123 = _mm_div_ps(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__sse_x4() local 43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 45 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 47 _mm_storeu_ps(y, vy0123); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 53 __m128 vy0123 = _mm_div_ps(va0123, vb); in xnn_f32_vdivc_minmax_ukernel__sse_x4() local 54 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 55 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 57 _mm_storel_pi((__m64*) y, vy0123); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 58 vy0123 = _mm_movehl_ps(vy0123, vy0123); in xnn_f32_vdivc_minmax_ukernel__sse_x4() 62 _mm_store_ss(y, vy0123); in xnn_f32_vdivc_minmax_ukernel__sse_x4()
|
D | vmulc-minmax-sse-x4.c | 40 __m128 vy0123 = _mm_mul_ps(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__sse_x4() local 43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 45 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 47 _mm_storeu_ps(y, vy0123); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 53 __m128 vy0123 = _mm_mul_ps(va0123, vb); in xnn_f32_vmulc_minmax_ukernel__sse_x4() local 54 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 55 vy0123 = _mm_min_ps(vy0123, vy_max); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 57 _mm_storel_pi((__m64*) y, vy0123); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 58 vy0123 = _mm_movehl_ps(vy0123, vy0123); in xnn_f32_vmulc_minmax_ukernel__sse_x4() 62 _mm_store_ss(y, vy0123); in xnn_f32_vmulc_minmax_ukernel__sse_x4()
|