/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x20.c | 68 v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() local 96 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 141 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 142 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 154 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20()
|
D | velu-wasmsimd-x86-rr2-p6-x24.c | 71 v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() local 103 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 156 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 157 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 172 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x20.c | 120 v128_t vsGHIJ = wasm_i32x4_add(vlGHIJ, venGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() local 142 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 169 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 170 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 182 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 132 v128_t vsGHIJ = wasm_i32x4_add(vlGHIJ, venGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() local 158 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 190 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 191 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 206 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-p6-x20.c | 68 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() local 126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 127 vsGHIJ = _mm_sub_ps(vsGHIJ, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 139 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x20.c | 70 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() local 117 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 118 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 130 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
D | velu-neon-rr2-p6-x20.c | 71 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() local 124 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 125 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x20() 137 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x20()
|
D | velu-neonfma-rr1-p6-x24.c | 73 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() local 128 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 129 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 144 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x20.c | 68 v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() local 126 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 127 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20() 139 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20()
|
D | velu-neon-rr2-p6-x24.c | 74 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() local 136 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 137 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 152 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x20.c | 68 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() local 126 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 127 vsGHIJ = _mm_sub_ps(vsGHIJ, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20() 139 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x24.c | 71 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() local 138 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 139 vsGHIJ = _mm_sub_ps(vsGHIJ, vone); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 154 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
D | velu-wasmsimd-arm-rr2-p6-x24.c | 71 v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() local 138 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 139 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24() 154 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24()
|
D | velu-sse2-rr2-p6-x24.c | 71 __m128 vsGHIJ = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(vnGHIJ), 23)); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() local 138 vtGHIJ = _mm_mul_ps(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 139 vsGHIJ = _mm_sub_ps(vsGHIJ, vone); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24() 154 const __m128 veGHIJ = _mm_mul_ps(_mm_add_ps(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__sse2_rr2_p6_x24()
|
D | velu-neonfma-rr1-lut16-p3-x20.c | 118 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vaddq_s32(vlGHIJ, venGHIJ)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() local 146 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 147 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 159 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20()
|
D | velu-neon-rr2-lut16-p3-x20.c | 119 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vaddq_s32(vlGHIJ, venGHIJ)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() local 153 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 154 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 166 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x20.c | 120 v128_t vsGHIJ = wasm_i32x4_add(vlGHIJ, venGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20() local 154 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20() 155 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20() 167 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20()
|
D | velu-neon-rr2-lut16-p3-x24.c | 131 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vaddq_s32(vlGHIJ, venGHIJ)); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() local 171 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 172 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 187 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24()
|
D | velu-neonfma-rr1-lut16-p3-x24.c | 130 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vaddq_s32(vlGHIJ, venGHIJ)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() local 163 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 164 vsGHIJ = vsubq_f32(vsGHIJ, vone); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 179 const float32x4_t veGHIJ = vmulq_f32(vaddq_f32(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x24.c | 132 v128_t vsGHIJ = wasm_i32x4_add(vlGHIJ, venGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() local 172 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 173 vsGHIJ = wasm_f32x4_sub(vsGHIJ, vone); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24() 188 const v128_t veGHIJ = wasm_f32x4_mul(wasm_f32x4_add(vpGHIJ, vsGHIJ), valpha); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-rr1-p5-x20-acc5.c | 67 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5() local 109 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5() 115 float32x4_t vfGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc5()
|
D | neonfma-rr1-p5-x20.c | 63 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20() local 105 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20() 111 float32x4_t vfGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20()
|
D | neon-rr2-p5-x20.c | 64 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() local 112 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20() 118 float32x4_t vfGHIJ = vmlaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_p5_x20()
|
D | neonfma-rr1-p5-x20-acc2.c | 64 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2() local 106 vtGHIJ = vmulq_f32(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2() 112 float32x4_t vfGHIJ = vfmaq_f32(vsGHIJ, vpGHIJ, vtGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_p5_x20_acc2()
|
D | wasmsimd-rr2-p5-x20-acc2.c | 71 const v128_t vsGHIJ = wasm_i32x4_shl(vnGHIJ, 23); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() local 127 vtGHIJ = wasm_f32x4_mul(vtGHIJ, vsGHIJ); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2() 133 v128_t vfGHIJ = wasm_f32x4_add(vsGHIJ, wasm_f32x4_mul(vtGHIJ, vpGHIJ)); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_rr2_p5_x20_acc2()
|