Home
last modified time | relevance | path

Searched refs:viGHIJ (Results 1 – 25 of 37) sorted by relevance

12

/external/XNNPACK/src/f32-spmm/gen/
D32x1-minmax-wasmsimd-arm-pipelined-x2.c45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local
67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
78 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
86 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
97 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
109 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
121 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
D32x1-minmax-wasmsimd-x86-pipelined-x2.c45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local
67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
78 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
86 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
97 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
109 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
121 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
D32x1-minmax-wasmsimd-arm-pipelined.c45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local
69 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
81 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
D32x1-minmax-neon-pipelined.c45 float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local
67 vaccGHIJ = vmlaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
81 viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
D32x1-minmax-neonfma-pipelined.c45 float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local
67 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
81 viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
D32x1-minmax-wasmsimd-x86-pipelined.c45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local
69 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
81 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
D32x4-minmax-neonfma.c81 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local
94 vaccGHIJn0 = vfmaq_laneq_f32(vaccGHIJn0, viGHIJ, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
102 vaccGHIJn1 = vfmaq_laneq_f32(vaccGHIJn1, viGHIJ, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
110 vaccGHIJn2 = vfmaq_laneq_f32(vaccGHIJn2, viGHIJ, vw, 2); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
118 vaccGHIJn3 = vfmaq_laneq_f32(vaccGHIJn3, viGHIJ, vw, 3); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
248 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local
261 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
D32x2-minmax-neonfma.c65 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local
78 vaccGHIJn0 = vfmaq_lane_f32(vaccGHIJn0, viGHIJ, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
86 vaccGHIJn1 = vfmaq_lane_f32(vaccGHIJn1, viGHIJ, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
166 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local
179 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
D32x1-minmax-neon.c57 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local
70 vaccGHIJ = vmlaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
D32x1-minmax-neonfma.c57 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local
70 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
D32x1-minmax-wasmsimd-arm.c57 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local
67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm()
D32x1-minmax-sse.c57 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local
67 vaccGHIJ = _mm_add_ps(vaccGHIJ, _mm_mul_ps(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__sse()
D32x1-minmax-wasmsimd-x86.c57 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local
67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-p5-x20.c49 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local
56 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
Dwasmsimd-p5-x20-acc5.c54 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() local
62 const v128_t vxGHIJ = wasm_f32x4_sub(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5()
Dneon-p5-x20-acc5.c54 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() local
61 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
Dneonfma-p5-x20-acc5.c53 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() local
60 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
Dsse2-p5-x20.c50 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() local
58 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
Dneon-p5-x20.c50 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() local
57 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
Dneon-p5-x20-acc2.c51 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() local
58 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
Dsse2-p5-x20-acc2.c51 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() local
59 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
Dneonfma-p5-x20-acc2.c50 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() local
57 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2()
Dwasmsimd-p5-x20-acc2.c51 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2() local
59 const v128_t vxGHIJ = wasm_f32x4_sub(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2()
Dwasmsimd-p5-x20.c50 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20() local
58 const v128_t vxGHIJ = wasm_f32x4_sub(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20()
Dsse2-p5-x20-acc5.c54 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() local
62 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()

12