/external/XNNPACK/src/f32-spmm/gen/ |
D | 32x1-minmax-wasmsimd-arm-pipelined-x2.c | 45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() local 67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 78 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 86 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 97 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 109 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2() 121 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-x86-pipelined-x2.c | 45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() local 67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 78 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 86 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 97 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 109 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2() 121 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined_x2()
|
D | 32x1-minmax-wasmsimd-arm-pipelined.c | 45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() local 69 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined() 81 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm_pipelined()
|
D | 32x1-minmax-neon-pipelined.c | 45 float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() local 67 vaccGHIJ = vmlaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined() 81 viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neon_pipelined()
|
D | 32x1-minmax-neonfma-pipelined.c | 45 float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() local 67 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined() 81 viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma_pipelined()
|
D | 32x1-minmax-wasmsimd-x86-pipelined.c | 45 v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() local 69 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined() 81 viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86_pipelined()
|
D | 32x4-minmax-neonfma.c | 81 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 94 vaccGHIJn0 = vfmaq_laneq_f32(vaccGHIJn0, viGHIJ, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 102 vaccGHIJn1 = vfmaq_laneq_f32(vaccGHIJn1, viGHIJ, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 110 vaccGHIJn2 = vfmaq_laneq_f32(vaccGHIJn2, viGHIJ, vw, 2); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 118 vaccGHIJn3 = vfmaq_laneq_f32(vaccGHIJn3, viGHIJ, vw, 3); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() 248 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma() local 261 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x4__neonfma()
|
D | 32x2-minmax-neonfma.c | 65 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 78 vaccGHIJn0 = vfmaq_lane_f32(vaccGHIJn0, viGHIJ, vw, 0); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 86 vaccGHIJn1 = vfmaq_lane_f32(vaccGHIJn1, viGHIJ, vw, 1); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() 166 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma() local 179 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x2__neonfma()
|
D | 32x1-minmax-neon.c | 57 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neon() local 70 vaccGHIJ = vmlaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neon()
|
D | 32x1-minmax-neonfma.c | 57 const float32x4_t viGHIJ = vld1q_f32(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma() local 70 vaccGHIJ = vfmaq_f32(vaccGHIJ, viGHIJ, vw); in xnn_f32_spmm_minmax_ukernel_32x1__neonfma()
|
D | 32x1-minmax-wasmsimd-arm.c | 57 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm() local 67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_arm()
|
D | 32x1-minmax-sse.c | 57 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__sse() local 67 vaccGHIJ = _mm_add_ps(vaccGHIJ, _mm_mul_ps(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__sse()
|
D | 32x1-minmax-wasmsimd-x86.c | 57 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86() local 67 vaccGHIJ = wasm_f32x4_add(vaccGHIJ, wasm_f32x4_mul(viGHIJ, vw)); in xnn_f32_spmm_minmax_ukernel_32x1__wasmsimd_x86()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-p5-x20.c | 49 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() local 56 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
D | wasmsimd-p5-x20-acc5.c | 54 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5() local 62 const v128_t vxGHIJ = wasm_f32x4_sub(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc5()
|
D | neon-p5-x20-acc5.c | 54 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() local 61 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
|
D | neonfma-p5-x20-acc5.c | 53 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() local 60 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
|
D | sse2-p5-x20.c | 50 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() local 58 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
|
D | neon-p5-x20.c | 50 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() local 57 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
|
D | neon-p5-x20-acc2.c | 51 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() local 58 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
|
D | sse2-p5-x20-acc2.c | 51 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() local 59 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
|
D | neonfma-p5-x20-acc2.c | 50 const float32x4_t viGHIJ = vld1q_f32(input); input += 4; in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() local 57 const float32x4_t vxGHIJ = vsubq_f32(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2()
|
D | wasmsimd-p5-x20-acc2.c | 51 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2() local 59 const v128_t vxGHIJ = wasm_f32x4_sub(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20_acc2()
|
D | wasmsimd-p5-x20.c | 50 const v128_t viGHIJ = wasm_v128_load(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20() local 58 const v128_t vxGHIJ = wasm_f32x4_sub(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__wasmsimd_p5_x20()
|
D | sse2-p5-x20-acc5.c | 54 const __m128 viGHIJ = _mm_loadu_ps(input + 16); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() local 62 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
|