/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x8inc-minmax-neon-dup-ld64.c | 55 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64() local 56 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64() 57 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64()
|
D | 1x8inc-minmax-neonfma-dup-ld64.c | 55 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64() local 56 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64() 57 vacc0x4567 = vfmaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64()
|
D | 1x8inc-minmax-wasmsimd-arm-splat.c | 55 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat() local 60 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat() 61 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_arm_splat()
|
D | 1x8inc-minmax-wasmsimd-x86-splat.c | 53 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat() local 58 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat() 59 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemminc_minmax_ukernel_1x8__wasmsimd_x86_splat()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x8-minmax-neonfma-dup-ld64.c | 53 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64() local 54 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64() 55 vacc0x4567 = vfmaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64()
|
D | 1x8-minmax-neon-dup-ld64.c | 53 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64() local 54 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64() 55 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64()
|
D | 1x8-wasmsimd-splat.c | 51 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_gemm_ukernel_1x8__wasmsimd_splat() local 56 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_gemm_ukernel_1x8__wasmsimd_splat() 57 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemm_ukernel_1x8__wasmsimd_splat()
|
D | 1x8-relu-wasmsimd-splat.c | 51 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat() local 56 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat() 57 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemm_relu_ukernel_1x8__wasmsimd_splat()
|
D | 1x8-minmax-wasmsimd-arm-splat.c | 53 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat() local 58 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat() 59 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
|
D | 1x8-minmax-wasmsimd-x86-splat.c | 51 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat() local 56 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat() 57 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 80 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 84 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 88 vacc0x4567 = vfmaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x8-minmax-neonfma-dup-ld64.c | 66 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64() local 67 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64() 68 vacc0x4567 = vfmaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64()
|
D | 1x8-minmax-neon-dup-ld64.c | 66 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64() local 67 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64() 68 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64()
|
D | 1x8-wasmsimd-splat.c | 64 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_igemm_ukernel_1x8__wasmsimd_splat() local 69 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_igemm_ukernel_1x8__wasmsimd_splat() 70 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_igemm_ukernel_1x8__wasmsimd_splat()
|
D | 1x8-relu-wasmsimd-splat.c | 64 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat() local 69 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat() 70 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_igemm_relu_ukernel_1x8__wasmsimd_splat()
|
D | 1x8-minmax-wasmsimd-x86-splat.c | 64 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat() local 69 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat() 70 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat()
|
D | 1x8-minmax-wasmsimd-arm-splat.c | 66 const v128_t va0c0 = wasm_v32x4_shuffle(va0, va0, 0, 0, 0, 0); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat() local 71 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c0, vb0123c0)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat() 72 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0c0, vb4567c0)); in xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat()
|
D | 4x8-minmax-neon-dup-ld64.c | 102 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() local 106 vacc0x0123 = vmlaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 110 vacc0x4567 = vmlaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 102 const float32x4_t va0c0 = vdupq_lane_f32(va0, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 106 vacc0x0123 = vfmaq_f32(vacc0x0123, va0c0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 110 vacc0x4567 = vfmaq_f32(vacc0x4567, va0c0, vb4567c0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f16-gemm/gen/ |
D | 1x16-minmax-neonfp16arith-ld64.c | 59 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() local 61 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64() 62 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64()
|
D | 1x8-minmax-neonfp16arith-ld64.c | 56 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64() local 58 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64()
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 1x16inc-minmax-neonfp16arith-ld64.c | 61 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() local 63 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64() 64 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_gemminc_minmax_ukernel_1x16__neonfp16arith_ld64()
|
D | 1x8inc-minmax-neonfp16arith-ld64.c | 58 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64() local 60 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_1x8__neonfp16arith_ld64()
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 1x16-minmax-neonfp16arith-ld64.c | 70 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() local 72 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64() 73 vacc0x89ABCDEF = vfmaq_f16(vacc0x89ABCDEF, va0c0, vb89ABCDEFc0); in xnn_f16_igemm_minmax_ukernel_1x16__neonfp16arith_ld64()
|
D | 1x8-minmax-neonfp16arith-ld64.c | 67 const float16x8_t va0c0 = vdupq_lane_f16(va0, 0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64() local 69 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_1x8__neonfp16arith_ld64()
|