Home
last modified time | relevance | path

Searched refs:vb4567 (Results 1 – 25 of 422) sorted by relevance

12345678910>>...17

/external/XNNPACK/src/f32-ppmm/gen/
D8x8-minmax-neonfma.c89 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() local
100 vacc0x4567 = vfmaq_laneq_f32(vacc0x4567, vb4567, va0123, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
101 vacc1x4567 = vfmaq_laneq_f32(vacc1x4567, vb4567, va0123, 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
102 vacc2x4567 = vfmaq_laneq_f32(vacc2x4567, vb4567, va0123, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
103 vacc3x4567 = vfmaq_laneq_f32(vacc3x4567, vb4567, va0123, 3); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
104 vacc4x4567 = vfmaq_laneq_f32(vacc4x4567, vb4567, va4567, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
105 vacc5x4567 = vfmaq_laneq_f32(vacc5x4567, vb4567, va4567, 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
106 vacc6x4567 = vfmaq_laneq_f32(vacc6x4567, vb4567, va4567, 2); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
107 vacc7x4567 = vfmaq_laneq_f32(vacc7x4567, vb4567, va4567, 3); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
126 vacc0x4567 = vfmaq_f32(vacc0x4567, va0000, vb4567); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
[all …]
D4x8-minmax-neonfma.c64 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma() local
71 vacc0x4567 = vfmaq_laneq_f32(vacc0x4567, vb4567, va0123, 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
72 vacc1x4567 = vfmaq_laneq_f32(vacc1x4567, vb4567, va0123, 1); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
73 vacc2x4567 = vfmaq_laneq_f32(vacc2x4567, vb4567, va0123, 2); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
74 vacc3x4567 = vfmaq_laneq_f32(vacc3x4567, vb4567, va0123, 3); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
85 vacc0x4567 = vfmaq_f32(vacc0x4567, va0000, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
86 vacc1x4567 = vfmaq_f32(vacc1x4567, va1111, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
87 vacc2x4567 = vfmaq_f32(vacc2x4567, va2222, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
88 vacc3x4567 = vfmaq_f32(vacc3x4567, va3333, vb4567); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
D8x8-minmax-neon.c89 const float32x4_t vb4567 = vld1q_f32(w); w += 4; in xnn_f32_ppmm_minmax_ukernel_8x8__neon() local
99 vacc0x4567 = vmlaq_lane_f32(vacc0x4567, vb4567, vget_low_f32(va0123), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
100 vacc1x4567 = vmlaq_lane_f32(vacc1x4567, vb4567, vget_low_f32(va0123), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
101 vacc2x4567 = vmlaq_lane_f32(vacc2x4567, vb4567, vget_high_f32(va0123), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
102 vacc3x4567 = vmlaq_lane_f32(vacc3x4567, vb4567, vget_high_f32(va0123), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
103 vacc4x4567 = vmlaq_lane_f32(vacc4x4567, vb4567, vget_low_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
104 vacc5x4567 = vmlaq_lane_f32(vacc5x4567, vb4567, vget_low_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
105 vacc6x4567 = vmlaq_lane_f32(vacc6x4567, vb4567, vget_high_f32(va4567), 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
106 vacc7x4567 = vmlaq_lane_f32(vacc7x4567, vb4567, vget_high_f32(va4567), 1); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
/external/XNNPACK/src/f32-gemm/gen/
D6x8-wasmsimd-loadsplat.c102 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat() local
111 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat()
112 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat()
113 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat()
114 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat()
115 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat()
116 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemm_ukernel_6x8__wasmsimd_loadsplat()
D6x8-wasmrelaxedsimd-fma-loadsplat.c102 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
111 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
112 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
113 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
114 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
115 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
116 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_gemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
D6x8-relu-wasmrelaxedsimd-fma-loadsplat.c102 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
111 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
112 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
113 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
114 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
115 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
116 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_gemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
D5x8-wasmrelaxedsimd-fma-loadsplat.c92 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat() local
100 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
101 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
102 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
103 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
104 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_gemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
D5x8-wasmsimd-loadsplat.c92 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat() local
100 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat()
101 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat()
102 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat()
103 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat()
104 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_ukernel_5x8__wasmsimd_loadsplat()
D6x8-relu-wasmsimd-loadsplat.c102 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat() local
111 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat()
112 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat()
113 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat()
114 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat()
115 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat()
116 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemm_relu_ukernel_6x8__wasmsimd_loadsplat()
D5x8-relu-wasmrelaxedsimd-fma-loadsplat.c92 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat() local
100 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
101 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
102 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
103 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
104 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_gemm_relu_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
D5x8-relu-wasmsimd-loadsplat.c92 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat() local
100 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat()
101 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat()
102 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat()
103 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat()
104 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_loadsplat()
D6x8-minmax-wasmsimd-arm-loadsplat.c104 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
113 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
114 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
115 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
116 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
117 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
118 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D6x8-minmax-wasmsimd-x86-loadsplat.c104 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
113 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
114 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
115 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
116 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
117 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
118 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
D6x8-minmax-wasmrelaxedsimd-fma-loadsplat.c104 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
113 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
114 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
115 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
116 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
117 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
118 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_gemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
/external/XNNPACK/src/f32-igemm/gen/
D6x8-wasmsimd-loadsplat.c117 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat() local
134 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat()
136 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat()
138 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat()
140 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat()
142 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat()
144 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_igemm_ukernel_6x8__wasmsimd_loadsplat()
D6x8-wasmrelaxedsimd-fma-loadsplat.c117 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
134 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
136 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
138 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
140 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
142 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
144 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_igemm_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
D6x8-relu-wasmrelaxedsimd-fma-loadsplat.c117 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
134 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
136 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
138 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
140 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
142 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
144 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_igemm_relu_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
D5x8-wasmsimd-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat() local
121 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat()
123 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat()
125 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat()
127 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat()
129 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_igemm_ukernel_5x8__wasmsimd_loadsplat()
D5x8-wasmrelaxedsimd-fma-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat() local
121 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
123 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
125 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
127 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
129 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_igemm_ukernel_5x8__wasmrelaxedsimd_fma_loadsplat()
D6x8-minmax-wasmrelaxedsimd-fma-loadsplat.c119 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
136 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
138 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
140 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
142 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
144 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
146 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_igemm_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
/external/XNNPACK/src/f32-gemm/gen-inc/
D6x8inc-minmax-wasmrelaxedsimd-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat() local
115 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat()
116 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat()
117 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat()
118 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat()
119 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat()
120 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_loadsplat()
D6x8inc-minmax-wasmrelaxedsimd-fma-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat() local
115 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
116 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
117 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
118 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
119 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
120 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmrelaxedsimd_fma_loadsplat()
D6x8inc-minmax-wasmsimd-arm-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat() local
115 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
116 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
117 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
118 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
119 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
120 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_arm_loadsplat()
D6x8inc-minmax-relaxedwasmsimd-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat() local
115 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat()
116 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat()
117 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat()
118 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat()
119 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat()
120 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__relaxedwasmsimd_loadsplat()
D6x8inc-minmax-wasmsimd-x86-loadsplat.c106 const v128_t vb4567 = wasm_v128_load(w + 4); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat() local
115 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
116 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
117 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
118 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
119 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()
120 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567)); in xnn_f32_gemminc_minmax_ukernel_6x8__wasmsimd_x86_loadsplat()

12345678910>>...17