/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-x86-rr2-p6-x24.c | 96 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 98 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 100 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 102 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 104 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 106 vsKLMN = wasm_v128_andnot(vsKLMN, vsatmKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 108 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 110 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 112 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() 114 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24() [all …]
|
D | velu-wasmsimd-x86-rr2-p6-x20.c | 89 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 91 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 93 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 95 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 97 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 99 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 101 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 103 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 105 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() 107 vtGHIJ = wasm_v128_andnot(vtGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20() [all …]
|
D | velu-wasmsimd-x86-rr2-p6-x16.c | 82 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 84 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 86 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 88 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 90 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 92 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 94 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 96 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 171 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() 172 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16() [all …]
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x24.c | 151 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 153 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 155 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 157 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 159 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 161 vsKLMN = wasm_v128_andnot(vsKLMN, vsatmKLMN); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 163 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 165 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 167 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() 169 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24() [all …]
|
D | velu-wasmsimd-x86-rr2-p6-x12.c | 75 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 77 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 79 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 81 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 83 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 85 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 148 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 149 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 181 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12() 182 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x20.c | 135 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 137 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 139 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 141 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 143 vsGHIJ = wasm_v128_andnot(vsGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 145 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 147 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 149 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 151 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() 153 vtGHIJ = wasm_v128_andnot(vtGHIJ, vsatmGHIJ); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20() [all …]
|
D | velu-wasmsimd-x86-rr2-p6-x8.c | 68 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 70 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 72 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 74 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 125 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 126 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 158 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8() 159 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x16.c | 119 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 121 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 123 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 125 vsCDEF = wasm_v128_andnot(vsCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 127 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 129 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 131 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 133 vtCDEF = wasm_v128_andnot(vtCDEF, vsatmCDEF); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 204 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() 205 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16() [all …]
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x12.c | 103 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 105 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 107 vs89AB = wasm_v128_andnot(vs89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 109 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 111 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 113 vt89AB = wasm_v128_andnot(vt89AB, vsatm89AB); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 175 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 176 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 216 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12() 217 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 87 vs0123 = wasm_v128_andnot(vs0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 89 vs4567 = wasm_v128_andnot(vs4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 91 vt0123 = wasm_v128_andnot(vt0123, vsatm0123); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 93 vt4567 = wasm_v128_andnot(vt4567, vsatm4567); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 146 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 147 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 187 vs = wasm_v128_andnot(vs, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 188 vt = wasm_v128_andnot(vt, vsatm); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2c4-wasmsimd.c | 112 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 113 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 114 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 115 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 116 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 118 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 119 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-relu-wasmsimd.c | 112 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 113 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 114 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 115 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 116 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 118 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 119 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 114 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 115 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 116 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 117 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 118 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 119 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 120 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 121 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 112 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 113 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 114 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 115 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 116 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 118 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 119 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2c4-relu-wasmsimd.c | 130 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 131 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 132 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 133 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 134 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 136 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 137 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-wasmsimd.c | 130 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 131 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 132 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 133 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 134 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 136 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 137 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 130 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 131 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 132 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 133 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 134 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 136 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 137 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 132 vacc0x0c4 = wasm_f32x4_add(vacc0x0c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 133 vacc0x1c4 = wasm_f32x4_add(vacc0x1c4, wasm_f32x4_mul(wasm_v128_andnot(va0, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 134 vacc1x0c4 = wasm_f32x4_add(vacc1x0c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 135 vacc1x1c4 = wasm_f32x4_add(vacc1x1c4, wasm_f32x4_mul(wasm_v128_andnot(va1, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 136 vacc2x0c4 = wasm_f32x4_add(vacc2x0c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 137 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 138 vacc3x0c4 = wasm_f32x4_add(vacc3x0c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask0), vb0)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 139 vacc3x1c4 = wasm_f32x4_add(vacc3x1c4, wasm_f32x4_mul(wasm_v128_andnot(va3, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
/external/XNNPACK/src/f32-vrnd/gen/ |
D | vrndne-wasmsimd-addsub-x8.c | 35 const v128_t vabsx0123 = wasm_v128_andnot(vx0123, vsign_mask); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x8() 36 const v128_t vabsx4567 = wasm_v128_andnot(vx4567, vsign_mask); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x8() 55 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x8() 66 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndne_ukernel__wasmsimd_addsub_x8()
|
D | vrndz-wasmsimd-cvt-x8.c | 41 … const v128_t vrndmask0123 = wasm_v128_andnot(wasm_f32x4_lt(vabsx0123, vmagic_number), vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8() 43 … const v128_t vrndmask4567 = wasm_v128_andnot(wasm_f32x4_lt(vabsx4567, vmagic_number), vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8() 59 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8() 71 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_cvt_x8()
|
D | vrndd-wasmsimd-addsub-x8.c | 36 const v128_t vabsx0123 = wasm_v128_andnot(vx0123, vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x8() 37 const v128_t vabsx4567 = wasm_v128_andnot(vx4567, vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x8() 59 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x8() 71 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_addsub_x8()
|
D | vrndz-wasmsimd-addsub-x8.c | 36 const v128_t vabsx0123 = wasm_v128_andnot(vx0123, vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x8() 37 const v128_t vabsx4567 = wasm_v128_andnot(vx4567, vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x8() 62 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x8() 75 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndz_ukernel__wasmsimd_addsub_x8()
|
D | vrndd-wasmsimd-cvt-x8.c | 42 … const v128_t vrndmask0123 = wasm_v128_andnot(wasm_f32x4_lt(vabsx0123, vmagic_number), vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() 44 … const v128_t vrndmask4567 = wasm_v128_andnot(wasm_f32x4_lt(vabsx4567, vmagic_number), vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() 66 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8() 80 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndd_ukernel__wasmsimd_cvt_x8()
|
D | vrndu-wasmsimd-cvt-x8.c | 42 … const v128_t vrndmask0123 = wasm_v128_andnot(wasm_f32x4_lt(vabsx0123, vmagic_number), vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 44 … const v128_t vrndmask4567 = wasm_v128_andnot(wasm_f32x4_lt(vabsx4567, vmagic_number), vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 69 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8() 84 const v128_t vrndmask = wasm_v128_andnot(wasm_f32x4_lt(vabsx, vmagic_number), vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_cvt_x8()
|
D | vrndu-wasmsimd-addsub-x8.c | 36 const v128_t vabsx0123 = wasm_v128_andnot(vx0123, vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8() 37 const v128_t vabsx4567 = wasm_v128_andnot(vx4567, vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8() 65 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8() 79 const v128_t vabsx = wasm_v128_andnot(vx, vsign_mask); in xnn_f32_vrndu_ukernel__wasmsimd_addsub_x8()
|