/external/XNNPACK/src/f32-gemm/gen/ |
D | 1x4-wasm.c | 41 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_1x4__wasm() local 57 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_1x4__wasm() 66 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_1x4__wasm() 72 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_1x4__wasm() 78 c0[0] = vacc00; in xnn_f32_gemm_ukernel_1x4__wasm() 89 c0[0] = vacc00; in xnn_f32_gemm_ukernel_1x4__wasm() 91 vacc00 = vacc02; in xnn_f32_gemm_ukernel_1x4__wasm() 95 c0[0] = vacc00; in xnn_f32_gemm_ukernel_1x4__wasm()
|
D | 1x4-scalar.c | 41 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_1x4__scalar() local 57 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_1x4__scalar() 66 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_1x4__scalar() 72 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_1x4__scalar() 78 c0[0] = vacc00; in xnn_f32_gemm_ukernel_1x4__scalar() 89 c0[0] = vacc00; in xnn_f32_gemm_ukernel_1x4__scalar() 91 vacc00 = vacc02; in xnn_f32_gemm_ukernel_1x4__scalar() 95 c0[0] = vacc00; in xnn_f32_gemm_ukernel_1x4__scalar()
|
D | 2x4-wasm.c | 47 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_2x4__wasm() local 52 float vacc10 = vacc00; in xnn_f32_gemm_ukernel_2x4__wasm() 68 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_2x4__wasm() 81 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_2x4__wasm() 91 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_2x4__wasm() 106 c0[0] = vacc00; in xnn_f32_gemm_ukernel_2x4__wasm() 122 c0[0] = vacc00; in xnn_f32_gemm_ukernel_2x4__wasm() 124 vacc00 = vacc02; in xnn_f32_gemm_ukernel_2x4__wasm() 129 c0[0] = vacc00; in xnn_f32_gemm_ukernel_2x4__wasm()
|
D | 2x4-scalar.c | 47 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_2x4__scalar() local 52 float vacc10 = vacc00; in xnn_f32_gemm_ukernel_2x4__scalar() 68 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_2x4__scalar() 81 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_2x4__scalar() 91 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_2x4__scalar() 106 c0[0] = vacc00; in xnn_f32_gemm_ukernel_2x4__scalar() 122 c0[0] = vacc00; in xnn_f32_gemm_ukernel_2x4__scalar() 124 vacc00 = vacc02; in xnn_f32_gemm_ukernel_2x4__scalar() 129 c0[0] = vacc00; in xnn_f32_gemm_ukernel_2x4__scalar()
|
D | 4x2-scalar.c | 59 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_4x2__scalar() local 62 float vacc10 = vacc00; in xnn_f32_gemm_ukernel_4x2__scalar() 64 float vacc20 = vacc00; in xnn_f32_gemm_ukernel_4x2__scalar() 66 float vacc30 = vacc00; in xnn_f32_gemm_ukernel_4x2__scalar() 80 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_4x2__scalar() 93 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_4x2__scalar() 103 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_4x2__scalar() 122 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x2__scalar() 137 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x2__scalar()
|
D | 4x2-wasm.c | 59 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_4x2__wasm() local 62 float vacc10 = vacc00; in xnn_f32_gemm_ukernel_4x2__wasm() 64 float vacc20 = vacc00; in xnn_f32_gemm_ukernel_4x2__wasm() 66 float vacc30 = vacc00; in xnn_f32_gemm_ukernel_4x2__wasm() 80 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_4x2__wasm() 93 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_4x2__wasm() 103 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_4x2__wasm() 122 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x2__wasm() 137 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x2__wasm()
|
D | 4x4-scalar.c | 59 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_4x4__scalar() local 64 float vacc10 = vacc00; in xnn_f32_gemm_ukernel_4x4__scalar() 68 float vacc20 = vacc00; in xnn_f32_gemm_ukernel_4x4__scalar() 72 float vacc30 = vacc00; in xnn_f32_gemm_ukernel_4x4__scalar() 90 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_4x4__scalar() 111 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_4x4__scalar() 129 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_4x4__scalar() 162 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x4__scalar() 188 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x4__scalar() 190 vacc00 = vacc02; in xnn_f32_gemm_ukernel_4x4__scalar() [all …]
|
D | 4x4-wasm.c | 59 float vacc00 = w[0]; in xnn_f32_gemm_ukernel_4x4__wasm() local 64 float vacc10 = vacc00; in xnn_f32_gemm_ukernel_4x4__wasm() 68 float vacc20 = vacc00; in xnn_f32_gemm_ukernel_4x4__wasm() 72 float vacc30 = vacc00; in xnn_f32_gemm_ukernel_4x4__wasm() 90 vacc00 += va0 * vb0; in xnn_f32_gemm_ukernel_4x4__wasm() 111 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemm_ukernel_4x4__wasm() 129 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemm_ukernel_4x4__wasm() 162 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x4__wasm() 188 c0[0] = vacc00; in xnn_f32_gemm_ukernel_4x4__wasm() 190 vacc00 = vacc02; in xnn_f32_gemm_ukernel_4x4__wasm() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 1x4-wasm.c | 43 float vacc00 = acc[0]; in xnn_f32_gemminc_ukernel_1x4__wasm() local 59 vacc00 += va0 * vb0; in xnn_f32_gemminc_ukernel_1x4__wasm() 68 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemminc_ukernel_1x4__wasm() 74 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemminc_ukernel_1x4__wasm() 80 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_1x4__wasm() 91 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_1x4__wasm() 93 vacc00 = vacc02; in xnn_f32_gemminc_ukernel_1x4__wasm() 97 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_1x4__wasm()
|
D | 1x4-scalar.c | 43 float vacc00 = acc[0]; in xnn_f32_gemminc_ukernel_1x4__scalar() local 59 vacc00 += va0 * vb0; in xnn_f32_gemminc_ukernel_1x4__scalar() 68 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemminc_ukernel_1x4__scalar() 74 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemminc_ukernel_1x4__scalar() 80 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_1x4__scalar() 91 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_1x4__scalar() 93 vacc00 = vacc02; in xnn_f32_gemminc_ukernel_1x4__scalar() 97 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_1x4__scalar()
|
D | 2x4-scalar.c | 49 float vacc00 = acc[0]; in xnn_f32_gemminc_ukernel_2x4__scalar() local 70 vacc00 += va0 * vb0; in xnn_f32_gemminc_ukernel_2x4__scalar() 83 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemminc_ukernel_2x4__scalar() 93 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemminc_ukernel_2x4__scalar() 108 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_2x4__scalar() 124 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_2x4__scalar() 126 vacc00 = vacc02; in xnn_f32_gemminc_ukernel_2x4__scalar() 131 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_2x4__scalar()
|
D | 2x4-wasm.c | 49 float vacc00 = acc[0]; in xnn_f32_gemminc_ukernel_2x4__wasm() local 70 vacc00 += va0 * vb0; in xnn_f32_gemminc_ukernel_2x4__wasm() 83 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemminc_ukernel_2x4__wasm() 93 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemminc_ukernel_2x4__wasm() 108 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_2x4__wasm() 124 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_2x4__wasm() 126 vacc00 = vacc02; in xnn_f32_gemminc_ukernel_2x4__wasm() 131 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_2x4__wasm()
|
D | 4x4-wasm.c | 61 float vacc00 = acc[0]; in xnn_f32_gemminc_ukernel_4x4__wasm() local 92 vacc00 += va0 * vb0; in xnn_f32_gemminc_ukernel_4x4__wasm() 113 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_gemminc_ukernel_4x4__wasm() 131 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_gemminc_ukernel_4x4__wasm() 164 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_4x4__wasm() 190 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_4x4__wasm() 192 vacc00 = vacc02; in xnn_f32_gemminc_ukernel_4x4__wasm() 199 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_4x4__wasm()
|
D | 4x4-scalar.c | 61 float vacc00 = acc[0]; in xnn_f32_gemminc_ukernel_4x4__scalar() local 92 vacc00 += va0 * vb0; in xnn_f32_gemminc_ukernel_4x4__scalar() 113 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_gemminc_ukernel_4x4__scalar() 131 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_gemminc_ukernel_4x4__scalar() 164 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_4x4__scalar() 190 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_4x4__scalar() 192 vacc00 = vacc02; in xnn_f32_gemminc_ukernel_4x4__scalar() 199 c0[0] = vacc00; in xnn_f32_gemminc_ukernel_4x4__scalar()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 1x4-wasm.c | 45 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_1x4__wasm() local 70 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_1x4__wasm() 81 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_1x4__wasm() 87 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_1x4__wasm() 93 c0[0] = vacc00; in xnn_f32_igemm_ukernel_1x4__wasm() 103 c0[0] = vacc00; in xnn_f32_igemm_ukernel_1x4__wasm() 105 vacc00 = vacc02; in xnn_f32_igemm_ukernel_1x4__wasm() 109 c0[0] = vacc00; in xnn_f32_igemm_ukernel_1x4__wasm()
|
D | 1x4-scalar.c | 45 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_1x4__scalar() local 70 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_1x4__scalar() 81 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_1x4__scalar() 87 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_1x4__scalar() 93 c0[0] = vacc00; in xnn_f32_igemm_ukernel_1x4__scalar() 103 c0[0] = vacc00; in xnn_f32_igemm_ukernel_1x4__scalar() 105 vacc00 = vacc02; in xnn_f32_igemm_ukernel_1x4__scalar() 109 c0[0] = vacc00; in xnn_f32_igemm_ukernel_1x4__scalar()
|
D | 2x4-wasm.c | 49 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_2x4__wasm() local 53 float vacc10 = vacc00; in xnn_f32_igemm_ukernel_2x4__wasm() 84 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_2x4__wasm() 99 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_2x4__wasm() 109 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_2x4__wasm() 124 c0[0] = vacc00; in xnn_f32_igemm_ukernel_2x4__wasm() 138 c0[0] = vacc00; in xnn_f32_igemm_ukernel_2x4__wasm() 140 vacc00 = vacc02; in xnn_f32_igemm_ukernel_2x4__wasm() 145 c0[0] = vacc00; in xnn_f32_igemm_ukernel_2x4__wasm()
|
D | 2x4-scalar.c | 49 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_2x4__scalar() local 53 float vacc10 = vacc00; in xnn_f32_igemm_ukernel_2x4__scalar() 84 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_2x4__scalar() 99 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_2x4__scalar() 109 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_2x4__scalar() 124 c0[0] = vacc00; in xnn_f32_igemm_ukernel_2x4__scalar() 138 c0[0] = vacc00; in xnn_f32_igemm_ukernel_2x4__scalar() 140 vacc00 = vacc02; in xnn_f32_igemm_ukernel_2x4__scalar() 145 c0[0] = vacc00; in xnn_f32_igemm_ukernel_2x4__scalar()
|
D | 4x2-scalar.c | 57 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_4x2__scalar() local 59 float vacc10 = vacc00; in xnn_f32_igemm_ukernel_4x2__scalar() 61 float vacc20 = vacc00; in xnn_f32_igemm_ukernel_4x2__scalar() 63 float vacc30 = vacc00; in xnn_f32_igemm_ukernel_4x2__scalar() 102 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_4x2__scalar() 117 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_4x2__scalar() 127 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_4x2__scalar() 146 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x2__scalar() 157 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x2__scalar()
|
D | 4x2-wasm.c | 57 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_4x2__wasm() local 59 float vacc10 = vacc00; in xnn_f32_igemm_ukernel_4x2__wasm() 61 float vacc20 = vacc00; in xnn_f32_igemm_ukernel_4x2__wasm() 63 float vacc30 = vacc00; in xnn_f32_igemm_ukernel_4x2__wasm() 102 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_4x2__wasm() 117 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_4x2__wasm() 127 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_4x2__wasm() 146 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x2__wasm() 157 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x2__wasm()
|
D | 4x4-scalar.c | 57 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_4x4__scalar() local 61 float vacc10 = vacc00; in xnn_f32_igemm_ukernel_4x4__scalar() 65 float vacc20 = vacc00; in xnn_f32_igemm_ukernel_4x4__scalar() 69 float vacc30 = vacc00; in xnn_f32_igemm_ukernel_4x4__scalar() 112 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_4x4__scalar() 135 vacc00 = math_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_4x4__scalar() 153 vacc00 = math_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_4x4__scalar() 186 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x4__scalar() 208 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x4__scalar() 210 vacc00 = vacc02; in xnn_f32_igemm_ukernel_4x4__scalar() [all …]
|
D | 4x4-wasm.c | 57 float vacc00 = w[0]; in xnn_f32_igemm_ukernel_4x4__wasm() local 61 float vacc10 = vacc00; in xnn_f32_igemm_ukernel_4x4__wasm() 65 float vacc20 = vacc00; in xnn_f32_igemm_ukernel_4x4__wasm() 69 float vacc30 = vacc00; in xnn_f32_igemm_ukernel_4x4__wasm() 112 vacc00 += va0 * vb0; in xnn_f32_igemm_ukernel_4x4__wasm() 135 vacc00 = __builtin_wasm_max_f32(vacc00, vmin); in xnn_f32_igemm_ukernel_4x4__wasm() 153 vacc00 = __builtin_wasm_min_f32(vacc00, vmax); in xnn_f32_igemm_ukernel_4x4__wasm() 186 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x4__wasm() 208 c0[0] = vacc00; in xnn_f32_igemm_ukernel_4x4__wasm() 210 vacc00 = vacc02; in xnn_f32_igemm_ukernel_4x4__wasm() [all …]
|
/external/XNNPACK/src/q8-gemm/ |
D | 2x4c8-sse2.c | 65 __m128i vacc00 = _mm_cvtsi32_si128((int) ((const int32_t*) w)[0]); in xnn_q8_gemm_ukernel_2x4c8__sse2() local 69 __m128i vacc10 = vacc00; in xnn_q8_gemm_ukernel_2x4c8__sse2() 94 vacc00 = _mm_add_epi32(vacc00, _mm_madd_epi16(vxa0, vxb0)); in xnn_q8_gemm_ukernel_2x4c8__sse2() 104 __m128i vacc0x0123 = sse_reduce4_i32(vacc00, vacc01, vacc02, vacc03); in xnn_q8_gemm_ukernel_2x4c8__sse2()
|