/external/XNNPACK/src/f32-vbinary/gen/ |
D | vmax-wasmsimd-x86-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vmax_ukernel__wasmsimd_x86_x16() local 47 const v128_t vm89AB = wasm_f32x4_le(va89AB, vb89AB); in xnn_f32_vmax_ukernel__wasmsimd_x86_x16() 52 v128_t vy89AB = wasm_v128_bitselect(vb89AB, va89AB, vm89AB); in xnn_f32_vmax_ukernel__wasmsimd_x86_x16()
|
D | vmin-wasmsimd-x86-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vmin_ukernel__wasmsimd_x86_x16() local 47 const v128_t vm89AB = wasm_f32x4_lt(va89AB, vb89AB); in xnn_f32_vmin_ukernel__wasmsimd_x86_x16() 52 v128_t vy89AB = wasm_v128_bitselect(va89AB, vb89AB, vm89AB); in xnn_f32_vmin_ukernel__wasmsimd_x86_x16()
|
D | vsub-wasmsimd-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vsub_ukernel__wasmsimd_x16() local 47 v128_t vy89AB = wasm_f32x4_sub(va89AB, vb89AB); in xnn_f32_vsub_ukernel__wasmsimd_x16()
|
D | vmul-wasmsimd-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vmul_ukernel__wasmsimd_x16() local 47 v128_t vy89AB = wasm_f32x4_mul(va89AB, vb89AB); in xnn_f32_vmul_ukernel__wasmsimd_x16()
|
D | vmax-wasmsimd-arm-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vmax_ukernel__wasmsimd_arm_x16() local 47 v128_t vy89AB = wasm_f32x4_max(va89AB, vb89AB); in xnn_f32_vmax_ukernel__wasmsimd_arm_x16()
|
D | vadd-wasmsimd-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vadd_ukernel__wasmsimd_x16() local 47 v128_t vy89AB = wasm_f32x4_add(va89AB, vb89AB); in xnn_f32_vadd_ukernel__wasmsimd_x16()
|
D | vdiv-wasmsimd-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vdiv_ukernel__wasmsimd_x16() local 47 v128_t vy89AB = wasm_f32x4_div(va89AB, vb89AB); in xnn_f32_vdiv_ukernel__wasmsimd_x16()
|
D | vmin-wasmsimd-arm-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vmin_ukernel__wasmsimd_arm_x16() local 47 v128_t vy89AB = wasm_f32x4_min(va89AB, vb89AB); in xnn_f32_vmin_ukernel__wasmsimd_arm_x16()
|
D | vsqrdiff-wasmsimd-x16.c | 41 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vsqrdiff_ukernel__wasmsimd_x16() local 47 v128_t vy89AB = wasm_f32x4_sub(va89AB, vb89AB); in xnn_f32_vsqrdiff_ukernel__wasmsimd_x16()
|
D | vadd-relu-wasmsimd-x16.c | 42 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vadd_relu_ukernel__wasmsimd_x16() local 48 v128_t vy89AB = wasm_f32x4_add(va89AB, vb89AB); in xnn_f32_vadd_relu_ukernel__wasmsimd_x16()
|
D | vsub-relu-wasmsimd-x16.c | 42 const v128_t vb89AB = wasm_v128_load(b + 8); in xnn_f32_vsub_relu_ukernel__wasmsimd_x16() local 48 v128_t vy89AB = wasm_f32x4_sub(va89AB, vb89AB); in xnn_f32_vsub_relu_ukernel__wasmsimd_x16()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 109 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 111 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 112 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 113 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 114 vacc3x89AB = _mm512_add_epi32(vacc3x89AB, _mm512_madd_epi16(va3, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
D | 3x16c8-minmax-fp32-avx512skx.c | 95 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() local 97 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() 98 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() 99 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx()
|
D | 2x16c8-minmax-fp32-avx512skx.c | 81 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx() local 83 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx() 84 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 108 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 110 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 111 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 112 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 113 vacc3x89AB = _mm512_add_epi32(vacc3x89AB, _mm512_madd_epi16(va3, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
D | 3x16c8-minmax-fp32-avx512skx.c | 94 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() local 96 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() 97 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() 98 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx()
|
D | 2x16c8-minmax-fp32-avx512skx.c | 80 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx() local 82 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx() 83 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qc8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx()
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 125 …const __m512i vb89AB = _mm512_sub_epi16(_mm512_cvtepu8_epi16(_mm256_load_si256((const __m256i*) ((… in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 127 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 128 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 129 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 130 vacc3x89AB = _mm512_add_epi32(vacc3x89AB, _mm512_madd_epi16(va3, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
D | 3x16c8-minmax-fp32-avx512skx.c | 109 …const __m512i vb89AB = _mm512_sub_epi16(_mm512_cvtepu8_epi16(_mm256_load_si256((const __m256i*) ((… in xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() local 111 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() 112 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() 113 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 124 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 126 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 127 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 128 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 129 vacc3x89AB = _mm512_add_epi32(vacc3x89AB, _mm512_madd_epi16(va3, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
D | 3x16c8-minmax-fp32-avx512skx.c | 108 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() local 110 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() 111 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() 112 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx()
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 110 …const __m512i vb89AB = _mm512_sub_epi16(_mm512_cvtepu8_epi16(_mm256_load_si256((const __m256i*) ((… in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 112 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 113 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 114 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx() 115 vacc3x89AB = _mm512_add_epi32(vacc3x89AB, _mm512_madd_epi16(va3, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
D | 3x16c8-minmax-fp32-avx512skx.c | 96 …const __m512i vb89AB = _mm512_sub_epi16(_mm512_cvtepu8_epi16(_mm256_load_si256((const __m256i*) ((… in xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() local 98 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() 99 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx() 100 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 4x16c8-minmax-fp32-avx512skx.c | 123 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() local 125 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 126 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 127 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx() 128 vacc3x89AB = _mm512_add_epi32(vacc3x89AB, _mm512_madd_epi16(va3, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx()
|
D | 3x16c8-minmax-fp32-avx512skx.c | 107 …const __m512i vb89AB = _mm512_cvtepi8_epi16(_mm256_load_si256((const __m256i*) ((const int8_t*) w … in xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() local 109 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() 110 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx() 111 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qc8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx()
|