/external/XNNPACK/src/f32-gemm/gen/ |
D | 6x2-minmax-neon-lane-ld64.c | 114 const float32x2_t vb01 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() local 116 vacc0x01 = vmla_f32(vacc0x01, va0, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 117 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 118 vacc2x01 = vmla_f32(vacc2x01, va2, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 119 vacc3x01 = vmla_f32(vacc3x01, va3, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 120 vacc4x01 = vmla_f32(vacc4x01, va4, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 121 vacc5x01 = vmla_f32(vacc5x01, va5, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64()
|
D | 6x2-minmax-neonfma-lane-ld64.c | 144 const float32x2_t vb01 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() local 146 vacc0x01 = vfma_f32(vacc0x01, va0, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 147 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 148 vacc2x01 = vfma_f32(vacc2x01, va2, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 149 vacc3x01 = vfma_f32(vacc3x01, va3, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 150 vacc4x01 = vfma_f32(vacc4x01, va4, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 151 vacc5x01 = vfma_f32(vacc5x01, va5, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64()
|
D | 4x2-minmax-neon-lane-ld64.c | 92 const float32x2_t vb01 = vld1_f32(w); w += 2; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() local 94 vacc0x01 = vmla_f32(vacc0x01, va0, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 95 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 96 vacc2x01 = vmla_f32(vacc2x01, va2, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 97 vacc3x01 = vmla_f32(vacc3x01, va3, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 6x2-minmax-neon-lane-ld64.c | 142 const float32x2_t vb01 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() local 144 vacc0x01 = vmla_f32(vacc0x01, va0, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 145 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 146 vacc2x01 = vmla_f32(vacc2x01, va2, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 147 vacc3x01 = vmla_f32(vacc3x01, va3, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 148 vacc4x01 = vmla_f32(vacc4x01, va4, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 149 vacc5x01 = vmla_f32(vacc5x01, va5, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64()
|
D | 6x2-minmax-neonfma-lane-ld64.c | 172 const float32x2_t vb01 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() local 174 vacc0x01 = vfma_f32(vacc0x01, va0, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 175 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 176 vacc2x01 = vfma_f32(vacc2x01, va2, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 177 vacc3x01 = vfma_f32(vacc3x01, va3, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 178 vacc4x01 = vfma_f32(vacc4x01, va4, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 179 vacc5x01 = vfma_f32(vacc5x01, va5, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64()
|
D | 4x2-minmax-neon-lane-ld64.c | 114 const float32x2_t vb01 = vld1_f32(w); w += 2; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() local 116 vacc0x01 = vmla_f32(vacc0x01, va0, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 117 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 118 vacc2x01 = vmla_f32(vacc2x01, va2, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 119 vacc3x01 = vmla_f32(vacc3x01, va3, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 1x4c8-minmax-fp32-sse41-ld128.c | 57 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() local 58 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() 59 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c8-minmax-fp32-avx-ld128.c | 57 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() local 58 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() 59 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 55 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 56 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 57 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c8-minmax-fp32-xop-ld128.c | 62 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() local 63 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() 64 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 55 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 56 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 57 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 60 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 61 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 62 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-sse2-ld128.c | 55 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() local 56 const __m128i vsb01 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() 57 const __m128i vxb0 = _mm_unpacklo_epi8(vb01, vsb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128() 58 const __m128i vxb1 = _mm_unpackhi_epi8(vb01, vsb01); in xnn_qc8_gemm_minmax_fp32_ukernel_1x4c2s4__sse2_ld128()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 55 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 56 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 57 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 55 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 56 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 57 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c8-minmax-fp32-sse41-ld128.c | 57 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() local 58 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128() 59 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128()
|
D | 1x4c8-minmax-fp32-xop-ld128.c | 62 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() local 63 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128() 64 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128()
|
D | 1x4c8-minmax-fp32-avx-ld128.c | 57 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() local 58 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128() 59 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 60 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 61 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 62 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c8-minmax-fp32-ssse3-ld128.c | 57 const __m128i vb01 = _mm_load_si128((const __m128i*) w); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128() local 58 const __m128i vsb01 = _mm_cmpgt_epi8(_mm_setzero_si128(), vb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128() 59 const __m128i vxb0 = _mm_unpacklo_epi8(vb01, vsb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128() 60 const __m128i vxb1 = _mm_unpackhi_epi8(vb01, vsb01); in xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 70 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 71 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 72 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|
D | 1x4c2s4-minmax-fp32-sse41-ld128.c | 65 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() local 66 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128() 67 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__sse41_ld128()
|
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 65 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 66 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 67 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 1x4c2s4-minmax-fp32-avx-ld128.c | 65 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() local 66 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128() 67 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__avx_ld128()
|
D | 1x4c2s4-minmax-fp32-xop-ld128.c | 70 const __m128i vb01 = _mm_loadu_si128((const __m128i*) w); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() local 71 const __m128i vxb0 = _mm_cvtepi8_epi16(vb01); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128() 72 const __m128i vxb1 = _mm_srai_epi16(_mm_unpackhi_epi8(vb01, vb01), 8); in xnn_qc8_igemm_minmax_fp32_ukernel_1x4c2s4__xop_ld128()
|