/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 40 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 41 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 42 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 43 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 44 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() 45 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() [all …]
|
D | minmax-xop-mul32-ld32-x32.c | 41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 45 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 46 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 47 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 48 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 49 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() 50 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() [all …]
|
D | minmax-sse41-mul32-ld32-x24.c | 36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 40 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 41 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 42 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 43 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 44 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() 45 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24() [all …]
|
D | minmax-xop-mul32-ld32-x24.c | 41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 45 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 46 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 47 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 48 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 49 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() 50 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24() [all …]
|
D | minmax-xop-mul32-ld32-x16.c | 41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 45 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 46 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 47 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 48 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 88 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() 89 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16() [all …]
|
D | minmax-sse41-mul32-ld32-x16.c | 36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 40 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 41 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 42 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 43 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 83 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() 84 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16() [all …]
|
D | minmax-xop-mul32-ld32-x8.c | 41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 73 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 74 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 75 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8() 76 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
|
D | minmax-sse41-mul32-ld32-x8.c | 36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 68 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 69 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 70 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8() 71 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 39 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 40 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 41 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 42 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 43 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 44 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 99 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() 100 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 44 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 45 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 46 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 47 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 48 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 49 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 104 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() 105 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x24.c | 42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 44 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 45 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 46 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 47 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 93 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24() 94 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
|
D | minmax-sse41-mul32-ld32-x24.c | 37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 39 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 40 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 41 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 42 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 88 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24() 89 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
|
D | minmax-sse41-mul32-ld32-x16.c | 37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 39 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 40 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 75 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() 76 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
|
D | minmax-xop-mul32-ld32-x16.c | 42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 44 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 45 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 80 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() 81 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
|
D | minmax-sse41-mul32-ld32-x8.c | 37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 64 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 65 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
|
D | minmax-xop-mul32-ld32-x8.c | 42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 69 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 70 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
|
/external/XNNPACK/src/qs8-vadd/ |
D | sse-mul32-ld32.c.in | 44 const __m128i vx${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); 45 const __m128i vy${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); 47 const __m128i vx${ABC[N:N+4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + ${N})); 48 const __m128i vy${ABC[N:N+4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + ${N})); 99 const __m128i vx${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); 100 const __m128i vy${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); 101 const __m128i vx${ABC[4:8]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); 102 const __m128i vy${ABC[4:8]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4));
|
/external/libopus/celt/x86/ |
D | x86cpu.h | 79 (_mm_cvtepi8_epi32(_mm_cvtsi32_si128(*(int *)(x)))) 82 (_mm_cvtepi8_epi32(*(__m128i *)(x)))
|
/external/XNNPACK/src/qs8-vaddc/ |
D | sse-mul32-ld32.c.in | 45 const __m128i vx${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); 47 const __m128i vx${ABC[N:N+4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + ${N})); 92 const __m128i vx${ABC[0:4]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); 93 const __m128i vx${ABC[4:8]} = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4));
|
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/ |
D | sse_tensor_utils.cc | 142 const __m128i vec_32x4 = _mm_cvtepi8_epi32( in SseMatrixBatchVectorMultiplyAccumulateImpl() 144 const __m128i row_32x4 = _mm_cvtepi8_epi32( in SseMatrixBatchVectorMultiplyAccumulateImpl()
|
/external/clang/test/CodeGen/ |
D | sse41-builtins.c | 89 return _mm_cvtepi8_epi32(a); in test_mm_cvtepi8_epi32()
|
/external/llvm-project/clang/test/CodeGen/X86/ |
D | sse41-builtins.c | 87 return _mm_cvtepi8_epi32(a); in test_mm_cvtepi8_epi32()
|
/external/libaom/libaom/av1/common/x86/ |
D | selfguided_sse4.c | 188 mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift)); in calc_ab() 329 mask[idx] = _mm_cvtepi8_epi32(_mm_srl_epi64(ones32, shift)); in calc_ab_fast()
|
/external/llvm-project/clang/lib/Headers/ |
D | smmintrin.h | 1251 _mm_cvtepi8_epi32(__m128i __V) in _mm_cvtepi8_epi32() function
|
/external/clang/lib/Headers/ |
D | smmintrin.h | 290 _mm_cvtepi8_epi32(__m128i __V) in _mm_cvtepi8_epi32() function
|