Home
last modified time | relevance | path

Searched refs:_mm_loadu_si32 (Results 1 – 25 of 42) sorted by relevance

12

/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx-rr2-lut16-p3-x40.c74 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
75 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
76 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
77 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
83 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
84 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
85 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
86 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
103 …__m128i vl1_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
104 …__m128i vl1_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40()
[all …]
Dvelu-avx-rr2-lut16-p3-x32.c71 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
72 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
73 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
74 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
80 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
81 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
82 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
83 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
100 …__m128i vl1_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
101 …__m128i vl1_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32()
[all …]
Dvelu-avx-rr2-lut16-p3-x48.c77 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
78 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
79 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
80 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
86 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
87 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
88 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
89 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
106 …__m128i vl1_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
107 …__m128i vl1_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48()
[all …]
Dvelu-avx-rr2-lut16-p3-x24.c68 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
69 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
70 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
71 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
77 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
78 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
79 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
80 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
97 …__m128i vl1_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
98 …__m128i vl1_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24()
[all …]
Dvelu-avx-rr2-lut16-p3-x16.c65 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
66 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
67 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
68 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
74 …__m128i vl0_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
75 …__m128i vl0_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
76 …__m128i vl0_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
77 …__m128i vl0_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
94 …__m128i vl1_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
95 …__m128i vl1_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16()
[all …]
Dvelu-avx-rr2-lut16-p3-x8.c62 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
63 …__m128i vl_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
64 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
65 …__m128i vl_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
71 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
72 …__m128i vl_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
73 …__m128i vl_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
74 …__m128i vl_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
128 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
129 …__m128i vl_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_… in xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul32-ld32-x32.c36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
40 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
41 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
42 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
43 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
44 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
45 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
[all …]
Dminmax-xop-mul32-ld32-x32.c41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
45 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
46 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
47 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
48 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
49 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
50 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
[all …]
Dminmax-sse41-mul32-ld32-x24.c36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
40 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
41 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
42 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
43 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
44 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
45 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x24()
[all …]
Dminmax-xop-mul32-ld32-x24.c41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
45 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
46 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
47 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
48 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
49 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
50 const __m128i vyGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 16)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x24()
[all …]
Dminmax-xop-mul32-ld32-x16.c41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
45 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
46 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
47 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
48 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
88 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
89 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x16()
[all …]
Dminmax-sse41-mul32-ld32-x16.c36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
40 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
41 const __m128i vy89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 8)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
42 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
43 const __m128i vyCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 12)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
83 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
84 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x16()
[all …]
Dminmax-xop-mul32-ld32-x8.c41 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
42 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
44 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
73 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
74 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
75 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
76 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x8()
Dminmax-sse41-mul32-ld32-x8.c36 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
37 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
39 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
68 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
69 const __m128i vy0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
70 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
71 const __m128i vy4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_y + 4)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x8()
/external/XNNPACK/src/f32-velu/
Davx-rr2-lut16-p3.c.in67 …__m128i vl${N}_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
68 …__m128i vl${N}_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
69 …__m128i vl${N}_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
70 …__m128i vl${N}_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
76 …__m128i vl${N}_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
77 …__m128i vl${N}_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
78 …__m128i vl${N}_hl = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
79 …__m128i vl${N}_hh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uin…
144 …__m128i vl_ll = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_…
145 …__m128i vl_lh = _mm_loadu_si32((const void*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_…
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-sse41-mul32-ld32-x32.c37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
39 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
40 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
41 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
42 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
43 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
44 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
99 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
100 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
Dminmax-xop-mul32-ld32-x32.c42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
44 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
45 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
46 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
47 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
48 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
49 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
104 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
105 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
Dminmax-xop-mul32-ld32-x24.c42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
44 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
45 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
46 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
47 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
93 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
94 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x24()
Dminmax-sse41-mul32-ld32-x24.c37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
39 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
40 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
41 const __m128i vxGHIJ = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 16)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
42 const __m128i vxKLMN = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 20)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
88 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
89 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x24()
Dminmax-sse41-mul32-ld32-x16.c37 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
38 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
39 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
40 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
75 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
76 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
Dminmax-xop-mul32-ld32-x16.c42 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
43 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
44 const __m128i vx89AB = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 8)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
45 const __m128i vxCDEF = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 12)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
80 const __m128i vx0123 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
81 const __m128i vx4567 = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 4)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
/external/XNNPACK/src/qs8-gemm/gen/
D1x8c8-xw-minmax-avx2.c45 const __m128i vbias0x0 = _mm_loadu_si32(w); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
46 const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
48 const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
49 const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
51 const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
52 const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
54 const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
55 const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2()
D1x8c8-minmax-avx2.c45 const __m128i vbias0x0 = _mm_loadu_si32(w); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
46 const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
48 const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
49 const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
51 const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
52 const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
54 const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
55 const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t))); in xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2()
D2x8c8-xw-minmax-avx2.c51 const __m128i vbias0x0 = _mm_loadu_si32(w); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
52 const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
54 const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
55 const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
57 const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
58 const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
60 const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
61 const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t))); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
/external/XNNPACK/src/qs8-igemm/gen/
D1x8c8-minmax-avx2.c48 const __m128i vbias0x0 = _mm_loadu_si32(w); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
49 const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
51 const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
52 const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
54 const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
55 const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
57 const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()
58 const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t))); in xnn_qs8_igemm_minmax_ukernel_1x8c8__avx2()

12