Home
last modified time | relevance | path

Searched refs:vld1q_s8 (Results 1 – 25 of 59) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D1x16c4-minmax-neondot.c68 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
69 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
70 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
71 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
72 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
73 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
74 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
75 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
95 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
96 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
[all …]
D1x16c16-minmax-neon-mlal-padal.c75 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
77 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
78 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
79 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
80 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
81 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
82 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
83 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
84 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
85 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
[all …]
D1x8c4-minmax-neondot.c66 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
67 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
68 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
69 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
85 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
86 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_1x8c4__neondot()
D1x8c16-minmax-neon-mlal-padal.c67 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
69 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
70 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
71 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
72 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
73 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
74 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
75 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
76 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c99 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
100 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
102 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
103 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
104 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
105 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
106 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
107 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
108 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
109 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x16c4-minmax-neondot.c107 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
108 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
109 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
110 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
111 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
112 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
113 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
114 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
161 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
162 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
[all …]
D2x8c16-minmax-neon-mlal-padal.c83 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
84 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
86 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
87 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
88 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
89 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
90 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
91 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
92 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
93 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c123 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
124 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
125 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
127 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
128 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
129 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
130 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
131 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
132 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
133 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c99 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
100 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
101 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
103 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
104 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
105 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
106 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
107 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
108 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
109 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D4x8c4-minmax-neondot.c99 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
100 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
101 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
102 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
133 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_4x8c4__neondot()
D6x16c4-minmax-neondot.c133 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
134 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
135 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
136 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
137 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
138 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
139 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
140 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
205 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
206 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_igemm_minmax_ukernel_6x16c4__neondot()
[all …]
D4x8c16-minmax-neon-mlal-padal.c115 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
116 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
117 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
118 const int8x16_t va3 = vld1q_s8(a3); a3 += 16; in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
120 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
121 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
122 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
123 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
124 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
125 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c16__neon_mlal_padal()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x16c4-minmax-neondot.c59 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
60 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
61 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
62 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
63 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
64 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
65 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
66 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
86 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
87 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
[all …]
D1x16c16-minmax-neon-mlal-padal.c64 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
66 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
67 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
68 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
69 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
70 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
71 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
72 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
73 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
74 const int8x16_t vb8 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
[all …]
D1x8c4-minmax-neondot.c57 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
58 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
59 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
60 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
76 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
77 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot()
D1x8c16-minmax-neon-mlal-padal.c56 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
58 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
59 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
60 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
61 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
62 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
63 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
64 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
65 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal()
D2x16c16-minmax-neon-mlal-padal.c86 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
87 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
89 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
90 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
91 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
92 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
93 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
94 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
95 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
96 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal()
[all …]
D4x16c4-minmax-neondot.c92 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
93 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
94 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
95 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
96 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
97 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
98 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
99 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
146 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
147 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
[all …]
D2x8c16-minmax-neon-mlal-padal.c70 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
71 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
73 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
74 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
75 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
76 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
77 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
78 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
79 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
80 const int8x16_t vb7 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal()
D3x16c16-minmax-neon-mlal-padal.c108 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
109 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
110 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
112 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
113 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
114 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
115 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
116 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
117 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
118 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal()
[all …]
D3x8c16-minmax-neon-mlal-padal.c84 const int8x16_t va0 = vld1q_s8(a0); a0 += 16; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
85 const int8x16_t va1 = vld1q_s8(a1); a1 += 16; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
86 const int8x16_t va2 = vld1q_s8(a2); a2 += 16; in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
88 const int8x16_t vb0 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
89 const int8x16_t vb1 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
90 const int8x16_t vb2 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
91 const int8x16_t vb3 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
92 const int8x16_t vb4 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
93 const int8x16_t vb5 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
94 const int8x16_t vb6 = vld1q_s8(w); w = (const void*) ((uintptr_t) w + 16 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal()
[all …]
D4x8c4-minmax-neondot.c84 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
85 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
86 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
87 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
118 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
119 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot()
D6x16c4-minmax-neondot.c114 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
115 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
116 const int8x16_t vb0123x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
117 const int8x16_t vb0123xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
118 const int8x16_t vb4567x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
119 const int8x16_t vb4567x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
120 const int8x16_t vb4567x89AB = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
121 const int8x16_t vb4567xCDEF = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
186 const int8x16_t vb0123x0123 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
187 const int8x16_t vb0123x4567 = vld1q_s8(w); w = (const void*) ((const int8_t*) w + 16); in xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot()
[all …]
/external/tensorflow/tensorflow/lite/kernels/internal/optimized/
Ddepthwiseconv_uint8_transitional.h3323 filter_reg_0_a = vld1q_s8(filter_workspace);
3325 filter_reg_0_b = vld1q_s8(filter_workspace);
3327 filter_reg_1_a = vld1q_s8(filter_workspace);
3329 filter_reg_1_b = vld1q_s8(filter_workspace);
3331 filter_reg_2_a = vld1q_s8(filter_workspace);
3333 filter_reg_2_b = vld1q_s8(filter_workspace);
3355 int8x16_t left_bank_0_reg = vld1q_s8(next_input_data);
3357 vld1q_s8(next_input_data + workspace_height_stride);
3359 vld1q_s8(next_input_data + 2 * workspace_height_stride);
3361 vld1q_s8(next_input_data + 3 * workspace_height_stride);
[all …]
/external/libjpeg-turbo/simd/arm/aarch64/
Djchuff-neon.c99 vld1q_s8((int8_t *)(block + 0 * DCTSIZE)), in jsimd_huff_encode_one_block_neon()
100 vld1q_s8((int8_t *)(block + 1 * DCTSIZE)), in jsimd_huff_encode_one_block_neon()
101 vld1q_s8((int8_t *)(block + 2 * DCTSIZE)), in jsimd_huff_encode_one_block_neon()
102 vld1q_s8((int8_t *)(block + 3 * DCTSIZE)) in jsimd_huff_encode_one_block_neon()
105 vld1q_s8((int8_t *)(block + 4 * DCTSIZE)), in jsimd_huff_encode_one_block_neon()
106 vld1q_s8((int8_t *)(block + 5 * DCTSIZE)), in jsimd_huff_encode_one_block_neon()
107 vld1q_s8((int8_t *)(block + 6 * DCTSIZE)), in jsimd_huff_encode_one_block_neon()
108 vld1q_s8((int8_t *)(block + 7 * DCTSIZE)) in jsimd_huff_encode_one_block_neon()

123