• Home
  • Raw
  • Download

Lines Matching +full:- +full:- +full:output

6 // This source code is licensed under the BSD-style license found in the
19 uint8_t* output) in xnn_x8_zip_xm_ukernel__sse2() argument
23 const size_t output_increment = 4 - m * n; in xnn_x8_zip_xm_ukernel__sse2()
24 const uint8_t* last_input = w + n * (m - 1); in xnn_x8_zip_xm_ukernel__sse2()
25 uint8_t* last_output = (uint8_t*) ((uintptr_t) output + (m - 4)); in xnn_x8_zip_xm_ukernel__sse2()
34 const uint8_t* z = (const uint8_t*) ((uintptr_t) w - n); in xnn_x8_zip_xm_ukernel__sse2()
35 const uint8_t* y = (const uint8_t*) ((uintptr_t) z - n); in xnn_x8_zip_xm_ukernel__sse2()
36 const uint8_t* x = (const uint8_t*) ((uintptr_t) y - n); in xnn_x8_zip_xm_ukernel__sse2()
55 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
56 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
58 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
59 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
61 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
62 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
64 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
65 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
67 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
68 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
70 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
71 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
73 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
74 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
76 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
77 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
79 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw2)); in xnn_x8_zip_xm_ukernel__sse2()
80 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
82 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw2)); in xnn_x8_zip_xm_ukernel__sse2()
83 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
85 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw2)); in xnn_x8_zip_xm_ukernel__sse2()
86 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
88 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw2)); in xnn_x8_zip_xm_ukernel__sse2()
89 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
91 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw3)); in xnn_x8_zip_xm_ukernel__sse2()
92 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
94 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw3)); in xnn_x8_zip_xm_ukernel__sse2()
95 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
97 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw3)); in xnn_x8_zip_xm_ukernel__sse2()
98 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
100 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw3)); in xnn_x8_zip_xm_ukernel__sse2()
101 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
102 k -= 16; in xnn_x8_zip_xm_ukernel__sse2()
118 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
119 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
121 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
122 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
124 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
125 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
127 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
128 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
130 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
131 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
133 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
134 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
136 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
137 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
139 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2()
140 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
141 k -= 8; in xnn_x8_zip_xm_ukernel__sse2()
144 const size_t address_decrement = 8 - k; in xnn_x8_zip_xm_ukernel__sse2()
145 x -= address_decrement; in xnn_x8_zip_xm_ukernel__sse2()
146 y -= address_decrement; in xnn_x8_zip_xm_ukernel__sse2()
147 z -= address_decrement; in xnn_x8_zip_xm_ukernel__sse2()
148 w -= address_decrement; in xnn_x8_zip_xm_ukernel__sse2()
162 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
163 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
165 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
166 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
168 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
169 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
171 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
172 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
177 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
178 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
180 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
181 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
185 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw0)); in xnn_x8_zip_xm_ukernel__sse2()
186 output = (uint8_t*) ((uintptr_t) output + m); in xnn_x8_zip_xm_ukernel__sse2()
189 output = (uint8_t*) ((uintptr_t) output + output_increment); in xnn_x8_zip_xm_ukernel__sse2()
190 if (output > last_output) { in xnn_x8_zip_xm_ukernel__sse2()
191 output = last_output; in xnn_x8_zip_xm_ukernel__sse2()
196 uint8_t* o = output; in xnn_x8_zip_xm_ukernel__sse2()
204 } while (--l != 0); in xnn_x8_zip_xm_ukernel__sse2()
205 } while (--k != 0); in xnn_x8_zip_xm_ukernel__sse2()