• Home
  • Raw
  • Download

Lines Matching +full:- +full:t

7 //     http://www.apache.org/licenses/LICENSE-2.0
51 // A 4x4 block of accumulators is stored in 32bit in xmm4--xmm7. in Run()
53 // +-------+-------+-------+-------+ in Run()
55 // Rhs +-------+---------------+-------+ in Run()
57 // +-------+-------+-------+-------+ in Run()
63 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
68 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
75 "pxor %%xmm4 , %%xmm4 \n\t" in Run()
76 "pxor %%xmm5 , %%xmm5 \n\t" in Run()
77 "pxor %%xmm6 , %%xmm6 \n\t" in Run()
78 "pxor %%xmm7 , %%xmm7 \n\t" in Run()
80 "movl %[run_depth_cells], %%eax\n\t" in Run()
81 "subl $2, %%eax\n\t" in Run()
82 "js outerLoop1%=\n\t" in Run()
85 "outerLoop2%=:\n\t" in Run()
89 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
92 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
93 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
94 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
95 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
96 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
97 "paddd %%xmm2, %%xmm4 \n\t" in Run()
98 "paddd %%xmm3, %%xmm5 \n\t" in Run()
100 "prefetcht0 0x80(%[lhs_ptr]) \n\t" in Run()
102 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
103 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
104 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
105 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
107 "prefetcht0 0x80(%[rhs_ptr]) \n\t" in Run()
111 "pmovzxbw 0x08(%[rhs_ptr]), %%xmm1\n\t" in Run()
113 "paddd %%xmm2, %%xmm6 \n\t" in Run()
114 "paddd %%xmm3, %%xmm7 \n\t" in Run()
117 "pmovzxbw 0x08(%[lhs_ptr]), %%xmm0\n\t" in Run()
118 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
119 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
120 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
121 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
122 "paddd %%xmm2, %%xmm4 \n\t" in Run()
123 "paddd %%xmm3, %%xmm5 \n\t" in Run()
124 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
125 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
127 "addl $0x10, %[lhs_ptr] \n\t" in Run()
128 "addl $0x10, %[rhs_ptr] \n\t" in Run()
130 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
131 "paddd %%xmm3, %%xmm7 \n\t" in Run()
132 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
133 "paddd %%xmm2, %%xmm6 \n\t" in Run()
135 "subl $2, %[run_depth_cells]\n\t" in Run()
136 "ja outerLoop2%=\n\t" in Run()
138 "movl %[run_depth_cells], %%eax\n\t" in Run()
139 "decl %%eax\n\t" in Run()
140 "js finish%=\n\t" in Run()
143 "outerLoop1%=:\n\t" in Run()
146 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
149 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
150 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
151 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
152 "paddd %%xmm2, %%xmm4 \n\t" in Run()
153 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
154 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
155 "paddd %%xmm3, %%xmm5 \n\t" in Run()
157 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
158 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
159 "paddd %%xmm2, %%xmm6 \n\t" in Run()
160 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
161 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
162 "paddd %%xmm3, %%xmm7 \n\t" in Run()
164 "addl $0x08, %[lhs_ptr]\n\t" in Run()
165 "addl $0x08, %[rhs_ptr]\n\t" in Run()
167 "decl %[run_depth_cells]\n\t" in Run()
168 "jnz outerLoop1%=\n\t" in Run()
170 "finish%=:\n\t" in Run()
172 "movl %[dst_col_stride], %%eax\n\t" in Run()
173 "shll $2, %%eax\n\t" in Run()
175 "movl %[start_depth], %%ecx\n\t" in Run()
176 "test %%ecx, %%ecx\n\t" in Run()
177 "jz storeDst%=\n\t" in Run()
179 "leal (%%eax,%%eax,0x2), %%ecx\n\t" in Run()
180 "paddd 0x00(%[dst_ptr]) , %%xmm4 \n\t" in Run()
181 "paddd 0x00(%[dst_ptr], %%eax, 1) , %%xmm5 \n\t" in Run()
182 "paddd 0x00(%[dst_ptr], %%eax, 2) , %%xmm6 \n\t" in Run()
183 "paddd 0x00(%[dst_ptr], %%ecx, 1) , %%xmm7 \n\t" in Run()
185 "storeDst%=:\n\t" in Run()
187 "leal (%%eax,%%eax,0x2), %%ecx\n\t" in Run()
188 "movdqu %%xmm4 , 0x00(%[dst_ptr]) \n\t" in Run()
189 "movdqu %%xmm5 , 0x00(%[dst_ptr], %%eax, 1)\n\t" in Run()
190 "movdqu %%xmm6 , 0x00(%[dst_ptr], %%eax, 2)\n\t" in Run()
191 "movdqu %%xmm7 , 0x00(%[dst_ptr], %%ecx, 1)\n\t" in Run()
228 // A 12x4 block of accumulators is stored in 32bit in xmm4--xmm15. in Run()
230 // +-------+-------+-------+-------+ in Run()
232 // Rhs +-------+---------------+-------+ in Run()
234 // +-------+-------+-------+-------+ in Run()
240 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
245 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
250 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
255 // +--+--+ - - - - +-------+-------+-------+-------+ in Run()
262 "movq %[dst_col_stride_q], %%r12\n\t" in Run()
263 "shlq $2, %%r12\n\t" in Run()
264 "leaq (%%r12,%%r12,0x2), %%r13\n\t" in Run()
267 "pxor %%xmm4 , %%xmm4 \n\t" in Run()
268 "pxor %%xmm5 , %%xmm5 \n\t" in Run()
269 "pxor %%xmm6 , %%xmm6 \n\t" in Run()
270 "pxor %%xmm7 , %%xmm7 \n\t" in Run()
271 "pxor %%xmm8 , %%xmm8 \n\t" in Run()
272 "pxor %%xmm9 , %%xmm9 \n\t" in Run()
273 "pxor %%xmm10 , %%xmm10\n\t" in Run()
274 "pxor %%xmm11 , %%xmm11\n\t" in Run()
275 "pxor %%xmm12 , %%xmm12\n\t" in Run()
276 "pxor %%xmm13 , %%xmm13\n\t" in Run()
277 "pxor %%xmm14 , %%xmm14\n\t" in Run()
278 "pxor %%xmm15 , %%xmm15\n\t" in Run()
280 "movq %[run_depth_cells], %%r14\n\t" in Run()
281 "subq $2, %%r14\n\t" in Run()
282 "js outerLoop1%=\n\t" in Run()
285 "outerLoop2%=:\n\t" in Run()
290 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
293 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
294 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
295 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
296 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
297 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
298 "paddd %%xmm2, %%xmm4 \n\t" in Run()
299 "paddd %%xmm3, %%xmm5 \n\t" in Run()
301 "prefetcht0 0x80(%[lhs_ptr]) \n\t" in Run()
303 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
304 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
305 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
306 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
309 "pmovzxbw 0x08(%[lhs_ptr]), %%xmm0\n\t" in Run()
311 "paddd %%xmm2, %%xmm6 \n\t" in Run()
312 "paddd %%xmm3, %%xmm7 \n\t" in Run()
314 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
315 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
316 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
317 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
318 "paddd %%xmm2, %%xmm8 \n\t" in Run()
319 "paddd %%xmm3, %%xmm9 \n\t" in Run()
321 "prefetcht0 0x80(%[rhs_ptr]) \n\t" in Run()
323 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
324 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
325 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
326 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
327 "paddd %%xmm2, %%xmm10 \n\t" in Run()
328 "paddd %%xmm3, %%xmm11 \n\t" in Run()
331 "pmovzxbw 0x10(%[lhs_ptr]), %%xmm0\n\t" in Run()
332 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
333 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
334 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
335 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
336 "paddd %%xmm2, %%xmm12 \n\t" in Run()
337 "paddd %%xmm3, %%xmm13 \n\t" in Run()
339 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
340 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
341 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
342 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
343 "paddd %%xmm2, %%xmm14 \n\t" in Run()
344 "paddd %%xmm3, %%xmm15 \n\t" in Run()
348 "pmovzxbw 0x08(%[rhs_ptr]), %%xmm1\n\t" in Run()
351 "pmovzxbw 0x18(%[lhs_ptr]), %%xmm0\n\t" in Run()
352 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
353 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
354 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
355 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
356 "paddd %%xmm2, %%xmm4 \n\t" in Run()
357 "paddd %%xmm3, %%xmm5 \n\t" in Run()
359 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
360 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
361 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
362 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
363 "paddd %%xmm2, %%xmm6 \n\t" in Run()
364 "paddd %%xmm3, %%xmm7 \n\t" in Run()
367 "pmovzxbw 0x20(%[lhs_ptr]), %%xmm0\n\t" in Run()
368 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
369 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
370 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
371 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
372 "paddd %%xmm2, %%xmm8 \n\t" in Run()
373 "paddd %%xmm3, %%xmm9 \n\t" in Run()
375 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
376 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
377 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
378 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
379 "paddd %%xmm2, %%xmm10 \n\t" in Run()
380 "paddd %%xmm3, %%xmm11 \n\t" in Run()
383 "pmovzxbw 0x28(%[lhs_ptr]), %%xmm0\n\t" in Run()
385 "addq $0x30, %[lhs_ptr] \n\t" in Run()
386 "addq $0x10, %[rhs_ptr] \n\t" in Run()
388 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
389 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
390 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
391 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
392 "paddd %%xmm2, %%xmm12 \n\t" in Run()
393 "paddd %%xmm3, %%xmm13 \n\t" in Run()
395 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
396 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
397 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
398 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
399 "paddd %%xmm2, %%xmm14 \n\t" in Run()
400 "paddd %%xmm3, %%xmm15 \n\t" in Run()
402 "subq $2, %[run_depth_cells]\n\t" in Run()
403 "ja outerLoop2%=\n\t" in Run()
405 "movq %[run_depth_cells], %%r14\n\t" in Run()
406 "decq %%r14\n\t" in Run()
407 "js finish%=\n\t" in Run()
410 "outerLoop1%=:\n\t" in Run()
413 "pmovzxbw (%[rhs_ptr]), %%xmm1\n\t" in Run()
416 "pmovzxbw 0x00(%[lhs_ptr]), %%xmm0\n\t" in Run()
417 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
418 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
419 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
420 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
421 "paddd %%xmm2, %%xmm4 \n\t" in Run()
422 "paddd %%xmm3, %%xmm5 \n\t" in Run()
423 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
424 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
425 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
426 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
427 "paddd %%xmm2, %%xmm6 \n\t" in Run()
428 "paddd %%xmm3, %%xmm7 \n\t" in Run()
431 "pmovzxbw 0x08(%[lhs_ptr]), %%xmm0\n\t" in Run()
432 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
433 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
434 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
435 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
436 "paddd %%xmm2, %%xmm8 \n\t" in Run()
437 "paddd %%xmm3, %%xmm9 \n\t" in Run()
438 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
439 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
440 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
441 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
442 "paddd %%xmm2, %%xmm10 \n\t" in Run()
443 "paddd %%xmm3, %%xmm11 \n\t" in Run()
446 "pmovzxbw 0x10(%[lhs_ptr]), %%xmm0\n\t" in Run()
448 "addq $0x18, %[lhs_ptr] \n\t" in Run()
449 "addq $0x08, %[rhs_ptr] \n\t" in Run()
451 "pshufd $0x00,%%xmm1,%%xmm2 \n\t" in Run()
452 "pshufd $0x55,%%xmm1,%%xmm3 \n\t" in Run()
453 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
454 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
455 "paddd %%xmm2, %%xmm12 \n\t" in Run()
456 "paddd %%xmm3, %%xmm13 \n\t" in Run()
457 "pshufd $0xaa,%%xmm1,%%xmm2 \n\t" in Run()
458 "pshufd $0xff,%%xmm1,%%xmm3 \n\t" in Run()
459 "pmaddwd %%xmm0, %%xmm2 \n\t" in Run()
460 "pmaddwd %%xmm0, %%xmm3 \n\t" in Run()
461 "paddd %%xmm2, %%xmm14 \n\t" in Run()
462 "paddd %%xmm3, %%xmm15 \n\t" in Run()
464 "decq %[run_depth_cells]\n\t" in Run()
465 "jnz outerLoop1%=\n\t" in Run()
467 "finish%=:\n\t" in Run()
469 "test %[start_depth], %[start_depth]\n\t" in Run()
470 "jz storeDst%=\n\t" in Run()
472 "paddd 0x00(%[dst_ptr]) , %%xmm4 \n\t" in Run()
473 "paddd 0x10(%[dst_ptr]) , %%xmm8 \n\t" in Run()
474 "paddd 0x20(%[dst_ptr]) , %%xmm12\n\t" in Run()
475 "paddd 0x00(%[dst_ptr], %%r12, 1) , %%xmm5 \n\t" in Run()
476 "paddd 0x10(%[dst_ptr], %%r12, 1) , %%xmm9 \n\t" in Run()
477 "paddd 0x20(%[dst_ptr], %%r12, 1) , %%xmm13\n\t" in Run()
478 "paddd 0x00(%[dst_ptr], %%r12, 2) , %%xmm6 \n\t" in Run()
479 "paddd 0x10(%[dst_ptr], %%r12, 2) , %%xmm10\n\t" in Run()
480 "paddd 0x20(%[dst_ptr], %%r12, 2) , %%xmm14\n\t" in Run()
481 "paddd 0x00(%[dst_ptr], %%r13, 1) , %%xmm7 \n\t" in Run()
482 "paddd 0x10(%[dst_ptr], %%r13, 1) , %%xmm11\n\t" in Run()
483 "paddd 0x20(%[dst_ptr], %%r13, 1) , %%xmm15\n\t" in Run()
485 "storeDst%=:\n\t" in Run()
487 "movdqu %%xmm4 , 0x00(%[dst_ptr]) \n\t" in Run()
488 "movdqu %%xmm8 , 0x10(%[dst_ptr]) \n\t" in Run()
489 "movdqu %%xmm12 , 0x20(%[dst_ptr]) \n\t" in Run()
490 "movdqu %%xmm5 , 0x00(%[dst_ptr], %%r12, 1)\n\t" in Run()
491 "movdqu %%xmm9 , 0x10(%[dst_ptr], %%r12, 1)\n\t" in Run()
492 "movdqu %%xmm13 , 0x20(%[dst_ptr], %%r12, 1)\n\t" in Run()
493 "movdqu %%xmm6 , 0x00(%[dst_ptr], %%r12, 2)\n\t" in Run()
494 "movdqu %%xmm10 , 0x10(%[dst_ptr], %%r12, 2)\n\t" in Run()
495 "movdqu %%xmm14 , 0x20(%[dst_ptr], %%r12, 2)\n\t" in Run()
496 "movdqu %%xmm7 , 0x00(%[dst_ptr], %%r13, 1)\n\t" in Run()
497 "movdqu %%xmm11 , 0x10(%[dst_ptr], %%r13, 1)\n\t" in Run()
498 "movdqu %%xmm15 , 0x20(%[dst_ptr], %%r13, 1)\n\t" in Run()