Lines Matching +full:32 +full:k
162 addl \disp(%rsp, SRND), h # h = k + w + h # --
176 add h, d # d = k + w + h + d # --
190 vpslld $(32-7), XTMP1, XTMP3
192 add y1, h # h = k + w + h + S0 # --
194 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
198 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
210 addl offset(%rsp, SRND), h # h = k + w + h # --
225 add h, d # d = k + w + h + d # --
227 vpslld $(32-18), XTMP1, XTMP1
244 add y1, h # h = k + w + h + S0 # --
247 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
248 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
261 addl offset(%rsp, SRND), h # h = k + w + h # --
276 add h, d # d = k + w + h + d # --
296 add y1,h # h = k + w + h + S0 # --
297 add y2,d # d = k + w + h + d + S1 + CH = d + t1 # --
298 add y2,h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
311 addl offset(%rsp, SRND), h # h = k + w + h # --
325 add h, d # d = k + w + h + d # --
338 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
349 add y1, h # h = k + w + h + S0 # --
350 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
377 addl \disp(%rsp, SRND), h # h = k + w + h # --
387 add h, d # d = k + w + h + d # --
389 add y1, h # h = k + w + h + S0 # --
390 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
396 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
416 addl offset(%rsp, SRND), h # h = k + w + h # --
426 add h, d # d = k + w + h + d # --
428 add y1, h # h = k + w + h + S0 # --
430 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
436 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
456 addl offset(%rsp, SRND), h # h = k + w + h # --
466 add h, d # d = k + w + h + d # --
468 add y1, h # h = k + w + h + S0 # --
470 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
476 add y2, old_h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
496 addl offset(%rsp, SRND), h # h = k + w + h # --
506 add h, d # d = k + w + h + d # --
508 add y1, h # h = k + w + h + S0 # --
510 add y2, d # d = k + w + h + d + S1 + CH = d + t1 # --
513 add y2, h # h = k + w + h + S0 + S1 + CH = t1 + S0# --
529 .align 32
538 and $-32, %rsp # align rsp to 32 byte boundary
568 VMOVDQ 0*32(INP),XTMP0
569 VMOVDQ 1*32(INP),XTMP1
570 VMOVDQ 2*32(INP),XTMP2
571 VMOVDQ 3*32(INP),XTMP3
594 vpaddd K256+0*32(SRND), X0, XFER
595 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
596 FOUR_ROUNDS_AND_SCHED _XFER + 0*32
598 vpaddd K256+1*32(SRND), X0, XFER
599 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
600 FOUR_ROUNDS_AND_SCHED _XFER + 1*32
602 vpaddd K256+2*32(SRND), X0, XFER
603 vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
604 FOUR_ROUNDS_AND_SCHED _XFER + 2*32
606 vpaddd K256+3*32(SRND), X0, XFER
607 vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
608 FOUR_ROUNDS_AND_SCHED _XFER + 3*32
610 add $4*32, SRND
611 cmp $3*4*32, SRND
616 vpaddd K256+0*32(SRND), X0, XFER
617 vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
618 DO_4ROUNDS _XFER + 0*32
620 vpaddd K256+1*32(SRND), X1, XFER
621 vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
622 DO_4ROUNDS _XFER + 1*32
623 add $2*32, SRND
628 cmp $4*4*32, SRND
650 DO_4ROUNDS _XFER + 0*32 + 16
651 DO_4ROUNDS _XFER + 1*32 + 16
652 add $2*32, SRND
653 cmp $4*4*32, SRND
753 .section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
754 .align 32
759 .section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32
760 .align 32
765 .section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32
766 .align 32