Lines Matching refs:tptr
1095 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
1327 lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
1329 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
1337 mov $A0[0],-24($tptr,$i) # t[1]
1343 mov $A0[1],-16($tptr,$i) # t[2]
1361 mov $A0[0],-8($tptr,$j) # t[3]
1386 mov $A0[1],($tptr,$j) # t[4]
1402 mov $A0[0],8($tptr,$j) # t[5]
1419 mov $A0[1],16($tptr,$j) # t[6]
1431 mov $A0[0],-8($tptr,$j) # t[7]
1443 mov $A1[1],($tptr) # t[8]
1445 mov %rdx,8($tptr) # t[9]
1451 lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
1453 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
1458 mov -24($tptr,$i),$A0[0] # t[1]
1462 mov $A0[0],-24($tptr,$i) # t[1]
1469 add -16($tptr,$i),$A0[1] # a[2]*a[0]+t[2]
1472 mov $A0[1],-16($tptr,$i) # t[2]
1481 add -8($tptr,$i),$A1[0]
1492 mov $A0[0],-8($tptr,$i) # t[3]
1505 add ($tptr,$j),$A1[1]
1520 mov $A0[1],($tptr,$j) # t[4]
1524 add 8($tptr,$j),$A1[0]
1535 mov $A0[0],-8($tptr,$j) # t[5], "preloaded t[1]" below
1547 mov $A1[1],($tptr) # t[6], "preloaded t[2]" below
1549 mov %rdx,8($tptr) # t[7], "preloaded t[3]" below
1556 lea 48+8(%rsp,$num,2),$tptr # end of tp[] buffer, &tp[2*$num]
1558 lea -32($tptr,$i),$tptr # end of tp[] window, &tp[2*$num-"$i"]
1571 mov $A0[0],-24($tptr) # t[1]
1581 mov $A0[1],-16($tptr) # t[2]
1592 mov $A0[0],-8($tptr) # t[3]
1601 mov $A1[1],($tptr) # t[4]
1603 mov %rdx,8($tptr) # t[5]
1618 mov %rax,8($tptr) # t[5]
1619 mov %rdx,16($tptr) # t[6]
1620 mov $carry,24($tptr) # t[7]
1623 lea 48+8(%rsp),$tptr
1625 mov 8($tptr),$A0[1] # t[1]
1632 mov 16($tptr),$A0[0] # t[2*i+2] # prefetch
1636 mov 24($tptr),$A0[1] # t[2*i+2+1] # prefetch
1639 mov $S[0],($tptr)
1643 mov $S[1],8($tptr)
1649 mov 32($tptr),$A0[0] # t[2*i+2] # prefetch
1653 mov 40($tptr),$A0[1] # t[2*i+2+1] # prefetch
1656 mov $S[2],16($tptr)
1659 mov $S[3],24($tptr)
1661 lea 64($tptr),$tptr
1671 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1675 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1678 mov $S[0],-32($tptr)
1682 mov $S[1],-24($tptr)
1688 mov 0($tptr),$A0[0] # t[2*i+2] # prefetch
1692 mov 8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1695 mov $S[2],-16($tptr)
1699 mov $S[3],-8($tptr)
1705 mov 16($tptr),$A0[0] # t[2*i+2] # prefetch
1709 mov 24($tptr),$A0[1] # t[2*i+2+1] # prefetch
1712 mov $S[0],0($tptr)
1716 mov $S[1],8($tptr)
1722 mov 32($tptr),$A0[0] # t[2*i+2] # prefetch
1726 mov 40($tptr),$A0[1] # t[2*i+2+1] # prefetch
1729 mov $S[2],16($tptr)
1731 mov $S[3],24($tptr)
1733 lea 64($tptr),$tptr
1743 mov -16($tptr),$A0[0] # t[2*i+2] # prefetch
1747 mov -8($tptr),$A0[1] # t[2*i+2+1] # prefetch
1750 mov $S[0],-32($tptr)
1754 mov $S[1],-24($tptr)
1764 mov $S[2],-16($tptr)
1765 mov $S[3],-8($tptr)
1775 my ($nptr,$tptr,$carry,$m0)=("%rbp","%rdi","%rsi","%rbx");
1784 lea 48+8(%rsp,$num),$tptr # end of initial t[] window
1791 lea ($tptr,$num),$tptr # start of current t[] window
1793 mov 8*0($tptr),$m0
1794 mov 8*1($tptr),%r9
1795 mov 8*2($tptr),%r10
1796 mov 8*3($tptr),%r11
1797 mov 8*4($tptr),%r12
1798 mov 8*5($tptr),%r13
1799 mov 8*6($tptr),%r14
1800 mov 8*7($tptr),%r15
1802 lea 8*8($tptr),$tptr
1889 add 8*0($tptr),%r8
1890 adc 8*1($tptr),%r9
1891 adc 8*2($tptr),%r10
1892 adc 8*3($tptr),%r11
1893 adc 8*4($tptr),%r12
1894 adc 8*5($tptr),%r13
1895 adc 8*6($tptr),%r14
1896 adc 8*7($tptr),%r15
1909 mov %r8,($tptr) # save result
1918 lea 8($tptr),$tptr # $tptr++
1982 adc 8*0($tptr),%r8
1983 adc 8*1($tptr),%r9
1984 adc 8*2($tptr),%r10
1985 adc 8*3($tptr),%r11
1986 adc 8*4($tptr),%r12
1987 adc 8*5($tptr),%r13
1988 adc 8*6($tptr),%r14
1989 adc 8*7($tptr),%r15
2010 adc 8*0($tptr),%r8
2011 adc 8*1($tptr),%r9
2012 adc 8*2($tptr),%r10
2013 adc 8*3($tptr),%r11
2014 adc 8*4($tptr),%r12
2015 adc 8*5($tptr),%r13
2016 adc 8*6($tptr),%r14
2017 adc 8*7($tptr),%r15
2024 mov %r8,8*0($tptr) # store top 512 bits
2025 mov %r9,8*1($tptr)
2027 mov %r10,8*2($tptr)
2028 mov %r11,8*3($tptr)
2029 mov %r12,8*4($tptr)
2030 mov %r13,8*5($tptr)
2031 mov %r14,8*6($tptr)
2032 mov %r15,8*7($tptr)
2033 lea 8*8($tptr),$tptr
2035 cmp %rdx,$tptr # end of t[]?
2046 my ($tptr,$nptr)=("%rbx","%rbp");
2053 lea (%rdi,$num),$tptr # %rdi was $tptr above
2084 adc 8*0($tptr),%r12
2085 adc 8*1($tptr),%r13
2086 adc 8*2($tptr),%r14
2087 adc 8*3($tptr),%r15
2089 lea 8*4($tptr),$tptr
2241 my ($aptr, $bptr, $nptr, $tptr, $mi, $bi, $zero, $num)=
2332 lea 64+8*4+8(%rsp),$tptr
2362 mov %r10,-8*4($tptr)
2367 mov %r11,-8*3($tptr)
2371 mov %r12,-8*2($tptr)
2389 lea 4*8($tptr),$tptr
2399 mov %r10,-5*8($tptr)
2401 mov %r11,-4*8($tptr)
2405 mov %r12,-3*8($tptr)
2409 mov %r13,-2*8($tptr)
2420 mov %r14,-1*8($tptr)
2425 lea 16-256($tptr),%r10 # where 256-byte mask is (+density control)
2453 mov $zero,($tptr) # save top-most carry
2454 lea 4*8($tptr,$num),$tptr # rewind $tptr
2459 adox -4*8($tptr),$mi # +t[0]
2462 adox -3*8($tptr),%r11
2465 adox -2*8($tptr),%r12
2469 adox -1*8($tptr),%r13
2492 mov %r10,-8*4($tptr)
2494 mov %r11,-8*3($tptr)
2496 mov %r12,-8*2($tptr)
2506 adcx 0*8($tptr),%r10
2509 adcx 1*8($tptr),%r11
2513 adcx 2*8($tptr),%r12
2515 adcx 3*8($tptr),%r13
2518 lea 4*8($tptr),$tptr
2529 mov %r10,-5*8($tptr)
2532 mov %r11,-4*8($tptr)
2536 mov %r12,-3*8($tptr)
2539 mov %r13,-2*8($tptr)
2546 sub 0*8($tptr),$bptr # pull top-most carry to %cf
2552 mov %r14,-1*8($tptr)
2562 lea ($tptr,$num),%rdi # rewind $tptr
2591 my ($i,$j,$tptr)=("%rbp","%rcx",$rptr);
2784 lea 48+8(%rsp),$tptr
2794 movdqa %xmm0,0*8($tptr)
2795 movdqa %xmm0,2*8($tptr)
2796 movdqa %xmm0,4*8($tptr)
2797 movdqa %xmm0,6*8($tptr)
2799 movdqa %xmm0,8*8($tptr)
2800 movdqa %xmm0,10*8($tptr)
2801 movdqa %xmm0,12*8($tptr)
2802 movdqa %xmm0,14*8($tptr)
2803 lea 16*8($tptr),$tptr
2815 lea 48+8(%rsp),$tptr
2843 adc 8*8($tptr),%r15
2844 mov %r8,1*8($tptr) # t[1]
2845 mov %r9,2*8($tptr) # t[2]
2871 mov %r8,3*8($tptr) # t[3]
2872 mov %r9,4*8($tptr) # t[4]
2890 mov %r8,5*8($tptr) # t[5]
2891 mov %r9,6*8($tptr) # t[6]
2912 mov %r8,7*8($tptr) # t[7]
2913 mov %r9,8*8($tptr) # t[8]
2948 mov 8*8($tptr),%r8
2949 adcx 9*8($tptr),%r9 # +=t[9]
2950 adcx 10*8($tptr),%r10 # ...
2951 adcx 11*8($tptr),%r11
2952 adc 12*8($tptr),%r12
2953 adc 13*8($tptr),%r13
2954 adc 14*8($tptr),%r14
2955 adc 15*8($tptr),%r15
2957 lea 2*64($tptr),$tptr
2962 mov $tptr,24+8(%rsp)
2964 #lea 8*8($tptr),$tptr # see 2*8*8($tptr) above
2996 mov %rbx,($tptr,%rcx,8) # store t[8+i]
3019 adcx 0*8($tptr),%r8
3020 adcx 1*8($tptr),%r9
3021 adc 2*8($tptr),%r10
3022 adc 3*8($tptr),%r11
3023 adc 4*8($tptr),%r12
3024 adc 5*8($tptr),%r13
3025 adc 6*8($tptr),%r14
3026 adc 7*8($tptr),%r15
3027 lea 8*8($tptr),$tptr
3039 mov 24+8(%rsp),$carry # initial $tptr, borrow $carry
3043 mov %r8,0*8($tptr)
3049 cmp $carry,$tptr # cf=0, of=0
3052 mov %r9,1*8($tptr)
3054 mov %r10,2*8($tptr)
3056 mov %r11,3*8($tptr)
3058 mov %r12,4*8($tptr)
3060 mov %r13,5*8($tptr)
3062 mov %r14,6*8($tptr)
3064 mov %r15,7*8($tptr)
3066 mov $carry,$tptr
3071 mov %r9,9*8($tptr) # t[9]
3073 mov %r10,10*8($tptr) # ...
3074 mov %r11,11*8($tptr)
3075 mov %r12,12*8($tptr)
3076 mov %r13,13*8($tptr)
3077 mov %r14,14*8($tptr)
3082 lea 48+8(%rsp),$tptr
3085 mov 8($tptr),$A0[1] # t[1]
3089 mov 16($tptr),$A1[0] # t[2] # prefetch
3090 mov 24($tptr),$A1[1] # t[3] # prefetch
3099 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 # mov 32($tptr),$A0[0] # t[2*i+4] # prefetch
3102 mov 40($tptr),$A0[1] # t[2*i+4+1] # prefetch
3103 mov %rax,0($tptr)
3104 mov %rbx,8($tptr)
3110 mov 48($tptr),$A1[0] # t[2*i+6] # prefetch
3113 mov 56($tptr),$A1[1] # t[2*i+6+1] # prefetch
3114 mov %rax,16($tptr)
3115 mov %rbx,24($tptr)
3122 mov 64($tptr),$A0[0] # t[2*i+8] # prefetch
3125 mov 72($tptr),$A0[1] # t[2*i+8+1] # prefetch
3126 mov %rax,32($tptr)
3127 mov %rbx,40($tptr)
3136 mov 80($tptr),$A1[0] # t[2*i+10] # prefetch
3137 mov 88($tptr),$A1[1] # t[2*i+10+1] # prefetch
3138 mov %rax,48($tptr)
3139 mov %rbx,56($tptr)
3140 lea 64($tptr),$tptr
3147 mov %rax,48($tptr)
3148 mov %rbx,56($tptr)
3149 lea 64($tptr),$tptr # end of t[] buffer
3166 mov 48+8(%rsp),%rdx # "%r8", 8*0($tptr)
3168 #lea 48+8(%rsp,$num,2),$tptr # end of t[] buffer
3170 mov $tptr,8+8(%rsp) # save end of t[]
3172 lea 48+8(%rsp),$tptr # initial t[] window
3177 mov 8*1($tptr),%r9
3178 mov 8*2($tptr),%r10
3179 mov 8*3($tptr),%r11
3180 mov 8*4($tptr),%r12
3183 mov 8*5($tptr),%r13
3184 mov 8*6($tptr),%r14
3185 mov 8*7($tptr),%r15
3188 lea 8*8($tptr),$tptr
3245 add 8*0($tptr),%r8
3248 adcx 8*1($tptr),%r9
3249 adcx 8*2($tptr),%r10
3250 adc 8*3($tptr),%r11
3251 adc 8*4($tptr),%r12
3252 adc 8*5($tptr),%r13
3253 adc 8*6($tptr),%r14
3254 adc 8*7($tptr),%r15
3255 lea 8*8($tptr),$tptr
3297 mov %rbx,($tptr,%rcx,8) # save result
3310 adc 8*0($tptr),%r8
3311 adc 8*1($tptr),%r9
3312 adc 8*2($tptr),%r10
3313 adc 8*3($tptr),%r11
3314 adc 8*4($tptr),%r12
3315 adc 8*5($tptr),%r13
3316 adc 8*6($tptr),%r14
3317 adc 8*7($tptr),%r15
3318 lea 8*8($tptr),$tptr
3341 adc 8*0($tptr),%r8
3343 adc 8*1($tptr),%r9
3346 adc 8*2($tptr),%r10
3347 adc 8*3($tptr),%r11
3348 adc 8*4($tptr),%r12
3349 adc 8*5($tptr),%r13
3350 adc 8*6($tptr),%r14
3351 adc 8*7($tptr),%r15
3355 mov 8*8($tptr,%rcx),%rdx # modulo-scheduled "%r8"
3357 mov %r8,8*0($tptr) # store top 512 bits
3358 lea 8*8($tptr),%r8 # borrow %r8
3359 mov %r9,8*1($tptr)
3360 mov %r10,8*2($tptr)
3361 mov %r11,8*3($tptr)
3362 mov %r12,8*4($tptr)
3363 mov %r13,8*5($tptr)
3364 mov %r14,8*6($tptr)
3365 mov %r15,8*7($tptr)
3367 lea 8*8($tptr,%rcx),$tptr # start of current t[] window
3389 #lea 48+8(%rsp,%r9),$tptr
3413 adc 8*0($tptr),%r12
3414 adc 8*1($tptr),%r13
3415 adc 8*2($tptr),%r14
3416 adc 8*3($tptr),%r15
3418 lea 8*4($tptr),$tptr