• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; XzCrc64Opt.asm -- CRC64 calculation : optimized version
2; 2021-02-06 : Igor Pavlov : Public domain
3
4include 7zAsm.asm
5
6MY_ASM_START
7
8ifdef x64
9
10rD      equ  r9
11rN      equ  r10
12rT      equ  r5
13num_VAR equ  r8
14
15SRCDAT4 equ  dword ptr [rD + rN * 1]
16
17CRC_XOR macro dest:req, src:req, t:req
18    xor     dest, QWORD PTR [rT + src * 8 + 0800h * t]
19endm
20
21CRC1b macro
22    movzx   x6, BYTE PTR [rD]
23    inc     rD
24    movzx   x3, x0_L
25    xor     x6, x3
26    shr     r0, 8
27    CRC_XOR r0, r6, 0
28    dec     rN
29endm
30
31MY_PROLOG macro crc_end:req
32  ifdef ABI_LINUX
33    MY_PUSH_2_REGS
34  else
35    MY_PUSH_4_REGS
36  endif
37    mov     r0, REG_ABI_PARAM_0
38    mov     rN, REG_ABI_PARAM_2
39    mov     rT, REG_ABI_PARAM_3
40    mov     rD, REG_ABI_PARAM_1
41    test    rN, rN
42    jz      crc_end
43  @@:
44    test    rD, 3
45    jz      @F
46    CRC1b
47    jnz     @B
48  @@:
49    cmp     rN, 8
50    jb      crc_end
51    add     rN, rD
52    mov     num_VAR, rN
53    sub     rN, 4
54    and     rN, NOT 3
55    sub     rD, rN
56    mov     x1, SRCDAT4
57    xor     r0, r1
58    add     rN, 4
59endm
60
61MY_EPILOG macro crc_end:req
62    sub     rN, 4
63    mov     x1, SRCDAT4
64    xor     r0, r1
65    mov     rD, rN
66    mov     rN, num_VAR
67    sub     rN, rD
68  crc_end:
69    test    rN, rN
70    jz      @F
71    CRC1b
72    jmp     crc_end
73  @@:
74  ifdef ABI_LINUX
75    MY_POP_2_REGS
76  else
77    MY_POP_4_REGS
78  endif
79endm
80
81MY_PROC XzCrc64UpdateT4, 4
82    MY_PROLOG crc_end_4
83    align 16
84  main_loop_4:
85    mov     x1, SRCDAT4
86    movzx   x2, x0_L
87    movzx   x3, x0_H
88    shr     r0, 16
89    movzx   x6, x0_L
90    movzx   x7, x0_H
91    shr     r0, 16
92    CRC_XOR r1, r2, 3
93    CRC_XOR r0, r3, 2
94    CRC_XOR r1, r6, 1
95    CRC_XOR r0, r7, 0
96    xor     r0, r1
97
98    add     rD, 4
99    jnz     main_loop_4
100
101    MY_EPILOG crc_end_4
102MY_ENDP
103
104else
105; x86 (32-bit)
106
107rD      equ  r1
108rN      equ  r7
109rT      equ  r5
110
111crc_OFFS  equ  (REG_SIZE * 5)
112
113if (IS_CDECL gt 0) or (IS_LINUX gt 0)
114    ; cdecl or (GNU fastcall) stack:
115    ;   (UInt32 *) table
116    ;   size_t     size
117    ;   void *     data
118    ;   (UInt64)   crc
119    ;   ret-ip <-(r4)
120    data_OFFS   equ  (8 + crc_OFFS)
121    size_OFFS   equ  (REG_SIZE + data_OFFS)
122    table_OFFS  equ  (REG_SIZE + size_OFFS)
123    num_VAR     equ  [r4 + size_OFFS]
124    table_VAR   equ  [r4 + table_OFFS]
125else
126    ; Windows fastcall:
127    ;   r1 = data, r2 = size
128    ; stack:
129    ;   (UInt32 *) table
130    ;   (UInt64)   crc
131    ;   ret-ip <-(r4)
132    table_OFFS  equ  (8 + crc_OFFS)
133    table_VAR   equ  [r4 + table_OFFS]
134    num_VAR     equ  table_VAR
135endif
136
137SRCDAT4 equ  dword ptr [rD + rN * 1]
138
139CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
140    op0     dest0, DWORD PTR [rT + src * 8 + 0800h * t]
141    op1     dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
142endm
143
144CRC_XOR macro dest0:req, dest1:req, src:req, t:req
145    CRC xor, xor, dest0, dest1, src, t
146endm
147
148
149CRC1b macro
150    movzx   x6, BYTE PTR [rD]
151    inc     rD
152    movzx   x3, x0_L
153    xor     x6, x3
154    shrd    r0, r2, 8
155    shr     r2, 8
156    CRC_XOR r0, r2, r6, 0
157    dec     rN
158endm
159
160MY_PROLOG macro crc_end:req
161    MY_PUSH_4_REGS
162
163  if (IS_CDECL gt 0) or (IS_LINUX gt 0)
164    proc_numParams = proc_numParams + 2 ; for ABI_LINUX
165    mov     rN, [r4 + size_OFFS]
166    mov     rD, [r4 + data_OFFS]
167  else
168    mov     rN, r2
169  endif
170
171    mov     x0, [r4 + crc_OFFS]
172    mov     x2, [r4 + crc_OFFS + 4]
173    mov     rT, table_VAR
174    test    rN, rN
175    jz      crc_end
176  @@:
177    test    rD, 3
178    jz      @F
179    CRC1b
180    jnz     @B
181  @@:
182    cmp     rN, 8
183    jb      crc_end
184    add     rN, rD
185
186    mov     num_VAR, rN
187
188    sub     rN, 4
189    and     rN, NOT 3
190    sub     rD, rN
191    xor     r0, SRCDAT4
192    add     rN, 4
193endm
194
195MY_EPILOG macro crc_end:req
196    sub     rN, 4
197    xor     r0, SRCDAT4
198
199    mov     rD, rN
200    mov     rN, num_VAR
201    sub     rN, rD
202  crc_end:
203    test    rN, rN
204    jz      @F
205    CRC1b
206    jmp     crc_end
207  @@:
208    MY_POP_4_REGS
209endm
210
211MY_PROC XzCrc64UpdateT4, 5
212    MY_PROLOG crc_end_4
213    movzx   x6, x0_L
214    align 16
215  main_loop_4:
216    mov     r3, SRCDAT4
217    xor     r3, r2
218
219    CRC xor, mov, r3, r2, r6, 3
220    movzx   x6, x0_H
221    shr     r0, 16
222    CRC_XOR r3, r2, r6, 2
223
224    movzx   x6, x0_L
225    movzx   x0, x0_H
226    CRC_XOR r3, r2, r6, 1
227    CRC_XOR r3, r2, r0, 0
228    movzx   x6, x3_L
229    mov     r0, r3
230
231    add     rD, 4
232    jnz     main_loop_4
233
234    MY_EPILOG crc_end_4
235MY_ENDP
236
237endif ; ! x64
238
239end
240