• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
2; 2022-04-17 : Igor Pavlov : Public domain
3
4include 7zAsm.asm
5
6MY_ASM_START
7
8; .data
9; public K
10
11; we can use external SHA256_K_ARRAY defined in Sha256.c
12; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes
13
14COMMENT @
15ifdef x64
16K_CONST equ SHA256_K_ARRAY
17else
18K_CONST equ _SHA256_K_ARRAY
19endif
20EXTRN   K_CONST:xmmword
21@
22
23CONST   SEGMENT
24
25align 16
26Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
27
28; COMMENT @
29align 16
30K_CONST \
31DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H
32DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H
33DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H
34DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H
35DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH
36DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH
37DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H
38DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H
39DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H
40DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H
41DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H
42DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H
43DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H
44DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H
45DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H
46DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H
47; @
48
49CONST   ENDS
50
51; _TEXT$SHA256OPT SEGMENT 'CODE'
52
53ifndef x64
54    .686
55    .xmm
56endif
57
58; jwasm-based assemblers for linux and linker from new versions of binutils
59; can generate incorrect code for load [ARRAY + offset] instructions.
60; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem
61        rTable  equ r0
62        ; rTable  equ K_CONST
63
64ifdef x64
65        rNum    equ REG_ABI_PARAM_2
66    if (IS_LINUX eq 0)
67        LOCAL_SIZE equ (16 * 2)
68    endif
69else
70        rNum    equ r3
71        LOCAL_SIZE equ (16 * 1)
72endif
73
74rState equ REG_ABI_PARAM_0
75rData  equ REG_ABI_PARAM_1
76
77
78
79
80
81
82MY_SHA_INSTR macro cmd, a1, a2
83        db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
84endm
85
86cmd_sha256rnds2 equ 0cbH
87cmd_sha256msg1  equ 0ccH
88cmd_sha256msg2  equ 0cdH
89
90MY_sha256rnds2 macro a1, a2
91        MY_SHA_INSTR  cmd_sha256rnds2, a1, a2
92endm
93
94MY_sha256msg1 macro a1, a2
95        MY_SHA_INSTR  cmd_sha256msg1, a1, a2
96endm
97
98MY_sha256msg2 macro a1, a2
99        MY_SHA_INSTR  cmd_sha256msg2, a1, a2
100endm
101
102MY_PROLOG macro
103    ifdef x64
104      if (IS_LINUX eq 0)
105        movdqa  [r4 + 8], xmm6
106        movdqa  [r4 + 8 + 16], xmm7
107        sub     r4, LOCAL_SIZE + 8
108        movdqa  [r4     ], xmm8
109        movdqa  [r4 + 16], xmm9
110      endif
111    else ; x86
112        push    r3
113        push    r5
114        mov     r5, r4
115        NUM_PUSH_REGS   equ 2
116        PARAM_OFFSET    equ (REG_SIZE * (1 + NUM_PUSH_REGS))
117      if (IS_CDECL gt 0)
118        mov     rState, [r4 + PARAM_OFFSET]
119        mov     rData,  [r4 + PARAM_OFFSET + REG_SIZE * 1]
120        mov     rNum,   [r4 + PARAM_OFFSET + REG_SIZE * 2]
121      else ; fastcall
122        mov     rNum,   [r4 + PARAM_OFFSET]
123      endif
124        and     r4, -16
125        sub     r4, LOCAL_SIZE
126    endif
127endm
128
129MY_EPILOG macro
130    ifdef x64
131      if (IS_LINUX eq 0)
132        movdqa  xmm8, [r4]
133        movdqa  xmm9, [r4 + 16]
134        add     r4, LOCAL_SIZE + 8
135        movdqa  xmm6, [r4 + 8]
136        movdqa  xmm7, [r4 + 8 + 16]
137      endif
138    else ; x86
139        mov     r4, r5
140        pop     r5
141        pop     r3
142    endif
143    MY_ENDP
144endm
145
146
147msg        equ xmm0
148tmp        equ xmm0
149state0_N   equ 2
150state1_N   equ 3
151w_regs     equ 4
152
153
154state1_save equ xmm1
155state0  equ @CatStr(xmm, %state0_N)
156state1  equ @CatStr(xmm, %state1_N)
157
158
159ifdef x64
160        state0_save  equ  xmm8
161        mask2        equ  xmm9
162else
163        state0_save  equ  [r4]
164        mask2        equ  xmm0
165endif
166
167LOAD_MASK macro
168        movdqa  mask2, XMMWORD PTR Reverse_Endian_Mask
169endm
170
171LOAD_W macro k:req
172        movdqu  @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
173        pshufb  @CatStr(xmm, %(w_regs + k)), mask2
174endm
175
176
177; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1
178pre1 equ 3
179pre2 equ 2
180
181
182
183RND4 macro k
184        movdqa  msg, xmmword ptr [rTable + (k) * 16]
185        paddd   msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
186        MY_sha256rnds2 state0_N, state1_N
187        pshufd   msg, msg, 0eH
188
189    if (k GE (4 - pre1)) AND (k LT (16 - pre1))
190        ; w4[0] = msg1(w4[-4], w4[-3])
191        MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
192    endif
193
194        MY_sha256rnds2 state1_N, state0_N
195
196    if (k GE (4 - pre2)) AND (k LT (16 - pre2))
197        movdqa  tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))
198        palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4
199        paddd   @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp
200        ; w4[0] = msg2(w4[0], w4[-1])
201        MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))
202    endif
203endm
204
205
206
207
208
209REVERSE_STATE macro
210                               ; state0 ; dcba
211                               ; state1 ; hgfe
212        pshufd      tmp, state0, 01bH   ; abcd
213        pshufd   state0, state1, 01bH   ; efgh
214        movdqa   state1, state0         ; efgh
215        punpcklqdq  state0, tmp         ; cdgh
216        punpckhqdq  state1, tmp         ; abef
217endm
218
219
220MY_PROC Sha256_UpdateBlocks_HW, 3
221    MY_PROLOG
222
223        lea     rTable, [K_CONST]
224
225        cmp     rNum, 0
226        je      end_c
227
228        movdqu   state0, [rState]       ; dcba
229        movdqu   state1, [rState + 16]  ; hgfe
230
231        REVERSE_STATE
232
233        ifdef x64
234        LOAD_MASK
235        endif
236
237    align 16
238    nextBlock:
239        movdqa  state0_save, state0
240        movdqa  state1_save, state1
241
242        ifndef x64
243        LOAD_MASK
244        endif
245
246        LOAD_W 0
247        LOAD_W 1
248        LOAD_W 2
249        LOAD_W 3
250
251
252        k = 0
253        rept 16
254          RND4 k
255          k = k + 1
256        endm
257
258        paddd   state0, state0_save
259        paddd   state1, state1_save
260
261        add     rData, 64
262        sub     rNum, 1
263        jnz     nextBlock
264
265        REVERSE_STATE
266
267        movdqu  [rState], state0
268        movdqu  [rState + 16], state1
269
270  end_c:
271MY_EPILOG
272
273; _TEXT$SHA256OPT ENDS
274
275end
276