• Home
  • Raw
  • Download

Lines Matching +full:a +full:- +full:h

2 # Implement fast SHA-256 with AVX1 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40 # This code is described in an Intel White-Paper:
41 # "Fast SHA-256 Implementations on Intel Architecture Processors"
47 # This code schedules 1 block at a time, with 4 lanes per block
50 #include <linux/linkage.h>
51 #include <linux/cfi_types.h>
59 # Add reg to mem using reg-mem add and store
67 shld $(32-(\p1)), \p2, \p2
94 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
95 SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
107 a = %eax define
112 h = %r11d define
141 # Rotate values of symbols a...h
143 TMP_ = h
144 h = g define
150 b = a
151 a = TMP_ define
155 ## compute s0 four at a time and s1 two at a time
156 ## compute W[-16] + W[-7] 4 at a time
159 MY_ROR (25-11), y0 # y0 = e >> (25-11)
160 mov a, y1 # y1 = a
161 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
162 MY_ROR (22-13), y1 # y1 = a >> (22-13)
163 xor e, y0 # y0 = e ^ (e >> (25-11))
165 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
166 xor a, y1 # y1 = a ^ (a >> (22-13)
168 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]
169 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
171 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
173 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
174 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
177 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
180 mov a, y0 # y0 = a
181 add y2, h # h = h + S1 + CH + k + w
182 mov a, y2 # y2 = a
184 or c, y0 # y0 = a|c
185 add h, d # d = d + h + S1 + CH + k + w
186 and c, y2 # y2 = a&c
187 vpslld $(32-7), XTMP1, XTMP3
188 and b, y0 # y0 = (a|c)&b
189 add y1, h # h = h + S1 + CH + k + w + S0
190 vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7
191 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
192 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
195 mov a, y1 # y1 = a
196 MY_ROR (25-11), y0 # y0 = e >> (25-11)
197 xor e, y0 # y0 = e ^ (e >> (25-11))
199 MY_ROR (22-13), y1 # y1 = a >> (22-13)
201 xor a, y1 # y1 = a ^ (a >> (22-13)
202 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
204 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
205 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
206 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
209 vpslld $(32-18), XTMP1, XTMP1
210 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
215 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
216 vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
217 mov a, y0 # y0 = a
218 add y2, h # h = h + S1 + CH + k + w
219 mov a, y2 # y2 = a
221 or c, y0 # y0 = a|c
222 add h, d # d = d + h + S1 + CH + k + w
223 and c, y2 # y2 = a&c
225 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
226 and b, y0 # y0 = (a|c)&b
227 add y1, h # h = h + S1 + CH + k + w + S0
228 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
229 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
230 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
233 mov a, y1 # y1 = a
234 MY_ROR (25-11), y0 # y0 = e >> (25-11)
235 xor e, y0 # y0 = e ^ (e >> (25-11))
236 MY_ROR (22-13), y1 # y1 = a >> (22-13)
238 xor a, y1 # y1 = a ^ (a >> (22-13)
239 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
240 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
242 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA}
243 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
245 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA}
246 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
247 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
252 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
255 mov a, y0 # y0 = a
256 add y2, h # h = h + S1 + CH + k + w
257 mov a, y2 # y2 = a
259 or c, y0 # y0 = a|c
260 add h, d # d = d + h + S1 + CH + k + w
261 and c, y2 # y2 = a&c
263 and b, y0 # y0 = (a|c)&b
264 add y1, h # h = h + S1 + CH + k + w + S0
266 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
267 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
268 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
271 MY_ROR (25-11), y0 # y0 = e >> (25-11)
272 mov a, y1 # y1 = a
273 MY_ROR (22-13), y1 # y1 = a >> (22-13)
274 xor e, y0 # y0 = e ^ (e >> (25-11))
276 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
277 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
278 xor a, y1 # y1 = a ^ (a >> (22-13)
280 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC}
281 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
283 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
284 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC}
285 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
289 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
293 mov a, y0 # y0 = a
294 add y2, h # h = h + S1 + CH + k + w
295 mov a, y2 # y2 = a
297 or c, y0 # y0 = a|c
298 add h, d # d = d + h + S1 + CH + k + w
299 and c, y2 # y2 = a&c
301 and b, y0 # y0 = (a|c)&b
302 add y1, h # h = h + S1 + CH + k + w + S0
303 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
304 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
312 MY_ROR (25-11), y0 # y0 = e >> (25-11)
313 mov a, y1 # y1 = a
314 xor e, y0 # y0 = e ^ (e >> (25-11))
315 MY_ROR (22-13), y1 # y1 = a >> (22-13)
317 xor a, y1 # y1 = a ^ (a >> (22-13)
318 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
320 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
321 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
323 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
327 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
330 mov a, y0 # y0 = a
331 add y2, h # h = h + S1 + CH + k + w
332 mov a, y2 # y2 = a
333 or c, y0 # y0 = a|c
334 add h, d # d = d + h + S1 + CH + k + w
335 and c, y2 # y2 = a&c
336 and b, y0 # y0 = (a|c)&b
337 add y1, h # h = h + S1 + CH + k + w + S0
338 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
339 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
368 mov 4*0(CTX), a
375 mov 4*7(CTX), h
438 addm (4*0)(CTX),a
445 addm (4*7)(CTX),h
491 # shuffle xBxA -> 00BA
497 # shuffle xDxC -> DC00