• Home
  • Raw
  • Download

Lines Matching +full:a +full:- +full:h

2 # Implement fast SHA-256 with AVX1 instructions. (x86_64)
11 # This software is available to you under a choice of one of two
21 # - Redistributions of source code must retain the above
25 # - Redistributions in binary form must reproduce the above
32 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
40 # This code is described in an Intel White-Paper:
41 # "Fast SHA-256 Implementations on Intel Architecture Processors"
47 # This code schedules 1 block at a time, with 4 lanes per block
50 #include <linux/linkage.h>
58 # Add reg to mem using reg-mem add and store
66 shld $(32-(\p1)), \p2, \p2
93 SHUF_00BA = %xmm10 # shuffle xBxA -> 00BA
94 SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
106 a = %eax define
111 h = %r11d define
140 # Rotate values of symbols a...h
142 TMP_ = h
143 h = g define
149 b = a
150 a = TMP_ define
154 ## compute s0 four at a time and s1 two at a time
155 ## compute W[-16] + W[-7] 4 at a time
158 MY_ROR (25-11), y0 # y0 = e >> (25-11)
159 mov a, y1 # y1 = a
160 vpalignr $4, X2, X3, XTMP0 # XTMP0 = W[-7]
161 MY_ROR (22-13), y1 # y1 = a >> (22-13)
162 xor e, y0 # y0 = e ^ (e >> (25-11))
164 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
165 xor a, y1 # y1 = a ^ (a >> (22-13)
167 vpaddd X0, XTMP0, XTMP0 # XTMP0 = W[-7] + W[-16]
168 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
170 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
172 vpalignr $4, X0, X1, XTMP1 # XTMP1 = W[-15]
173 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
176 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
179 mov a, y0 # y0 = a
180 add y2, h # h = h + S1 + CH + k + w
181 mov a, y2 # y2 = a
183 or c, y0 # y0 = a|c
184 add h, d # d = d + h + S1 + CH + k + w
185 and c, y2 # y2 = a&c
186 vpslld $(32-7), XTMP1, XTMP3
187 and b, y0 # y0 = (a|c)&b
188 add y1, h # h = h + S1 + CH + k + w + S0
189 vpor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7
190 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
191 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
194 mov a, y1 # y1 = a
195 MY_ROR (25-11), y0 # y0 = e >> (25-11)
196 xor e, y0 # y0 = e ^ (e >> (25-11))
198 MY_ROR (22-13), y1 # y1 = a >> (22-13)
200 xor a, y1 # y1 = a ^ (a >> (22-13)
201 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
203 vpsrld $3, XTMP1, XTMP4 # XTMP4 = W[-15] >> 3
204 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
205 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
208 vpslld $(32-18), XTMP1, XTMP1
209 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
214 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
215 vpxor XTMP2, XTMP3, XTMP3 # XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR
216 mov a, y0 # y0 = a
217 add y2, h # h = h + S1 + CH + k + w
218 mov a, y2 # y2 = a
220 or c, y0 # y0 = a|c
221 add h, d # d = d + h + S1 + CH + k + w
222 and c, y2 # y2 = a&c
224 vpshufd $0b11111010, X3, XTMP2 # XTMP2 = W[-2] {BBAA}
225 and b, y0 # y0 = (a|c)&b
226 add y1, h # h = h + S1 + CH + k + w + S0
227 vpaddd XTMP1, XTMP0, XTMP0 # XTMP0 = W[-16] + W[-7] + s0
228 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
229 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
232 mov a, y1 # y1 = a
233 MY_ROR (25-11), y0 # y0 = e >> (25-11)
234 xor e, y0 # y0 = e ^ (e >> (25-11))
235 MY_ROR (22-13), y1 # y1 = a >> (22-13)
237 xor a, y1 # y1 = a ^ (a >> (22-13)
238 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
239 vpsrld $10, XTMP2, XTMP4 # XTMP4 = W[-2] >> 10 {BBAA}
241 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xBxA}
242 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
244 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xBxA}
245 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
246 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
251 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
254 mov a, y0 # y0 = a
255 add y2, h # h = h + S1 + CH + k + w
256 mov a, y2 # y2 = a
258 or c, y0 # y0 = a|c
259 add h, d # d = d + h + S1 + CH + k + w
260 and c, y2 # y2 = a&c
262 and b, y0 # y0 = (a|c)&b
263 add y1, h # h = h + S1 + CH + k + w + S0
265 vpshufd $0b01010000, XTMP0, XTMP2 # XTMP2 = W[-2] {DDCC}
266 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
267 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
270 MY_ROR (25-11), y0 # y0 = e >> (25-11)
271 mov a, y1 # y1 = a
272 MY_ROR (22-13), y1 # y1 = a >> (22-13)
273 xor e, y0 # y0 = e ^ (e >> (25-11))
275 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
276 vpsrld $10, XTMP2, XTMP5 # XTMP5 = W[-2] >> 10 {DDCC}
277 xor a, y1 # y1 = a ^ (a >> (22-13)
279 vpsrlq $19, XTMP2, XTMP3 # XTMP3 = W[-2] MY_ROR 19 {xDxC}
280 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
282 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
283 vpsrlq $17, XTMP2, XTMP2 # XTMP2 = W[-2] MY_ROR 17 {xDxC}
284 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
288 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
292 mov a, y0 # y0 = a
293 add y2, h # h = h + S1 + CH + k + w
294 mov a, y2 # y2 = a
296 or c, y0 # y0 = a|c
297 add h, d # d = d + h + S1 + CH + k + w
298 and c, y2 # y2 = a&c
300 and b, y0 # y0 = (a|c)&b
301 add y1, h # h = h + S1 + CH + k + w + S0
302 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
303 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
311 MY_ROR (25-11), y0 # y0 = e >> (25-11)
312 mov a, y1 # y1 = a
313 xor e, y0 # y0 = e ^ (e >> (25-11))
314 MY_ROR (22-13), y1 # y1 = a >> (22-13)
316 xor a, y1 # y1 = a ^ (a >> (22-13)
317 MY_ROR (11-6), y0 # y0 = (e >> (11-6)) ^ (e >> (25-6))
319 xor e, y0 # y0 = e ^ (e >> (11-6)) ^ (e >> (25-6))
320 MY_ROR (13-2), y1 # y1 = (a >> (13-2)) ^ (a >> (22-2))
322 xor a, y1 # y1 = a ^ (a >> (13-2)) ^ (a >> (22-2))
326 MY_ROR 2, y1 # y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22)
329 mov a, y0 # y0 = a
330 add y2, h # h = h + S1 + CH + k + w
331 mov a, y2 # y2 = a
332 or c, y0 # y0 = a|c
333 add h, d # d = d + h + S1 + CH + k + w
334 and c, y2 # y2 = a&c
335 and b, y0 # y0 = (a|c)&b
336 add y1, h # h = h + S1 + CH + k + w + S0
337 or y2, y0 # y0 = MAJ = (a|c)&b)|(a&c)
338 add y0, h # h = h + S1 + CH + k + w + S0 + MAJ
368 mov 4*0(CTX), a
375 mov 4*7(CTX), h
438 addm (4*0)(CTX),a
445 addm (4*7)(CTX),h
491 # shuffle xBxA -> 00BA
497 # shuffle xDxC -> DC00