1/* 2 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions 3 * 4 * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/linkage.h> 12#include <asm/assembler.h> 13 14 .text 15 .arch armv8-a+crypto 16 17 k0 .req v0 18 k1 .req v1 19 k2 .req v2 20 k3 .req v3 21 22 t0 .req v4 23 t1 .req v5 24 25 dga .req q6 26 dgav .req v6 27 dgb .req s7 28 dgbv .req v7 29 30 dg0q .req q12 31 dg0s .req s12 32 dg0v .req v12 33 dg1s .req s13 34 dg1v .req v13 35 dg2s .req s14 36 37 .macro add_only, op, ev, rc, s0, dg1 38 .ifc \ev, ev 39 add t1.4s, v\s0\().4s, \rc\().4s 40 sha1h dg2s, dg0s 41 .ifnb \dg1 42 sha1\op dg0q, \dg1, t0.4s 43 .else 44 sha1\op dg0q, dg1s, t0.4s 45 .endif 46 .else 47 .ifnb \s0 48 add t0.4s, v\s0\().4s, \rc\().4s 49 .endif 50 sha1h dg1s, dg0s 51 sha1\op dg0q, dg2s, t1.4s 52 .endif 53 .endm 54 55 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 56 sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s 57 add_only \op, \ev, \rc, \s1, \dg1 58 sha1su1 v\s0\().4s, v\s3\().4s 59 .endm 60 61 /* 62 * The SHA1 round constants 63 */ 64 .align 4 65.Lsha1_rcon: 66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 67 68 /* 69 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, 70 * int blocks) 71 */ 72ENTRY(sha1_ce_transform) 73 /* load round constants */ 74 adr x6, .Lsha1_rcon 75 ld1r {k0.4s}, [x6], #4 76 ld1r {k1.4s}, [x6], #4 77 ld1r {k2.4s}, [x6], #4 78 ld1r {k3.4s}, [x6] 79 80 /* load state */ 81 ld1 {dgav.4s}, [x0] 82 ldr dgb, [x0, #16] 83 84 /* load sha1_ce_state::finalize */ 85 ldr_l w4, sha1_ce_offsetof_finalize, x4 86 ldr w4, [x0, x4] 87 88 /* load input */ 890: ld1 {v8.4s-v11.4s}, [x1], #64 90 sub w2, w2, #1 91 92CPU_LE( rev32 v8.16b, v8.16b ) 93CPU_LE( rev32 v9.16b, v9.16b ) 94CPU_LE( rev32 v10.16b, v10.16b ) 95CPU_LE( rev32 v11.16b, v11.16b ) 96 971: add t0.4s, v8.4s, k0.4s 98 mov dg0v.16b, dgav.16b 99 100 add_update c, ev, k0, 8, 9, 10, 11, dgb 101 add_update c, od, k0, 9, 10, 11, 8 102 add_update c, ev, k0, 10, 11, 8, 9 103 add_update c, od, k0, 11, 8, 9, 10 104 add_update c, ev, k1, 8, 9, 10, 11 105 106 add_update p, od, k1, 9, 10, 11, 8 107 add_update p, ev, k1, 10, 11, 8, 9 108 add_update p, od, k1, 11, 8, 9, 10 109 add_update p, ev, k1, 8, 9, 10, 11 110 add_update p, od, k2, 9, 10, 11, 8 111 112 add_update m, ev, k2, 10, 11, 8, 9 113 add_update m, od, k2, 11, 8, 9, 10 114 add_update m, ev, k2, 8, 9, 10, 11 115 add_update m, od, k2, 9, 10, 11, 8 116 add_update m, ev, k3, 10, 11, 8, 9 117 118 add_update p, od, k3, 11, 8, 9, 10 119 add_only p, ev, k3, 9 120 add_only p, od, k3, 10 121 add_only p, ev, k3, 11 122 add_only p, od 123 124 /* update state */ 125 add dgbv.2s, dgbv.2s, dg1v.2s 126 add dgav.4s, dgav.4s, dg0v.4s 127 128 cbnz w2, 0b 129 130 /* 131 * Final block: add padding and total bit count. 132 * Skip if the input size was not a round multiple of the block size, 133 * the padding is handled by the C code in that case. 134 */ 135 cbz x4, 3f 136 ldr_l w4, sha1_ce_offsetof_count, x4 137 ldr x4, [x0, x4] 138 movi v9.2d, #0 139 mov x8, #0x80000000 140 movi v10.2d, #0 141 ror x7, x4, #29 // ror(lsl(x4, 3), 32) 142 fmov d8, x8 143 mov x4, #0 144 mov v11.d[0], xzr 145 mov v11.d[1], x7 146 b 1b 147 148 /* store new state */ 1493: st1 {dgav.4s}, [x0] 150 str dgb, [x0, #16] 151 ret 152ENDPROC(sha1_ce_transform) 153