1#! /usr/bin/env perl 2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# sha1_block for Thumb. 18# 19# January 2007. 20# 21# The code does not present direct interest to OpenSSL, because of low 22# performance. Its purpose is to establish _size_ benchmark. Pretty 23# useless one I must say, because 30% or 88 bytes larger ARMv4 code 24# [available on demand] is almost _twice_ as fast. It should also be 25# noted that in-lining of .Lcommon and .Lrotate improves performance 26# by over 40%, while code increases by only 10% or 32 bytes. But once 27# again, the goal was to establish _size_ benchmark, not performance. 28 29$output=shift; 30open STDOUT,">$output"; 31 32$inline=0; 33#$cheat_on_binutils=1; 34 35$t0="r0"; 36$t1="r1"; 37$t2="r2"; 38$a="r3"; 39$b="r4"; 40$c="r5"; 41$d="r6"; 42$e="r7"; 43$K="r8"; # "upper" registers can be used in add/sub and mov insns 44$ctx="r9"; 45$inp="r10"; 46$len="r11"; 47$Xi="r12"; 48 49sub common { 50<<___; 51 sub $t0,#4 52 ldr $t1,[$t0] 53 add $e,$K @ E+=K_xx_xx 54 lsl $t2,$a,#5 55 add $t2,$e 56 lsr $e,$a,#27 57 add $t2,$e @ E+=ROR(A,27) 58 add $t2,$t1 @ E+=X[i] 59___ 60} 61sub rotate { 62<<___; 63 mov $e,$d @ E=D 64 mov $d,$c @ D=C 65 lsl $c,$b,#30 66 lsr $b,$b,#2 67 orr $c,$b @ C=ROR(B,2) 68 mov $b,$a @ B=A 69 add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) 70___ 71} 72 73sub BODY_00_19 { 74$code.=$inline?&common():"\tbl .Lcommon\n"; 75$code.=<<___; 76 mov $t1,$c 77 eor $t1,$d 78 and $t1,$b 79 eor $t1,$d @ F_00_19(B,C,D) 80___ 81$code.=$inline?&rotate():"\tbl .Lrotate\n"; 82} 83 84sub BODY_20_39 { 85$code.=$inline?&common():"\tbl .Lcommon\n"; 86$code.=<<___; 87 mov $t1,$b 88 eor $t1,$c 89 eor $t1,$d @ F_20_39(B,C,D) 90___ 91$code.=$inline?&rotate():"\tbl .Lrotate\n"; 92} 93 94sub BODY_40_59 { 95$code.=$inline?&common():"\tbl .Lcommon\n"; 96$code.=<<___; 97 mov $t1,$b 98 and $t1,$c 99 mov $e,$b 100 orr $e,$c 101 and $e,$d 102 orr $t1,$e @ F_40_59(B,C,D) 103___ 104$code.=$inline?&rotate():"\tbl .Lrotate\n"; 105} 106 107$code=<<___; 108.text 109.code 16 110 111.global sha1_block_data_order 112.type sha1_block_data_order,%function 113 114.align 2 115sha1_block_data_order: 116___ 117if ($cheat_on_binutils) { 118$code.=<<___; 119.code 32 120 add r3,pc,#1 121 bx r3 @ switch to Thumb ISA 122.code 16 123___ 124} 125$code.=<<___; 126 push {r4-r7} 127 mov r3,r8 128 mov r4,r9 129 mov r5,r10 130 mov r6,r11 131 mov r7,r12 132 push {r3-r7,lr} 133 lsl r2,#6 134 mov $ctx,r0 @ save context 135 mov $inp,r1 @ save inp 136 mov $len,r2 @ save len 137 add $len,$inp @ $len to point at inp end 138 139.Lloop: 140 mov $Xi,sp 141 mov $t2,sp 142 sub $t2,#16*4 @ [3] 143.LXload: 144 ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp 145 ldrb $b,[$t1,#1] 146 ldrb $c,[$t1,#2] 147 ldrb $d,[$t1,#3] 148 lsl $a,#24 149 lsl $b,#16 150 lsl $c,#8 151 orr $a,$b 152 orr $a,$c 153 orr $a,$d 154 add $t1,#4 155 push {$a} 156 cmp sp,$t2 157 bne .LXload @ [+14*16] 158 159 mov $inp,$t1 @ update $inp 160 sub $t2,#32*4 161 sub $t2,#32*4 162 mov $e,#31 @ [+4] 163.LXupdate: 164 ldr $a,[sp,#15*4] 165 ldr $b,[sp,#13*4] 166 ldr $c,[sp,#7*4] 167 ldr $d,[sp,#2*4] 168 eor $a,$b 169 eor $a,$c 170 eor $a,$d 171 ror $a,$e 172 push {$a} 173 cmp sp,$t2 174 bne .LXupdate @ [+(11+1)*64] 175 176 ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx 177 mov $t0,$Xi 178 179 ldr $t2,.LK_00_19 180 mov $t1,$t0 181 sub $t1,#20*4 182 mov $Xi,$t1 183 mov $K,$t2 @ [+7+4] 184.L_00_19: 185___ 186 &BODY_00_19(); 187$code.=<<___; 188 cmp $Xi,$t0 189 bne .L_00_19 @ [+(2+9+4+2+8+2)*20] 190 191 ldr $t2,.LK_20_39 192 mov $t1,$t0 193 sub $t1,#20*4 194 mov $Xi,$t1 195 mov $K,$t2 @ [+5] 196.L_20_39_or_60_79: 197___ 198 &BODY_20_39(); 199$code.=<<___; 200 cmp $Xi,$t0 201 bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] 202 cmp sp,$t0 203 beq .Ldone @ [+2] 204 205 ldr $t2,.LK_40_59 206 mov $t1,$t0 207 sub $t1,#20*4 208 mov $Xi,$t1 209 mov $K,$t2 @ [+5] 210.L_40_59: 211___ 212 &BODY_40_59(); 213$code.=<<___; 214 cmp $Xi,$t0 215 bne .L_40_59 @ [+(2+9+6+2+8+2)*20] 216 217 ldr $t2,.LK_60_79 218 mov $Xi,sp 219 mov $K,$t2 220 b .L_20_39_or_60_79 @ [+4] 221.Ldone: 222 mov $t0,$ctx 223 ldr $t1,[$t0,#0] 224 ldr $t2,[$t0,#4] 225 add $a,$t1 226 ldr $t1,[$t0,#8] 227 add $b,$t2 228 ldr $t2,[$t0,#12] 229 add $c,$t1 230 ldr $t1,[$t0,#16] 231 add $d,$t2 232 add $e,$t1 233 stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] 234 235 add sp,#80*4 @ deallocate stack frame 236 mov $t0,$ctx @ restore ctx 237 mov $t1,$inp @ restore inp 238 cmp $t1,$len 239 beq .Lexit 240 b .Lloop @ [+6] total 3212 cycles 241.Lexit: 242 pop {r2-r7} 243 mov r8,r2 244 mov r9,r3 245 mov r10,r4 246 mov r11,r5 247 mov r12,r6 248 mov lr,r7 249 pop {r4-r7} 250 bx lr 251.align 2 252___ 253$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); 254$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); 255$code.=<<___; 256.align 2 257.LK_00_19: .word 0x5a827999 258.LK_20_39: .word 0x6ed9eba1 259.LK_40_59: .word 0x8f1bbcdc 260.LK_60_79: .word 0xca62c1d6 261.size sha1_block_data_order,.-sha1_block_data_order 262.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" 263___ 264 265print $code; 266close STDOUT or die "error closing STDOUT: $!"; # enforce flush 267