1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# SHA1 block procedure for MIPS. 11 12# Performance improvement is 30% on unaligned input. The "secret" is 13# to deploy lwl/lwr pair to load unaligned input. One could have 14# vectorized Xupdate on MIPSIII/IV, but the goal was to code MIPS32- 15# compatible subroutine. There is room for minor optimization on 16# little-endian platforms... 17 18###################################################################### 19# There is a number of MIPS ABI in use, O32 and N32/64 are most 20# widely used. Then there is a new contender: NUBI. It appears that if 21# one picks the latter, it's possible to arrange code in ABI neutral 22# manner. Therefore let's stick to NUBI register layout: 23# 24($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); 25($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 26($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); 27($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); 28# 29# The return value is placed in $a0. Following coding rules facilitate 30# interoperability: 31# 32# - never ever touch $tp, "thread pointer", former $gp; 33# - copy return value to $t0, former $v0 [or to $a0 if you're adapting 34# old code]; 35# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; 36# 37# For reference here is register layout for N32/64 MIPS ABIs: 38# 39# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); 40# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); 41# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); 42# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); 43# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); 44# 45$flavour = shift; # supported flavours are o32,n32,64,nubi32,nubi64 46 47if ($flavour =~ /64|n32/i) { 48 $PTR_ADD="dadd"; # incidentally works even on n32 49 $PTR_SUB="dsub"; # incidentally works even on n32 50 $REG_S="sd"; 51 $REG_L="ld"; 52 $PTR_SLL="dsll"; # incidentally works even on n32 53 $SZREG=8; 54} else { 55 $PTR_ADD="add"; 56 $PTR_SUB="sub"; 57 $REG_S="sw"; 58 $REG_L="lw"; 59 $PTR_SLL="sll"; 60 $SZREG=4; 61} 62# 63# <appro@openssl.org> 64# 65###################################################################### 66 67$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; 68 69for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } 70open STDOUT,">$output"; 71 72if (!defined($big_endian)) 73 { $big_endian=(unpack('L',pack('N',1))==1); } 74 75# offsets of the Most and Least Significant Bytes 76$MSB=$big_endian?0:3; 77$LSB=3&~$MSB; 78 79@X=map("\$$_",(8..23)); # a4-a7,s0-s11 80 81$ctx=$a0; 82$inp=$a1; 83$num=$a2; 84$A="\$1"; 85$B="\$2"; 86$C="\$3"; 87$D="\$7"; 88$E="\$24"; @V=($A,$B,$C,$D,$E); 89$t0="\$25"; 90$t1=$num; # $num is offloaded to stack 91$t2="\$30"; # fp 92$K="\$31"; # ra 93 94sub BODY_00_14 { 95my ($i,$a,$b,$c,$d,$e)=@_; 96my $j=$i+1; 97$code.=<<___ if (!$big_endian); 98 srl $t0,@X[$i],24 # byte swap($i) 99 srl $t1,@X[$i],8 100 andi $t2,@X[$i],0xFF00 101 sll @X[$i],@X[$i],24 102 andi $t1,0xFF00 103 sll $t2,$t2,8 104 or @X[$i],$t0 105 or $t1,$t2 106 or @X[$i],$t1 107___ 108$code.=<<___; 109 lwl @X[$j],$j*4+$MSB($inp) 110 sll $t0,$a,5 # $i 111 addu $e,$K 112 lwr @X[$j],$j*4+$LSB($inp) 113 srl $t1,$a,27 114 addu $e,$t0 115 xor $t0,$c,$d 116 addu $e,$t1 117 sll $t2,$b,30 118 and $t0,$b 119 srl $b,$b,2 120 xor $t0,$d 121 addu $e,@X[$i] 122 or $b,$t2 123 addu $e,$t0 124___ 125} 126 127sub BODY_15_19 { 128my ($i,$a,$b,$c,$d,$e)=@_; 129my $j=$i+1; 130 131$code.=<<___ if (!$big_endian && $i==15); 132 srl $t0,@X[$i],24 # byte swap($i) 133 srl $t1,@X[$i],8 134 andi $t2,@X[$i],0xFF00 135 sll @X[$i],@X[$i],24 136 andi $t1,0xFF00 137 sll $t2,$t2,8 138 or @X[$i],$t0 139 or @X[$i],$t1 140 or @X[$i],$t2 141___ 142$code.=<<___; 143 xor @X[$j%16],@X[($j+2)%16] 144 sll $t0,$a,5 # $i 145 addu $e,$K 146 srl $t1,$a,27 147 addu $e,$t0 148 xor @X[$j%16],@X[($j+8)%16] 149 xor $t0,$c,$d 150 addu $e,$t1 151 xor @X[$j%16],@X[($j+13)%16] 152 sll $t2,$b,30 153 and $t0,$b 154 srl $t1,@X[$j%16],31 155 addu @X[$j%16],@X[$j%16] 156 srl $b,$b,2 157 xor $t0,$d 158 or @X[$j%16],$t1 159 addu $e,@X[$i%16] 160 or $b,$t2 161 addu $e,$t0 162___ 163} 164 165sub BODY_20_39 { 166my ($i,$a,$b,$c,$d,$e)=@_; 167my $j=$i+1; 168$code.=<<___ if ($i<79); 169 xor @X[$j%16],@X[($j+2)%16] 170 sll $t0,$a,5 # $i 171 addu $e,$K 172 srl $t1,$a,27 173 addu $e,$t0 174 xor @X[$j%16],@X[($j+8)%16] 175 xor $t0,$c,$d 176 addu $e,$t1 177 xor @X[$j%16],@X[($j+13)%16] 178 sll $t2,$b,30 179 xor $t0,$b 180 srl $t1,@X[$j%16],31 181 addu @X[$j%16],@X[$j%16] 182 srl $b,$b,2 183 addu $e,@X[$i%16] 184 or @X[$j%16],$t1 185 or $b,$t2 186 addu $e,$t0 187___ 188$code.=<<___ if ($i==79); 189 lw @X[0],0($ctx) 190 sll $t0,$a,5 # $i 191 addu $e,$K 192 lw @X[1],4($ctx) 193 srl $t1,$a,27 194 addu $e,$t0 195 lw @X[2],8($ctx) 196 xor $t0,$c,$d 197 addu $e,$t1 198 lw @X[3],12($ctx) 199 sll $t2,$b,30 200 xor $t0,$b 201 lw @X[4],16($ctx) 202 srl $b,$b,2 203 addu $e,@X[$i%16] 204 or $b,$t2 205 addu $e,$t0 206___ 207} 208 209sub BODY_40_59 { 210my ($i,$a,$b,$c,$d,$e)=@_; 211my $j=$i+1; 212$code.=<<___ if ($i<79); 213 xor @X[$j%16],@X[($j+2)%16] 214 sll $t0,$a,5 # $i 215 addu $e,$K 216 srl $t1,$a,27 217 addu $e,$t0 218 xor @X[$j%16],@X[($j+8)%16] 219 and $t0,$c,$d 220 addu $e,$t1 221 xor @X[$j%16],@X[($j+13)%16] 222 sll $t2,$b,30 223 addu $e,$t0 224 srl $t1,@X[$j%16],31 225 xor $t0,$c,$d 226 addu @X[$j%16],@X[$j%16] 227 and $t0,$b 228 srl $b,$b,2 229 or @X[$j%16],$t1 230 addu $e,@X[$i%16] 231 or $b,$t2 232 addu $e,$t0 233___ 234} 235 236$FRAMESIZE=16; # large enough to accomodate NUBI saved registers 237$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000; 238 239$code=<<___; 240#ifdef OPENSSL_FIPSCANISTER 241# include <openssl/fipssyms.h> 242#endif 243 244.text 245 246.set noat 247.set noreorder 248.align 5 249.globl sha1_block_data_order 250.ent sha1_block_data_order 251sha1_block_data_order: 252 .frame $sp,$FRAMESIZE*$SZREG,$ra 253 .mask $SAVED_REGS_MASK,-$SZREG 254 .set noreorder 255 $PTR_SUB $sp,$FRAMESIZE*$SZREG 256 $REG_S $ra,($FRAMESIZE-1)*$SZREG($sp) 257 $REG_S $fp,($FRAMESIZE-2)*$SZREG($sp) 258 $REG_S $s11,($FRAMESIZE-3)*$SZREG($sp) 259 $REG_S $s10,($FRAMESIZE-4)*$SZREG($sp) 260 $REG_S $s9,($FRAMESIZE-5)*$SZREG($sp) 261 $REG_S $s8,($FRAMESIZE-6)*$SZREG($sp) 262 $REG_S $s7,($FRAMESIZE-7)*$SZREG($sp) 263 $REG_S $s6,($FRAMESIZE-8)*$SZREG($sp) 264 $REG_S $s5,($FRAMESIZE-9)*$SZREG($sp) 265 $REG_S $s4,($FRAMESIZE-10)*$SZREG($sp) 266___ 267$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue 268 $REG_S $s3,($FRAMESIZE-11)*$SZREG($sp) 269 $REG_S $s2,($FRAMESIZE-12)*$SZREG($sp) 270 $REG_S $s1,($FRAMESIZE-13)*$SZREG($sp) 271 $REG_S $s0,($FRAMESIZE-14)*$SZREG($sp) 272 $REG_S $gp,($FRAMESIZE-15)*$SZREG($sp) 273___ 274$code.=<<___; 275 $PTR_SLL $num,6 276 $PTR_ADD $num,$inp 277 $REG_S $num,0($sp) 278 lw $A,0($ctx) 279 lw $B,4($ctx) 280 lw $C,8($ctx) 281 lw $D,12($ctx) 282 b .Loop 283 lw $E,16($ctx) 284.align 4 285.Loop: 286 .set reorder 287 lwl @X[0],$MSB($inp) 288 lui $K,0x5a82 289 lwr @X[0],$LSB($inp) 290 ori $K,0x7999 # K_00_19 291___ 292for ($i=0;$i<15;$i++) { &BODY_00_14($i,@V); unshift(@V,pop(@V)); } 293for (;$i<20;$i++) { &BODY_15_19($i,@V); unshift(@V,pop(@V)); } 294$code.=<<___; 295 lui $K,0x6ed9 296 ori $K,0xeba1 # K_20_39 297___ 298for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 299$code.=<<___; 300 lui $K,0x8f1b 301 ori $K,0xbcdc # K_40_59 302___ 303for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 304$code.=<<___; 305 lui $K,0xca62 306 ori $K,0xc1d6 # K_60_79 307___ 308for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 309$code.=<<___; 310 $PTR_ADD $inp,64 311 $REG_L $num,0($sp) 312 313 addu $A,$X[0] 314 addu $B,$X[1] 315 sw $A,0($ctx) 316 addu $C,$X[2] 317 addu $D,$X[3] 318 sw $B,4($ctx) 319 addu $E,$X[4] 320 sw $C,8($ctx) 321 sw $D,12($ctx) 322 sw $E,16($ctx) 323 .set noreorder 324 bne $inp,$num,.Loop 325 nop 326 327 .set noreorder 328 $REG_L $ra,($FRAMESIZE-1)*$SZREG($sp) 329 $REG_L $fp,($FRAMESIZE-2)*$SZREG($sp) 330 $REG_L $s11,($FRAMESIZE-3)*$SZREG($sp) 331 $REG_L $s10,($FRAMESIZE-4)*$SZREG($sp) 332 $REG_L $s9,($FRAMESIZE-5)*$SZREG($sp) 333 $REG_L $s8,($FRAMESIZE-6)*$SZREG($sp) 334 $REG_L $s7,($FRAMESIZE-7)*$SZREG($sp) 335 $REG_L $s6,($FRAMESIZE-8)*$SZREG($sp) 336 $REG_L $s5,($FRAMESIZE-9)*$SZREG($sp) 337 $REG_L $s4,($FRAMESIZE-10)*$SZREG($sp) 338___ 339$code.=<<___ if ($flavour =~ /nubi/i); 340 $REG_L $s3,($FRAMESIZE-11)*$SZREG($sp) 341 $REG_L $s2,($FRAMESIZE-12)*$SZREG($sp) 342 $REG_L $s1,($FRAMESIZE-13)*$SZREG($sp) 343 $REG_L $s0,($FRAMESIZE-14)*$SZREG($sp) 344 $REG_L $gp,($FRAMESIZE-15)*$SZREG($sp) 345___ 346$code.=<<___; 347 jr $ra 348 $PTR_ADD $sp,$FRAMESIZE*$SZREG 349.end sha1_block_data_order 350.rdata 351.asciiz "SHA1 for MIPS, CRYPTOGAMS by <appro\@openssl.org>" 352___ 353print $code; 354close STDOUT; 355