1#! /usr/bin/env perl 2# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# I let hardware handle unaligned input(*), except on page boundaries 18# (see below for details). Otherwise straightforward implementation 19# with X vector in register bank. 20# 21# (*) this means that this module is inappropriate for PPC403? Does 22# anybody know if pre-POWER3 can sustain unaligned load? 23 24# -m64 -m32 25# ---------------------------------- 26# PPC970,gcc-4.0.0 +76% +59% 27# Power6,xlc-7 +68% +33% 28 29$flavour = shift; 30 31if ($flavour =~ /64/) { 32 $SIZE_T =8; 33 $LRSAVE =2*$SIZE_T; 34 $UCMP ="cmpld"; 35 $STU ="stdu"; 36 $POP ="ld"; 37 $PUSH ="std"; 38} elsif ($flavour =~ /32/) { 39 $SIZE_T =4; 40 $LRSAVE =$SIZE_T; 41 $UCMP ="cmplw"; 42 $STU ="stwu"; 43 $POP ="lwz"; 44 $PUSH ="stw"; 45} else { die "nonsense $flavour"; } 46 47# Define endianness based on flavour 48# i.e.: linux64le 49$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 50 51$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 52( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 53( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 54die "can't locate ppc-xlate.pl"; 55 56open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 57 58$FRAME=24*$SIZE_T+64; 59$LOCALS=6*$SIZE_T; 60 61$K ="r0"; 62$sp ="r1"; 63$toc="r2"; 64$ctx="r3"; 65$inp="r4"; 66$num="r5"; 67$t0 ="r15"; 68$t1 ="r6"; 69 70$A ="r7"; 71$B ="r8"; 72$C ="r9"; 73$D ="r10"; 74$E ="r11"; 75$T ="r12"; 76 77@V=($A,$B,$C,$D,$E,$T); 78@X=("r16","r17","r18","r19","r20","r21","r22","r23", 79 "r24","r25","r26","r27","r28","r29","r30","r31"); 80 81sub loadbe { 82my ($dst, $src, $temp_reg) = @_; 83$code.=<<___ if (!$LITTLE_ENDIAN); 84 lwz $dst,$src 85___ 86$code.=<<___ if ($LITTLE_ENDIAN); 87 lwz $temp_reg,$src 88 rotlwi $dst,$temp_reg,8 89 rlwimi $dst,$temp_reg,24,0,7 90 rlwimi $dst,$temp_reg,24,16,23 91___ 92} 93 94sub BODY_00_19 { 95my ($i,$a,$b,$c,$d,$e,$f)=@_; 96my $j=$i+1; 97 98 # Since the last value of $f is discarded, we can use 99 # it as a temp reg to swap byte-order when needed. 100 loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); 101 loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); 102$code.=<<___ if ($i<15); 103 add $f,$K,$e 104 rotlwi $e,$a,5 105 add $f,$f,@X[$i] 106 and $t0,$c,$b 107 add $f,$f,$e 108 andc $t1,$d,$b 109 rotlwi $b,$b,30 110 or $t0,$t0,$t1 111 add $f,$f,$t0 112___ 113$code.=<<___ if ($i>=15); 114 add $f,$K,$e 115 rotlwi $e,$a,5 116 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] 117 add $f,$f,@X[$i%16] 118 and $t0,$c,$b 119 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] 120 add $f,$f,$e 121 andc $t1,$d,$b 122 rotlwi $b,$b,30 123 or $t0,$t0,$t1 124 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] 125 add $f,$f,$t0 126 rotlwi @X[$j%16],@X[$j%16],1 127___ 128} 129 130sub BODY_20_39 { 131my ($i,$a,$b,$c,$d,$e,$f)=@_; 132my $j=$i+1; 133$code.=<<___ if ($i<79); 134 add $f,$K,$e 135 xor $t0,$b,$d 136 rotlwi $e,$a,5 137 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] 138 add $f,$f,@X[$i%16] 139 xor $t0,$t0,$c 140 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] 141 add $f,$f,$t0 142 rotlwi $b,$b,30 143 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] 144 add $f,$f,$e 145 rotlwi @X[$j%16],@X[$j%16],1 146___ 147$code.=<<___ if ($i==79); 148 add $f,$K,$e 149 xor $t0,$b,$d 150 rotlwi $e,$a,5 151 lwz r16,0($ctx) 152 add $f,$f,@X[$i%16] 153 xor $t0,$t0,$c 154 lwz r17,4($ctx) 155 add $f,$f,$t0 156 rotlwi $b,$b,30 157 lwz r18,8($ctx) 158 lwz r19,12($ctx) 159 add $f,$f,$e 160 lwz r20,16($ctx) 161___ 162} 163 164sub BODY_40_59 { 165my ($i,$a,$b,$c,$d,$e,$f)=@_; 166my $j=$i+1; 167$code.=<<___; 168 add $f,$K,$e 169 rotlwi $e,$a,5 170 xor @X[$j%16],@X[$j%16],@X[($j+2)%16] 171 add $f,$f,@X[$i%16] 172 and $t0,$b,$c 173 xor @X[$j%16],@X[$j%16],@X[($j+8)%16] 174 add $f,$f,$e 175 or $t1,$b,$c 176 rotlwi $b,$b,30 177 xor @X[$j%16],@X[$j%16],@X[($j+13)%16] 178 and $t1,$t1,$d 179 or $t0,$t0,$t1 180 rotlwi @X[$j%16],@X[$j%16],1 181 add $f,$f,$t0 182___ 183} 184 185$code=<<___; 186.machine "any" 187.text 188 189.globl .sha1_block_data_order 190.align 4 191.sha1_block_data_order: 192 $STU $sp,-$FRAME($sp) 193 mflr r0 194 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 195 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 196 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 197 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 198 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 199 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 200 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 201 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 202 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 203 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 204 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 205 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 206 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 207 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 208 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 209 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 210 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 211 $PUSH r0,`$FRAME+$LRSAVE`($sp) 212 lwz $A,0($ctx) 213 lwz $B,4($ctx) 214 lwz $C,8($ctx) 215 lwz $D,12($ctx) 216 lwz $E,16($ctx) 217 andi. r0,$inp,3 218 bne Lunaligned 219Laligned: 220 mtctr $num 221 bl Lsha1_block_private 222 b Ldone 223 224; PowerPC specification allows an implementation to be ill-behaved 225; upon unaligned access which crosses page boundary. "Better safe 226; than sorry" principle makes me treat it specially. But I don't 227; look for particular offending word, but rather for 64-byte input 228; block which crosses the boundary. Once found that block is aligned 229; and hashed separately... 230.align 4 231Lunaligned: 232 subfic $t1,$inp,4096 233 andi. $t1,$t1,4095 ; distance to closest page boundary 234 srwi. $t1,$t1,6 ; t1/=64 235 beq Lcross_page 236 $UCMP $num,$t1 237 ble Laligned ; didn't cross the page boundary 238 mtctr $t1 239 subfc $num,$t1,$num 240 bl Lsha1_block_private 241Lcross_page: 242 li $t1,16 243 mtctr $t1 244 addi r20,$sp,$LOCALS ; spot within the frame 245Lmemcpy: 246 lbz r16,0($inp) 247 lbz r17,1($inp) 248 lbz r18,2($inp) 249 lbz r19,3($inp) 250 addi $inp,$inp,4 251 stb r16,0(r20) 252 stb r17,1(r20) 253 stb r18,2(r20) 254 stb r19,3(r20) 255 addi r20,r20,4 256 bdnz Lmemcpy 257 258 $PUSH $inp,`$FRAME-$SIZE_T*18`($sp) 259 li $t1,1 260 addi $inp,$sp,$LOCALS 261 mtctr $t1 262 bl Lsha1_block_private 263 $POP $inp,`$FRAME-$SIZE_T*18`($sp) 264 addic. $num,$num,-1 265 bne Lunaligned 266 267Ldone: 268 $POP r0,`$FRAME+$LRSAVE`($sp) 269 $POP r15,`$FRAME-$SIZE_T*17`($sp) 270 $POP r16,`$FRAME-$SIZE_T*16`($sp) 271 $POP r17,`$FRAME-$SIZE_T*15`($sp) 272 $POP r18,`$FRAME-$SIZE_T*14`($sp) 273 $POP r19,`$FRAME-$SIZE_T*13`($sp) 274 $POP r20,`$FRAME-$SIZE_T*12`($sp) 275 $POP r21,`$FRAME-$SIZE_T*11`($sp) 276 $POP r22,`$FRAME-$SIZE_T*10`($sp) 277 $POP r23,`$FRAME-$SIZE_T*9`($sp) 278 $POP r24,`$FRAME-$SIZE_T*8`($sp) 279 $POP r25,`$FRAME-$SIZE_T*7`($sp) 280 $POP r26,`$FRAME-$SIZE_T*6`($sp) 281 $POP r27,`$FRAME-$SIZE_T*5`($sp) 282 $POP r28,`$FRAME-$SIZE_T*4`($sp) 283 $POP r29,`$FRAME-$SIZE_T*3`($sp) 284 $POP r30,`$FRAME-$SIZE_T*2`($sp) 285 $POP r31,`$FRAME-$SIZE_T*1`($sp) 286 mtlr r0 287 addi $sp,$sp,$FRAME 288 blr 289 .long 0 290 .byte 0,12,4,1,0x80,18,3,0 291 .long 0 292___ 293 294# This is private block function, which uses tailored calling 295# interface, namely upon entry SHA_CTX is pre-loaded to given 296# registers and counter register contains amount of chunks to 297# digest... 298$code.=<<___; 299.align 4 300Lsha1_block_private: 301___ 302$code.=<<___; # load K_00_19 303 lis $K,0x5a82 304 ori $K,$K,0x7999 305___ 306for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } 307$code.=<<___; # load K_20_39 308 lis $K,0x6ed9 309 ori $K,$K,0xeba1 310___ 311for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 312$code.=<<___; # load K_40_59 313 lis $K,0x8f1b 314 ori $K,$K,0xbcdc 315___ 316for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } 317$code.=<<___; # load K_60_79 318 lis $K,0xca62 319 ori $K,$K,0xc1d6 320___ 321for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } 322$code.=<<___; 323 add r16,r16,$E 324 add r17,r17,$T 325 add r18,r18,$A 326 add r19,r19,$B 327 add r20,r20,$C 328 stw r16,0($ctx) 329 mr $A,r16 330 stw r17,4($ctx) 331 mr $B,r17 332 stw r18,8($ctx) 333 mr $C,r18 334 stw r19,12($ctx) 335 mr $D,r19 336 stw r20,16($ctx) 337 mr $E,r20 338 addi $inp,$inp,`16*4` 339 bdnz Lsha1_block_private 340 blr 341 .long 0 342 .byte 0,12,0x14,0,0,0,0,0 343.size .sha1_block_data_order,.-.sha1_block_data_order 344___ 345$code.=<<___; 346.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" 347___ 348 349$code =~ s/\`([^\`]*)\`/eval $1/gem; 350print $code; 351close STDOUT or die "error closing STDOUT: $!"; 352