1#!/usr/bin/env perl 2# 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# This module doesn't present direct interest for OpenSSL, because it 11# doesn't provide better performance for longer keys. While 512-bit 12# RSA private key operations are 40% faster, 1024-bit ones are hardly 13# faster at all, while longer key operations are slower by up to 20%. 14# It might be of interest to embedded system developers though, as 15# it's smaller than 1KB, yet offers ~3x improvement over compiler 16# generated code. 17# 18# The module targets N32 and N64 MIPS ABIs and currently is a bit 19# IRIX-centric, i.e. is likely to require adaptation for other OSes. 20 21# int bn_mul_mont( 22$rp="a0"; # BN_ULONG *rp, 23$ap="a1"; # const BN_ULONG *ap, 24$bp="a2"; # const BN_ULONG *bp, 25$np="a3"; # const BN_ULONG *np, 26$n0="a4"; # const BN_ULONG *n0, 27$num="a5"; # int num); 28 29$lo0="a6"; 30$hi0="a7"; 31$lo1="v0"; 32$hi1="v1"; 33$aj="t0"; 34$bi="t1"; 35$nj="t2"; 36$tp="t3"; 37$alo="s0"; 38$ahi="s1"; 39$nlo="s2"; 40$nhi="s3"; 41$tj="s4"; 42$i="s5"; 43$j="s6"; 44$fp="t8"; 45$m1="t9"; 46 47$FRAME=8*(2+8); 48 49$code=<<___; 50#include <asm.h> 51#include <regdef.h> 52 53.text 54 55.set noat 56.set reorder 57 58.align 5 59.globl bn_mul_mont 60.ent bn_mul_mont 61bn_mul_mont: 62 .set noreorder 63 PTR_SUB sp,64 64 move $fp,sp 65 .frame $fp,64,ra 66 slt AT,$num,4 67 li v0,0 68 beqzl AT,.Lproceed 69 nop 70 jr ra 71 PTR_ADD sp,$fp,64 72 .set reorder 73.align 5 74.Lproceed: 75 ld $n0,0($n0) 76 ld $bi,0($bp) # bp[0] 77 ld $aj,0($ap) # ap[0] 78 ld $nj,0($np) # np[0] 79 PTR_SUB sp,16 # place for two extra words 80 sll $num,3 81 li AT,-4096 82 PTR_SUB sp,$num 83 and sp,AT 84 85 sd s0,0($fp) 86 sd s1,8($fp) 87 sd s2,16($fp) 88 sd s3,24($fp) 89 sd s4,32($fp) 90 sd s5,40($fp) 91 sd s6,48($fp) 92 sd s7,56($fp) 93 94 dmultu $aj,$bi 95 ld $alo,8($ap) 96 ld $nlo,8($np) 97 mflo $lo0 98 mfhi $hi0 99 dmultu $lo0,$n0 100 mflo $m1 101 102 dmultu $alo,$bi 103 mflo $alo 104 mfhi $ahi 105 106 dmultu $nj,$m1 107 mflo $lo1 108 mfhi $hi1 109 dmultu $nlo,$m1 110 daddu $lo1,$lo0 111 sltu AT,$lo1,$lo0 112 daddu $hi1,AT 113 mflo $nlo 114 mfhi $nhi 115 116 move $tp,sp 117 li $j,16 118.align 4 119.L1st: 120 .set noreorder 121 PTR_ADD $aj,$ap,$j 122 ld $aj,($aj) 123 PTR_ADD $nj,$np,$j 124 ld $nj,($nj) 125 126 dmultu $aj,$bi 127 daddu $lo0,$alo,$hi0 128 daddu $lo1,$nlo,$hi1 129 sltu AT,$lo0,$hi0 130 sltu s7,$lo1,$hi1 131 daddu $hi0,$ahi,AT 132 daddu $hi1,$nhi,s7 133 mflo $alo 134 mfhi $ahi 135 136 daddu $lo1,$lo0 137 sltu AT,$lo1,$lo0 138 dmultu $nj,$m1 139 daddu $hi1,AT 140 addu $j,8 141 sd $lo1,($tp) 142 sltu s7,$j,$num 143 mflo $nlo 144 mfhi $nhi 145 146 bnez s7,.L1st 147 PTR_ADD $tp,8 148 .set reorder 149 150 daddu $lo0,$alo,$hi0 151 sltu AT,$lo0,$hi0 152 daddu $hi0,$ahi,AT 153 154 daddu $lo1,$nlo,$hi1 155 sltu s7,$lo1,$hi1 156 daddu $hi1,$nhi,s7 157 daddu $lo1,$lo0 158 sltu AT,$lo1,$lo0 159 daddu $hi1,AT 160 161 sd $lo1,($tp) 162 163 daddu $hi1,$hi0 164 sltu AT,$hi1,$hi0 165 sd $hi1,8($tp) 166 sd AT,16($tp) 167 168 li $i,8 169.align 4 170.Louter: 171 PTR_ADD $bi,$bp,$i 172 ld $bi,($bi) 173 ld $aj,($ap) 174 ld $alo,8($ap) 175 ld $tj,(sp) 176 177 dmultu $aj,$bi 178 ld $nj,($np) 179 ld $nlo,8($np) 180 mflo $lo0 181 mfhi $hi0 182 daddu $lo0,$tj 183 dmultu $lo0,$n0 184 sltu AT,$lo0,$tj 185 daddu $hi0,AT 186 mflo $m1 187 188 dmultu $alo,$bi 189 mflo $alo 190 mfhi $ahi 191 192 dmultu $nj,$m1 193 mflo $lo1 194 mfhi $hi1 195 196 dmultu $nlo,$m1 197 daddu $lo1,$lo0 198 sltu AT,$lo1,$lo0 199 daddu $hi1,AT 200 mflo $nlo 201 mfhi $nhi 202 203 move $tp,sp 204 li $j,16 205 ld $tj,8($tp) 206.align 4 207.Linner: 208 .set noreorder 209 PTR_ADD $aj,$ap,$j 210 ld $aj,($aj) 211 PTR_ADD $nj,$np,$j 212 ld $nj,($nj) 213 214 dmultu $aj,$bi 215 daddu $lo0,$alo,$hi0 216 daddu $lo1,$nlo,$hi1 217 sltu AT,$lo0,$hi0 218 sltu s7,$lo1,$hi1 219 daddu $hi0,$ahi,AT 220 daddu $hi1,$nhi,s7 221 mflo $alo 222 mfhi $ahi 223 224 daddu $lo0,$tj 225 addu $j,8 226 dmultu $nj,$m1 227 sltu AT,$lo0,$tj 228 daddu $lo1,$lo0 229 daddu $hi0,AT 230 sltu s7,$lo1,$lo0 231 ld $tj,16($tp) 232 daddu $hi1,s7 233 sltu AT,$j,$num 234 mflo $nlo 235 mfhi $nhi 236 sd $lo1,($tp) 237 bnez AT,.Linner 238 PTR_ADD $tp,8 239 .set reorder 240 241 daddu $lo0,$alo,$hi0 242 sltu AT,$lo0,$hi0 243 daddu $hi0,$ahi,AT 244 daddu $lo0,$tj 245 sltu s7,$lo0,$tj 246 daddu $hi0,s7 247 248 ld $tj,16($tp) 249 daddu $lo1,$nlo,$hi1 250 sltu AT,$lo1,$hi1 251 daddu $hi1,$nhi,AT 252 daddu $lo1,$lo0 253 sltu s7,$lo1,$lo0 254 daddu $hi1,s7 255 sd $lo1,($tp) 256 257 daddu $lo1,$hi1,$hi0 258 sltu $hi1,$lo1,$hi0 259 daddu $lo1,$tj 260 sltu AT,$lo1,$tj 261 daddu $hi1,AT 262 sd $lo1,8($tp) 263 sd $hi1,16($tp) 264 265 addu $i,8 266 sltu s7,$i,$num 267 bnez s7,.Louter 268 269 .set noreorder 270 PTR_ADD $tj,sp,$num # &tp[num] 271 move $tp,sp 272 move $ap,sp 273 li $hi0,0 # clear borrow bit 274 275.align 4 276.Lsub: ld $lo0,($tp) 277 ld $lo1,($np) 278 PTR_ADD $tp,8 279 PTR_ADD $np,8 280 dsubu $lo1,$lo0,$lo1 # tp[i]-np[i] 281 sgtu AT,$lo1,$lo0 282 dsubu $lo0,$lo1,$hi0 283 sgtu $hi0,$lo0,$lo1 284 sd $lo0,($rp) 285 or $hi0,AT 286 sltu AT,$tp,$tj 287 bnez AT,.Lsub 288 PTR_ADD $rp,8 289 290 dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit 291 move $tp,sp 292 PTR_SUB $rp,$num # restore rp 293 not $hi1,$hi0 294 295 and $ap,$hi0,sp 296 and $bp,$hi1,$rp 297 or $ap,$ap,$bp # ap=borrow?tp:rp 298 299.align 4 300.Lcopy: ld $aj,($ap) 301 PTR_ADD $ap,8 302 PTR_ADD $tp,8 303 sd zero,-8($tp) 304 sltu AT,$tp,$tj 305 sd $aj,($rp) 306 bnez AT,.Lcopy 307 PTR_ADD $rp,8 308 309 ld s0,0($fp) 310 ld s1,8($fp) 311 ld s2,16($fp) 312 ld s3,24($fp) 313 ld s4,32($fp) 314 ld s5,40($fp) 315 ld s6,48($fp) 316 ld s7,56($fp) 317 li v0,1 318 jr ra 319 PTR_ADD sp,$fp,64 320 .set reorder 321END(bn_mul_mont) 322.rdata 323.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>" 324___ 325 326print $code; 327close STDOUT; 328