1#! /usr/bin/env perl 2# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# April 2010 18# 19# The module implements "4-bit" GCM GHASH function and underlying 20# single multiplication operation in GF(2^128). "4-bit" means that it 21# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC 22# it processes one byte in 19.6 cycles, which is more than twice as 23# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for 24# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per 25# processed byte. This is ~2.2x faster than 64-bit code generated by 26# vendor compiler (which used to be very hard to beat:-). 27# 28# Special thanks to polarhome.com for providing HP-UX account. 29 30$flavour = shift; 31$output = shift; 32open STDOUT,">$output"; 33 34if ($flavour =~ /64/) { 35 $LEVEL ="2.0W"; 36 $SIZE_T =8; 37 $FRAME_MARKER =80; 38 $SAVED_RP =16; 39 $PUSH ="std"; 40 $PUSHMA ="std,ma"; 41 $POP ="ldd"; 42 $POPMB ="ldd,mb"; 43 $NREGS =6; 44} else { 45 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0"; 46 $SIZE_T =4; 47 $FRAME_MARKER =48; 48 $SAVED_RP =20; 49 $PUSH ="stw"; 50 $PUSHMA ="stwm"; 51 $POP ="ldw"; 52 $POPMB ="ldwm"; 53 $NREGS =11; 54} 55 56$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker 57 # [+ argument transfer] 58 59################# volatile registers 60$Xi="%r26"; # argument block 61$Htbl="%r25"; 62$inp="%r24"; 63$len="%r23"; 64$Hhh=$Htbl; # variables 65$Hll="%r22"; 66$Zhh="%r21"; 67$Zll="%r20"; 68$cnt="%r19"; 69$rem_4bit="%r28"; 70$rem="%r29"; 71$mask0xf0="%r31"; 72 73################# preserved registers 74$Thh="%r1"; 75$Tll="%r2"; 76$nlo="%r3"; 77$nhi="%r4"; 78$byte="%r5"; 79if ($SIZE_T==4) { 80 $Zhl="%r6"; 81 $Zlh="%r7"; 82 $Hhl="%r8"; 83 $Hlh="%r9"; 84 $Thl="%r10"; 85 $Tlh="%r11"; 86} 87$rem2="%r6"; # used in PA-RISC 2.0 code 88 89$code.=<<___; 90 .LEVEL $LEVEL 91 .SPACE \$TEXT\$ 92 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY 93 94 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR 95 .ALIGN 64 96gcm_gmult_4bit 97 .PROC 98 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS 99 .ENTRY 100 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 101 $PUSHMA %r3,$FRAME(%sp) 102 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 103 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 104 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 105___ 106$code.=<<___ if ($SIZE_T==4); 107 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 108 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 109 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 110 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 111 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 112___ 113$code.=<<___; 114 blr %r0,$rem_4bit 115 ldi 3,$rem 116L\$pic_gmult 117 andcm $rem_4bit,$rem,$rem_4bit 118 addl $inp,$len,$len 119 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit 120 ldi 0xf0,$mask0xf0 121___ 122$code.=<<___ if ($SIZE_T==4); 123 ldi 31,$rem 124 mtctl $rem,%cr11 125 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 126 b L\$parisc1_gmult 127 nop 128___ 129 130$code.=<<___; 131 ldb 15($Xi),$nlo 132 ldo 8($Htbl),$Hll 133 134 and $mask0xf0,$nlo,$nhi 135 depd,z $nlo,59,4,$nlo 136 137 ldd $nlo($Hll),$Zll 138 ldd $nlo($Hhh),$Zhh 139 140 depd,z $Zll,60,4,$rem 141 shrpd $Zhh,$Zll,4,$Zll 142 extrd,u $Zhh,59,60,$Zhh 143 ldb 14($Xi),$nlo 144 145 ldd $nhi($Hll),$Tll 146 ldd $nhi($Hhh),$Thh 147 and $mask0xf0,$nlo,$nhi 148 depd,z $nlo,59,4,$nlo 149 150 xor $Tll,$Zll,$Zll 151 xor $Thh,$Zhh,$Zhh 152 ldd $rem($rem_4bit),$rem 153 b L\$oop_gmult_pa2 154 ldi 13,$cnt 155 156 .ALIGN 8 157L\$oop_gmult_pa2 158 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug 159 depd,z $Zll,60,4,$rem 160 161 shrpd $Zhh,$Zll,4,$Zll 162 extrd,u $Zhh,59,60,$Zhh 163 ldd $nlo($Hll),$Tll 164 ldd $nlo($Hhh),$Thh 165 166 xor $Tll,$Zll,$Zll 167 xor $Thh,$Zhh,$Zhh 168 ldd $rem($rem_4bit),$rem 169 170 xor $rem,$Zhh,$Zhh 171 depd,z $Zll,60,4,$rem 172 ldbx $cnt($Xi),$nlo 173 174 shrpd $Zhh,$Zll,4,$Zll 175 extrd,u $Zhh,59,60,$Zhh 176 ldd $nhi($Hll),$Tll 177 ldd $nhi($Hhh),$Thh 178 179 and $mask0xf0,$nlo,$nhi 180 depd,z $nlo,59,4,$nlo 181 ldd $rem($rem_4bit),$rem 182 183 xor $Tll,$Zll,$Zll 184 addib,uv -1,$cnt,L\$oop_gmult_pa2 185 xor $Thh,$Zhh,$Zhh 186 187 xor $rem,$Zhh,$Zhh 188 depd,z $Zll,60,4,$rem 189 190 shrpd $Zhh,$Zll,4,$Zll 191 extrd,u $Zhh,59,60,$Zhh 192 ldd $nlo($Hll),$Tll 193 ldd $nlo($Hhh),$Thh 194 195 xor $Tll,$Zll,$Zll 196 xor $Thh,$Zhh,$Zhh 197 ldd $rem($rem_4bit),$rem 198 199 xor $rem,$Zhh,$Zhh 200 depd,z $Zll,60,4,$rem 201 202 shrpd $Zhh,$Zll,4,$Zll 203 extrd,u $Zhh,59,60,$Zhh 204 ldd $nhi($Hll),$Tll 205 ldd $nhi($Hhh),$Thh 206 207 xor $Tll,$Zll,$Zll 208 xor $Thh,$Zhh,$Zhh 209 ldd $rem($rem_4bit),$rem 210 211 xor $rem,$Zhh,$Zhh 212 std $Zll,8($Xi) 213 std $Zhh,0($Xi) 214___ 215 216$code.=<<___ if ($SIZE_T==4); 217 b L\$done_gmult 218 nop 219 220L\$parisc1_gmult 221 ldb 15($Xi),$nlo 222 ldo 12($Htbl),$Hll 223 ldo 8($Htbl),$Hlh 224 ldo 4($Htbl),$Hhl 225 226 and $mask0xf0,$nlo,$nhi 227 zdep $nlo,27,4,$nlo 228 229 ldwx $nlo($Hll),$Zll 230 ldwx $nlo($Hlh),$Zlh 231 ldwx $nlo($Hhl),$Zhl 232 ldwx $nlo($Hhh),$Zhh 233 zdep $Zll,28,4,$rem 234 ldb 14($Xi),$nlo 235 ldwx $rem($rem_4bit),$rem 236 shrpw $Zlh,$Zll,4,$Zll 237 ldwx $nhi($Hll),$Tll 238 shrpw $Zhl,$Zlh,4,$Zlh 239 ldwx $nhi($Hlh),$Tlh 240 shrpw $Zhh,$Zhl,4,$Zhl 241 ldwx $nhi($Hhl),$Thl 242 extru $Zhh,27,28,$Zhh 243 ldwx $nhi($Hhh),$Thh 244 xor $rem,$Zhh,$Zhh 245 and $mask0xf0,$nlo,$nhi 246 zdep $nlo,27,4,$nlo 247 248 xor $Tll,$Zll,$Zll 249 ldwx $nlo($Hll),$Tll 250 xor $Tlh,$Zlh,$Zlh 251 ldwx $nlo($Hlh),$Tlh 252 xor $Thl,$Zhl,$Zhl 253 b L\$oop_gmult_pa1 254 ldi 13,$cnt 255 256 .ALIGN 8 257L\$oop_gmult_pa1 258 zdep $Zll,28,4,$rem 259 ldwx $nlo($Hhl),$Thl 260 xor $Thh,$Zhh,$Zhh 261 ldwx $rem($rem_4bit),$rem 262 shrpw $Zlh,$Zll,4,$Zll 263 ldwx $nlo($Hhh),$Thh 264 shrpw $Zhl,$Zlh,4,$Zlh 265 ldbx $cnt($Xi),$nlo 266 xor $Tll,$Zll,$Zll 267 ldwx $nhi($Hll),$Tll 268 shrpw $Zhh,$Zhl,4,$Zhl 269 xor $Tlh,$Zlh,$Zlh 270 ldwx $nhi($Hlh),$Tlh 271 extru $Zhh,27,28,$Zhh 272 xor $Thl,$Zhl,$Zhl 273 ldwx $nhi($Hhl),$Thl 274 xor $rem,$Zhh,$Zhh 275 zdep $Zll,28,4,$rem 276 xor $Thh,$Zhh,$Zhh 277 ldwx $nhi($Hhh),$Thh 278 shrpw $Zlh,$Zll,4,$Zll 279 ldwx $rem($rem_4bit),$rem 280 shrpw $Zhl,$Zlh,4,$Zlh 281 shrpw $Zhh,$Zhl,4,$Zhl 282 and $mask0xf0,$nlo,$nhi 283 extru $Zhh,27,28,$Zhh 284 zdep $nlo,27,4,$nlo 285 xor $Tll,$Zll,$Zll 286 ldwx $nlo($Hll),$Tll 287 xor $Tlh,$Zlh,$Zlh 288 ldwx $nlo($Hlh),$Tlh 289 xor $rem,$Zhh,$Zhh 290 addib,uv -1,$cnt,L\$oop_gmult_pa1 291 xor $Thl,$Zhl,$Zhl 292 293 zdep $Zll,28,4,$rem 294 ldwx $nlo($Hhl),$Thl 295 xor $Thh,$Zhh,$Zhh 296 ldwx $rem($rem_4bit),$rem 297 shrpw $Zlh,$Zll,4,$Zll 298 ldwx $nlo($Hhh),$Thh 299 shrpw $Zhl,$Zlh,4,$Zlh 300 xor $Tll,$Zll,$Zll 301 ldwx $nhi($Hll),$Tll 302 shrpw $Zhh,$Zhl,4,$Zhl 303 xor $Tlh,$Zlh,$Zlh 304 ldwx $nhi($Hlh),$Tlh 305 extru $Zhh,27,28,$Zhh 306 xor $rem,$Zhh,$Zhh 307 xor $Thl,$Zhl,$Zhl 308 ldwx $nhi($Hhl),$Thl 309 xor $Thh,$Zhh,$Zhh 310 ldwx $nhi($Hhh),$Thh 311 zdep $Zll,28,4,$rem 312 ldwx $rem($rem_4bit),$rem 313 shrpw $Zlh,$Zll,4,$Zll 314 shrpw $Zhl,$Zlh,4,$Zlh 315 shrpw $Zhh,$Zhl,4,$Zhl 316 extru $Zhh,27,28,$Zhh 317 xor $Tll,$Zll,$Zll 318 xor $Tlh,$Zlh,$Zlh 319 xor $rem,$Zhh,$Zhh 320 stw $Zll,12($Xi) 321 xor $Thl,$Zhl,$Zhl 322 stw $Zlh,8($Xi) 323 xor $Thh,$Zhh,$Zhh 324 stw $Zhl,4($Xi) 325 stw $Zhh,0($Xi) 326___ 327$code.=<<___; 328L\$done_gmult 329 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 330 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 331 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 332 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 333___ 334$code.=<<___ if ($SIZE_T==4); 335 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 336 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 337 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 338 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 339 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 340___ 341$code.=<<___; 342 bv (%r2) 343 .EXIT 344 $POPMB -$FRAME(%sp),%r3 345 .PROCEND 346 347 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR 348 .ALIGN 64 349gcm_ghash_4bit 350 .PROC 351 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11 352 .ENTRY 353 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue 354 $PUSHMA %r3,$FRAME(%sp) 355 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) 356 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) 357 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) 358___ 359$code.=<<___ if ($SIZE_T==4); 360 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) 361 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) 362 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) 363 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) 364 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) 365___ 366$code.=<<___; 367 blr %r0,$rem_4bit 368 ldi 3,$rem 369L\$pic_ghash 370 andcm $rem_4bit,$rem,$rem_4bit 371 addl $inp,$len,$len 372 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit 373 ldi 0xf0,$mask0xf0 374___ 375$code.=<<___ if ($SIZE_T==4); 376 ldi 31,$rem 377 mtctl $rem,%cr11 378 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0 379 b L\$parisc1_ghash 380 nop 381___ 382 383$code.=<<___; 384 ldb 15($Xi),$nlo 385 ldo 8($Htbl),$Hll 386 387L\$outer_ghash_pa2 388 ldb 15($inp),$nhi 389 xor $nhi,$nlo,$nlo 390 and $mask0xf0,$nlo,$nhi 391 depd,z $nlo,59,4,$nlo 392 393 ldd $nlo($Hll),$Zll 394 ldd $nlo($Hhh),$Zhh 395 396 depd,z $Zll,60,4,$rem 397 shrpd $Zhh,$Zll,4,$Zll 398 extrd,u $Zhh,59,60,$Zhh 399 ldb 14($Xi),$nlo 400 ldb 14($inp),$byte 401 402 ldd $nhi($Hll),$Tll 403 ldd $nhi($Hhh),$Thh 404 xor $byte,$nlo,$nlo 405 and $mask0xf0,$nlo,$nhi 406 depd,z $nlo,59,4,$nlo 407 408 xor $Tll,$Zll,$Zll 409 xor $Thh,$Zhh,$Zhh 410 ldd $rem($rem_4bit),$rem 411 b L\$oop_ghash_pa2 412 ldi 13,$cnt 413 414 .ALIGN 8 415L\$oop_ghash_pa2 416 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug 417 depd,z $Zll,60,4,$rem2 418 419 shrpd $Zhh,$Zll,4,$Zll 420 extrd,u $Zhh,59,60,$Zhh 421 ldd $nlo($Hll),$Tll 422 ldd $nlo($Hhh),$Thh 423 424 xor $Tll,$Zll,$Zll 425 xor $Thh,$Zhh,$Zhh 426 ldbx $cnt($Xi),$nlo 427 ldbx $cnt($inp),$byte 428 429 depd,z $Zll,60,4,$rem 430 shrpd $Zhh,$Zll,4,$Zll 431 ldd $rem2($rem_4bit),$rem2 432 433 xor $rem2,$Zhh,$Zhh 434 xor $byte,$nlo,$nlo 435 ldd $nhi($Hll),$Tll 436 ldd $nhi($Hhh),$Thh 437 438 and $mask0xf0,$nlo,$nhi 439 depd,z $nlo,59,4,$nlo 440 441 extrd,u $Zhh,59,60,$Zhh 442 xor $Tll,$Zll,$Zll 443 444 ldd $rem($rem_4bit),$rem 445 addib,uv -1,$cnt,L\$oop_ghash_pa2 446 xor $Thh,$Zhh,$Zhh 447 448 xor $rem,$Zhh,$Zhh 449 depd,z $Zll,60,4,$rem2 450 451 shrpd $Zhh,$Zll,4,$Zll 452 extrd,u $Zhh,59,60,$Zhh 453 ldd $nlo($Hll),$Tll 454 ldd $nlo($Hhh),$Thh 455 456 xor $Tll,$Zll,$Zll 457 xor $Thh,$Zhh,$Zhh 458 459 depd,z $Zll,60,4,$rem 460 shrpd $Zhh,$Zll,4,$Zll 461 ldd $rem2($rem_4bit),$rem2 462 463 xor $rem2,$Zhh,$Zhh 464 ldd $nhi($Hll),$Tll 465 ldd $nhi($Hhh),$Thh 466 467 extrd,u $Zhh,59,60,$Zhh 468 xor $Tll,$Zll,$Zll 469 xor $Thh,$Zhh,$Zhh 470 ldd $rem($rem_4bit),$rem 471 472 xor $rem,$Zhh,$Zhh 473 std $Zll,8($Xi) 474 ldo 16($inp),$inp 475 std $Zhh,0($Xi) 476 cmpb,*<> $inp,$len,L\$outer_ghash_pa2 477 copy $Zll,$nlo 478___ 479 480$code.=<<___ if ($SIZE_T==4); 481 b L\$done_ghash 482 nop 483 484L\$parisc1_ghash 485 ldb 15($Xi),$nlo 486 ldo 12($Htbl),$Hll 487 ldo 8($Htbl),$Hlh 488 ldo 4($Htbl),$Hhl 489 490L\$outer_ghash_pa1 491 ldb 15($inp),$byte 492 xor $byte,$nlo,$nlo 493 and $mask0xf0,$nlo,$nhi 494 zdep $nlo,27,4,$nlo 495 496 ldwx $nlo($Hll),$Zll 497 ldwx $nlo($Hlh),$Zlh 498 ldwx $nlo($Hhl),$Zhl 499 ldwx $nlo($Hhh),$Zhh 500 zdep $Zll,28,4,$rem 501 ldb 14($Xi),$nlo 502 ldb 14($inp),$byte 503 ldwx $rem($rem_4bit),$rem 504 shrpw $Zlh,$Zll,4,$Zll 505 ldwx $nhi($Hll),$Tll 506 shrpw $Zhl,$Zlh,4,$Zlh 507 ldwx $nhi($Hlh),$Tlh 508 shrpw $Zhh,$Zhl,4,$Zhl 509 ldwx $nhi($Hhl),$Thl 510 extru $Zhh,27,28,$Zhh 511 ldwx $nhi($Hhh),$Thh 512 xor $byte,$nlo,$nlo 513 xor $rem,$Zhh,$Zhh 514 and $mask0xf0,$nlo,$nhi 515 zdep $nlo,27,4,$nlo 516 517 xor $Tll,$Zll,$Zll 518 ldwx $nlo($Hll),$Tll 519 xor $Tlh,$Zlh,$Zlh 520 ldwx $nlo($Hlh),$Tlh 521 xor $Thl,$Zhl,$Zhl 522 b L\$oop_ghash_pa1 523 ldi 13,$cnt 524 525 .ALIGN 8 526L\$oop_ghash_pa1 527 zdep $Zll,28,4,$rem 528 ldwx $nlo($Hhl),$Thl 529 xor $Thh,$Zhh,$Zhh 530 ldwx $rem($rem_4bit),$rem 531 shrpw $Zlh,$Zll,4,$Zll 532 ldwx $nlo($Hhh),$Thh 533 shrpw $Zhl,$Zlh,4,$Zlh 534 ldbx $cnt($Xi),$nlo 535 xor $Tll,$Zll,$Zll 536 ldwx $nhi($Hll),$Tll 537 shrpw $Zhh,$Zhl,4,$Zhl 538 ldbx $cnt($inp),$byte 539 xor $Tlh,$Zlh,$Zlh 540 ldwx $nhi($Hlh),$Tlh 541 extru $Zhh,27,28,$Zhh 542 xor $Thl,$Zhl,$Zhl 543 ldwx $nhi($Hhl),$Thl 544 xor $rem,$Zhh,$Zhh 545 zdep $Zll,28,4,$rem 546 xor $Thh,$Zhh,$Zhh 547 ldwx $nhi($Hhh),$Thh 548 shrpw $Zlh,$Zll,4,$Zll 549 ldwx $rem($rem_4bit),$rem 550 shrpw $Zhl,$Zlh,4,$Zlh 551 xor $byte,$nlo,$nlo 552 shrpw $Zhh,$Zhl,4,$Zhl 553 and $mask0xf0,$nlo,$nhi 554 extru $Zhh,27,28,$Zhh 555 zdep $nlo,27,4,$nlo 556 xor $Tll,$Zll,$Zll 557 ldwx $nlo($Hll),$Tll 558 xor $Tlh,$Zlh,$Zlh 559 ldwx $nlo($Hlh),$Tlh 560 xor $rem,$Zhh,$Zhh 561 addib,uv -1,$cnt,L\$oop_ghash_pa1 562 xor $Thl,$Zhl,$Zhl 563 564 zdep $Zll,28,4,$rem 565 ldwx $nlo($Hhl),$Thl 566 xor $Thh,$Zhh,$Zhh 567 ldwx $rem($rem_4bit),$rem 568 shrpw $Zlh,$Zll,4,$Zll 569 ldwx $nlo($Hhh),$Thh 570 shrpw $Zhl,$Zlh,4,$Zlh 571 xor $Tll,$Zll,$Zll 572 ldwx $nhi($Hll),$Tll 573 shrpw $Zhh,$Zhl,4,$Zhl 574 xor $Tlh,$Zlh,$Zlh 575 ldwx $nhi($Hlh),$Tlh 576 extru $Zhh,27,28,$Zhh 577 xor $rem,$Zhh,$Zhh 578 xor $Thl,$Zhl,$Zhl 579 ldwx $nhi($Hhl),$Thl 580 xor $Thh,$Zhh,$Zhh 581 ldwx $nhi($Hhh),$Thh 582 zdep $Zll,28,4,$rem 583 ldwx $rem($rem_4bit),$rem 584 shrpw $Zlh,$Zll,4,$Zll 585 shrpw $Zhl,$Zlh,4,$Zlh 586 shrpw $Zhh,$Zhl,4,$Zhl 587 extru $Zhh,27,28,$Zhh 588 xor $Tll,$Zll,$Zll 589 xor $Tlh,$Zlh,$Zlh 590 xor $rem,$Zhh,$Zhh 591 stw $Zll,12($Xi) 592 xor $Thl,$Zhl,$Zhl 593 stw $Zlh,8($Xi) 594 xor $Thh,$Zhh,$Zhh 595 stw $Zhl,4($Xi) 596 ldo 16($inp),$inp 597 stw $Zhh,0($Xi) 598 comb,<> $inp,$len,L\$outer_ghash_pa1 599 copy $Zll,$nlo 600___ 601$code.=<<___; 602L\$done_ghash 603 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue 604 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 605 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 606 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 607___ 608$code.=<<___ if ($SIZE_T==4); 609 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 610 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 611 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 612 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 613 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 614___ 615$code.=<<___; 616 bv (%r2) 617 .EXIT 618 $POPMB -$FRAME(%sp),%r3 619 .PROCEND 620 621 .ALIGN 64 622L\$rem_4bit 623 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0 624 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0 625 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0 626 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0 627 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>" 628 .ALIGN 64 629___ 630 631# Explicitly encode PA-RISC 2.0 instructions used in this module, so 632# that it can be compiled with .LEVEL 1.0. It should be noted that I 633# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0 634# directive... 635 636my $ldd = sub { 637 my ($mod,$args) = @_; 638 my $orig = "ldd$mod\t$args"; 639 640 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4 641 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3; 642 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 643 } 644 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5 645 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3; 646 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset 647 $opcode|=(1<<5) if ($mod =~ /^,m/); 648 $opcode|=(1<<13) if ($mod =~ /^,mb/); 649 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 650 } 651 else { "\t".$orig; } 652}; 653 654my $std = sub { 655 my ($mod,$args) = @_; 656 my $orig = "std$mod\t$args"; 657 658 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices 659 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1); 660 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 661 } 662 else { "\t".$orig; } 663}; 664 665my $extrd = sub { 666 my ($mod,$args) = @_; 667 my $orig = "extrd$mod\t$args"; 668 669 # I only have ",u" completer, it's implicitly encoded... 670 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15 671 { my $opcode=(0x36<<26)|($1<<21)|($4<<16); 672 my $len=32-$3; 673 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos 674 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 675 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 676 } 677 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12 678 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9); 679 my $len=32-$2; 680 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len 681 $opcode |= (1<<13) if ($mod =~ /,\**=/); 682 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 683 } 684 else { "\t".$orig; } 685}; 686 687my $shrpd = sub { 688 my ($mod,$args) = @_; 689 my $orig = "shrpd$mod\t$args"; 690 691 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14 692 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4; 693 my $cpos=63-$3; 694 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa 695 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 696 } 697 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11 698 { sprintf "\t.WORD\t0x%08x\t; %s", 699 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig; 700 } 701 else { "\t".$orig; } 702}; 703 704my $depd = sub { 705 my ($mod,$args) = @_; 706 my $orig = "depd$mod\t$args"; 707 708 # I only have ",z" completer, it's implicitly encoded... 709 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16 710 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16); 711 my $cpos=63-$2; 712 my $len=32-$3; 713 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos 714 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len 715 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig; 716 } 717 else { "\t".$orig; } 718}; 719 720sub assemble { 721 my ($mnemonic,$mod,$args)=@_; 722 my $opcode = eval("\$$mnemonic"); 723 724 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args"; 725} 726 727if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` 728 =~ /GNU assembler/) { 729 $gnuas = 1; 730} 731 732foreach (split("\n",$code)) { 733 s/\`([^\`]*)\`/eval $1/ge; 734 if ($SIZE_T==4) { 735 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e; 736 s/cmpb,\*/comb,/; 737 s/,\*/,/; 738 } 739 740 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8); 741 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8); 742 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8); 743 s/\bbv\b/bve/ if ($SIZE_T==8); 744 745 print $_,"\n"; 746} 747 748close STDOUT or die "error closing STDOUT: $!"; 749