1! des_enc.m4 2! des_enc.S (generated from des_enc.m4) 3! 4! UltraSPARC assembler version of the LibDES/SSLeay/OpenSSL des_enc.c file. 5! 6! Version 1.0. 32-bit version. 7! 8! June 8, 2000. 9! 10! Version 2.0. 32/64-bit, PIC-ification, blended CPU adaptation 11! by Andy Polyakov. 12! 13! January 1, 2003. 14! 15! Assembler version: Copyright Svend Olaf Mikkelsen. 16! 17! Original C code: Copyright Eric A. Young. 18! 19! This code can be freely used by LibDES/SSLeay/OpenSSL users. 20! 21! The LibDES/SSLeay/OpenSSL copyright notices must be respected. 22! 23! This version can be redistributed. 24! 25! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S 26! 27! Global registers 1 to 5 are used. This is the same as done by the 28! cc compiler. The UltraSPARC load/store little endian feature is used. 29! 30! Instruction grouping often refers to one CPU cycle. 31! 32! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S 33! 34! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S 35! 36! Performance improvement according to './apps/openssl speed des' 37! 38! 32-bit build: 39! 23% faster than cc-5.2 -xarch=v8plus -xO5 40! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 41! 64-bit build: 42! 50% faster than cc-5.2 -xarch=v9 -xO5 43! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 44! 45 46.ident "des_enc.m4 2.1" 47.file "des_enc-sparc.S" 48 49#if defined(__SUNPRO_C) && defined(__sparcv9) 50# define ABI64 /* They've said -xarch=v9 at command line */ 51#elif defined(__GNUC__) && defined(__arch64__) 52# define ABI64 /* They've said -m64 at command line */ 53#endif 54 55#ifdef ABI64 56 .register %g2,#scratch 57 .register %g3,#scratch 58# define FRAME -192 59# define BIAS 2047 60# define LDPTR ldx 61# define STPTR stx 62# define ARG0 128 63# define ARGSZ 8 64# ifndef OPENSSL_SYSNAME_ULTRASPARC 65# define OPENSSL_SYSNAME_ULTRASPARC 66# endif 67#else 68# define FRAME -96 69# define BIAS 0 70# define LDPTR ld 71# define STPTR st 72# define ARG0 68 73# define ARGSZ 4 74#endif 75 76#define LOOPS 7 77 78#define global0 %g0 79#define global1 %g1 80#define global2 %g2 81#define global3 %g3 82#define global4 %g4 83#define global5 %g5 84 85#define local0 %l0 86#define local1 %l1 87#define local2 %l2 88#define local3 %l3 89#define local4 %l4 90#define local5 %l5 91#define local7 %l6 92#define local6 %l7 93 94#define in0 %i0 95#define in1 %i1 96#define in2 %i2 97#define in3 %i3 98#define in4 %i4 99#define in5 %i5 100#define in6 %i6 101#define in7 %i7 102 103#define out0 %o0 104#define out1 %o1 105#define out2 %o2 106#define out3 %o3 107#define out4 %o4 108#define out5 %o5 109#define out6 %o6 110#define out7 %o7 111 112#define stub stb 113 114changequote({,}) 115 116 117! Macro definitions: 118 119 120! {ip_macro} 121! 122! The logic used in initial and final permutations is the same as in 123! the C code. The permutations are done with a clever shift, xor, and 124! technique. 125! 126! The macro also loads address sbox 1 to 5 to global 1 to 5, address 127! sbox 6 to local6, and addres sbox 8 to out3. 128! 129! Rotates the halfs 3 left to bring the sbox bits in convenient positions. 130! 131! Loads key first round from address in parameter 5 to out0, out1. 132! 133! After the the original LibDES initial permutation, the resulting left 134! is in the variable initially used for right and vice versa. The macro 135! implements the possibility to keep the halfs in the original registers. 136! 137! parameter 1 left 138! parameter 2 right 139! parameter 3 result left (modify in first round) 140! parameter 4 result right (use in first round) 141! parameter 5 key address 142! parameter 6 1/2 for include encryption/decryption 143! parameter 7 1 for move in1 to in3 144! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 145! parameter 9 1 for load ks3 and ks2 to in4 and in3 146 147define(ip_macro, { 148 149! {ip_macro} 150! $1 $2 $4 $3 $5 $6 $7 $8 $9 151 152 ld [out2+256], local1 153 srl $2, 4, local4 154 155 xor local4, $1, local4 156 ifelse($7,1,{mov in1, in3},{nop}) 157 158 ld [out2+260], local2 159 and local4, local1, local4 160 ifelse($8,1,{mov in3, in4},{}) 161 ifelse($8,2,{mov in4, in3},{}) 162 163 ld [out2+280], out4 ! loop counter 164 sll local4, 4, local1 165 xor $1, local4, $1 166 167 ld [out2+264], local3 168 srl $1, 16, local4 169 xor $2, local1, $2 170 171 ifelse($9,1,{LDPTR KS3, in4},{}) 172 xor local4, $2, local4 173 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr 174 175 ifelse($9,1,{LDPTR KS2, in3},{}) 176 and local4, local2, local4 177 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr 178 179 sll local4, 16, local1 180 xor $2, local4, $2 181 182 srl $2, 2, local4 183 xor $1, local1, $1 184 185 sethi %hi(16711680), local5 186 xor local4, $1, local4 187 188 and local4, local3, local4 189 or local5, 255, local5 190 191 sll local4, 2, local2 192 xor $1, local4, $1 193 194 srl $1, 8, local4 195 xor $2, local2, $2 196 197 xor local4, $2, local4 198 add global1, 768, global4 199 200 and local4, local5, local4 201 add global1, 1024, global5 202 203 ld [out2+272], local7 204 sll local4, 8, local1 205 xor $2, local4, $2 206 207 srl $2, 1, local4 208 xor $1, local1, $1 209 210 ld [$5], out0 ! key 7531 211 xor local4, $1, local4 212 add global1, 256, global2 213 214 ld [$5+4], out1 ! key 8642 215 and local4, local7, local4 216 add global1, 512, global3 217 218 sll local4, 1, local1 219 xor $1, local4, $1 220 221 sll $1, 3, local3 222 xor $2, local1, $2 223 224 sll $2, 3, local2 225 add global1, 1280, local6 ! address sbox 8 226 227 srl $1, 29, local4 228 add global1, 1792, out3 ! address sbox 8 229 230 srl $2, 29, local1 231 or local4, local3, $4 232 233 or local2, local1, $3 234 235 ifelse($6, 1, { 236 237 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 238 or local2, local1, $3 239 xor $4, out0, local1 240 241 call .des_enc.1 242 and local1, 252, local1 243 244 },{}) 245 246 ifelse($6, 2, { 247 248 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 249 or local2, local1, $3 250 xor $4, out0, local1 251 252 call .des_dec.1 253 and local1, 252, local1 254 255 },{}) 256}) 257 258 259! {rounds_macro} 260! 261! The logic used in the DES rounds is the same as in the C code, 262! except that calculations for sbox 1 and sbox 5 begin before 263! the previous round is finished. 264! 265! In each round one half (work) is modified based on key and the 266! other half (use). 267! 268! In this version we do two rounds in a loop repeated 7 times 269! and two rounds seperately. 270! 271! One half has the bits for the sboxes in the following positions: 272! 273! 777777xx555555xx333333xx111111xx 274! 275! 88xx666666xx444444xx222222xx8888 276! 277! The bits for each sbox are xor-ed with the key bits for that box. 278! The above xx bits are cleared, and the result used for lookup in 279! the sbox table. Each sbox entry contains the 4 output bits permuted 280! into 32 bits according to the P permutation. 281! 282! In the description of DES, left and right are switched after 283! each round, except after last round. In this code the original 284! left and right are kept in the same register in all rounds, meaning 285! that after the 16 rounds the result for right is in the register 286! originally used for left. 287! 288! parameter 1 first work (left in first round) 289! parameter 2 first use (right in first round) 290! parameter 3 enc/dec 1/-1 291! parameter 4 loop label 292! parameter 5 key address register 293! parameter 6 optional address for key next encryption/decryption 294! parameter 7 not empty for include retl 295! 296! also compares in2 to 8 297 298define(rounds_macro, { 299 300! {rounds_macro} 301! $1 $2 $3 $4 $5 $6 $7 $8 $9 302 303 xor $2, out0, local1 304 305 ld [out2+284], local5 ! 0x0000FC00 306 ba $4 307 and local1, 252, local1 308 309 .align 32 310 311$4: 312 ! local6 is address sbox 6 313 ! out3 is address sbox 8 314 ! out4 is loop counter 315 316 ld [global1+local1], local1 317 xor $2, out1, out1 ! 8642 318 xor $2, out0, out0 ! 7531 319 ! fmovs %f0, %f0 ! fxor used for alignment 320 321 srl out1, 4, local0 ! rotate 4 right 322 and out0, local5, local3 ! 3 323 ! fmovs %f0, %f0 324 325 ld [$5+$3*8], local7 ! key 7531 next round 326 srl local3, 8, local3 ! 3 327 and local0, 252, local2 ! 2 328 ! fmovs %f0, %f0 329 330 ld [global3+local3],local3 ! 3 331 sll out1, 28, out1 ! rotate 332 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 333 334 ld [global2+local2], local2 ! 2 335 srl out0, 24, local1 ! 7 336 or out1, local0, out1 ! rotate 337 338 ldub [out2+local1], local1 ! 7 (and 0xFC) 339 srl out1, 24, local0 ! 8 340 and out1, local5, local4 ! 4 341 342 ldub [out2+local0], local0 ! 8 (and 0xFC) 343 srl local4, 8, local4 ! 4 344 xor $1, local2, $1 ! 2 finished local2 now sbox 6 345 346 ld [global4+local4],local4 ! 4 347 srl out1, 16, local2 ! 6 348 xor $1, local3, $1 ! 3 finished local3 now sbox 5 349 350 ld [out3+local0],local0 ! 8 351 and local2, 252, local2 ! 6 352 add global1, 1536, local5 ! address sbox 7 353 354 ld [local6+local2], local2 ! 6 355 srl out0, 16, local3 ! 5 356 xor $1, local4, $1 ! 4 finished 357 358 ld [local5+local1],local1 ! 7 359 and local3, 252, local3 ! 5 360 xor $1, local0, $1 ! 8 finished 361 362 ld [global5+local3],local3 ! 5 363 xor $1, local2, $1 ! 6 finished 364 subcc out4, 1, out4 365 366 ld [$5+$3*8+4], out0 ! key 8642 next round 367 xor $1, local7, local2 ! sbox 5 next round 368 xor $1, local1, $1 ! 7 finished 369 370 srl local2, 16, local2 ! sbox 5 next round 371 xor $1, local3, $1 ! 5 finished 372 373 ld [$5+$3*16+4], out1 ! key 8642 next round again 374 and local2, 252, local2 ! sbox5 next round 375! next round 376 xor $1, local7, local7 ! 7531 377 378 ld [global5+local2], local2 ! 5 379 srl local7, 24, local3 ! 7 380 xor $1, out0, out0 ! 8642 381 382 ldub [out2+local3], local3 ! 7 (and 0xFC) 383 srl out0, 4, local0 ! rotate 4 right 384 and local7, 252, local1 ! 1 385 386 sll out0, 28, out0 ! rotate 387 xor $2, local2, $2 ! 5 finished local2 used 388 389 srl local0, 8, local4 ! 4 390 and local0, 252, local2 ! 2 391 ld [local5+local3], local3 ! 7 392 393 srl local0, 16, local5 ! 6 394 or out0, local0, out0 ! rotate 395 ld [global2+local2], local2 ! 2 396 397 srl out0, 24, local0 398 ld [$5+$3*16], out0 ! key 7531 next round 399 and local4, 252, local4 ! 4 400 401 and local5, 252, local5 ! 6 402 ld [global4+local4], local4 ! 4 403 xor $2, local3, $2 ! 7 finished local3 used 404 405 and local0, 252, local0 ! 8 406 ld [local6+local5], local5 ! 6 407 xor $2, local2, $2 ! 2 finished local2 now sbox 3 408 409 srl local7, 8, local2 ! 3 start 410 ld [out3+local0], local0 ! 8 411 xor $2, local4, $2 ! 4 finished 412 413 and local2, 252, local2 ! 3 414 ld [global1+local1], local1 ! 1 415 xor $2, local5, $2 ! 6 finished local5 used 416 417 ld [global3+local2], local2 ! 3 418 xor $2, local0, $2 ! 8 finished 419 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer 420 421 ld [out2+284], local5 ! 0x0000FC00 422 xor $2, out0, local4 ! sbox 1 next round 423 xor $2, local1, $2 ! 1 finished 424 425 xor $2, local2, $2 ! 3 finished 426#ifdef OPENSSL_SYSNAME_ULTRASPARC 427 bne,pt %icc, $4 428#else 429 bne $4 430#endif 431 and local4, 252, local1 ! sbox 1 next round 432 433! two rounds more: 434 435 ld [global1+local1], local1 436 xor $2, out1, out1 437 xor $2, out0, out0 438 439 srl out1, 4, local0 ! rotate 440 and out0, local5, local3 441 442 ld [$5+$3*8], local7 ! key 7531 443 srl local3, 8, local3 444 and local0, 252, local2 445 446 ld [global3+local3],local3 447 sll out1, 28, out1 ! rotate 448 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 449 450 ld [global2+local2], local2 451 srl out0, 24, local1 452 or out1, local0, out1 ! rotate 453 454 ldub [out2+local1], local1 455 srl out1, 24, local0 456 and out1, local5, local4 457 458 ldub [out2+local0], local0 459 srl local4, 8, local4 460 xor $1, local2, $1 ! 2 finished local2 now sbox 6 461 462 ld [global4+local4],local4 463 srl out1, 16, local2 464 xor $1, local3, $1 ! 3 finished local3 now sbox 5 465 466 ld [out3+local0],local0 467 and local2, 252, local2 468 add global1, 1536, local5 ! address sbox 7 469 470 ld [local6+local2], local2 471 srl out0, 16, local3 472 xor $1, local4, $1 ! 4 finished 473 474 ld [local5+local1],local1 475 and local3, 252, local3 476 xor $1, local0, $1 477 478 ld [global5+local3],local3 479 xor $1, local2, $1 ! 6 finished 480 cmp in2, 8 481 482 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter 483 xor $1, local7, local2 ! sbox 5 next round 484 xor $1, local1, $1 ! 7 finished 485 486 ld [$5+$3*8+4], out0 487 srl local2, 16, local2 ! sbox 5 next round 488 xor $1, local3, $1 ! 5 finished 489 490 and local2, 252, local2 491! next round (two rounds more) 492 xor $1, local7, local7 ! 7531 493 494 ld [global5+local2], local2 495 srl local7, 24, local3 496 xor $1, out0, out0 ! 8642 497 498 ldub [out2+local3], local3 499 srl out0, 4, local0 ! rotate 500 and local7, 252, local1 501 502 sll out0, 28, out0 ! rotate 503 xor $2, local2, $2 ! 5 finished local2 used 504 505 srl local0, 8, local4 506 and local0, 252, local2 507 ld [local5+local3], local3 508 509 srl local0, 16, local5 510 or out0, local0, out0 ! rotate 511 ld [global2+local2], local2 512 513 srl out0, 24, local0 514 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption 515 and local4, 252, local4 516 517 and local5, 252, local5 518 ld [global4+local4], local4 519 xor $2, local3, $2 ! 7 finished local3 used 520 521 and local0, 252, local0 522 ld [local6+local5], local5 523 xor $2, local2, $2 ! 2 finished local2 now sbox 3 524 525 srl local7, 8, local2 ! 3 start 526 ld [out3+local0], local0 527 xor $2, local4, $2 528 529 and local2, 252, local2 530 ld [global1+local1], local1 531 xor $2, local5, $2 ! 6 finished local5 used 532 533 ld [global3+local2], local2 534 srl $1, 3, local3 535 xor $2, local0, $2 536 537 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption 538 sll $1, 29, local4 539 xor $2, local1, $2 540 541 ifelse($7,{}, {}, {retl}) 542 xor $2, local2, $2 543}) 544 545 546! {fp_macro} 547! 548! parameter 1 right (original left) 549! parameter 2 left (original right) 550! parameter 3 1 for optional store to [in0] 551! parameter 4 1 for load input/output address to local5/7 552! 553! The final permutation logic switches the halfes, meaning that 554! left and right ends up the the registers originally used. 555 556define(fp_macro, { 557 558! {fp_macro} 559! $1 $2 $3 $4 $5 $6 $7 $8 $9 560 561 ! initially undo the rotate 3 left done after initial permutation 562 ! original left is received shifted 3 right and 29 left in local3/4 563 564 sll $2, 29, local1 565 or local3, local4, $1 566 567 srl $2, 3, $2 568 sethi %hi(0x55555555), local2 569 570 or $2, local1, $2 571 or local2, %lo(0x55555555), local2 572 573 srl $2, 1, local3 574 sethi %hi(0x00ff00ff), local1 575 xor local3, $1, local3 576 or local1, %lo(0x00ff00ff), local1 577 and local3, local2, local3 578 sethi %hi(0x33333333), local4 579 sll local3, 1, local2 580 581 xor $1, local3, $1 582 583 srl $1, 8, local3 584 xor $2, local2, $2 585 xor local3, $2, local3 586 or local4, %lo(0x33333333), local4 587 and local3, local1, local3 588 sethi %hi(0x0000ffff), local1 589 sll local3, 8, local2 590 591 xor $2, local3, $2 592 593 srl $2, 2, local3 594 xor $1, local2, $1 595 xor local3, $1, local3 596 or local1, %lo(0x0000ffff), local1 597 and local3, local4, local3 598 sethi %hi(0x0f0f0f0f), local4 599 sll local3, 2, local2 600 601 ifelse($4,1, {LDPTR INPUT, local5}) 602 xor $1, local3, $1 603 604 ifelse($4,1, {LDPTR OUTPUT, local7}) 605 srl $1, 16, local3 606 xor $2, local2, $2 607 xor local3, $2, local3 608 or local4, %lo(0x0f0f0f0f), local4 609 and local3, local1, local3 610 sll local3, 16, local2 611 612 xor $2, local3, local1 613 614 srl local1, 4, local3 615 xor $1, local2, $1 616 xor local3, $1, local3 617 and local3, local4, local3 618 sll local3, 4, local2 619 620 xor $1, local3, $1 621 622 ! optional store: 623 624 ifelse($3,1, {st $1, [in0]}) 625 626 xor local1, local2, $2 627 628 ifelse($3,1, {st $2, [in0+4]}) 629 630}) 631 632 633! {fp_ip_macro} 634! 635! Does initial permutation for next block mixed with 636! final permutation for current block. 637! 638! parameter 1 original left 639! parameter 2 original right 640! parameter 3 left ip 641! parameter 4 right ip 642! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 643! 2: mov in4 to in3 644! 645! also adds -8 to length in2 and loads loop counter to out4 646 647define(fp_ip_macro, { 648 649! {fp_ip_macro} 650! $1 $2 $3 $4 $5 $6 $7 $8 $9 651 652 define({temp1},{out4}) 653 define({temp2},{local3}) 654 655 define({ip1},{local1}) 656 define({ip2},{local2}) 657 define({ip4},{local4}) 658 define({ip5},{local5}) 659 660 ! $1 in local3, local4 661 662 ld [out2+256], ip1 663 sll out5, 29, temp1 664 or local3, local4, $1 665 666 srl out5, 3, $2 667 ifelse($5,2,{mov in4, in3}) 668 669 ld [out2+272], ip5 670 srl $4, 4, local0 671 or $2, temp1, $2 672 673 srl $2, 1, temp1 674 xor temp1, $1, temp1 675 676 and temp1, ip5, temp1 677 xor local0, $3, local0 678 679 sll temp1, 1, temp2 680 xor $1, temp1, $1 681 682 and local0, ip1, local0 683 add in2, -8, in2 684 685 sll local0, 4, local7 686 xor $3, local0, $3 687 688 ld [out2+268], ip4 689 srl $1, 8, temp1 690 xor $2, temp2, $2 691 ld [out2+260], ip2 692 srl $3, 16, local0 693 xor $4, local7, $4 694 xor temp1, $2, temp1 695 xor local0, $4, local0 696 and temp1, ip4, temp1 697 and local0, ip2, local0 698 sll temp1, 8, temp2 699 xor $2, temp1, $2 700 sll local0, 16, local7 701 xor $4, local0, $4 702 703 srl $2, 2, temp1 704 xor $1, temp2, $1 705 706 ld [out2+264], temp2 ! ip3 707 srl $4, 2, local0 708 xor $3, local7, $3 709 xor temp1, $1, temp1 710 xor local0, $3, local0 711 and temp1, temp2, temp1 712 and local0, temp2, local0 713 sll temp1, 2, temp2 714 xor $1, temp1, $1 715 sll local0, 2, local7 716 xor $3, local0, $3 717 718 srl $1, 16, temp1 719 xor $2, temp2, $2 720 srl $3, 8, local0 721 xor $4, local7, $4 722 xor temp1, $2, temp1 723 xor local0, $4, local0 724 and temp1, ip2, temp1 725 and local0, ip4, local0 726 sll temp1, 16, temp2 727 xor $2, temp1, local4 728 sll local0, 8, local7 729 xor $4, local0, $4 730 731 srl $4, 1, local0 732 xor $3, local7, $3 733 734 srl local4, 4, temp1 735 xor local0, $3, local0 736 737 xor $1, temp2, $1 738 and local0, ip5, local0 739 740 sll local0, 1, local7 741 xor temp1, $1, temp1 742 743 xor $3, local0, $3 744 xor $4, local7, $4 745 746 sll $3, 3, local5 747 and temp1, ip1, temp1 748 749 sll temp1, 4, temp2 750 xor $1, temp1, $1 751 752 ifelse($5,1,{LDPTR KS2, in4}) 753 sll $4, 3, local2 754 xor local4, temp2, $2 755 756 ! reload since used as temporar: 757 758 ld [out2+280], out4 ! loop counter 759 760 srl $3, 29, local0 761 ifelse($5,1,{add in4, 120, in4}) 762 763 ifelse($5,1,{LDPTR KS1, in3}) 764 srl $4, 29, local7 765 766 or local0, local5, $4 767 or local2, local7, $3 768 769}) 770 771 772 773! {load_little_endian} 774! 775! parameter 1 address 776! parameter 2 destination left 777! parameter 3 destination right 778! parameter 4 temporar 779! parameter 5 label 780 781define(load_little_endian, { 782 783! {load_little_endian} 784! $1 $2 $3 $4 $5 $6 $7 $8 $9 785 786 ! first in memory to rightmost in register 787 788#ifdef OPENSSL_SYSNAME_ULTRASPARC 789 andcc $1, 3, global0 790 bne,pn %icc, $5 791 nop 792 793 lda [$1] 0x88, $2 794 add $1, 4, $4 795 796 ba,pt %icc, $5a 797 lda [$4] 0x88, $3 798#endif 799 800$5: 801 ldub [$1+3], $2 802 803 ldub [$1+2], $4 804 sll $2, 8, $2 805 or $2, $4, $2 806 807 ldub [$1+1], $4 808 sll $2, 8, $2 809 or $2, $4, $2 810 811 ldub [$1+0], $4 812 sll $2, 8, $2 813 or $2, $4, $2 814 815 816 ldub [$1+3+4], $3 817 818 ldub [$1+2+4], $4 819 sll $3, 8, $3 820 or $3, $4, $3 821 822 ldub [$1+1+4], $4 823 sll $3, 8, $3 824 or $3, $4, $3 825 826 ldub [$1+0+4], $4 827 sll $3, 8, $3 828 or $3, $4, $3 829$5a: 830 831}) 832 833 834! {load_little_endian_inc} 835! 836! parameter 1 address 837! parameter 2 destination left 838! parameter 3 destination right 839! parameter 4 temporar 840! parameter 4 label 841! 842! adds 8 to address 843 844define(load_little_endian_inc, { 845 846! {load_little_endian_inc} 847! $1 $2 $3 $4 $5 $6 $7 $8 $9 848 849 ! first in memory to rightmost in register 850 851#ifdef OPENSSL_SYSNAME_ULTRASPARC 852 andcc $1, 3, global0 853 bne,pn %icc, $5 854 nop 855 856 lda [$1] 0x88, $2 857 add $1, 4, $1 858 859 lda [$1] 0x88, $3 860 ba,pt %icc, $5a 861 add $1, 4, $1 862#endif 863 864$5: 865 ldub [$1+3], $2 866 867 ldub [$1+2], $4 868 sll $2, 8, $2 869 or $2, $4, $2 870 871 ldub [$1+1], $4 872 sll $2, 8, $2 873 or $2, $4, $2 874 875 ldub [$1+0], $4 876 sll $2, 8, $2 877 or $2, $4, $2 878 879 ldub [$1+3+4], $3 880 add $1, 8, $1 881 882 ldub [$1+2+4-8], $4 883 sll $3, 8, $3 884 or $3, $4, $3 885 886 ldub [$1+1+4-8], $4 887 sll $3, 8, $3 888 or $3, $4, $3 889 890 ldub [$1+0+4-8], $4 891 sll $3, 8, $3 892 or $3, $4, $3 893$5a: 894 895}) 896 897 898! {load_n_bytes} 899! 900! Loads 1 to 7 bytes little endian 901! Remaining bytes are zeroed. 902! 903! parameter 1 address 904! parameter 2 length 905! parameter 3 destination register left 906! parameter 4 destination register right 907! parameter 5 temp 908! parameter 6 temp2 909! parameter 7 label 910! parameter 8 return label 911 912define(load_n_bytes, { 913 914! {load_n_bytes} 915! $1 $2 $5 $6 $7 $8 $7 $8 $9 916 917$7.0: call .+8 918 sll $2, 2, $6 919 920 add %o7,$7.jmp.table-$7.0,$5 921 922 add $5, $6, $5 923 mov 0, $4 924 925 ld [$5], $5 926 927 jmp %o7+$5 928 mov 0, $3 929 930$7.7: 931 ldub [$1+6], $5 932 sll $5, 16, $5 933 or $3, $5, $3 934$7.6: 935 ldub [$1+5], $5 936 sll $5, 8, $5 937 or $3, $5, $3 938$7.5: 939 ldub [$1+4], $5 940 or $3, $5, $3 941$7.4: 942 ldub [$1+3], $5 943 sll $5, 24, $5 944 or $4, $5, $4 945$7.3: 946 ldub [$1+2], $5 947 sll $5, 16, $5 948 or $4, $5, $4 949$7.2: 950 ldub [$1+1], $5 951 sll $5, 8, $5 952 or $4, $5, $4 953$7.1: 954 ldub [$1+0], $5 955 ba $8 956 or $4, $5, $4 957 958 .align 4 959 960$7.jmp.table: 961 .word 0 962 .word $7.1-$7.0 963 .word $7.2-$7.0 964 .word $7.3-$7.0 965 .word $7.4-$7.0 966 .word $7.5-$7.0 967 .word $7.6-$7.0 968 .word $7.7-$7.0 969}) 970 971 972! {store_little_endian} 973! 974! parameter 1 address 975! parameter 2 source left 976! parameter 3 source right 977! parameter 4 temporar 978 979define(store_little_endian, { 980 981! {store_little_endian} 982! $1 $2 $3 $4 $5 $6 $7 $8 $9 983 984 ! rightmost in register to first in memory 985 986#ifdef OPENSSL_SYSNAME_ULTRASPARC 987 andcc $1, 3, global0 988 bne,pn %icc, $5 989 nop 990 991 sta $2, [$1] 0x88 992 add $1, 4, $4 993 994 ba,pt %icc, $5a 995 sta $3, [$4] 0x88 996#endif 997 998$5: 999 and $2, 255, $4 1000 stub $4, [$1+0] 1001 1002 srl $2, 8, $4 1003 and $4, 255, $4 1004 stub $4, [$1+1] 1005 1006 srl $2, 16, $4 1007 and $4, 255, $4 1008 stub $4, [$1+2] 1009 1010 srl $2, 24, $4 1011 stub $4, [$1+3] 1012 1013 1014 and $3, 255, $4 1015 stub $4, [$1+0+4] 1016 1017 srl $3, 8, $4 1018 and $4, 255, $4 1019 stub $4, [$1+1+4] 1020 1021 srl $3, 16, $4 1022 and $4, 255, $4 1023 stub $4, [$1+2+4] 1024 1025 srl $3, 24, $4 1026 stub $4, [$1+3+4] 1027 1028$5a: 1029 1030}) 1031 1032 1033! {store_n_bytes} 1034! 1035! Stores 1 to 7 bytes little endian 1036! 1037! parameter 1 address 1038! parameter 2 length 1039! parameter 3 source register left 1040! parameter 4 source register right 1041! parameter 5 temp 1042! parameter 6 temp2 1043! parameter 7 label 1044! parameter 8 return label 1045 1046define(store_n_bytes, { 1047 1048! {store_n_bytes} 1049! $1 $2 $5 $6 $7 $8 $7 $8 $9 1050 1051$7.0: call .+8 1052 sll $2, 2, $6 1053 1054 add %o7,$7.jmp.table-$7.0,$5 1055 1056 add $5, $6, $5 1057 1058 ld [$5], $5 1059 1060 jmp %o7+$5 1061 nop 1062 1063$7.7: 1064 srl $3, 16, $5 1065 and $5, 0xff, $5 1066 stub $5, [$1+6] 1067$7.6: 1068 srl $3, 8, $5 1069 and $5, 0xff, $5 1070 stub $5, [$1+5] 1071$7.5: 1072 and $3, 0xff, $5 1073 stub $5, [$1+4] 1074$7.4: 1075 srl $4, 24, $5 1076 stub $5, [$1+3] 1077$7.3: 1078 srl $4, 16, $5 1079 and $5, 0xff, $5 1080 stub $5, [$1+2] 1081$7.2: 1082 srl $4, 8, $5 1083 and $5, 0xff, $5 1084 stub $5, [$1+1] 1085$7.1: 1086 and $4, 0xff, $5 1087 1088 1089 ba $8 1090 stub $5, [$1] 1091 1092 .align 4 1093 1094$7.jmp.table: 1095 1096 .word 0 1097 .word $7.1-$7.0 1098 .word $7.2-$7.0 1099 .word $7.3-$7.0 1100 .word $7.4-$7.0 1101 .word $7.5-$7.0 1102 .word $7.6-$7.0 1103 .word $7.7-$7.0 1104}) 1105 1106 1107define(testvalue,{1}) 1108 1109define(register_init, { 1110 1111! For test purposes: 1112 1113 sethi %hi(testvalue), local0 1114 or local0, %lo(testvalue), local0 1115 1116 ifelse($1,{},{}, {mov local0, $1}) 1117 ifelse($2,{},{}, {mov local0, $2}) 1118 ifelse($3,{},{}, {mov local0, $3}) 1119 ifelse($4,{},{}, {mov local0, $4}) 1120 ifelse($5,{},{}, {mov local0, $5}) 1121 ifelse($6,{},{}, {mov local0, $6}) 1122 ifelse($7,{},{}, {mov local0, $7}) 1123 ifelse($8,{},{}, {mov local0, $8}) 1124 1125 mov local0, local1 1126 mov local0, local2 1127 mov local0, local3 1128 mov local0, local4 1129 mov local0, local5 1130 mov local0, local7 1131 mov local0, local6 1132 mov local0, out0 1133 mov local0, out1 1134 mov local0, out2 1135 mov local0, out3 1136 mov local0, out4 1137 mov local0, out5 1138 mov local0, global1 1139 mov local0, global2 1140 mov local0, global3 1141 mov local0, global4 1142 mov local0, global5 1143 1144}) 1145 1146.section ".text" 1147 1148 .align 32 1149 1150.des_enc: 1151 1152 ! key address in3 1153 ! loads key next encryption/decryption first round from [in4] 1154 1155 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) 1156 1157 1158 .align 32 1159 1160.des_dec: 1161 1162 ! implemented with out5 as first parameter to avoid 1163 ! register exchange in ede modes 1164 1165 ! key address in4 1166 ! loads key next encryption/decryption first round from [in3] 1167 1168 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) 1169 1170 1171 1172! void DES_encrypt1(data, ks, enc) 1173! ******************************* 1174 1175 .align 32 1176 .global DES_encrypt1 1177 .type DES_encrypt1,#function 1178 1179DES_encrypt1: 1180 1181 save %sp, FRAME, %sp 1182 1183 sethi %hi(.PIC.DES_SPtrans-1f),global1 1184 or global1,%lo(.PIC.DES_SPtrans-1f),global1 11851: call .+8 1186 add %o7,global1,global1 1187 sub global1,.PIC.DES_SPtrans-.des_and,out2 1188 1189 ld [in0], in5 ! left 1190 cmp in2, 0 ! enc 1191 1192#ifdef OPENSSL_SYSNAME_ULTRASPARC 1193 be,pn %icc, .encrypt.dec ! enc/dec 1194#else 1195 be .encrypt.dec 1196#endif 1197 ld [in0+4], out5 ! right 1198 1199 ! parameter 6 1/2 for include encryption/decryption 1200 ! parameter 7 1 for move in1 to in3 1201 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1202 1203 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) 1204 1205 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used 1206 1207 fp_macro(in5, out5, 1) ! 1 for store to [in0] 1208 1209 ret 1210 restore 1211 1212.encrypt.dec: 1213 1214 add in1, 120, in3 ! use last subkey for first round 1215 1216 ! parameter 6 1/2 for include encryption/decryption 1217 ! parameter 7 1 for move in1 to in3 1218 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1219 1220 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 1221 1222 fp_macro(out5, in5, 1) ! 1 for store to [in0] 1223 1224 ret 1225 restore 1226 1227.DES_encrypt1.end: 1228 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 1229 1230 1231! void DES_encrypt2(data, ks, enc) 1232!********************************* 1233 1234 ! encrypts/decrypts without initial/final permutation 1235 1236 .align 32 1237 .global DES_encrypt2 1238 .type DES_encrypt2,#function 1239 1240DES_encrypt2: 1241 1242 save %sp, FRAME, %sp 1243 1244 sethi %hi(.PIC.DES_SPtrans-1f),global1 1245 or global1,%lo(.PIC.DES_SPtrans-1f),global1 12461: call .+8 1247 add %o7,global1,global1 1248 sub global1,.PIC.DES_SPtrans-.des_and,out2 1249 1250 ! Set sbox address 1 to 6 and rotate halfs 3 left 1251 ! Errors caught by destest? Yes. Still? *NO* 1252 1253 !sethi %hi(DES_SPtrans), global1 ! address sbox 1 1254 1255 !or global1, %lo(DES_SPtrans), global1 ! sbox 1 1256 1257 add global1, 256, global2 ! sbox 2 1258 add global1, 512, global3 ! sbox 3 1259 1260 ld [in0], out5 ! right 1261 add global1, 768, global4 ! sbox 4 1262 add global1, 1024, global5 ! sbox 5 1263 1264 ld [in0+4], in5 ! left 1265 add global1, 1280, local6 ! sbox 6 1266 add global1, 1792, out3 ! sbox 8 1267 1268 ! rotate 1269 1270 sll in5, 3, local5 1271 mov in1, in3 ! key address to in3 1272 1273 sll out5, 3, local7 1274 srl in5, 29, in5 1275 1276 srl out5, 29, out5 1277 add in5, local5, in5 1278 1279 add out5, local7, out5 1280 cmp in2, 0 1281 1282 ! we use our own stackframe 1283 1284#ifdef OPENSSL_SYSNAME_ULTRASPARC 1285 be,pn %icc, .encrypt2.dec ! decryption 1286#else 1287 be .encrypt2.dec 1288#endif 1289 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] 1290 1291 ld [in3], out0 ! key 7531 first round 1292 mov LOOPS, out4 ! loop counter 1293 1294 ld [in3+4], out1 ! key 8642 first round 1295 sethi %hi(0x0000FC00), local5 1296 1297 call .des_enc 1298 mov in3, in4 1299 1300 ! rotate 1301 sll in5, 29, in0 1302 srl in5, 3, in5 1303 sll out5, 29, in1 1304 add in5, in0, in5 1305 srl out5, 3, out5 1306 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1307 add out5, in1, out5 1308 st in5, [in0] 1309 st out5, [in0+4] 1310 1311 ret 1312 restore 1313 1314 1315.encrypt2.dec: 1316 1317 add in3, 120, in4 1318 1319 ld [in4], out0 ! key 7531 first round 1320 mov LOOPS, out4 ! loop counter 1321 1322 ld [in4+4], out1 ! key 8642 first round 1323 sethi %hi(0x0000FC00), local5 1324 1325 mov in5, local1 ! left expected in out5 1326 mov out5, in5 1327 1328 call .des_dec 1329 mov local1, out5 1330 1331.encrypt2.finish: 1332 1333 ! rotate 1334 sll in5, 29, in0 1335 srl in5, 3, in5 1336 sll out5, 29, in1 1337 add in5, in0, in5 1338 srl out5, 3, out5 1339 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1340 add out5, in1, out5 1341 st out5, [in0] 1342 st in5, [in0+4] 1343 1344 ret 1345 restore 1346 1347.DES_encrypt2.end: 1348 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 1349 1350 1351! void DES_encrypt3(data, ks1, ks2, ks3) 1352! ************************************** 1353 1354 .align 32 1355 .global DES_encrypt3 1356 .type DES_encrypt3,#function 1357 1358DES_encrypt3: 1359 1360 save %sp, FRAME, %sp 1361 1362 sethi %hi(.PIC.DES_SPtrans-1f),global1 1363 or global1,%lo(.PIC.DES_SPtrans-1f),global1 13641: call .+8 1365 add %o7,global1,global1 1366 sub global1,.PIC.DES_SPtrans-.des_and,out2 1367 1368 ld [in0], in5 ! left 1369 add in2, 120, in4 ! ks2 1370 1371 ld [in0+4], out5 ! right 1372 mov in3, in2 ! save ks3 1373 1374 ! parameter 6 1/2 for include encryption/decryption 1375 ! parameter 7 1 for mov in1 to in3 1376 ! parameter 8 1 for mov in3 to in4 1377 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1378 1379 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) 1380 1381 call .des_dec 1382 mov in2, in3 ! preload ks3 1383 1384 call .des_enc 1385 nop 1386 1387 fp_macro(in5, out5, 1) 1388 1389 ret 1390 restore 1391 1392.DES_encrypt3.end: 1393 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 1394 1395 1396! void DES_decrypt3(data, ks1, ks2, ks3) 1397! ************************************** 1398 1399 .align 32 1400 .global DES_decrypt3 1401 .type DES_decrypt3,#function 1402 1403DES_decrypt3: 1404 1405 save %sp, FRAME, %sp 1406 1407 sethi %hi(.PIC.DES_SPtrans-1f),global1 1408 or global1,%lo(.PIC.DES_SPtrans-1f),global1 14091: call .+8 1410 add %o7,global1,global1 1411 sub global1,.PIC.DES_SPtrans-.des_and,out2 1412 1413 ld [in0], in5 ! left 1414 add in3, 120, in4 ! ks3 1415 1416 ld [in0+4], out5 ! right 1417 mov in2, in3 ! ks2 1418 1419 ! parameter 6 1/2 for include encryption/decryption 1420 ! parameter 7 1 for mov in1 to in3 1421 ! parameter 8 1 for mov in3 to in4 1422 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1423 1424 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) 1425 1426 call .des_enc 1427 add in1, 120, in4 ! preload ks1 1428 1429 call .des_dec 1430 nop 1431 1432 fp_macro(out5, in5, 1) 1433 1434 ret 1435 restore 1436 1437.DES_decrypt3.end: 1438 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1439 1440! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1441! ***************************************************************** 1442 1443 1444 .align 32 1445 .global DES_ncbc_encrypt 1446 .type DES_ncbc_encrypt,#function 1447 1448DES_ncbc_encrypt: 1449 1450 save %sp, FRAME, %sp 1451 1452 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) 1453 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1454 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1455 1456 sethi %hi(.PIC.DES_SPtrans-1f),global1 1457 or global1,%lo(.PIC.DES_SPtrans-1f),global1 14581: call .+8 1459 add %o7,global1,global1 1460 sub global1,.PIC.DES_SPtrans-.des_and,out2 1461 1462 cmp in5, 0 ! enc 1463 1464#ifdef OPENSSL_SYSNAME_ULTRASPARC 1465 be,pn %icc, .ncbc.dec 1466#else 1467 be .ncbc.dec 1468#endif 1469 STPTR in4, IVEC 1470 1471 ! addr left right temp label 1472 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv 1473 1474 addcc in2, -8, in2 ! bytes missing when first block done 1475 1476#ifdef OPENSSL_SYSNAME_ULTRASPARC 1477 bl,pn %icc, .ncbc.enc.seven.or.less 1478#else 1479 bl .ncbc.enc.seven.or.less 1480#endif 1481 mov in3, in4 ! schedule 1482 1483.ncbc.enc.next.block: 1484 1485 load_little_endian(in0, out4, global4, local3, .LLE2) ! block 1486 1487.ncbc.enc.next.block_1: 1488 1489 xor in5, out4, in5 ! iv xor 1490 xor out5, global4, out5 ! iv xor 1491 1492 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1493 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) 1494 1495.ncbc.enc.next.block_2: 1496 1497!// call .des_enc ! compares in2 to 8 1498! rounds inlined for alignment purposes 1499 1500 add global1, 768, global4 ! address sbox 4 since register used below 1501 1502 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 1503 1504#ifdef OPENSSL_SYSNAME_ULTRASPARC 1505 bl,pn %icc, .ncbc.enc.next.block_fp 1506#else 1507 bl .ncbc.enc.next.block_fp 1508#endif 1509 add in0, 8, in0 ! input address 1510 1511 ! If 8 or more bytes are to be encrypted after this block, 1512 ! we combine final permutation for this block with initial 1513 ! permutation for next block. Load next block: 1514 1515 load_little_endian(in0, global3, global4, local5, .LLE12) 1516 1517 ! parameter 1 original left 1518 ! parameter 2 original right 1519 ! parameter 3 left ip 1520 ! parameter 4 right ip 1521 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1522 ! 2: mov in4 to in3 1523 ! 1524 ! also adds -8 to length in2 and loads loop counter to out4 1525 1526 fp_ip_macro(out0, out1, global3, global4, 2) 1527 1528 store_little_endian(in1, out0, out1, local3, .SLE10) ! block 1529 1530 ld [in3], out0 ! key 7531 first round next block 1531 mov in5, local1 1532 xor global3, out5, in5 ! iv xor next block 1533 1534 ld [in3+4], out1 ! key 8642 1535 add global1, 512, global3 ! address sbox 3 since register used 1536 xor global4, local1, out5 ! iv xor next block 1537 1538 ba .ncbc.enc.next.block_2 1539 add in1, 8, in1 ! output adress 1540 1541.ncbc.enc.next.block_fp: 1542 1543 fp_macro(in5, out5) 1544 1545 store_little_endian(in1, in5, out5, local3, .SLE1) ! block 1546 1547 addcc in2, -8, in2 ! bytes missing when next block done 1548 1549#ifdef OPENSSL_SYSNAME_ULTRASPARC 1550 bpos,pt %icc, .ncbc.enc.next.block ! also jumps if 0 1551#else 1552 bpos .ncbc.enc.next.block 1553#endif 1554 add in1, 8, in1 1555 1556.ncbc.enc.seven.or.less: 1557 1558 cmp in2, -8 1559 1560#ifdef OPENSSL_SYSNAME_ULTRASPARC 1561 ble,pt %icc, .ncbc.enc.finish 1562#else 1563 ble .ncbc.enc.finish 1564#endif 1565 nop 1566 1567 add in2, 8, local1 ! bytes to load 1568 1569 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1570 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) 1571 1572 ! Loads 1 to 7 bytes little endian to global4, out4 1573 1574 1575.ncbc.enc.finish: 1576 1577 LDPTR IVEC, local4 1578 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec 1579 1580 ret 1581 restore 1582 1583 1584.ncbc.dec: 1585 1586 STPTR in0, INPUT 1587 cmp in2, 0 ! length 1588 add in3, 120, in3 1589 1590 LDPTR IVEC, local7 ! ivec 1591#ifdef OPENSSL_SYSNAME_ULTRASPARC 1592 ble,pn %icc, .ncbc.dec.finish 1593#else 1594 ble .ncbc.dec.finish 1595#endif 1596 mov in3, in4 ! schedule 1597 1598 STPTR in1, OUTPUT 1599 mov in0, local5 ! input 1600 1601 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec 1602 1603.ncbc.dec.next.block: 1604 1605 load_little_endian(local5, in5, out5, local3, .LLE4) ! block 1606 1607 ! parameter 6 1/2 for include encryption/decryption 1608 ! parameter 7 1 for mov in1 to in3 1609 ! parameter 8 1 for mov in3 to in4 1610 1611 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4 1612 1613 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 1614 1615 ! in2 is bytes left to be stored 1616 ! in2 is compared to 8 in the rounds 1617 1618 xor out5, in0, out4 ! iv xor 1619#ifdef OPENSSL_SYSNAME_ULTRASPARC 1620 bl,pn %icc, .ncbc.dec.seven.or.less 1621#else 1622 bl .ncbc.dec.seven.or.less 1623#endif 1624 xor in5, in1, global4 ! iv xor 1625 1626 ! Load ivec next block now, since input and output address might be the same. 1627 1628 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv 1629 1630 store_little_endian(local7, out4, global4, local3, .SLE3) 1631 1632 STPTR local5, INPUT 1633 add local7, 8, local7 1634 addcc in2, -8, in2 1635 1636#ifdef OPENSSL_SYSNAME_ULTRASPARC 1637 bg,pt %icc, .ncbc.dec.next.block 1638#else 1639 bg .ncbc.dec.next.block 1640#endif 1641 STPTR local7, OUTPUT 1642 1643 1644.ncbc.dec.store.iv: 1645 1646 LDPTR IVEC, local4 ! ivec 1647 store_little_endian(local4, in0, in1, local5, .SLE4) 1648 1649.ncbc.dec.finish: 1650 1651 ret 1652 restore 1653 1654.ncbc.dec.seven.or.less: 1655 1656 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec 1657 1658 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) 1659 1660 1661.DES_ncbc_encrypt.end: 1662 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt 1663 1664 1665! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc) 1666! ************************************************************************** 1667 1668 1669 .align 32 1670 .global DES_ede3_cbc_encrypt 1671 .type DES_ede3_cbc_encrypt,#function 1672 1673DES_ede3_cbc_encrypt: 1674 1675 save %sp, FRAME, %sp 1676 1677 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) 1678 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1679 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1680 1681 sethi %hi(.PIC.DES_SPtrans-1f),global1 1682 or global1,%lo(.PIC.DES_SPtrans-1f),global1 16831: call .+8 1684 add %o7,global1,global1 1685 sub global1,.PIC.DES_SPtrans-.des_and,out2 1686 1687 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1688 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1689 cmp local3, 0 ! enc 1690 1691#ifdef OPENSSL_SYSNAME_ULTRASPARC 1692 be,pn %icc, .ede3.dec 1693#else 1694 be .ede3.dec 1695#endif 1696 STPTR in4, KS2 1697 1698 STPTR in5, KS3 1699 1700 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec 1701 1702 addcc in2, -8, in2 ! bytes missing after next block 1703 1704#ifdef OPENSSL_SYSNAME_ULTRASPARC 1705 bl,pn %icc, .ede3.enc.seven.or.less 1706#else 1707 bl .ede3.enc.seven.or.less 1708#endif 1709 STPTR in3, KS1 1710 1711.ede3.enc.next.block: 1712 1713 load_little_endian(in0, out4, global4, local3, .LLE7) 1714 1715.ede3.enc.next.block_1: 1716 1717 LDPTR KS2, in4 1718 xor in5, out4, in5 ! iv xor 1719 xor out5, global4, out5 ! iv xor 1720 1721 LDPTR KS1, in3 1722 add in4, 120, in4 ! for decryption we use last subkey first 1723 nop 1724 1725 ip_macro(in5, out5, in5, out5, in3) 1726 1727.ede3.enc.next.block_2: 1728 1729 call .des_enc ! ks1 in3 1730 nop 1731 1732 call .des_dec ! ks2 in4 1733 LDPTR KS3, in3 1734 1735 call .des_enc ! ks3 in3 compares in2 to 8 1736 nop 1737 1738#ifdef OPENSSL_SYSNAME_ULTRASPARC 1739 bl,pn %icc, .ede3.enc.next.block_fp 1740#else 1741 bl .ede3.enc.next.block_fp 1742#endif 1743 add in0, 8, in0 1744 1745 ! If 8 or more bytes are to be encrypted after this block, 1746 ! we combine final permutation for this block with initial 1747 ! permutation for next block. Load next block: 1748 1749 load_little_endian(in0, global3, global4, local5, .LLE11) 1750 1751 ! parameter 1 original left 1752 ! parameter 2 original right 1753 ! parameter 3 left ip 1754 ! parameter 4 right ip 1755 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1756 ! 2: mov in4 to in3 1757 ! 1758 ! also adds -8 to length in2 and loads loop counter to out4 1759 1760 fp_ip_macro(out0, out1, global3, global4, 1) 1761 1762 store_little_endian(in1, out0, out1, local3, .SLE9) ! block 1763 1764 mov in5, local1 1765 xor global3, out5, in5 ! iv xor next block 1766 1767 ld [in3], out0 ! key 7531 1768 add global1, 512, global3 ! address sbox 3 1769 xor global4, local1, out5 ! iv xor next block 1770 1771 ld [in3+4], out1 ! key 8642 1772 add global1, 768, global4 ! address sbox 4 1773 ba .ede3.enc.next.block_2 1774 add in1, 8, in1 1775 1776.ede3.enc.next.block_fp: 1777 1778 fp_macro(in5, out5) 1779 1780 store_little_endian(in1, in5, out5, local3, .SLE5) ! block 1781 1782 addcc in2, -8, in2 ! bytes missing when next block done 1783 1784#ifdef OPENSSL_SYSNAME_ULTRASPARC 1785 bpos,pt %icc, .ede3.enc.next.block 1786#else 1787 bpos .ede3.enc.next.block 1788#endif 1789 add in1, 8, in1 1790 1791.ede3.enc.seven.or.less: 1792 1793 cmp in2, -8 1794 1795#ifdef OPENSSL_SYSNAME_ULTRASPARC 1796 ble,pt %icc, .ede3.enc.finish 1797#else 1798 ble .ede3.enc.finish 1799#endif 1800 nop 1801 1802 add in2, 8, local1 ! bytes to load 1803 1804 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1805 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) 1806 1807.ede3.enc.finish: 1808 1809 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1810 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec 1811 1812 ret 1813 restore 1814 1815.ede3.dec: 1816 1817 STPTR in0, INPUT 1818 add in5, 120, in5 1819 1820 STPTR in1, OUTPUT 1821 mov in0, local5 1822 add in3, 120, in3 1823 1824 STPTR in3, KS1 1825 cmp in2, 0 1826 1827#ifdef OPENSSL_SYSNAME_ULTRASPARC 1828 ble %icc, .ede3.dec.finish 1829#else 1830 ble .ede3.dec.finish 1831#endif 1832 STPTR in5, KS3 1833 1834 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv 1835 load_little_endian(local7, in0, in1, local3, .LLE8) 1836 1837.ede3.dec.next.block: 1838 1839 load_little_endian(local5, in5, out5, local3, .LLE9) 1840 1841 ! parameter 6 1/2 for include encryption/decryption 1842 ! parameter 7 1 for mov in1 to in3 1843 ! parameter 8 1 for mov in3 to in4 1844 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1845 1846 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 1847 1848 call .des_enc ! ks2 in3 1849 LDPTR KS1, in4 1850 1851 call .des_dec ! ks1 in4 1852 nop 1853 1854 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 1855 1856 ! in2 is bytes left to be stored 1857 ! in2 is compared to 8 in the rounds 1858 1859 xor out5, in0, out4 1860#ifdef OPENSSL_SYSNAME_ULTRASPARC 1861 bl,pn %icc, .ede3.dec.seven.or.less 1862#else 1863 bl .ede3.dec.seven.or.less 1864#endif 1865 xor in5, in1, global4 1866 1867 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block 1868 1869 store_little_endian(local7, out4, global4, local3, .SLE7) ! block 1870 1871 STPTR local5, INPUT 1872 addcc in2, -8, in2 1873 add local7, 8, local7 1874 1875#ifdef OPENSSL_SYSNAME_ULTRASPARC 1876 bg,pt %icc, .ede3.dec.next.block 1877#else 1878 bg .ede3.dec.next.block 1879#endif 1880 STPTR local7, OUTPUT 1881 1882.ede3.dec.store.iv: 1883 1884 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1885 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec 1886 1887.ede3.dec.finish: 1888 1889 ret 1890 restore 1891 1892.ede3.dec.seven.or.less: 1893 1894 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv 1895 1896 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) 1897 1898 1899.DES_ede3_cbc_encrypt.end: 1900 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1901 1902 .align 256 1903 .type .des_and,#object 1904 .size .des_and,284 1905 1906.des_and: 1907 1908! This table is used for AND 0xFC when it is known that register 1909! bits 8-31 are zero. Makes it possible to do three arithmetic 1910! operations in one cycle. 1911 1912 .byte 0, 0, 0, 0, 4, 4, 4, 4 1913 .byte 8, 8, 8, 8, 12, 12, 12, 12 1914 .byte 16, 16, 16, 16, 20, 20, 20, 20 1915 .byte 24, 24, 24, 24, 28, 28, 28, 28 1916 .byte 32, 32, 32, 32, 36, 36, 36, 36 1917 .byte 40, 40, 40, 40, 44, 44, 44, 44 1918 .byte 48, 48, 48, 48, 52, 52, 52, 52 1919 .byte 56, 56, 56, 56, 60, 60, 60, 60 1920 .byte 64, 64, 64, 64, 68, 68, 68, 68 1921 .byte 72, 72, 72, 72, 76, 76, 76, 76 1922 .byte 80, 80, 80, 80, 84, 84, 84, 84 1923 .byte 88, 88, 88, 88, 92, 92, 92, 92 1924 .byte 96, 96, 96, 96, 100, 100, 100, 100 1925 .byte 104, 104, 104, 104, 108, 108, 108, 108 1926 .byte 112, 112, 112, 112, 116, 116, 116, 116 1927 .byte 120, 120, 120, 120, 124, 124, 124, 124 1928 .byte 128, 128, 128, 128, 132, 132, 132, 132 1929 .byte 136, 136, 136, 136, 140, 140, 140, 140 1930 .byte 144, 144, 144, 144, 148, 148, 148, 148 1931 .byte 152, 152, 152, 152, 156, 156, 156, 156 1932 .byte 160, 160, 160, 160, 164, 164, 164, 164 1933 .byte 168, 168, 168, 168, 172, 172, 172, 172 1934 .byte 176, 176, 176, 176, 180, 180, 180, 180 1935 .byte 184, 184, 184, 184, 188, 188, 188, 188 1936 .byte 192, 192, 192, 192, 196, 196, 196, 196 1937 .byte 200, 200, 200, 200, 204, 204, 204, 204 1938 .byte 208, 208, 208, 208, 212, 212, 212, 212 1939 .byte 216, 216, 216, 216, 220, 220, 220, 220 1940 .byte 224, 224, 224, 224, 228, 228, 228, 228 1941 .byte 232, 232, 232, 232, 236, 236, 236, 236 1942 .byte 240, 240, 240, 240, 244, 244, 244, 244 1943 .byte 248, 248, 248, 248, 252, 252, 252, 252 1944 1945 ! 5 numbers for initil/final permutation 1946 1947 .word 0x0f0f0f0f ! offset 256 1948 .word 0x0000ffff ! 260 1949 .word 0x33333333 ! 264 1950 .word 0x00ff00ff ! 268 1951 .word 0x55555555 ! 272 1952 1953 .word 0 ! 276 1954 .word LOOPS ! 280 1955 .word 0x0000FC00 ! 284 1956 1957 .global DES_SPtrans 1958 .type DES_SPtrans,#object 1959 .size DES_SPtrans,2048 1960.align 64 1961DES_SPtrans: 1962.PIC.DES_SPtrans: 1963 ! nibble 0 1964 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 1965 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 1966 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 1967 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 1968 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 1969 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 1970 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 1971 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 1972 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 1973 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 1974 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 1975 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 1976 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 1977 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 1978 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 1979 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 1980 ! nibble 1 1981 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 1982 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 1983 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 1984 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 1985 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 1986 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 1987 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 1988 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 1989 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 1990 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 1991 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 1992 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 1993 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 1994 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 1995 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 1996 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 1997 ! nibble 2 1998 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 1999 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 2000 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 2001 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 2002 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 2003 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 2004 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 2005 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 2006 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 2007 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 2008 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 2009 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 2010 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 2011 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 2012 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 2013 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 2014 ! nibble 3 2015 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 2016 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 2017 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 2018 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 2019 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 2020 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 2021 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 2022 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 2023 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 2024 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 2025 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 2026 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 2027 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 2028 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 2029 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 2030 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 2031 ! nibble 4 2032 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 2033 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 2034 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 2035 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 2036 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 2037 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 2038 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 2039 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 2040 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 2041 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 2042 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 2043 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 2044 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 2045 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 2046 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 2047 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 2048 ! nibble 5 2049 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 2050 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 2051 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 2052 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 2053 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 2054 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 2055 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 2056 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 2057 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 2058 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 2059 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 2060 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 2061 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 2062 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 2063 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 2064 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 2065 ! nibble 6 2066 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 2067 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 2068 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 2069 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 2070 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 2071 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 2072 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 2073 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 2074 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 2075 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 2076 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 2077 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 2078 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 2079 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 2080 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 2081 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 2082 ! nibble 7 2083 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 2084 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 2085 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 2086 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 2087 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 2088 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 2089 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 2090 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 2091 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 2092 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 2093 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 2094 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 2095 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 2096 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 2097 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 2098 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 2099 2100