1! Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved. 2! 3! Licensed under the OpenSSL license (the "License"). You may not use 4! this file except in compliance with the License. You can obtain a copy 5! in the file LICENSE in the source distribution or at 6! https://www.openssl.org/source/license.html 7! 8! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S 9! 10! Global registers 1 to 5 are used. This is the same as done by the 11! cc compiler. The UltraSPARC load/store little endian feature is used. 12! 13! Instruction grouping often refers to one CPU cycle. 14! 15! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S 16! 17! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S 18! 19! Performance improvement according to './apps/openssl speed des' 20! 21! 32-bit build: 22! 23% faster than cc-5.2 -xarch=v8plus -xO5 23! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 24! 64-bit build: 25! 50% faster than cc-5.2 -xarch=v9 -xO5 26! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 27! 28 29.ident "des_enc.m4 2.1" 30.file "des_enc-sparc.S" 31 32#include <openssl/opensslconf.h> 33 34#if defined(__SUNPRO_C) && defined(__sparcv9) 35# define ABI64 /* They've said -xarch=v9 at command line */ 36#elif defined(__GNUC__) && defined(__arch64__) 37# define ABI64 /* They've said -m64 at command line */ 38#endif 39 40#ifdef ABI64 41 .register %g2,#scratch 42 .register %g3,#scratch 43# define FRAME -192 44# define BIAS 2047 45# define LDPTR ldx 46# define STPTR stx 47# define ARG0 128 48# define ARGSZ 8 49#else 50# define FRAME -96 51# define BIAS 0 52# define LDPTR ld 53# define STPTR st 54# define ARG0 68 55# define ARGSZ 4 56#endif 57 58#define LOOPS 7 59 60#define global0 %g0 61#define global1 %g1 62#define global2 %g2 63#define global3 %g3 64#define global4 %g4 65#define global5 %g5 66 67#define local0 %l0 68#define local1 %l1 69#define local2 %l2 70#define local3 %l3 71#define local4 %l4 72#define local5 %l5 73#define local7 %l6 74#define local6 %l7 75 76#define in0 %i0 77#define in1 %i1 78#define in2 %i2 79#define in3 %i3 80#define in4 %i4 81#define in5 %i5 82#define in6 %i6 83#define in7 %i7 84 85#define out0 %o0 86#define out1 %o1 87#define out2 %o2 88#define out3 %o3 89#define out4 %o4 90#define out5 %o5 91#define out6 %o6 92#define out7 %o7 93 94#define stub stb 95 96changequote({,}) 97 98 99! Macro definitions: 100 101 102! {ip_macro} 103! 104! The logic used in initial and final permutations is the same as in 105! the C code. The permutations are done with a clever shift, xor, and 106! technique. 107! 108! The macro also loads address sbox 1 to 5 to global 1 to 5, address 109! sbox 6 to local6, and address sbox 8 to out3. 110! 111! Rotates the halves 3 left to bring the sbox bits in convenient positions. 112! 113! Loads key first round from address in parameter 5 to out0, out1. 114! 115! After the original LibDES initial permutation, the resulting left 116! is in the variable initially used for right and vice versa. The macro 117! implements the possibility to keep the halves in the original registers. 118! 119! parameter 1 left 120! parameter 2 right 121! parameter 3 result left (modify in first round) 122! parameter 4 result right (use in first round) 123! parameter 5 key address 124! parameter 6 1/2 for include encryption/decryption 125! parameter 7 1 for move in1 to in3 126! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 127! parameter 9 1 for load ks3 and ks2 to in4 and in3 128 129define(ip_macro, { 130 131! {ip_macro} 132! $1 $2 $4 $3 $5 $6 $7 $8 $9 133 134 ld [out2+256], local1 135 srl $2, 4, local4 136 137 xor local4, $1, local4 138 ifelse($7,1,{mov in1, in3},{nop}) 139 140 ld [out2+260], local2 141 and local4, local1, local4 142 ifelse($8,1,{mov in3, in4},{}) 143 ifelse($8,2,{mov in4, in3},{}) 144 145 ld [out2+280], out4 ! loop counter 146 sll local4, 4, local1 147 xor $1, local4, $1 148 149 ld [out2+264], local3 150 srl $1, 16, local4 151 xor $2, local1, $2 152 153 ifelse($9,1,{LDPTR KS3, in4},{}) 154 xor local4, $2, local4 155 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr 156 157 ifelse($9,1,{LDPTR KS2, in3},{}) 158 and local4, local2, local4 159 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr 160 161 sll local4, 16, local1 162 xor $2, local4, $2 163 164 srl $2, 2, local4 165 xor $1, local1, $1 166 167 sethi %hi(16711680), local5 168 xor local4, $1, local4 169 170 and local4, local3, local4 171 or local5, 255, local5 172 173 sll local4, 2, local2 174 xor $1, local4, $1 175 176 srl $1, 8, local4 177 xor $2, local2, $2 178 179 xor local4, $2, local4 180 add global1, 768, global4 181 182 and local4, local5, local4 183 add global1, 1024, global5 184 185 ld [out2+272], local7 186 sll local4, 8, local1 187 xor $2, local4, $2 188 189 srl $2, 1, local4 190 xor $1, local1, $1 191 192 ld [$5], out0 ! key 7531 193 xor local4, $1, local4 194 add global1, 256, global2 195 196 ld [$5+4], out1 ! key 8642 197 and local4, local7, local4 198 add global1, 512, global3 199 200 sll local4, 1, local1 201 xor $1, local4, $1 202 203 sll $1, 3, local3 204 xor $2, local1, $2 205 206 sll $2, 3, local2 207 add global1, 1280, local6 ! address sbox 8 208 209 srl $1, 29, local4 210 add global1, 1792, out3 ! address sbox 8 211 212 srl $2, 29, local1 213 or local4, local3, $4 214 215 or local2, local1, $3 216 217 ifelse($6, 1, { 218 219 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 220 or local2, local1, $3 221 xor $4, out0, local1 222 223 call .des_enc.1 224 and local1, 252, local1 225 226 },{}) 227 228 ifelse($6, 2, { 229 230 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 231 or local2, local1, $3 232 xor $4, out0, local1 233 234 call .des_dec.1 235 and local1, 252, local1 236 237 },{}) 238}) 239 240 241! {rounds_macro} 242! 243! The logic used in the DES rounds is the same as in the C code, 244! except that calculations for sbox 1 and sbox 5 begin before 245! the previous round is finished. 246! 247! In each round one half (work) is modified based on key and the 248! other half (use). 249! 250! In this version we do two rounds in a loop repeated 7 times 251! and two rounds separately. 252! 253! One half has the bits for the sboxes in the following positions: 254! 255! 777777xx555555xx333333xx111111xx 256! 257! 88xx666666xx444444xx222222xx8888 258! 259! The bits for each sbox are xor-ed with the key bits for that box. 260! The above xx bits are cleared, and the result used for lookup in 261! the sbox table. Each sbox entry contains the 4 output bits permuted 262! into 32 bits according to the P permutation. 263! 264! In the description of DES, left and right are switched after 265! each round, except after last round. In this code the original 266! left and right are kept in the same register in all rounds, meaning 267! that after the 16 rounds the result for right is in the register 268! originally used for left. 269! 270! parameter 1 first work (left in first round) 271! parameter 2 first use (right in first round) 272! parameter 3 enc/dec 1/-1 273! parameter 4 loop label 274! parameter 5 key address register 275! parameter 6 optional address for key next encryption/decryption 276! parameter 7 not empty for include retl 277! 278! also compares in2 to 8 279 280define(rounds_macro, { 281 282! {rounds_macro} 283! $1 $2 $3 $4 $5 $6 $7 $8 $9 284 285 xor $2, out0, local1 286 287 ld [out2+284], local5 ! 0x0000FC00 288 ba $4 289 and local1, 252, local1 290 291 .align 32 292 293$4: 294 ! local6 is address sbox 6 295 ! out3 is address sbox 8 296 ! out4 is loop counter 297 298 ld [global1+local1], local1 299 xor $2, out1, out1 ! 8642 300 xor $2, out0, out0 ! 7531 301 ! fmovs %f0, %f0 ! fxor used for alignment 302 303 srl out1, 4, local0 ! rotate 4 right 304 and out0, local5, local3 ! 3 305 ! fmovs %f0, %f0 306 307 ld [$5+$3*8], local7 ! key 7531 next round 308 srl local3, 8, local3 ! 3 309 and local0, 252, local2 ! 2 310 ! fmovs %f0, %f0 311 312 ld [global3+local3],local3 ! 3 313 sll out1, 28, out1 ! rotate 314 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 315 316 ld [global2+local2], local2 ! 2 317 srl out0, 24, local1 ! 7 318 or out1, local0, out1 ! rotate 319 320 ldub [out2+local1], local1 ! 7 (and 0xFC) 321 srl out1, 24, local0 ! 8 322 and out1, local5, local4 ! 4 323 324 ldub [out2+local0], local0 ! 8 (and 0xFC) 325 srl local4, 8, local4 ! 4 326 xor $1, local2, $1 ! 2 finished local2 now sbox 6 327 328 ld [global4+local4],local4 ! 4 329 srl out1, 16, local2 ! 6 330 xor $1, local3, $1 ! 3 finished local3 now sbox 5 331 332 ld [out3+local0],local0 ! 8 333 and local2, 252, local2 ! 6 334 add global1, 1536, local5 ! address sbox 7 335 336 ld [local6+local2], local2 ! 6 337 srl out0, 16, local3 ! 5 338 xor $1, local4, $1 ! 4 finished 339 340 ld [local5+local1],local1 ! 7 341 and local3, 252, local3 ! 5 342 xor $1, local0, $1 ! 8 finished 343 344 ld [global5+local3],local3 ! 5 345 xor $1, local2, $1 ! 6 finished 346 subcc out4, 1, out4 347 348 ld [$5+$3*8+4], out0 ! key 8642 next round 349 xor $1, local7, local2 ! sbox 5 next round 350 xor $1, local1, $1 ! 7 finished 351 352 srl local2, 16, local2 ! sbox 5 next round 353 xor $1, local3, $1 ! 5 finished 354 355 ld [$5+$3*16+4], out1 ! key 8642 next round again 356 and local2, 252, local2 ! sbox5 next round 357! next round 358 xor $1, local7, local7 ! 7531 359 360 ld [global5+local2], local2 ! 5 361 srl local7, 24, local3 ! 7 362 xor $1, out0, out0 ! 8642 363 364 ldub [out2+local3], local3 ! 7 (and 0xFC) 365 srl out0, 4, local0 ! rotate 4 right 366 and local7, 252, local1 ! 1 367 368 sll out0, 28, out0 ! rotate 369 xor $2, local2, $2 ! 5 finished local2 used 370 371 srl local0, 8, local4 ! 4 372 and local0, 252, local2 ! 2 373 ld [local5+local3], local3 ! 7 374 375 srl local0, 16, local5 ! 6 376 or out0, local0, out0 ! rotate 377 ld [global2+local2], local2 ! 2 378 379 srl out0, 24, local0 380 ld [$5+$3*16], out0 ! key 7531 next round 381 and local4, 252, local4 ! 4 382 383 and local5, 252, local5 ! 6 384 ld [global4+local4], local4 ! 4 385 xor $2, local3, $2 ! 7 finished local3 used 386 387 and local0, 252, local0 ! 8 388 ld [local6+local5], local5 ! 6 389 xor $2, local2, $2 ! 2 finished local2 now sbox 3 390 391 srl local7, 8, local2 ! 3 start 392 ld [out3+local0], local0 ! 8 393 xor $2, local4, $2 ! 4 finished 394 395 and local2, 252, local2 ! 3 396 ld [global1+local1], local1 ! 1 397 xor $2, local5, $2 ! 6 finished local5 used 398 399 ld [global3+local2], local2 ! 3 400 xor $2, local0, $2 ! 8 finished 401 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer 402 403 ld [out2+284], local5 ! 0x0000FC00 404 xor $2, out0, local4 ! sbox 1 next round 405 xor $2, local1, $2 ! 1 finished 406 407 xor $2, local2, $2 ! 3 finished 408 bne $4 409 and local4, 252, local1 ! sbox 1 next round 410 411! two rounds more: 412 413 ld [global1+local1], local1 414 xor $2, out1, out1 415 xor $2, out0, out0 416 417 srl out1, 4, local0 ! rotate 418 and out0, local5, local3 419 420 ld [$5+$3*8], local7 ! key 7531 421 srl local3, 8, local3 422 and local0, 252, local2 423 424 ld [global3+local3],local3 425 sll out1, 28, out1 ! rotate 426 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 427 428 ld [global2+local2], local2 429 srl out0, 24, local1 430 or out1, local0, out1 ! rotate 431 432 ldub [out2+local1], local1 433 srl out1, 24, local0 434 and out1, local5, local4 435 436 ldub [out2+local0], local0 437 srl local4, 8, local4 438 xor $1, local2, $1 ! 2 finished local2 now sbox 6 439 440 ld [global4+local4],local4 441 srl out1, 16, local2 442 xor $1, local3, $1 ! 3 finished local3 now sbox 5 443 444 ld [out3+local0],local0 445 and local2, 252, local2 446 add global1, 1536, local5 ! address sbox 7 447 448 ld [local6+local2], local2 449 srl out0, 16, local3 450 xor $1, local4, $1 ! 4 finished 451 452 ld [local5+local1],local1 453 and local3, 252, local3 454 xor $1, local0, $1 455 456 ld [global5+local3],local3 457 xor $1, local2, $1 ! 6 finished 458 cmp in2, 8 459 460 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter 461 xor $1, local7, local2 ! sbox 5 next round 462 xor $1, local1, $1 ! 7 finished 463 464 ld [$5+$3*8+4], out0 465 srl local2, 16, local2 ! sbox 5 next round 466 xor $1, local3, $1 ! 5 finished 467 468 and local2, 252, local2 469! next round (two rounds more) 470 xor $1, local7, local7 ! 7531 471 472 ld [global5+local2], local2 473 srl local7, 24, local3 474 xor $1, out0, out0 ! 8642 475 476 ldub [out2+local3], local3 477 srl out0, 4, local0 ! rotate 478 and local7, 252, local1 479 480 sll out0, 28, out0 ! rotate 481 xor $2, local2, $2 ! 5 finished local2 used 482 483 srl local0, 8, local4 484 and local0, 252, local2 485 ld [local5+local3], local3 486 487 srl local0, 16, local5 488 or out0, local0, out0 ! rotate 489 ld [global2+local2], local2 490 491 srl out0, 24, local0 492 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption 493 and local4, 252, local4 494 495 and local5, 252, local5 496 ld [global4+local4], local4 497 xor $2, local3, $2 ! 7 finished local3 used 498 499 and local0, 252, local0 500 ld [local6+local5], local5 501 xor $2, local2, $2 ! 2 finished local2 now sbox 3 502 503 srl local7, 8, local2 ! 3 start 504 ld [out3+local0], local0 505 xor $2, local4, $2 506 507 and local2, 252, local2 508 ld [global1+local1], local1 509 xor $2, local5, $2 ! 6 finished local5 used 510 511 ld [global3+local2], local2 512 srl $1, 3, local3 513 xor $2, local0, $2 514 515 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption 516 sll $1, 29, local4 517 xor $2, local1, $2 518 519 ifelse($7,{}, {}, {retl}) 520 xor $2, local2, $2 521}) 522 523 524! {fp_macro} 525! 526! parameter 1 right (original left) 527! parameter 2 left (original right) 528! parameter 3 1 for optional store to [in0] 529! parameter 4 1 for load input/output address to local5/7 530! 531! The final permutation logic switches the halves, meaning that 532! left and right ends up the registers originally used. 533 534define(fp_macro, { 535 536! {fp_macro} 537! $1 $2 $3 $4 $5 $6 $7 $8 $9 538 539 ! initially undo the rotate 3 left done after initial permutation 540 ! original left is received shifted 3 right and 29 left in local3/4 541 542 sll $2, 29, local1 543 or local3, local4, $1 544 545 srl $2, 3, $2 546 sethi %hi(0x55555555), local2 547 548 or $2, local1, $2 549 or local2, %lo(0x55555555), local2 550 551 srl $2, 1, local3 552 sethi %hi(0x00ff00ff), local1 553 xor local3, $1, local3 554 or local1, %lo(0x00ff00ff), local1 555 and local3, local2, local3 556 sethi %hi(0x33333333), local4 557 sll local3, 1, local2 558 559 xor $1, local3, $1 560 561 srl $1, 8, local3 562 xor $2, local2, $2 563 xor local3, $2, local3 564 or local4, %lo(0x33333333), local4 565 and local3, local1, local3 566 sethi %hi(0x0000ffff), local1 567 sll local3, 8, local2 568 569 xor $2, local3, $2 570 571 srl $2, 2, local3 572 xor $1, local2, $1 573 xor local3, $1, local3 574 or local1, %lo(0x0000ffff), local1 575 and local3, local4, local3 576 sethi %hi(0x0f0f0f0f), local4 577 sll local3, 2, local2 578 579 ifelse($4,1, {LDPTR INPUT, local5}) 580 xor $1, local3, $1 581 582 ifelse($4,1, {LDPTR OUTPUT, local7}) 583 srl $1, 16, local3 584 xor $2, local2, $2 585 xor local3, $2, local3 586 or local4, %lo(0x0f0f0f0f), local4 587 and local3, local1, local3 588 sll local3, 16, local2 589 590 xor $2, local3, local1 591 592 srl local1, 4, local3 593 xor $1, local2, $1 594 xor local3, $1, local3 595 and local3, local4, local3 596 sll local3, 4, local2 597 598 xor $1, local3, $1 599 600 ! optional store: 601 602 ifelse($3,1, {st $1, [in0]}) 603 604 xor local1, local2, $2 605 606 ifelse($3,1, {st $2, [in0+4]}) 607 608}) 609 610 611! {fp_ip_macro} 612! 613! Does initial permutation for next block mixed with 614! final permutation for current block. 615! 616! parameter 1 original left 617! parameter 2 original right 618! parameter 3 left ip 619! parameter 4 right ip 620! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 621! 2: mov in4 to in3 622! 623! also adds -8 to length in2 and loads loop counter to out4 624 625define(fp_ip_macro, { 626 627! {fp_ip_macro} 628! $1 $2 $3 $4 $5 $6 $7 $8 $9 629 630 define({temp1},{out4}) 631 define({temp2},{local3}) 632 633 define({ip1},{local1}) 634 define({ip2},{local2}) 635 define({ip4},{local4}) 636 define({ip5},{local5}) 637 638 ! $1 in local3, local4 639 640 ld [out2+256], ip1 641 sll out5, 29, temp1 642 or local3, local4, $1 643 644 srl out5, 3, $2 645 ifelse($5,2,{mov in4, in3}) 646 647 ld [out2+272], ip5 648 srl $4, 4, local0 649 or $2, temp1, $2 650 651 srl $2, 1, temp1 652 xor temp1, $1, temp1 653 654 and temp1, ip5, temp1 655 xor local0, $3, local0 656 657 sll temp1, 1, temp2 658 xor $1, temp1, $1 659 660 and local0, ip1, local0 661 add in2, -8, in2 662 663 sll local0, 4, local7 664 xor $3, local0, $3 665 666 ld [out2+268], ip4 667 srl $1, 8, temp1 668 xor $2, temp2, $2 669 ld [out2+260], ip2 670 srl $3, 16, local0 671 xor $4, local7, $4 672 xor temp1, $2, temp1 673 xor local0, $4, local0 674 and temp1, ip4, temp1 675 and local0, ip2, local0 676 sll temp1, 8, temp2 677 xor $2, temp1, $2 678 sll local0, 16, local7 679 xor $4, local0, $4 680 681 srl $2, 2, temp1 682 xor $1, temp2, $1 683 684 ld [out2+264], temp2 ! ip3 685 srl $4, 2, local0 686 xor $3, local7, $3 687 xor temp1, $1, temp1 688 xor local0, $3, local0 689 and temp1, temp2, temp1 690 and local0, temp2, local0 691 sll temp1, 2, temp2 692 xor $1, temp1, $1 693 sll local0, 2, local7 694 xor $3, local0, $3 695 696 srl $1, 16, temp1 697 xor $2, temp2, $2 698 srl $3, 8, local0 699 xor $4, local7, $4 700 xor temp1, $2, temp1 701 xor local0, $4, local0 702 and temp1, ip2, temp1 703 and local0, ip4, local0 704 sll temp1, 16, temp2 705 xor $2, temp1, local4 706 sll local0, 8, local7 707 xor $4, local0, $4 708 709 srl $4, 1, local0 710 xor $3, local7, $3 711 712 srl local4, 4, temp1 713 xor local0, $3, local0 714 715 xor $1, temp2, $1 716 and local0, ip5, local0 717 718 sll local0, 1, local7 719 xor temp1, $1, temp1 720 721 xor $3, local0, $3 722 xor $4, local7, $4 723 724 sll $3, 3, local5 725 and temp1, ip1, temp1 726 727 sll temp1, 4, temp2 728 xor $1, temp1, $1 729 730 ifelse($5,1,{LDPTR KS2, in4}) 731 sll $4, 3, local2 732 xor local4, temp2, $2 733 734 ! reload since used as temporary: 735 736 ld [out2+280], out4 ! loop counter 737 738 srl $3, 29, local0 739 ifelse($5,1,{add in4, 120, in4}) 740 741 ifelse($5,1,{LDPTR KS1, in3}) 742 srl $4, 29, local7 743 744 or local0, local5, $4 745 or local2, local7, $3 746 747}) 748 749 750 751! {load_little_endian} 752! 753! parameter 1 address 754! parameter 2 destination left 755! parameter 3 destination right 756! parameter 4 temporary 757! parameter 5 label 758 759define(load_little_endian, { 760 761! {load_little_endian} 762! $1 $2 $3 $4 $5 $6 $7 $8 $9 763 764 ! first in memory to rightmost in register 765 766$5: 767 ldub [$1+3], $2 768 769 ldub [$1+2], $4 770 sll $2, 8, $2 771 or $2, $4, $2 772 773 ldub [$1+1], $4 774 sll $2, 8, $2 775 or $2, $4, $2 776 777 ldub [$1+0], $4 778 sll $2, 8, $2 779 or $2, $4, $2 780 781 782 ldub [$1+3+4], $3 783 784 ldub [$1+2+4], $4 785 sll $3, 8, $3 786 or $3, $4, $3 787 788 ldub [$1+1+4], $4 789 sll $3, 8, $3 790 or $3, $4, $3 791 792 ldub [$1+0+4], $4 793 sll $3, 8, $3 794 or $3, $4, $3 795$5a: 796 797}) 798 799 800! {load_little_endian_inc} 801! 802! parameter 1 address 803! parameter 2 destination left 804! parameter 3 destination right 805! parameter 4 temporary 806! parameter 4 label 807! 808! adds 8 to address 809 810define(load_little_endian_inc, { 811 812! {load_little_endian_inc} 813! $1 $2 $3 $4 $5 $6 $7 $8 $9 814 815 ! first in memory to rightmost in register 816 817$5: 818 ldub [$1+3], $2 819 820 ldub [$1+2], $4 821 sll $2, 8, $2 822 or $2, $4, $2 823 824 ldub [$1+1], $4 825 sll $2, 8, $2 826 or $2, $4, $2 827 828 ldub [$1+0], $4 829 sll $2, 8, $2 830 or $2, $4, $2 831 832 ldub [$1+3+4], $3 833 add $1, 8, $1 834 835 ldub [$1+2+4-8], $4 836 sll $3, 8, $3 837 or $3, $4, $3 838 839 ldub [$1+1+4-8], $4 840 sll $3, 8, $3 841 or $3, $4, $3 842 843 ldub [$1+0+4-8], $4 844 sll $3, 8, $3 845 or $3, $4, $3 846$5a: 847 848}) 849 850 851! {load_n_bytes} 852! 853! Loads 1 to 7 bytes little endian 854! Remaining bytes are zeroed. 855! 856! parameter 1 address 857! parameter 2 length 858! parameter 3 destination register left 859! parameter 4 destination register right 860! parameter 5 temp 861! parameter 6 temp2 862! parameter 7 label 863! parameter 8 return label 864 865define(load_n_bytes, { 866 867! {load_n_bytes} 868! $1 $2 $5 $6 $7 $8 $7 $8 $9 869 870$7.0: call .+8 871 sll $2, 2, $6 872 873 add %o7,$7.jmp.table-$7.0,$5 874 875 add $5, $6, $5 876 mov 0, $4 877 878 ld [$5], $5 879 880 jmp %o7+$5 881 mov 0, $3 882 883$7.7: 884 ldub [$1+6], $5 885 sll $5, 16, $5 886 or $3, $5, $3 887$7.6: 888 ldub [$1+5], $5 889 sll $5, 8, $5 890 or $3, $5, $3 891$7.5: 892 ldub [$1+4], $5 893 or $3, $5, $3 894$7.4: 895 ldub [$1+3], $5 896 sll $5, 24, $5 897 or $4, $5, $4 898$7.3: 899 ldub [$1+2], $5 900 sll $5, 16, $5 901 or $4, $5, $4 902$7.2: 903 ldub [$1+1], $5 904 sll $5, 8, $5 905 or $4, $5, $4 906$7.1: 907 ldub [$1+0], $5 908 ba $8 909 or $4, $5, $4 910 911 .align 4 912 913$7.jmp.table: 914 .word 0 915 .word $7.1-$7.0 916 .word $7.2-$7.0 917 .word $7.3-$7.0 918 .word $7.4-$7.0 919 .word $7.5-$7.0 920 .word $7.6-$7.0 921 .word $7.7-$7.0 922}) 923 924 925! {store_little_endian} 926! 927! parameter 1 address 928! parameter 2 source left 929! parameter 3 source right 930! parameter 4 temporary 931 932define(store_little_endian, { 933 934! {store_little_endian} 935! $1 $2 $3 $4 $5 $6 $7 $8 $9 936 937 ! rightmost in register to first in memory 938 939$5: 940 and $2, 255, $4 941 stub $4, [$1+0] 942 943 srl $2, 8, $4 944 and $4, 255, $4 945 stub $4, [$1+1] 946 947 srl $2, 16, $4 948 and $4, 255, $4 949 stub $4, [$1+2] 950 951 srl $2, 24, $4 952 stub $4, [$1+3] 953 954 955 and $3, 255, $4 956 stub $4, [$1+0+4] 957 958 srl $3, 8, $4 959 and $4, 255, $4 960 stub $4, [$1+1+4] 961 962 srl $3, 16, $4 963 and $4, 255, $4 964 stub $4, [$1+2+4] 965 966 srl $3, 24, $4 967 stub $4, [$1+3+4] 968 969$5a: 970 971}) 972 973 974! {store_n_bytes} 975! 976! Stores 1 to 7 bytes little endian 977! 978! parameter 1 address 979! parameter 2 length 980! parameter 3 source register left 981! parameter 4 source register right 982! parameter 5 temp 983! parameter 6 temp2 984! parameter 7 label 985! parameter 8 return label 986 987define(store_n_bytes, { 988 989! {store_n_bytes} 990! $1 $2 $5 $6 $7 $8 $7 $8 $9 991 992$7.0: call .+8 993 sll $2, 2, $6 994 995 add %o7,$7.jmp.table-$7.0,$5 996 997 add $5, $6, $5 998 999 ld [$5], $5 1000 1001 jmp %o7+$5 1002 nop 1003 1004$7.7: 1005 srl $3, 16, $5 1006 and $5, 0xff, $5 1007 stub $5, [$1+6] 1008$7.6: 1009 srl $3, 8, $5 1010 and $5, 0xff, $5 1011 stub $5, [$1+5] 1012$7.5: 1013 and $3, 0xff, $5 1014 stub $5, [$1+4] 1015$7.4: 1016 srl $4, 24, $5 1017 stub $5, [$1+3] 1018$7.3: 1019 srl $4, 16, $5 1020 and $5, 0xff, $5 1021 stub $5, [$1+2] 1022$7.2: 1023 srl $4, 8, $5 1024 and $5, 0xff, $5 1025 stub $5, [$1+1] 1026$7.1: 1027 and $4, 0xff, $5 1028 1029 1030 ba $8 1031 stub $5, [$1] 1032 1033 .align 4 1034 1035$7.jmp.table: 1036 1037 .word 0 1038 .word $7.1-$7.0 1039 .word $7.2-$7.0 1040 .word $7.3-$7.0 1041 .word $7.4-$7.0 1042 .word $7.5-$7.0 1043 .word $7.6-$7.0 1044 .word $7.7-$7.0 1045}) 1046 1047 1048define(testvalue,{1}) 1049 1050define(register_init, { 1051 1052! For test purposes: 1053 1054 sethi %hi(testvalue), local0 1055 or local0, %lo(testvalue), local0 1056 1057 ifelse($1,{},{}, {mov local0, $1}) 1058 ifelse($2,{},{}, {mov local0, $2}) 1059 ifelse($3,{},{}, {mov local0, $3}) 1060 ifelse($4,{},{}, {mov local0, $4}) 1061 ifelse($5,{},{}, {mov local0, $5}) 1062 ifelse($6,{},{}, {mov local0, $6}) 1063 ifelse($7,{},{}, {mov local0, $7}) 1064 ifelse($8,{},{}, {mov local0, $8}) 1065 1066 mov local0, local1 1067 mov local0, local2 1068 mov local0, local3 1069 mov local0, local4 1070 mov local0, local5 1071 mov local0, local7 1072 mov local0, local6 1073 mov local0, out0 1074 mov local0, out1 1075 mov local0, out2 1076 mov local0, out3 1077 mov local0, out4 1078 mov local0, out5 1079 mov local0, global1 1080 mov local0, global2 1081 mov local0, global3 1082 mov local0, global4 1083 mov local0, global5 1084 1085}) 1086 1087.section ".text" 1088 1089 .align 32 1090 1091.des_enc: 1092 1093 ! key address in3 1094 ! loads key next encryption/decryption first round from [in4] 1095 1096 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) 1097 1098 1099 .align 32 1100 1101.des_dec: 1102 1103 ! implemented with out5 as first parameter to avoid 1104 ! register exchange in ede modes 1105 1106 ! key address in4 1107 ! loads key next encryption/decryption first round from [in3] 1108 1109 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) 1110 1111 1112 1113! void DES_encrypt1(data, ks, enc) 1114! ******************************* 1115 1116 .align 32 1117 .global DES_encrypt1 1118 .type DES_encrypt1,#function 1119 1120DES_encrypt1: 1121 1122 save %sp, FRAME, %sp 1123 1124 sethi %hi(.PIC.DES_SPtrans-1f),global1 1125 or global1,%lo(.PIC.DES_SPtrans-1f),global1 11261: call .+8 1127 add %o7,global1,global1 1128 sub global1,.PIC.DES_SPtrans-.des_and,out2 1129 1130 ld [in0], in5 ! left 1131 cmp in2, 0 ! enc 1132 1133 be .encrypt.dec 1134 ld [in0+4], out5 ! right 1135 1136 ! parameter 6 1/2 for include encryption/decryption 1137 ! parameter 7 1 for move in1 to in3 1138 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1139 1140 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) 1141 1142 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used 1143 1144 fp_macro(in5, out5, 1) ! 1 for store to [in0] 1145 1146 ret 1147 restore 1148 1149.encrypt.dec: 1150 1151 add in1, 120, in3 ! use last subkey for first round 1152 1153 ! parameter 6 1/2 for include encryption/decryption 1154 ! parameter 7 1 for move in1 to in3 1155 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1156 1157 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 1158 1159 fp_macro(out5, in5, 1) ! 1 for store to [in0] 1160 1161 ret 1162 restore 1163 1164.DES_encrypt1.end: 1165 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 1166 1167 1168! void DES_encrypt2(data, ks, enc) 1169!********************************* 1170 1171 ! encrypts/decrypts without initial/final permutation 1172 1173 .align 32 1174 .global DES_encrypt2 1175 .type DES_encrypt2,#function 1176 1177DES_encrypt2: 1178 1179 save %sp, FRAME, %sp 1180 1181 sethi %hi(.PIC.DES_SPtrans-1f),global1 1182 or global1,%lo(.PIC.DES_SPtrans-1f),global1 11831: call .+8 1184 add %o7,global1,global1 1185 sub global1,.PIC.DES_SPtrans-.des_and,out2 1186 1187 ! Set sbox address 1 to 6 and rotate halves 3 left 1188 ! Errors caught by destest? Yes. Still? *NO* 1189 1190 !sethi %hi(DES_SPtrans), global1 ! address sbox 1 1191 1192 !or global1, %lo(DES_SPtrans), global1 ! sbox 1 1193 1194 add global1, 256, global2 ! sbox 2 1195 add global1, 512, global3 ! sbox 3 1196 1197 ld [in0], out5 ! right 1198 add global1, 768, global4 ! sbox 4 1199 add global1, 1024, global5 ! sbox 5 1200 1201 ld [in0+4], in5 ! left 1202 add global1, 1280, local6 ! sbox 6 1203 add global1, 1792, out3 ! sbox 8 1204 1205 ! rotate 1206 1207 sll in5, 3, local5 1208 mov in1, in3 ! key address to in3 1209 1210 sll out5, 3, local7 1211 srl in5, 29, in5 1212 1213 srl out5, 29, out5 1214 add in5, local5, in5 1215 1216 add out5, local7, out5 1217 cmp in2, 0 1218 1219 ! we use our own stackframe 1220 1221 be .encrypt2.dec 1222 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] 1223 1224 ld [in3], out0 ! key 7531 first round 1225 mov LOOPS, out4 ! loop counter 1226 1227 ld [in3+4], out1 ! key 8642 first round 1228 sethi %hi(0x0000FC00), local5 1229 1230 call .des_enc 1231 mov in3, in4 1232 1233 ! rotate 1234 sll in5, 29, in0 1235 srl in5, 3, in5 1236 sll out5, 29, in1 1237 add in5, in0, in5 1238 srl out5, 3, out5 1239 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1240 add out5, in1, out5 1241 st in5, [in0] 1242 st out5, [in0+4] 1243 1244 ret 1245 restore 1246 1247 1248.encrypt2.dec: 1249 1250 add in3, 120, in4 1251 1252 ld [in4], out0 ! key 7531 first round 1253 mov LOOPS, out4 ! loop counter 1254 1255 ld [in4+4], out1 ! key 8642 first round 1256 sethi %hi(0x0000FC00), local5 1257 1258 mov in5, local1 ! left expected in out5 1259 mov out5, in5 1260 1261 call .des_dec 1262 mov local1, out5 1263 1264.encrypt2.finish: 1265 1266 ! rotate 1267 sll in5, 29, in0 1268 srl in5, 3, in5 1269 sll out5, 29, in1 1270 add in5, in0, in5 1271 srl out5, 3, out5 1272 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1273 add out5, in1, out5 1274 st out5, [in0] 1275 st in5, [in0+4] 1276 1277 ret 1278 restore 1279 1280.DES_encrypt2.end: 1281 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 1282 1283 1284! void DES_encrypt3(data, ks1, ks2, ks3) 1285! ************************************** 1286 1287 .align 32 1288 .global DES_encrypt3 1289 .type DES_encrypt3,#function 1290 1291DES_encrypt3: 1292 1293 save %sp, FRAME, %sp 1294 1295 sethi %hi(.PIC.DES_SPtrans-1f),global1 1296 or global1,%lo(.PIC.DES_SPtrans-1f),global1 12971: call .+8 1298 add %o7,global1,global1 1299 sub global1,.PIC.DES_SPtrans-.des_and,out2 1300 1301 ld [in0], in5 ! left 1302 add in2, 120, in4 ! ks2 1303 1304 ld [in0+4], out5 ! right 1305 mov in3, in2 ! save ks3 1306 1307 ! parameter 6 1/2 for include encryption/decryption 1308 ! parameter 7 1 for mov in1 to in3 1309 ! parameter 8 1 for mov in3 to in4 1310 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1311 1312 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) 1313 1314 call .des_dec 1315 mov in2, in3 ! preload ks3 1316 1317 call .des_enc 1318 nop 1319 1320 fp_macro(in5, out5, 1) 1321 1322 ret 1323 restore 1324 1325.DES_encrypt3.end: 1326 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 1327 1328 1329! void DES_decrypt3(data, ks1, ks2, ks3) 1330! ************************************** 1331 1332 .align 32 1333 .global DES_decrypt3 1334 .type DES_decrypt3,#function 1335 1336DES_decrypt3: 1337 1338 save %sp, FRAME, %sp 1339 1340 sethi %hi(.PIC.DES_SPtrans-1f),global1 1341 or global1,%lo(.PIC.DES_SPtrans-1f),global1 13421: call .+8 1343 add %o7,global1,global1 1344 sub global1,.PIC.DES_SPtrans-.des_and,out2 1345 1346 ld [in0], in5 ! left 1347 add in3, 120, in4 ! ks3 1348 1349 ld [in0+4], out5 ! right 1350 mov in2, in3 ! ks2 1351 1352 ! parameter 6 1/2 for include encryption/decryption 1353 ! parameter 7 1 for mov in1 to in3 1354 ! parameter 8 1 for mov in3 to in4 1355 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1356 1357 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) 1358 1359 call .des_enc 1360 add in1, 120, in4 ! preload ks1 1361 1362 call .des_dec 1363 nop 1364 1365 fp_macro(out5, in5, 1) 1366 1367 ret 1368 restore 1369 1370.DES_decrypt3.end: 1371 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1372 1373! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1374! ***************************************************************** 1375 1376 1377 .align 32 1378 .global DES_ncbc_encrypt 1379 .type DES_ncbc_encrypt,#function 1380 1381DES_ncbc_encrypt: 1382 1383 save %sp, FRAME, %sp 1384 1385 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) 1386 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1387 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1388 1389 sethi %hi(.PIC.DES_SPtrans-1f),global1 1390 or global1,%lo(.PIC.DES_SPtrans-1f),global1 13911: call .+8 1392 add %o7,global1,global1 1393 sub global1,.PIC.DES_SPtrans-.des_and,out2 1394 1395 cmp in5, 0 ! enc 1396 1397 be .ncbc.dec 1398 STPTR in4, IVEC 1399 1400 ! addr left right temp label 1401 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv 1402 1403 addcc in2, -8, in2 ! bytes missing when first block done 1404 1405 bl .ncbc.enc.seven.or.less 1406 mov in3, in4 ! schedule 1407 1408.ncbc.enc.next.block: 1409 1410 load_little_endian(in0, out4, global4, local3, .LLE2) ! block 1411 1412.ncbc.enc.next.block_1: 1413 1414 xor in5, out4, in5 ! iv xor 1415 xor out5, global4, out5 ! iv xor 1416 1417 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1418 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) 1419 1420.ncbc.enc.next.block_2: 1421 1422!// call .des_enc ! compares in2 to 8 1423! rounds inlined for alignment purposes 1424 1425 add global1, 768, global4 ! address sbox 4 since register used below 1426 1427 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 1428 1429 bl .ncbc.enc.next.block_fp 1430 add in0, 8, in0 ! input address 1431 1432 ! If 8 or more bytes are to be encrypted after this block, 1433 ! we combine final permutation for this block with initial 1434 ! permutation for next block. Load next block: 1435 1436 load_little_endian(in0, global3, global4, local5, .LLE12) 1437 1438 ! parameter 1 original left 1439 ! parameter 2 original right 1440 ! parameter 3 left ip 1441 ! parameter 4 right ip 1442 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1443 ! 2: mov in4 to in3 1444 ! 1445 ! also adds -8 to length in2 and loads loop counter to out4 1446 1447 fp_ip_macro(out0, out1, global3, global4, 2) 1448 1449 store_little_endian(in1, out0, out1, local3, .SLE10) ! block 1450 1451 ld [in3], out0 ! key 7531 first round next block 1452 mov in5, local1 1453 xor global3, out5, in5 ! iv xor next block 1454 1455 ld [in3+4], out1 ! key 8642 1456 add global1, 512, global3 ! address sbox 3 since register used 1457 xor global4, local1, out5 ! iv xor next block 1458 1459 ba .ncbc.enc.next.block_2 1460 add in1, 8, in1 ! output address 1461 1462.ncbc.enc.next.block_fp: 1463 1464 fp_macro(in5, out5) 1465 1466 store_little_endian(in1, in5, out5, local3, .SLE1) ! block 1467 1468 addcc in2, -8, in2 ! bytes missing when next block done 1469 1470 bpos .ncbc.enc.next.block 1471 add in1, 8, in1 1472 1473.ncbc.enc.seven.or.less: 1474 1475 cmp in2, -8 1476 1477 ble .ncbc.enc.finish 1478 nop 1479 1480 add in2, 8, local1 ! bytes to load 1481 1482 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1483 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) 1484 1485 ! Loads 1 to 7 bytes little endian to global4, out4 1486 1487 1488.ncbc.enc.finish: 1489 1490 LDPTR IVEC, local4 1491 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec 1492 1493 ret 1494 restore 1495 1496 1497.ncbc.dec: 1498 1499 STPTR in0, INPUT 1500 cmp in2, 0 ! length 1501 add in3, 120, in3 1502 1503 LDPTR IVEC, local7 ! ivec 1504 ble .ncbc.dec.finish 1505 mov in3, in4 ! schedule 1506 1507 STPTR in1, OUTPUT 1508 mov in0, local5 ! input 1509 1510 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec 1511 1512.ncbc.dec.next.block: 1513 1514 load_little_endian(local5, in5, out5, local3, .LLE4) ! block 1515 1516 ! parameter 6 1/2 for include encryption/decryption 1517 ! parameter 7 1 for mov in1 to in3 1518 ! parameter 8 1 for mov in3 to in4 1519 1520 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4 1521 1522 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 1523 1524 ! in2 is bytes left to be stored 1525 ! in2 is compared to 8 in the rounds 1526 1527 xor out5, in0, out4 ! iv xor 1528 bl .ncbc.dec.seven.or.less 1529 xor in5, in1, global4 ! iv xor 1530 1531 ! Load ivec next block now, since input and output address might be the same. 1532 1533 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv 1534 1535 store_little_endian(local7, out4, global4, local3, .SLE3) 1536 1537 STPTR local5, INPUT 1538 add local7, 8, local7 1539 addcc in2, -8, in2 1540 1541 bg .ncbc.dec.next.block 1542 STPTR local7, OUTPUT 1543 1544 1545.ncbc.dec.store.iv: 1546 1547 LDPTR IVEC, local4 ! ivec 1548 store_little_endian(local4, in0, in1, local5, .SLE4) 1549 1550.ncbc.dec.finish: 1551 1552 ret 1553 restore 1554 1555.ncbc.dec.seven.or.less: 1556 1557 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec 1558 1559 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) 1560 1561 1562.DES_ncbc_encrypt.end: 1563 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt 1564 1565 1566! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc) 1567! ************************************************************************** 1568 1569 1570 .align 32 1571 .global DES_ede3_cbc_encrypt 1572 .type DES_ede3_cbc_encrypt,#function 1573 1574DES_ede3_cbc_encrypt: 1575 1576 save %sp, FRAME, %sp 1577 1578 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) 1579 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1580 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1581 1582 sethi %hi(.PIC.DES_SPtrans-1f),global1 1583 or global1,%lo(.PIC.DES_SPtrans-1f),global1 15841: call .+8 1585 add %o7,global1,global1 1586 sub global1,.PIC.DES_SPtrans-.des_and,out2 1587 1588 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1589 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1590 cmp local3, 0 ! enc 1591 1592 be .ede3.dec 1593 STPTR in4, KS2 1594 1595 STPTR in5, KS3 1596 1597 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec 1598 1599 addcc in2, -8, in2 ! bytes missing after next block 1600 1601 bl .ede3.enc.seven.or.less 1602 STPTR in3, KS1 1603 1604.ede3.enc.next.block: 1605 1606 load_little_endian(in0, out4, global4, local3, .LLE7) 1607 1608.ede3.enc.next.block_1: 1609 1610 LDPTR KS2, in4 1611 xor in5, out4, in5 ! iv xor 1612 xor out5, global4, out5 ! iv xor 1613 1614 LDPTR KS1, in3 1615 add in4, 120, in4 ! for decryption we use last subkey first 1616 nop 1617 1618 ip_macro(in5, out5, in5, out5, in3) 1619 1620.ede3.enc.next.block_2: 1621 1622 call .des_enc ! ks1 in3 1623 nop 1624 1625 call .des_dec ! ks2 in4 1626 LDPTR KS3, in3 1627 1628 call .des_enc ! ks3 in3 compares in2 to 8 1629 nop 1630 1631 bl .ede3.enc.next.block_fp 1632 add in0, 8, in0 1633 1634 ! If 8 or more bytes are to be encrypted after this block, 1635 ! we combine final permutation for this block with initial 1636 ! permutation for next block. Load next block: 1637 1638 load_little_endian(in0, global3, global4, local5, .LLE11) 1639 1640 ! parameter 1 original left 1641 ! parameter 2 original right 1642 ! parameter 3 left ip 1643 ! parameter 4 right ip 1644 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1645 ! 2: mov in4 to in3 1646 ! 1647 ! also adds -8 to length in2 and loads loop counter to out4 1648 1649 fp_ip_macro(out0, out1, global3, global4, 1) 1650 1651 store_little_endian(in1, out0, out1, local3, .SLE9) ! block 1652 1653 mov in5, local1 1654 xor global3, out5, in5 ! iv xor next block 1655 1656 ld [in3], out0 ! key 7531 1657 add global1, 512, global3 ! address sbox 3 1658 xor global4, local1, out5 ! iv xor next block 1659 1660 ld [in3+4], out1 ! key 8642 1661 add global1, 768, global4 ! address sbox 4 1662 ba .ede3.enc.next.block_2 1663 add in1, 8, in1 1664 1665.ede3.enc.next.block_fp: 1666 1667 fp_macro(in5, out5) 1668 1669 store_little_endian(in1, in5, out5, local3, .SLE5) ! block 1670 1671 addcc in2, -8, in2 ! bytes missing when next block done 1672 1673 bpos .ede3.enc.next.block 1674 add in1, 8, in1 1675 1676.ede3.enc.seven.or.less: 1677 1678 cmp in2, -8 1679 1680 ble .ede3.enc.finish 1681 nop 1682 1683 add in2, 8, local1 ! bytes to load 1684 1685 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1686 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) 1687 1688.ede3.enc.finish: 1689 1690 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1691 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec 1692 1693 ret 1694 restore 1695 1696.ede3.dec: 1697 1698 STPTR in0, INPUT 1699 add in5, 120, in5 1700 1701 STPTR in1, OUTPUT 1702 mov in0, local5 1703 add in3, 120, in3 1704 1705 STPTR in3, KS1 1706 cmp in2, 0 1707 1708 ble .ede3.dec.finish 1709 STPTR in5, KS3 1710 1711 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv 1712 load_little_endian(local7, in0, in1, local3, .LLE8) 1713 1714.ede3.dec.next.block: 1715 1716 load_little_endian(local5, in5, out5, local3, .LLE9) 1717 1718 ! parameter 6 1/2 for include encryption/decryption 1719 ! parameter 7 1 for mov in1 to in3 1720 ! parameter 8 1 for mov in3 to in4 1721 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1722 1723 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 1724 1725 call .des_enc ! ks2 in3 1726 LDPTR KS1, in4 1727 1728 call .des_dec ! ks1 in4 1729 nop 1730 1731 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 1732 1733 ! in2 is bytes left to be stored 1734 ! in2 is compared to 8 in the rounds 1735 1736 xor out5, in0, out4 1737 bl .ede3.dec.seven.or.less 1738 xor in5, in1, global4 1739 1740 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block 1741 1742 store_little_endian(local7, out4, global4, local3, .SLE7) ! block 1743 1744 STPTR local5, INPUT 1745 addcc in2, -8, in2 1746 add local7, 8, local7 1747 1748 bg .ede3.dec.next.block 1749 STPTR local7, OUTPUT 1750 1751.ede3.dec.store.iv: 1752 1753 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1754 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec 1755 1756.ede3.dec.finish: 1757 1758 ret 1759 restore 1760 1761.ede3.dec.seven.or.less: 1762 1763 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv 1764 1765 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) 1766 1767 1768.DES_ede3_cbc_encrypt.end: 1769 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1770 1771 .align 256 1772 .type .des_and,#object 1773 .size .des_and,284 1774 1775.des_and: 1776 1777! This table is used for AND 0xFC when it is known that register 1778! bits 8-31 are zero. Makes it possible to do three arithmetic 1779! operations in one cycle. 1780 1781 .byte 0, 0, 0, 0, 4, 4, 4, 4 1782 .byte 8, 8, 8, 8, 12, 12, 12, 12 1783 .byte 16, 16, 16, 16, 20, 20, 20, 20 1784 .byte 24, 24, 24, 24, 28, 28, 28, 28 1785 .byte 32, 32, 32, 32, 36, 36, 36, 36 1786 .byte 40, 40, 40, 40, 44, 44, 44, 44 1787 .byte 48, 48, 48, 48, 52, 52, 52, 52 1788 .byte 56, 56, 56, 56, 60, 60, 60, 60 1789 .byte 64, 64, 64, 64, 68, 68, 68, 68 1790 .byte 72, 72, 72, 72, 76, 76, 76, 76 1791 .byte 80, 80, 80, 80, 84, 84, 84, 84 1792 .byte 88, 88, 88, 88, 92, 92, 92, 92 1793 .byte 96, 96, 96, 96, 100, 100, 100, 100 1794 .byte 104, 104, 104, 104, 108, 108, 108, 108 1795 .byte 112, 112, 112, 112, 116, 116, 116, 116 1796 .byte 120, 120, 120, 120, 124, 124, 124, 124 1797 .byte 128, 128, 128, 128, 132, 132, 132, 132 1798 .byte 136, 136, 136, 136, 140, 140, 140, 140 1799 .byte 144, 144, 144, 144, 148, 148, 148, 148 1800 .byte 152, 152, 152, 152, 156, 156, 156, 156 1801 .byte 160, 160, 160, 160, 164, 164, 164, 164 1802 .byte 168, 168, 168, 168, 172, 172, 172, 172 1803 .byte 176, 176, 176, 176, 180, 180, 180, 180 1804 .byte 184, 184, 184, 184, 188, 188, 188, 188 1805 .byte 192, 192, 192, 192, 196, 196, 196, 196 1806 .byte 200, 200, 200, 200, 204, 204, 204, 204 1807 .byte 208, 208, 208, 208, 212, 212, 212, 212 1808 .byte 216, 216, 216, 216, 220, 220, 220, 220 1809 .byte 224, 224, 224, 224, 228, 228, 228, 228 1810 .byte 232, 232, 232, 232, 236, 236, 236, 236 1811 .byte 240, 240, 240, 240, 244, 244, 244, 244 1812 .byte 248, 248, 248, 248, 252, 252, 252, 252 1813 1814 ! 5 numbers for initial/final permutation 1815 1816 .word 0x0f0f0f0f ! offset 256 1817 .word 0x0000ffff ! 260 1818 .word 0x33333333 ! 264 1819 .word 0x00ff00ff ! 268 1820 .word 0x55555555 ! 272 1821 1822 .word 0 ! 276 1823 .word LOOPS ! 280 1824 .word 0x0000FC00 ! 284 1825 1826 .global DES_SPtrans 1827 .type DES_SPtrans,#object 1828 .size DES_SPtrans,2048 1829.align 64 1830DES_SPtrans: 1831.PIC.DES_SPtrans: 1832 ! nibble 0 1833 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 1834 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 1835 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 1836 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 1837 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 1838 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 1839 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 1840 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 1841 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 1842 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 1843 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 1844 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 1845 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 1846 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 1847 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 1848 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 1849 ! nibble 1 1850 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 1851 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 1852 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 1853 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 1854 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 1855 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 1856 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 1857 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 1858 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 1859 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 1860 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 1861 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 1862 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 1863 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 1864 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 1865 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 1866 ! nibble 2 1867 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 1868 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 1869 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 1870 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 1871 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 1872 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 1873 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 1874 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 1875 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 1876 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 1877 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 1878 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 1879 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 1880 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 1881 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 1882 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 1883 ! nibble 3 1884 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 1885 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 1886 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 1887 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 1888 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 1889 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 1890 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 1891 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 1892 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 1893 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 1894 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 1895 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 1896 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 1897 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 1898 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 1899 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 1900 ! nibble 4 1901 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 1902 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 1903 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 1904 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 1905 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 1906 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 1907 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 1908 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 1909 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 1910 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 1911 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 1912 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 1913 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 1914 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 1915 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 1916 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 1917 ! nibble 5 1918 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 1919 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 1920 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 1921 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 1922 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 1923 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 1924 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 1925 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 1926 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 1927 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 1928 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 1929 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 1930 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 1931 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 1932 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 1933 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 1934 ! nibble 6 1935 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 1936 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 1937 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 1938 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 1939 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 1940 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 1941 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 1942 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 1943 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 1944 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 1945 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 1946 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 1947 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 1948 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 1949 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 1950 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 1951 ! nibble 7 1952 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 1953 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 1954 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 1955 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 1956 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 1957 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 1958 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 1959 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 1960 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 1961 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 1962 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 1963 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 1964 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 1965 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 1966 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 1967 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 1968 1969