1#! /usr/bin/env perl 2# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. Rights for redistribution and usage in source and binary 13# forms are granted according to the OpenSSL license. 14# ==================================================================== 15# 16# Version 1.1 17# 18# The major reason for undertaken effort was to mitigate the hazard of 19# cache-timing attack. This is [currently and initially!] addressed in 20# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. 21# 2. References to them are scheduled for L2 cache latency, meaning 22# that the tables don't have to reside in L1 cache. Once again, this 23# is an initial draft and one should expect more countermeasures to 24# be implemented... 25# 26# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last 27# round. 28# 29# Even though performance was not the primary goal [on the contrary, 30# extra shifts "induced" by compressed S-box and longer loop epilogue 31# "induced" by scheduling for L2 have negative effect on performance], 32# the code turned out to run in ~23 cycles per processed byte en-/ 33# decrypted with 128-bit key. This is pretty good result for code 34# with mentioned qualities and UltraSPARC core. Compared to Sun C 35# generated code my encrypt procedure runs just few percents faster, 36# while decrypt one - whole 50% faster [yes, Sun C failed to generate 37# optimal decrypt procedure]. Compared to GNU C generated code both 38# procedures are more than 60% faster:-) 39 40$output = pop; 41open STDOUT,">$output"; 42 43$frame="STACK_FRAME"; 44$bias="STACK_BIAS"; 45$locals=16; 46 47$acc0="%l0"; 48$acc1="%o0"; 49$acc2="%o1"; 50$acc3="%o2"; 51 52$acc4="%l1"; 53$acc5="%o3"; 54$acc6="%o4"; 55$acc7="%o5"; 56 57$acc8="%l2"; 58$acc9="%o7"; 59$acc10="%g1"; 60$acc11="%g2"; 61 62$acc12="%l3"; 63$acc13="%g3"; 64$acc14="%g4"; 65$acc15="%g5"; 66 67$t0="%l4"; 68$t1="%l5"; 69$t2="%l6"; 70$t3="%l7"; 71 72$s0="%i0"; 73$s1="%i1"; 74$s2="%i2"; 75$s3="%i3"; 76$tbl="%i4"; 77$key="%i5"; 78$rounds="%i7"; # aliases with return address, which is off-loaded to stack 79 80sub _data_word() 81{ my $i; 82 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 83} 84 85$code.=<<___; 86#include "sparc_arch.h" 87 88#ifdef __arch64__ 89.register %g2,#scratch 90.register %g3,#scratch 91#endif 92.section ".text",#alloc,#execinstr 93 94.align 256 95AES_Te: 96___ 97&_data_word( 98 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 99 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 100 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 101 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 102 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 103 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 104 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 105 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 106 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 107 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 108 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 109 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 110 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 111 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 112 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 113 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 114 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 115 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 116 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 117 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 118 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 119 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 120 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 121 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 122 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 123 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 124 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 125 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 126 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 127 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 128 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 129 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 130 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 131 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 132 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 133 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 134 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 135 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 136 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 137 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 138 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 139 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 140 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 141 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 142 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 143 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 144 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 145 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 146 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 147 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 148 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 149 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 150 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 151 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 152 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 153 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 154 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 155 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 156 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 157 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 158 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 159 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 160 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 161 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 162$code.=<<___; 163 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 164 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 165 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 166 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 167 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 168 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 169 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 170 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 171 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 172 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 173 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 174 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 175 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 176 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 177 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 178 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 179 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 180 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 181 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 182 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 183 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 184 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 185 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 186 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 187 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 188 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 189 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 190 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 191 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 192 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 193 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 194 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 195.type AES_Te,#object 196.size AES_Te,(.-AES_Te) 197 198.align 64 199.skip 16 200_sparcv9_AES_encrypt: 201 save %sp,-$frame-$locals,%sp 202 stx %i7,[%sp+$bias+$frame+0] ! off-load return address 203 ld [$key+240],$rounds 204 ld [$key+0],$t0 205 ld [$key+4],$t1 ! 206 ld [$key+8],$t2 207 srl $rounds,1,$rounds 208 xor $t0,$s0,$s0 209 ld [$key+12],$t3 210 srl $s0,21,$acc0 211 xor $t1,$s1,$s1 212 ld [$key+16],$t0 213 srl $s1,13,$acc1 ! 214 xor $t2,$s2,$s2 215 ld [$key+20],$t1 216 xor $t3,$s3,$s3 217 ld [$key+24],$t2 218 and $acc0,2040,$acc0 219 ld [$key+28],$t3 220 nop 221.Lenc_loop: 222 srl $s2,5,$acc2 ! 223 and $acc1,2040,$acc1 224 ldx [$tbl+$acc0],$acc0 225 sll $s3,3,$acc3 226 and $acc2,2040,$acc2 227 ldx [$tbl+$acc1],$acc1 228 srl $s1,21,$acc4 229 and $acc3,2040,$acc3 230 ldx [$tbl+$acc2],$acc2 ! 231 srl $s2,13,$acc5 232 and $acc4,2040,$acc4 233 ldx [$tbl+$acc3],$acc3 234 srl $s3,5,$acc6 235 and $acc5,2040,$acc5 236 ldx [$tbl+$acc4],$acc4 237 fmovs %f0,%f0 238 sll $s0,3,$acc7 ! 239 and $acc6,2040,$acc6 240 ldx [$tbl+$acc5],$acc5 241 srl $s2,21,$acc8 242 and $acc7,2040,$acc7 243 ldx [$tbl+$acc6],$acc6 244 srl $s3,13,$acc9 245 and $acc8,2040,$acc8 246 ldx [$tbl+$acc7],$acc7 ! 247 srl $s0,5,$acc10 248 and $acc9,2040,$acc9 249 ldx [$tbl+$acc8],$acc8 250 sll $s1,3,$acc11 251 and $acc10,2040,$acc10 252 ldx [$tbl+$acc9],$acc9 253 fmovs %f0,%f0 254 srl $s3,21,$acc12 ! 255 and $acc11,2040,$acc11 256 ldx [$tbl+$acc10],$acc10 257 srl $s0,13,$acc13 258 and $acc12,2040,$acc12 259 ldx [$tbl+$acc11],$acc11 260 srl $s1,5,$acc14 261 and $acc13,2040,$acc13 262 ldx [$tbl+$acc12],$acc12 ! 263 sll $s2,3,$acc15 264 and $acc14,2040,$acc14 265 ldx [$tbl+$acc13],$acc13 266 and $acc15,2040,$acc15 267 add $key,32,$key 268 ldx [$tbl+$acc14],$acc14 269 fmovs %f0,%f0 270 subcc $rounds,1,$rounds ! 271 ldx [$tbl+$acc15],$acc15 272 bz,a,pn %icc,.Lenc_last 273 add $tbl,2048,$rounds 274 275 srlx $acc1,8,$acc1 276 xor $acc0,$t0,$t0 277 ld [$key+0],$s0 278 fmovs %f0,%f0 279 srlx $acc2,16,$acc2 ! 280 xor $acc1,$t0,$t0 281 ld [$key+4],$s1 282 srlx $acc3,24,$acc3 283 xor $acc2,$t0,$t0 284 ld [$key+8],$s2 285 srlx $acc5,8,$acc5 286 xor $acc3,$t0,$t0 287 ld [$key+12],$s3 ! 288 srlx $acc6,16,$acc6 289 xor $acc4,$t1,$t1 290 fmovs %f0,%f0 291 srlx $acc7,24,$acc7 292 xor $acc5,$t1,$t1 293 srlx $acc9,8,$acc9 294 xor $acc6,$t1,$t1 295 srlx $acc10,16,$acc10 ! 296 xor $acc7,$t1,$t1 297 srlx $acc11,24,$acc11 298 xor $acc8,$t2,$t2 299 srlx $acc13,8,$acc13 300 xor $acc9,$t2,$t2 301 srlx $acc14,16,$acc14 302 xor $acc10,$t2,$t2 303 srlx $acc15,24,$acc15 ! 304 xor $acc11,$t2,$t2 305 xor $acc12,$acc14,$acc14 306 xor $acc13,$t3,$t3 307 srl $t0,21,$acc0 308 xor $acc14,$t3,$t3 309 srl $t1,13,$acc1 310 xor $acc15,$t3,$t3 311 312 and $acc0,2040,$acc0 ! 313 srl $t2,5,$acc2 314 and $acc1,2040,$acc1 315 ldx [$tbl+$acc0],$acc0 316 sll $t3,3,$acc3 317 and $acc2,2040,$acc2 318 ldx [$tbl+$acc1],$acc1 319 fmovs %f0,%f0 320 srl $t1,21,$acc4 ! 321 and $acc3,2040,$acc3 322 ldx [$tbl+$acc2],$acc2 323 srl $t2,13,$acc5 324 and $acc4,2040,$acc4 325 ldx [$tbl+$acc3],$acc3 326 srl $t3,5,$acc6 327 and $acc5,2040,$acc5 328 ldx [$tbl+$acc4],$acc4 ! 329 sll $t0,3,$acc7 330 and $acc6,2040,$acc6 331 ldx [$tbl+$acc5],$acc5 332 srl $t2,21,$acc8 333 and $acc7,2040,$acc7 334 ldx [$tbl+$acc6],$acc6 335 fmovs %f0,%f0 336 srl $t3,13,$acc9 ! 337 and $acc8,2040,$acc8 338 ldx [$tbl+$acc7],$acc7 339 srl $t0,5,$acc10 340 and $acc9,2040,$acc9 341 ldx [$tbl+$acc8],$acc8 342 sll $t1,3,$acc11 343 and $acc10,2040,$acc10 344 ldx [$tbl+$acc9],$acc9 ! 345 srl $t3,21,$acc12 346 and $acc11,2040,$acc11 347 ldx [$tbl+$acc10],$acc10 348 srl $t0,13,$acc13 349 and $acc12,2040,$acc12 350 ldx [$tbl+$acc11],$acc11 351 fmovs %f0,%f0 352 srl $t1,5,$acc14 ! 353 and $acc13,2040,$acc13 354 ldx [$tbl+$acc12],$acc12 355 sll $t2,3,$acc15 356 and $acc14,2040,$acc14 357 ldx [$tbl+$acc13],$acc13 358 srlx $acc1,8,$acc1 359 and $acc15,2040,$acc15 360 ldx [$tbl+$acc14],$acc14 ! 361 362 srlx $acc2,16,$acc2 363 xor $acc0,$s0,$s0 364 ldx [$tbl+$acc15],$acc15 365 srlx $acc3,24,$acc3 366 xor $acc1,$s0,$s0 367 ld [$key+16],$t0 368 fmovs %f0,%f0 369 srlx $acc5,8,$acc5 ! 370 xor $acc2,$s0,$s0 371 ld [$key+20],$t1 372 srlx $acc6,16,$acc6 373 xor $acc3,$s0,$s0 374 ld [$key+24],$t2 375 srlx $acc7,24,$acc7 376 xor $acc4,$s1,$s1 377 ld [$key+28],$t3 ! 378 srlx $acc9,8,$acc9 379 xor $acc5,$s1,$s1 380 ldx [$tbl+2048+0],%g0 ! prefetch te4 381 srlx $acc10,16,$acc10 382 xor $acc6,$s1,$s1 383 ldx [$tbl+2048+32],%g0 ! prefetch te4 384 srlx $acc11,24,$acc11 385 xor $acc7,$s1,$s1 386 ldx [$tbl+2048+64],%g0 ! prefetch te4 387 srlx $acc13,8,$acc13 388 xor $acc8,$s2,$s2 389 ldx [$tbl+2048+96],%g0 ! prefetch te4 390 srlx $acc14,16,$acc14 ! 391 xor $acc9,$s2,$s2 392 ldx [$tbl+2048+128],%g0 ! prefetch te4 393 srlx $acc15,24,$acc15 394 xor $acc10,$s2,$s2 395 ldx [$tbl+2048+160],%g0 ! prefetch te4 396 srl $s0,21,$acc0 397 xor $acc11,$s2,$s2 398 ldx [$tbl+2048+192],%g0 ! prefetch te4 399 xor $acc12,$acc14,$acc14 400 xor $acc13,$s3,$s3 401 ldx [$tbl+2048+224],%g0 ! prefetch te4 402 srl $s1,13,$acc1 ! 403 xor $acc14,$s3,$s3 404 xor $acc15,$s3,$s3 405 ba .Lenc_loop 406 and $acc0,2040,$acc0 407 408.align 32 409.Lenc_last: 410 srlx $acc1,8,$acc1 ! 411 xor $acc0,$t0,$t0 412 ld [$key+0],$s0 413 srlx $acc2,16,$acc2 414 xor $acc1,$t0,$t0 415 ld [$key+4],$s1 416 srlx $acc3,24,$acc3 417 xor $acc2,$t0,$t0 418 ld [$key+8],$s2 ! 419 srlx $acc5,8,$acc5 420 xor $acc3,$t0,$t0 421 ld [$key+12],$s3 422 srlx $acc6,16,$acc6 423 xor $acc4,$t1,$t1 424 srlx $acc7,24,$acc7 425 xor $acc5,$t1,$t1 426 srlx $acc9,8,$acc9 ! 427 xor $acc6,$t1,$t1 428 srlx $acc10,16,$acc10 429 xor $acc7,$t1,$t1 430 srlx $acc11,24,$acc11 431 xor $acc8,$t2,$t2 432 srlx $acc13,8,$acc13 433 xor $acc9,$t2,$t2 434 srlx $acc14,16,$acc14 ! 435 xor $acc10,$t2,$t2 436 srlx $acc15,24,$acc15 437 xor $acc11,$t2,$t2 438 xor $acc12,$acc14,$acc14 439 xor $acc13,$t3,$t3 440 srl $t0,24,$acc0 441 xor $acc14,$t3,$t3 442 srl $t1,16,$acc1 ! 443 xor $acc15,$t3,$t3 444 445 srl $t2,8,$acc2 446 and $acc1,255,$acc1 447 ldub [$rounds+$acc0],$acc0 448 srl $t1,24,$acc4 449 and $acc2,255,$acc2 450 ldub [$rounds+$acc1],$acc1 451 srl $t2,16,$acc5 ! 452 and $t3,255,$acc3 453 ldub [$rounds+$acc2],$acc2 454 ldub [$rounds+$acc3],$acc3 455 srl $t3,8,$acc6 456 and $acc5,255,$acc5 457 ldub [$rounds+$acc4],$acc4 458 fmovs %f0,%f0 459 srl $t2,24,$acc8 ! 460 and $acc6,255,$acc6 461 ldub [$rounds+$acc5],$acc5 462 srl $t3,16,$acc9 463 and $t0,255,$acc7 464 ldub [$rounds+$acc6],$acc6 465 ldub [$rounds+$acc7],$acc7 466 fmovs %f0,%f0 467 srl $t0,8,$acc10 ! 468 and $acc9,255,$acc9 469 ldub [$rounds+$acc8],$acc8 470 srl $t3,24,$acc12 471 and $acc10,255,$acc10 472 ldub [$rounds+$acc9],$acc9 473 srl $t0,16,$acc13 474 and $t1,255,$acc11 475 ldub [$rounds+$acc10],$acc10 ! 476 srl $t1,8,$acc14 477 and $acc13,255,$acc13 478 ldub [$rounds+$acc11],$acc11 479 ldub [$rounds+$acc12],$acc12 480 and $acc14,255,$acc14 481 ldub [$rounds+$acc13],$acc13 482 and $t2,255,$acc15 483 ldub [$rounds+$acc14],$acc14 ! 484 485 sll $acc0,24,$acc0 486 xor $acc3,$s0,$s0 487 ldub [$rounds+$acc15],$acc15 488 sll $acc1,16,$acc1 489 xor $acc0,$s0,$s0 490 ldx [%sp+$bias+$frame+0],%i7 ! restore return address 491 fmovs %f0,%f0 492 sll $acc2,8,$acc2 ! 493 xor $acc1,$s0,$s0 494 sll $acc4,24,$acc4 495 xor $acc2,$s0,$s0 496 sll $acc5,16,$acc5 497 xor $acc7,$s1,$s1 498 sll $acc6,8,$acc6 499 xor $acc4,$s1,$s1 500 sll $acc8,24,$acc8 ! 501 xor $acc5,$s1,$s1 502 sll $acc9,16,$acc9 503 xor $acc11,$s2,$s2 504 sll $acc10,8,$acc10 505 xor $acc6,$s1,$s1 506 sll $acc12,24,$acc12 507 xor $acc8,$s2,$s2 508 sll $acc13,16,$acc13 ! 509 xor $acc9,$s2,$s2 510 sll $acc14,8,$acc14 511 xor $acc10,$s2,$s2 512 xor $acc12,$acc14,$acc14 513 xor $acc13,$s3,$s3 514 xor $acc14,$s3,$s3 515 xor $acc15,$s3,$s3 516 517 ret 518 restore 519.type _sparcv9_AES_encrypt,#function 520.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) 521 522.align 32 523.globl AES_encrypt 524AES_encrypt: 525 or %o0,%o1,%g1 526 andcc %g1,3,%g0 527 bnz,pn %xcc,.Lunaligned_enc 528 save %sp,-$frame,%sp 529 530 ld [%i0+0],%o0 531 ld [%i0+4],%o1 532 ld [%i0+8],%o2 533 ld [%i0+12],%o3 534 5351: call .+8 536 add %o7,AES_Te-1b,%o4 537 call _sparcv9_AES_encrypt 538 mov %i2,%o5 539 540 st %o0,[%i1+0] 541 st %o1,[%i1+4] 542 st %o2,[%i1+8] 543 st %o3,[%i1+12] 544 545 ret 546 restore 547 548.align 32 549.Lunaligned_enc: 550 ldub [%i0+0],%l0 551 ldub [%i0+1],%l1 552 ldub [%i0+2],%l2 553 554 sll %l0,24,%l0 555 ldub [%i0+3],%l3 556 sll %l1,16,%l1 557 ldub [%i0+4],%l4 558 sll %l2,8,%l2 559 or %l1,%l0,%l0 560 ldub [%i0+5],%l5 561 sll %l4,24,%l4 562 or %l3,%l2,%l2 563 ldub [%i0+6],%l6 564 sll %l5,16,%l5 565 or %l0,%l2,%o0 566 ldub [%i0+7],%l7 567 568 sll %l6,8,%l6 569 or %l5,%l4,%l4 570 ldub [%i0+8],%l0 571 or %l7,%l6,%l6 572 ldub [%i0+9],%l1 573 or %l4,%l6,%o1 574 ldub [%i0+10],%l2 575 576 sll %l0,24,%l0 577 ldub [%i0+11],%l3 578 sll %l1,16,%l1 579 ldub [%i0+12],%l4 580 sll %l2,8,%l2 581 or %l1,%l0,%l0 582 ldub [%i0+13],%l5 583 sll %l4,24,%l4 584 or %l3,%l2,%l2 585 ldub [%i0+14],%l6 586 sll %l5,16,%l5 587 or %l0,%l2,%o2 588 ldub [%i0+15],%l7 589 590 sll %l6,8,%l6 591 or %l5,%l4,%l4 592 or %l7,%l6,%l6 593 or %l4,%l6,%o3 594 5951: call .+8 596 add %o7,AES_Te-1b,%o4 597 call _sparcv9_AES_encrypt 598 mov %i2,%o5 599 600 srl %o0,24,%l0 601 srl %o0,16,%l1 602 stb %l0,[%i1+0] 603 srl %o0,8,%l2 604 stb %l1,[%i1+1] 605 stb %l2,[%i1+2] 606 srl %o1,24,%l4 607 stb %o0,[%i1+3] 608 609 srl %o1,16,%l5 610 stb %l4,[%i1+4] 611 srl %o1,8,%l6 612 stb %l5,[%i1+5] 613 stb %l6,[%i1+6] 614 srl %o2,24,%l0 615 stb %o1,[%i1+7] 616 617 srl %o2,16,%l1 618 stb %l0,[%i1+8] 619 srl %o2,8,%l2 620 stb %l1,[%i1+9] 621 stb %l2,[%i1+10] 622 srl %o3,24,%l4 623 stb %o2,[%i1+11] 624 625 srl %o3,16,%l5 626 stb %l4,[%i1+12] 627 srl %o3,8,%l6 628 stb %l5,[%i1+13] 629 stb %l6,[%i1+14] 630 stb %o3,[%i1+15] 631 632 ret 633 restore 634.type AES_encrypt,#function 635.size AES_encrypt,(.-AES_encrypt) 636 637___ 638 639$code.=<<___; 640.align 256 641AES_Td: 642___ 643&_data_word( 644 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 645 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 646 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 647 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 648 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 649 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 650 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 651 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 652 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 653 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 654 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 655 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 656 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 657 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 658 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 659 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 660 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 661 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 662 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 663 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 664 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 665 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 666 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 667 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 668 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 669 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 670 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 671 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 672 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 673 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 674 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 675 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 676 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 677 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 678 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 679 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 680 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 681 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 682 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 683 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 684 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 685 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 686 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 687 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 688 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 689 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 690 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 691 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 692 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 693 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 694 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 695 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 696 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 697 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 698 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 699 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 700 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 701 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 702 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 703 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 704 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 705 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 706 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 707 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 708$code.=<<___; 709 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 710 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 711 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 712 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 713 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 714 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 715 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 716 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 717 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 718 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 719 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 720 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 721 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 722 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 723 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 724 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 725 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 726 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 727 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 728 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 729 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 730 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 731 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 732 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 733 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 734 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 735 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 736 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 737 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 738 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 739 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 740 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 741.type AES_Td,#object 742.size AES_Td,(.-AES_Td) 743 744.align 64 745.skip 16 746_sparcv9_AES_decrypt: 747 save %sp,-$frame-$locals,%sp 748 stx %i7,[%sp+$bias+$frame+0] ! off-load return address 749 ld [$key+240],$rounds 750 ld [$key+0],$t0 751 ld [$key+4],$t1 ! 752 ld [$key+8],$t2 753 ld [$key+12],$t3 754 srl $rounds,1,$rounds 755 xor $t0,$s0,$s0 756 ld [$key+16],$t0 757 xor $t1,$s1,$s1 758 ld [$key+20],$t1 759 srl $s0,21,$acc0 ! 760 xor $t2,$s2,$s2 761 ld [$key+24],$t2 762 xor $t3,$s3,$s3 763 and $acc0,2040,$acc0 764 ld [$key+28],$t3 765 srl $s3,13,$acc1 766 nop 767.Ldec_loop: 768 srl $s2,5,$acc2 ! 769 and $acc1,2040,$acc1 770 ldx [$tbl+$acc0],$acc0 771 sll $s1,3,$acc3 772 and $acc2,2040,$acc2 773 ldx [$tbl+$acc1],$acc1 774 srl $s1,21,$acc4 775 and $acc3,2040,$acc3 776 ldx [$tbl+$acc2],$acc2 ! 777 srl $s0,13,$acc5 778 and $acc4,2040,$acc4 779 ldx [$tbl+$acc3],$acc3 780 srl $s3,5,$acc6 781 and $acc5,2040,$acc5 782 ldx [$tbl+$acc4],$acc4 783 fmovs %f0,%f0 784 sll $s2,3,$acc7 ! 785 and $acc6,2040,$acc6 786 ldx [$tbl+$acc5],$acc5 787 srl $s2,21,$acc8 788 and $acc7,2040,$acc7 789 ldx [$tbl+$acc6],$acc6 790 srl $s1,13,$acc9 791 and $acc8,2040,$acc8 792 ldx [$tbl+$acc7],$acc7 ! 793 srl $s0,5,$acc10 794 and $acc9,2040,$acc9 795 ldx [$tbl+$acc8],$acc8 796 sll $s3,3,$acc11 797 and $acc10,2040,$acc10 798 ldx [$tbl+$acc9],$acc9 799 fmovs %f0,%f0 800 srl $s3,21,$acc12 ! 801 and $acc11,2040,$acc11 802 ldx [$tbl+$acc10],$acc10 803 srl $s2,13,$acc13 804 and $acc12,2040,$acc12 805 ldx [$tbl+$acc11],$acc11 806 srl $s1,5,$acc14 807 and $acc13,2040,$acc13 808 ldx [$tbl+$acc12],$acc12 ! 809 sll $s0,3,$acc15 810 and $acc14,2040,$acc14 811 ldx [$tbl+$acc13],$acc13 812 and $acc15,2040,$acc15 813 add $key,32,$key 814 ldx [$tbl+$acc14],$acc14 815 fmovs %f0,%f0 816 subcc $rounds,1,$rounds ! 817 ldx [$tbl+$acc15],$acc15 818 bz,a,pn %icc,.Ldec_last 819 add $tbl,2048,$rounds 820 821 srlx $acc1,8,$acc1 822 xor $acc0,$t0,$t0 823 ld [$key+0],$s0 824 fmovs %f0,%f0 825 srlx $acc2,16,$acc2 ! 826 xor $acc1,$t0,$t0 827 ld [$key+4],$s1 828 srlx $acc3,24,$acc3 829 xor $acc2,$t0,$t0 830 ld [$key+8],$s2 831 srlx $acc5,8,$acc5 832 xor $acc3,$t0,$t0 833 ld [$key+12],$s3 ! 834 srlx $acc6,16,$acc6 835 xor $acc4,$t1,$t1 836 fmovs %f0,%f0 837 srlx $acc7,24,$acc7 838 xor $acc5,$t1,$t1 839 srlx $acc9,8,$acc9 840 xor $acc6,$t1,$t1 841 srlx $acc10,16,$acc10 ! 842 xor $acc7,$t1,$t1 843 srlx $acc11,24,$acc11 844 xor $acc8,$t2,$t2 845 srlx $acc13,8,$acc13 846 xor $acc9,$t2,$t2 847 srlx $acc14,16,$acc14 848 xor $acc10,$t2,$t2 849 srlx $acc15,24,$acc15 ! 850 xor $acc11,$t2,$t2 851 xor $acc12,$acc14,$acc14 852 xor $acc13,$t3,$t3 853 srl $t0,21,$acc0 854 xor $acc14,$t3,$t3 855 xor $acc15,$t3,$t3 856 srl $t3,13,$acc1 857 858 and $acc0,2040,$acc0 ! 859 srl $t2,5,$acc2 860 and $acc1,2040,$acc1 861 ldx [$tbl+$acc0],$acc0 862 sll $t1,3,$acc3 863 and $acc2,2040,$acc2 864 ldx [$tbl+$acc1],$acc1 865 fmovs %f0,%f0 866 srl $t1,21,$acc4 ! 867 and $acc3,2040,$acc3 868 ldx [$tbl+$acc2],$acc2 869 srl $t0,13,$acc5 870 and $acc4,2040,$acc4 871 ldx [$tbl+$acc3],$acc3 872 srl $t3,5,$acc6 873 and $acc5,2040,$acc5 874 ldx [$tbl+$acc4],$acc4 ! 875 sll $t2,3,$acc7 876 and $acc6,2040,$acc6 877 ldx [$tbl+$acc5],$acc5 878 srl $t2,21,$acc8 879 and $acc7,2040,$acc7 880 ldx [$tbl+$acc6],$acc6 881 fmovs %f0,%f0 882 srl $t1,13,$acc9 ! 883 and $acc8,2040,$acc8 884 ldx [$tbl+$acc7],$acc7 885 srl $t0,5,$acc10 886 and $acc9,2040,$acc9 887 ldx [$tbl+$acc8],$acc8 888 sll $t3,3,$acc11 889 and $acc10,2040,$acc10 890 ldx [$tbl+$acc9],$acc9 ! 891 srl $t3,21,$acc12 892 and $acc11,2040,$acc11 893 ldx [$tbl+$acc10],$acc10 894 srl $t2,13,$acc13 895 and $acc12,2040,$acc12 896 ldx [$tbl+$acc11],$acc11 897 fmovs %f0,%f0 898 srl $t1,5,$acc14 ! 899 and $acc13,2040,$acc13 900 ldx [$tbl+$acc12],$acc12 901 sll $t0,3,$acc15 902 and $acc14,2040,$acc14 903 ldx [$tbl+$acc13],$acc13 904 srlx $acc1,8,$acc1 905 and $acc15,2040,$acc15 906 ldx [$tbl+$acc14],$acc14 ! 907 908 srlx $acc2,16,$acc2 909 xor $acc0,$s0,$s0 910 ldx [$tbl+$acc15],$acc15 911 srlx $acc3,24,$acc3 912 xor $acc1,$s0,$s0 913 ld [$key+16],$t0 914 fmovs %f0,%f0 915 srlx $acc5,8,$acc5 ! 916 xor $acc2,$s0,$s0 917 ld [$key+20],$t1 918 srlx $acc6,16,$acc6 919 xor $acc3,$s0,$s0 920 ld [$key+24],$t2 921 srlx $acc7,24,$acc7 922 xor $acc4,$s1,$s1 923 ld [$key+28],$t3 ! 924 srlx $acc9,8,$acc9 925 xor $acc5,$s1,$s1 926 ldx [$tbl+2048+0],%g0 ! prefetch td4 927 srlx $acc10,16,$acc10 928 xor $acc6,$s1,$s1 929 ldx [$tbl+2048+32],%g0 ! prefetch td4 930 srlx $acc11,24,$acc11 931 xor $acc7,$s1,$s1 932 ldx [$tbl+2048+64],%g0 ! prefetch td4 933 srlx $acc13,8,$acc13 934 xor $acc8,$s2,$s2 935 ldx [$tbl+2048+96],%g0 ! prefetch td4 936 srlx $acc14,16,$acc14 ! 937 xor $acc9,$s2,$s2 938 ldx [$tbl+2048+128],%g0 ! prefetch td4 939 srlx $acc15,24,$acc15 940 xor $acc10,$s2,$s2 941 ldx [$tbl+2048+160],%g0 ! prefetch td4 942 srl $s0,21,$acc0 943 xor $acc11,$s2,$s2 944 ldx [$tbl+2048+192],%g0 ! prefetch td4 945 xor $acc12,$acc14,$acc14 946 xor $acc13,$s3,$s3 947 ldx [$tbl+2048+224],%g0 ! prefetch td4 948 and $acc0,2040,$acc0 ! 949 xor $acc14,$s3,$s3 950 xor $acc15,$s3,$s3 951 ba .Ldec_loop 952 srl $s3,13,$acc1 953 954.align 32 955.Ldec_last: 956 srlx $acc1,8,$acc1 ! 957 xor $acc0,$t0,$t0 958 ld [$key+0],$s0 959 srlx $acc2,16,$acc2 960 xor $acc1,$t0,$t0 961 ld [$key+4],$s1 962 srlx $acc3,24,$acc3 963 xor $acc2,$t0,$t0 964 ld [$key+8],$s2 ! 965 srlx $acc5,8,$acc5 966 xor $acc3,$t0,$t0 967 ld [$key+12],$s3 968 srlx $acc6,16,$acc6 969 xor $acc4,$t1,$t1 970 srlx $acc7,24,$acc7 971 xor $acc5,$t1,$t1 972 srlx $acc9,8,$acc9 ! 973 xor $acc6,$t1,$t1 974 srlx $acc10,16,$acc10 975 xor $acc7,$t1,$t1 976 srlx $acc11,24,$acc11 977 xor $acc8,$t2,$t2 978 srlx $acc13,8,$acc13 979 xor $acc9,$t2,$t2 980 srlx $acc14,16,$acc14 ! 981 xor $acc10,$t2,$t2 982 srlx $acc15,24,$acc15 983 xor $acc11,$t2,$t2 984 xor $acc12,$acc14,$acc14 985 xor $acc13,$t3,$t3 986 srl $t0,24,$acc0 987 xor $acc14,$t3,$t3 988 xor $acc15,$t3,$t3 ! 989 srl $t3,16,$acc1 990 991 srl $t2,8,$acc2 992 and $acc1,255,$acc1 993 ldub [$rounds+$acc0],$acc0 994 srl $t1,24,$acc4 995 and $acc2,255,$acc2 996 ldub [$rounds+$acc1],$acc1 997 srl $t0,16,$acc5 ! 998 and $t1,255,$acc3 999 ldub [$rounds+$acc2],$acc2 1000 ldub [$rounds+$acc3],$acc3 1001 srl $t3,8,$acc6 1002 and $acc5,255,$acc5 1003 ldub [$rounds+$acc4],$acc4 1004 fmovs %f0,%f0 1005 srl $t2,24,$acc8 ! 1006 and $acc6,255,$acc6 1007 ldub [$rounds+$acc5],$acc5 1008 srl $t1,16,$acc9 1009 and $t2,255,$acc7 1010 ldub [$rounds+$acc6],$acc6 1011 ldub [$rounds+$acc7],$acc7 1012 fmovs %f0,%f0 1013 srl $t0,8,$acc10 ! 1014 and $acc9,255,$acc9 1015 ldub [$rounds+$acc8],$acc8 1016 srl $t3,24,$acc12 1017 and $acc10,255,$acc10 1018 ldub [$rounds+$acc9],$acc9 1019 srl $t2,16,$acc13 1020 and $t3,255,$acc11 1021 ldub [$rounds+$acc10],$acc10 ! 1022 srl $t1,8,$acc14 1023 and $acc13,255,$acc13 1024 ldub [$rounds+$acc11],$acc11 1025 ldub [$rounds+$acc12],$acc12 1026 and $acc14,255,$acc14 1027 ldub [$rounds+$acc13],$acc13 1028 and $t0,255,$acc15 1029 ldub [$rounds+$acc14],$acc14 ! 1030 1031 sll $acc0,24,$acc0 1032 xor $acc3,$s0,$s0 1033 ldub [$rounds+$acc15],$acc15 1034 sll $acc1,16,$acc1 1035 xor $acc0,$s0,$s0 1036 ldx [%sp+$bias+$frame+0],%i7 ! restore return address 1037 fmovs %f0,%f0 1038 sll $acc2,8,$acc2 ! 1039 xor $acc1,$s0,$s0 1040 sll $acc4,24,$acc4 1041 xor $acc2,$s0,$s0 1042 sll $acc5,16,$acc5 1043 xor $acc7,$s1,$s1 1044 sll $acc6,8,$acc6 1045 xor $acc4,$s1,$s1 1046 sll $acc8,24,$acc8 ! 1047 xor $acc5,$s1,$s1 1048 sll $acc9,16,$acc9 1049 xor $acc11,$s2,$s2 1050 sll $acc10,8,$acc10 1051 xor $acc6,$s1,$s1 1052 sll $acc12,24,$acc12 1053 xor $acc8,$s2,$s2 1054 sll $acc13,16,$acc13 ! 1055 xor $acc9,$s2,$s2 1056 sll $acc14,8,$acc14 1057 xor $acc10,$s2,$s2 1058 xor $acc12,$acc14,$acc14 1059 xor $acc13,$s3,$s3 1060 xor $acc14,$s3,$s3 1061 xor $acc15,$s3,$s3 1062 1063 ret 1064 restore 1065.type _sparcv9_AES_decrypt,#function 1066.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) 1067 1068.align 32 1069.globl AES_decrypt 1070AES_decrypt: 1071 or %o0,%o1,%g1 1072 andcc %g1,3,%g0 1073 bnz,pn %xcc,.Lunaligned_dec 1074 save %sp,-$frame,%sp 1075 1076 ld [%i0+0],%o0 1077 ld [%i0+4],%o1 1078 ld [%i0+8],%o2 1079 ld [%i0+12],%o3 1080 10811: call .+8 1082 add %o7,AES_Td-1b,%o4 1083 call _sparcv9_AES_decrypt 1084 mov %i2,%o5 1085 1086 st %o0,[%i1+0] 1087 st %o1,[%i1+4] 1088 st %o2,[%i1+8] 1089 st %o3,[%i1+12] 1090 1091 ret 1092 restore 1093 1094.align 32 1095.Lunaligned_dec: 1096 ldub [%i0+0],%l0 1097 ldub [%i0+1],%l1 1098 ldub [%i0+2],%l2 1099 1100 sll %l0,24,%l0 1101 ldub [%i0+3],%l3 1102 sll %l1,16,%l1 1103 ldub [%i0+4],%l4 1104 sll %l2,8,%l2 1105 or %l1,%l0,%l0 1106 ldub [%i0+5],%l5 1107 sll %l4,24,%l4 1108 or %l3,%l2,%l2 1109 ldub [%i0+6],%l6 1110 sll %l5,16,%l5 1111 or %l0,%l2,%o0 1112 ldub [%i0+7],%l7 1113 1114 sll %l6,8,%l6 1115 or %l5,%l4,%l4 1116 ldub [%i0+8],%l0 1117 or %l7,%l6,%l6 1118 ldub [%i0+9],%l1 1119 or %l4,%l6,%o1 1120 ldub [%i0+10],%l2 1121 1122 sll %l0,24,%l0 1123 ldub [%i0+11],%l3 1124 sll %l1,16,%l1 1125 ldub [%i0+12],%l4 1126 sll %l2,8,%l2 1127 or %l1,%l0,%l0 1128 ldub [%i0+13],%l5 1129 sll %l4,24,%l4 1130 or %l3,%l2,%l2 1131 ldub [%i0+14],%l6 1132 sll %l5,16,%l5 1133 or %l0,%l2,%o2 1134 ldub [%i0+15],%l7 1135 1136 sll %l6,8,%l6 1137 or %l5,%l4,%l4 1138 or %l7,%l6,%l6 1139 or %l4,%l6,%o3 1140 11411: call .+8 1142 add %o7,AES_Td-1b,%o4 1143 call _sparcv9_AES_decrypt 1144 mov %i2,%o5 1145 1146 srl %o0,24,%l0 1147 srl %o0,16,%l1 1148 stb %l0,[%i1+0] 1149 srl %o0,8,%l2 1150 stb %l1,[%i1+1] 1151 stb %l2,[%i1+2] 1152 srl %o1,24,%l4 1153 stb %o0,[%i1+3] 1154 1155 srl %o1,16,%l5 1156 stb %l4,[%i1+4] 1157 srl %o1,8,%l6 1158 stb %l5,[%i1+5] 1159 stb %l6,[%i1+6] 1160 srl %o2,24,%l0 1161 stb %o1,[%i1+7] 1162 1163 srl %o2,16,%l1 1164 stb %l0,[%i1+8] 1165 srl %o2,8,%l2 1166 stb %l1,[%i1+9] 1167 stb %l2,[%i1+10] 1168 srl %o3,24,%l4 1169 stb %o2,[%i1+11] 1170 1171 srl %o3,16,%l5 1172 stb %l4,[%i1+12] 1173 srl %o3,8,%l6 1174 stb %l5,[%i1+13] 1175 stb %l6,[%i1+14] 1176 stb %o3,[%i1+15] 1177 1178 ret 1179 restore 1180.type AES_decrypt,#function 1181.size AES_decrypt,(.-AES_decrypt) 1182___ 1183 1184# fmovs instructions substituting for FP nops were originally added 1185# to meet specific instruction alignment requirements to maximize ILP. 1186# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have 1187# undesired effect, so just omit them and sacrifice some portion of 1188# percent in performance... 1189$code =~ s/fmovs.*$//gm; 1190 1191print $code; 1192close STDOUT or die "error closing STDOUT: $!"; # ensure flush 1193