1#!/usr/bin/env perl 2 3# ==================================================================== 4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL 5# project. The module is, however, dual licensed under OpenSSL and 6# CRYPTOGAMS licenses depending on where you obtain it. For further 7# details see http://www.openssl.org/~appro/cryptogams/. 8# ==================================================================== 9 10# Needs more work: key setup, page boundaries, CBC routine... 11# 12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 13# 128-bit key, which is ~40% better than 64-bit code generated by gcc 14# 4.0. But these are not the ones currently used! Their "compact" 15# counterparts are, for security reason. ppc_AES_encrypt_compact runs 16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 17# at 1/3 of ppc_AES_decrypt. 18 19# February 2010 20# 21# Rescheduling instructions to favour Power6 pipeline gives 10% 22# performance improvement on the platfrom in question (and marginal 23# improvement even on others). It should be noted that Power6 fails 24# to process byte in 18 cycles, only in 23, because it fails to issue 25# 4 load instructions in two cycles, only in 3. As result non-compact 26# block subroutines are 25% slower than one would expect. Compact 27# functions scale better, because they have pure computational part, 28# which scales perfectly with clock frequency. To be specific 29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while 30# ppc_AES_decrypt_compact - at 55 (in 64-bit build). 31 32$flavour = shift; 33 34if ($flavour =~ /64/) { 35 $SIZE_T =8; 36 $STU ="stdu"; 37 $POP ="ld"; 38 $PUSH ="std"; 39} elsif ($flavour =~ /32/) { 40 $SIZE_T =4; 41 $STU ="stwu"; 42 $POP ="lwz"; 43 $PUSH ="stw"; 44} else { die "nonsense $flavour"; } 45 46$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 47( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 48( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 49die "can't locate ppc-xlate.pl"; 50 51open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; 52 53$FRAME=32*$SIZE_T; 54 55sub _data_word() 56{ my $i; 57 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 58} 59 60$sp="r1"; 61$toc="r2"; 62$inp="r3"; 63$out="r4"; 64$key="r5"; 65 66$Tbl0="r3"; 67$Tbl1="r6"; 68$Tbl2="r7"; 69$Tbl3="r2"; 70 71$s0="r8"; 72$s1="r9"; 73$s2="r10"; 74$s3="r11"; 75 76$t0="r12"; 77$t1="r13"; 78$t2="r14"; 79$t3="r15"; 80 81$acc00="r16"; 82$acc01="r17"; 83$acc02="r18"; 84$acc03="r19"; 85 86$acc04="r20"; 87$acc05="r21"; 88$acc06="r22"; 89$acc07="r23"; 90 91$acc08="r24"; 92$acc09="r25"; 93$acc10="r26"; 94$acc11="r27"; 95 96$acc12="r28"; 97$acc13="r29"; 98$acc14="r30"; 99$acc15="r31"; 100 101# stay away from TLS pointer 102if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } 103else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } 104$mask80=$Tbl2; 105$mask1b=$Tbl3; 106 107$code.=<<___; 108.machine "any" 109.text 110 111.align 7 112LAES_Te: 113 mflr r0 114 bcl 20,31,\$+4 115 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry 116 addi $Tbl0,$Tbl0,`128-8` 117 mtlr r0 118 blr 119 .space `32-24` 120LAES_Td: 121 mflr r0 122 bcl 20,31,\$+4 123 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 124 addi $Tbl0,$Tbl0,`128-8-32+2048+256` 125 mtlr r0 126 blr 127 .space `128-32-24` 128___ 129&_data_word( 130 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 131 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 132 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 133 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 134 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 135 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 136 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 137 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 138 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 139 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 140 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 141 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 142 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 143 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 144 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 145 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 146 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 147 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 148 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 149 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 150 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 151 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 152 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 153 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 154 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 155 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 156 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 157 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 158 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 159 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 160 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 161 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 162 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 163 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 164 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 165 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 166 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 167 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 168 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 169 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 170 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 171 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 172 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 173 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 174 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 175 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 176 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 177 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 178 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 179 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 180 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 181 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 182 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 183 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 184 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 185 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 186 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 187 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 188 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 189 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 190 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 191 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 192 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 193 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 194$code.=<<___; 195.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 196.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 197.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 198.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 199.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 200.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 201.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 202.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 203.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 204.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 205.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 206.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 207.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 208.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 209.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 210.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 211.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 212.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 213.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 214.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 215.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 216.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 217.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 218.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 219.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 220.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 221.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 222.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 223.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 224.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 225.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 226.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 227___ 228&_data_word( 229 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 230 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 231 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 232 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 233 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 234 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 235 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 236 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 237 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 238 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 239 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 240 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 241 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 242 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 243 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 244 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 245 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 246 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 247 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 248 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 249 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 250 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 251 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 252 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 253 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 254 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 255 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 256 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 257 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 258 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 259 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 260 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 261 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 262 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 263 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 264 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 265 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 266 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 267 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 268 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 269 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 270 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 271 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 272 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 273 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 274 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 275 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 276 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 277 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 278 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 279 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 280 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 281 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 282 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 283 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 284 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 285 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 286 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 287 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 288 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 289 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 290 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 291 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 292 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 293$code.=<<___; 294.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 295.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 296.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 297.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 298.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 299.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 300.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 301.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 302.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 303.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 304.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 305.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 306.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 307.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 308.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 309.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 310.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 311.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 312.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 313.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 314.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 315.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 316.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 317.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 318.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 319.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 320.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 321.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 322.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 323.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 324.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 325.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 326 327 328.globl .AES_encrypt 329.align 7 330.AES_encrypt: 331 mflr r0 332 $STU $sp,-$FRAME($sp) 333 334 $PUSH r0,`$FRAME-$SIZE_T*21`($sp) 335 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 336 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 337 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 338 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 339 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 340 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 341 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 342 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 343 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 344 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 345 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 346 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 347 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 348 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 349 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 350 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 351 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 352 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 353 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 354 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 355 356 lwz $s0,0($inp) 357 lwz $s1,4($inp) 358 lwz $s2,8($inp) 359 lwz $s3,12($inp) 360 bl LAES_Te 361 bl Lppc_AES_encrypt_compact 362 stw $s0,0($out) 363 stw $s1,4($out) 364 stw $s2,8($out) 365 stw $s3,12($out) 366 367 $POP r0,`$FRAME-$SIZE_T*21`($sp) 368 $POP $toc,`$FRAME-$SIZE_T*20`($sp) 369 $POP r13,`$FRAME-$SIZE_T*19`($sp) 370 $POP r14,`$FRAME-$SIZE_T*18`($sp) 371 $POP r15,`$FRAME-$SIZE_T*17`($sp) 372 $POP r16,`$FRAME-$SIZE_T*16`($sp) 373 $POP r17,`$FRAME-$SIZE_T*15`($sp) 374 $POP r18,`$FRAME-$SIZE_T*14`($sp) 375 $POP r19,`$FRAME-$SIZE_T*13`($sp) 376 $POP r20,`$FRAME-$SIZE_T*12`($sp) 377 $POP r21,`$FRAME-$SIZE_T*11`($sp) 378 $POP r22,`$FRAME-$SIZE_T*10`($sp) 379 $POP r23,`$FRAME-$SIZE_T*9`($sp) 380 $POP r24,`$FRAME-$SIZE_T*8`($sp) 381 $POP r25,`$FRAME-$SIZE_T*7`($sp) 382 $POP r26,`$FRAME-$SIZE_T*6`($sp) 383 $POP r27,`$FRAME-$SIZE_T*5`($sp) 384 $POP r28,`$FRAME-$SIZE_T*4`($sp) 385 $POP r29,`$FRAME-$SIZE_T*3`($sp) 386 $POP r30,`$FRAME-$SIZE_T*2`($sp) 387 $POP r31,`$FRAME-$SIZE_T*1`($sp) 388 mtlr r0 389 addi $sp,$sp,$FRAME 390 blr 391 392.align 5 393Lppc_AES_encrypt: 394 lwz $acc00,240($key) 395 lwz $t0,0($key) 396 lwz $t1,4($key) 397 lwz $t2,8($key) 398 lwz $t3,12($key) 399 addi $Tbl1,$Tbl0,3 400 addi $Tbl2,$Tbl0,2 401 addi $Tbl3,$Tbl0,1 402 addi $acc00,$acc00,-1 403 addi $key,$key,16 404 xor $s0,$s0,$t0 405 xor $s1,$s1,$t1 406 xor $s2,$s2,$t2 407 xor $s3,$s3,$t3 408 mtctr $acc00 409.align 4 410Lenc_loop: 411 rlwinm $acc00,$s0,`32-24+3`,21,28 412 rlwinm $acc01,$s1,`32-24+3`,21,28 413 rlwinm $acc02,$s2,`32-24+3`,21,28 414 rlwinm $acc03,$s3,`32-24+3`,21,28 415 lwz $t0,0($key) 416 lwz $t1,4($key) 417 rlwinm $acc04,$s1,`32-16+3`,21,28 418 rlwinm $acc05,$s2,`32-16+3`,21,28 419 lwz $t2,8($key) 420 lwz $t3,12($key) 421 rlwinm $acc06,$s3,`32-16+3`,21,28 422 rlwinm $acc07,$s0,`32-16+3`,21,28 423 lwzx $acc00,$Tbl0,$acc00 424 lwzx $acc01,$Tbl0,$acc01 425 rlwinm $acc08,$s2,`32-8+3`,21,28 426 rlwinm $acc09,$s3,`32-8+3`,21,28 427 lwzx $acc02,$Tbl0,$acc02 428 lwzx $acc03,$Tbl0,$acc03 429 rlwinm $acc10,$s0,`32-8+3`,21,28 430 rlwinm $acc11,$s1,`32-8+3`,21,28 431 lwzx $acc04,$Tbl1,$acc04 432 lwzx $acc05,$Tbl1,$acc05 433 rlwinm $acc12,$s3,`0+3`,21,28 434 rlwinm $acc13,$s0,`0+3`,21,28 435 lwzx $acc06,$Tbl1,$acc06 436 lwzx $acc07,$Tbl1,$acc07 437 rlwinm $acc14,$s1,`0+3`,21,28 438 rlwinm $acc15,$s2,`0+3`,21,28 439 lwzx $acc08,$Tbl2,$acc08 440 lwzx $acc09,$Tbl2,$acc09 441 xor $t0,$t0,$acc00 442 xor $t1,$t1,$acc01 443 lwzx $acc10,$Tbl2,$acc10 444 lwzx $acc11,$Tbl2,$acc11 445 xor $t2,$t2,$acc02 446 xor $t3,$t3,$acc03 447 lwzx $acc12,$Tbl3,$acc12 448 lwzx $acc13,$Tbl3,$acc13 449 xor $t0,$t0,$acc04 450 xor $t1,$t1,$acc05 451 lwzx $acc14,$Tbl3,$acc14 452 lwzx $acc15,$Tbl3,$acc15 453 xor $t2,$t2,$acc06 454 xor $t3,$t3,$acc07 455 xor $t0,$t0,$acc08 456 xor $t1,$t1,$acc09 457 xor $t2,$t2,$acc10 458 xor $t3,$t3,$acc11 459 xor $s0,$t0,$acc12 460 xor $s1,$t1,$acc13 461 xor $s2,$t2,$acc14 462 xor $s3,$t3,$acc15 463 addi $key,$key,16 464 bdnz- Lenc_loop 465 466 addi $Tbl2,$Tbl0,2048 467 nop 468 lwz $t0,0($key) 469 lwz $t1,4($key) 470 rlwinm $acc00,$s0,`32-24`,24,31 471 rlwinm $acc01,$s1,`32-24`,24,31 472 lwz $t2,8($key) 473 lwz $t3,12($key) 474 rlwinm $acc02,$s2,`32-24`,24,31 475 rlwinm $acc03,$s3,`32-24`,24,31 476 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 477 lwz $acc09,`2048+32`($Tbl0) 478 rlwinm $acc04,$s1,`32-16`,24,31 479 rlwinm $acc05,$s2,`32-16`,24,31 480 lwz $acc10,`2048+64`($Tbl0) 481 lwz $acc11,`2048+96`($Tbl0) 482 rlwinm $acc06,$s3,`32-16`,24,31 483 rlwinm $acc07,$s0,`32-16`,24,31 484 lwz $acc12,`2048+128`($Tbl0) 485 lwz $acc13,`2048+160`($Tbl0) 486 rlwinm $acc08,$s2,`32-8`,24,31 487 rlwinm $acc09,$s3,`32-8`,24,31 488 lwz $acc14,`2048+192`($Tbl0) 489 lwz $acc15,`2048+224`($Tbl0) 490 rlwinm $acc10,$s0,`32-8`,24,31 491 rlwinm $acc11,$s1,`32-8`,24,31 492 lbzx $acc00,$Tbl2,$acc00 493 lbzx $acc01,$Tbl2,$acc01 494 rlwinm $acc12,$s3,`0`,24,31 495 rlwinm $acc13,$s0,`0`,24,31 496 lbzx $acc02,$Tbl2,$acc02 497 lbzx $acc03,$Tbl2,$acc03 498 rlwinm $acc14,$s1,`0`,24,31 499 rlwinm $acc15,$s2,`0`,24,31 500 lbzx $acc04,$Tbl2,$acc04 501 lbzx $acc05,$Tbl2,$acc05 502 rlwinm $s0,$acc00,24,0,7 503 rlwinm $s1,$acc01,24,0,7 504 lbzx $acc06,$Tbl2,$acc06 505 lbzx $acc07,$Tbl2,$acc07 506 rlwinm $s2,$acc02,24,0,7 507 rlwinm $s3,$acc03,24,0,7 508 lbzx $acc08,$Tbl2,$acc08 509 lbzx $acc09,$Tbl2,$acc09 510 rlwimi $s0,$acc04,16,8,15 511 rlwimi $s1,$acc05,16,8,15 512 lbzx $acc10,$Tbl2,$acc10 513 lbzx $acc11,$Tbl2,$acc11 514 rlwimi $s2,$acc06,16,8,15 515 rlwimi $s3,$acc07,16,8,15 516 lbzx $acc12,$Tbl2,$acc12 517 lbzx $acc13,$Tbl2,$acc13 518 rlwimi $s0,$acc08,8,16,23 519 rlwimi $s1,$acc09,8,16,23 520 lbzx $acc14,$Tbl2,$acc14 521 lbzx $acc15,$Tbl2,$acc15 522 rlwimi $s2,$acc10,8,16,23 523 rlwimi $s3,$acc11,8,16,23 524 or $s0,$s0,$acc12 525 or $s1,$s1,$acc13 526 or $s2,$s2,$acc14 527 or $s3,$s3,$acc15 528 xor $s0,$s0,$t0 529 xor $s1,$s1,$t1 530 xor $s2,$s2,$t2 531 xor $s3,$s3,$t3 532 blr 533 534.align 4 535Lppc_AES_encrypt_compact: 536 lwz $acc00,240($key) 537 lwz $t0,0($key) 538 lwz $t1,4($key) 539 lwz $t2,8($key) 540 lwz $t3,12($key) 541 addi $Tbl1,$Tbl0,2048 542 lis $mask80,0x8080 543 lis $mask1b,0x1b1b 544 addi $key,$key,16 545 ori $mask80,$mask80,0x8080 546 ori $mask1b,$mask1b,0x1b1b 547 mtctr $acc00 548.align 4 549Lenc_compact_loop: 550 xor $s0,$s0,$t0 551 xor $s1,$s1,$t1 552 xor $s2,$s2,$t2 553 xor $s3,$s3,$t3 554 rlwinm $acc00,$s0,`32-24`,24,31 555 rlwinm $acc01,$s1,`32-24`,24,31 556 rlwinm $acc02,$s2,`32-24`,24,31 557 rlwinm $acc03,$s3,`32-24`,24,31 558 rlwinm $acc04,$s1,`32-16`,24,31 559 rlwinm $acc05,$s2,`32-16`,24,31 560 rlwinm $acc06,$s3,`32-16`,24,31 561 rlwinm $acc07,$s0,`32-16`,24,31 562 lbzx $acc00,$Tbl1,$acc00 563 lbzx $acc01,$Tbl1,$acc01 564 rlwinm $acc08,$s2,`32-8`,24,31 565 rlwinm $acc09,$s3,`32-8`,24,31 566 lbzx $acc02,$Tbl1,$acc02 567 lbzx $acc03,$Tbl1,$acc03 568 rlwinm $acc10,$s0,`32-8`,24,31 569 rlwinm $acc11,$s1,`32-8`,24,31 570 lbzx $acc04,$Tbl1,$acc04 571 lbzx $acc05,$Tbl1,$acc05 572 rlwinm $acc12,$s3,`0`,24,31 573 rlwinm $acc13,$s0,`0`,24,31 574 lbzx $acc06,$Tbl1,$acc06 575 lbzx $acc07,$Tbl1,$acc07 576 rlwinm $acc14,$s1,`0`,24,31 577 rlwinm $acc15,$s2,`0`,24,31 578 lbzx $acc08,$Tbl1,$acc08 579 lbzx $acc09,$Tbl1,$acc09 580 rlwinm $s0,$acc00,24,0,7 581 rlwinm $s1,$acc01,24,0,7 582 lbzx $acc10,$Tbl1,$acc10 583 lbzx $acc11,$Tbl1,$acc11 584 rlwinm $s2,$acc02,24,0,7 585 rlwinm $s3,$acc03,24,0,7 586 lbzx $acc12,$Tbl1,$acc12 587 lbzx $acc13,$Tbl1,$acc13 588 rlwimi $s0,$acc04,16,8,15 589 rlwimi $s1,$acc05,16,8,15 590 lbzx $acc14,$Tbl1,$acc14 591 lbzx $acc15,$Tbl1,$acc15 592 rlwimi $s2,$acc06,16,8,15 593 rlwimi $s3,$acc07,16,8,15 594 rlwimi $s0,$acc08,8,16,23 595 rlwimi $s1,$acc09,8,16,23 596 rlwimi $s2,$acc10,8,16,23 597 rlwimi $s3,$acc11,8,16,23 598 lwz $t0,0($key) 599 lwz $t1,4($key) 600 or $s0,$s0,$acc12 601 or $s1,$s1,$acc13 602 lwz $t2,8($key) 603 lwz $t3,12($key) 604 or $s2,$s2,$acc14 605 or $s3,$s3,$acc15 606 607 addi $key,$key,16 608 bdz Lenc_compact_done 609 610 and $acc00,$s0,$mask80 # r1=r0&0x80808080 611 and $acc01,$s1,$mask80 612 and $acc02,$s2,$mask80 613 and $acc03,$s3,$mask80 614 srwi $acc04,$acc00,7 # r1>>7 615 srwi $acc05,$acc01,7 616 srwi $acc06,$acc02,7 617 srwi $acc07,$acc03,7 618 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 619 andc $acc09,$s1,$mask80 620 andc $acc10,$s2,$mask80 621 andc $acc11,$s3,$mask80 622 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 623 sub $acc01,$acc01,$acc05 624 sub $acc02,$acc02,$acc06 625 sub $acc03,$acc03,$acc07 626 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 627 add $acc09,$acc09,$acc09 628 add $acc10,$acc10,$acc10 629 add $acc11,$acc11,$acc11 630 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 631 and $acc01,$acc01,$mask1b 632 and $acc02,$acc02,$mask1b 633 and $acc03,$acc03,$mask1b 634 xor $acc00,$acc00,$acc08 # r2 635 xor $acc01,$acc01,$acc09 636 xor $acc02,$acc02,$acc10 637 xor $acc03,$acc03,$acc11 638 639 rotlwi $acc12,$s0,16 # ROTATE(r0,16) 640 rotlwi $acc13,$s1,16 641 rotlwi $acc14,$s2,16 642 rotlwi $acc15,$s3,16 643 xor $s0,$s0,$acc00 # r0^r2 644 xor $s1,$s1,$acc01 645 xor $s2,$s2,$acc02 646 xor $s3,$s3,$acc03 647 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 648 rotrwi $s1,$s1,24 649 rotrwi $s2,$s2,24 650 rotrwi $s3,$s3,24 651 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 652 xor $s1,$s1,$acc01 653 xor $s2,$s2,$acc02 654 xor $s3,$s3,$acc03 655 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 656 rotlwi $acc09,$acc13,8 657 rotlwi $acc10,$acc14,8 658 rotlwi $acc11,$acc15,8 659 xor $s0,$s0,$acc12 # 660 xor $s1,$s1,$acc13 661 xor $s2,$s2,$acc14 662 xor $s3,$s3,$acc15 663 xor $s0,$s0,$acc08 # 664 xor $s1,$s1,$acc09 665 xor $s2,$s2,$acc10 666 xor $s3,$s3,$acc11 667 668 b Lenc_compact_loop 669.align 4 670Lenc_compact_done: 671 xor $s0,$s0,$t0 672 xor $s1,$s1,$t1 673 xor $s2,$s2,$t2 674 xor $s3,$s3,$t3 675 blr 676 677.globl .AES_decrypt 678.align 7 679.AES_decrypt: 680 mflr r0 681 $STU $sp,-$FRAME($sp) 682 683 $PUSH r0,`$FRAME-$SIZE_T*21`($sp) 684 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) 685 $PUSH r13,`$FRAME-$SIZE_T*19`($sp) 686 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 687 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 688 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 689 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 690 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 691 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 692 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 693 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 694 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 695 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 696 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 697 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 698 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 699 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 700 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 701 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 702 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 703 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 704 705 lwz $s0,0($inp) 706 lwz $s1,4($inp) 707 lwz $s2,8($inp) 708 lwz $s3,12($inp) 709 bl LAES_Td 710 bl Lppc_AES_decrypt_compact 711 stw $s0,0($out) 712 stw $s1,4($out) 713 stw $s2,8($out) 714 stw $s3,12($out) 715 716 $POP r0,`$FRAME-$SIZE_T*21`($sp) 717 $POP $toc,`$FRAME-$SIZE_T*20`($sp) 718 $POP r13,`$FRAME-$SIZE_T*19`($sp) 719 $POP r14,`$FRAME-$SIZE_T*18`($sp) 720 $POP r15,`$FRAME-$SIZE_T*17`($sp) 721 $POP r16,`$FRAME-$SIZE_T*16`($sp) 722 $POP r17,`$FRAME-$SIZE_T*15`($sp) 723 $POP r18,`$FRAME-$SIZE_T*14`($sp) 724 $POP r19,`$FRAME-$SIZE_T*13`($sp) 725 $POP r20,`$FRAME-$SIZE_T*12`($sp) 726 $POP r21,`$FRAME-$SIZE_T*11`($sp) 727 $POP r22,`$FRAME-$SIZE_T*10`($sp) 728 $POP r23,`$FRAME-$SIZE_T*9`($sp) 729 $POP r24,`$FRAME-$SIZE_T*8`($sp) 730 $POP r25,`$FRAME-$SIZE_T*7`($sp) 731 $POP r26,`$FRAME-$SIZE_T*6`($sp) 732 $POP r27,`$FRAME-$SIZE_T*5`($sp) 733 $POP r28,`$FRAME-$SIZE_T*4`($sp) 734 $POP r29,`$FRAME-$SIZE_T*3`($sp) 735 $POP r30,`$FRAME-$SIZE_T*2`($sp) 736 $POP r31,`$FRAME-$SIZE_T*1`($sp) 737 mtlr r0 738 addi $sp,$sp,$FRAME 739 blr 740 741.align 5 742Lppc_AES_decrypt: 743 lwz $acc00,240($key) 744 lwz $t0,0($key) 745 lwz $t1,4($key) 746 lwz $t2,8($key) 747 lwz $t3,12($key) 748 addi $Tbl1,$Tbl0,3 749 addi $Tbl2,$Tbl0,2 750 addi $Tbl3,$Tbl0,1 751 addi $acc00,$acc00,-1 752 addi $key,$key,16 753 xor $s0,$s0,$t0 754 xor $s1,$s1,$t1 755 xor $s2,$s2,$t2 756 xor $s3,$s3,$t3 757 mtctr $acc00 758.align 4 759Ldec_loop: 760 rlwinm $acc00,$s0,`32-24+3`,21,28 761 rlwinm $acc01,$s1,`32-24+3`,21,28 762 rlwinm $acc02,$s2,`32-24+3`,21,28 763 rlwinm $acc03,$s3,`32-24+3`,21,28 764 lwz $t0,0($key) 765 lwz $t1,4($key) 766 rlwinm $acc04,$s3,`32-16+3`,21,28 767 rlwinm $acc05,$s0,`32-16+3`,21,28 768 lwz $t2,8($key) 769 lwz $t3,12($key) 770 rlwinm $acc06,$s1,`32-16+3`,21,28 771 rlwinm $acc07,$s2,`32-16+3`,21,28 772 lwzx $acc00,$Tbl0,$acc00 773 lwzx $acc01,$Tbl0,$acc01 774 rlwinm $acc08,$s2,`32-8+3`,21,28 775 rlwinm $acc09,$s3,`32-8+3`,21,28 776 lwzx $acc02,$Tbl0,$acc02 777 lwzx $acc03,$Tbl0,$acc03 778 rlwinm $acc10,$s0,`32-8+3`,21,28 779 rlwinm $acc11,$s1,`32-8+3`,21,28 780 lwzx $acc04,$Tbl1,$acc04 781 lwzx $acc05,$Tbl1,$acc05 782 rlwinm $acc12,$s1,`0+3`,21,28 783 rlwinm $acc13,$s2,`0+3`,21,28 784 lwzx $acc06,$Tbl1,$acc06 785 lwzx $acc07,$Tbl1,$acc07 786 rlwinm $acc14,$s3,`0+3`,21,28 787 rlwinm $acc15,$s0,`0+3`,21,28 788 lwzx $acc08,$Tbl2,$acc08 789 lwzx $acc09,$Tbl2,$acc09 790 xor $t0,$t0,$acc00 791 xor $t1,$t1,$acc01 792 lwzx $acc10,$Tbl2,$acc10 793 lwzx $acc11,$Tbl2,$acc11 794 xor $t2,$t2,$acc02 795 xor $t3,$t3,$acc03 796 lwzx $acc12,$Tbl3,$acc12 797 lwzx $acc13,$Tbl3,$acc13 798 xor $t0,$t0,$acc04 799 xor $t1,$t1,$acc05 800 lwzx $acc14,$Tbl3,$acc14 801 lwzx $acc15,$Tbl3,$acc15 802 xor $t2,$t2,$acc06 803 xor $t3,$t3,$acc07 804 xor $t0,$t0,$acc08 805 xor $t1,$t1,$acc09 806 xor $t2,$t2,$acc10 807 xor $t3,$t3,$acc11 808 xor $s0,$t0,$acc12 809 xor $s1,$t1,$acc13 810 xor $s2,$t2,$acc14 811 xor $s3,$t3,$acc15 812 addi $key,$key,16 813 bdnz- Ldec_loop 814 815 addi $Tbl2,$Tbl0,2048 816 nop 817 lwz $t0,0($key) 818 lwz $t1,4($key) 819 rlwinm $acc00,$s0,`32-24`,24,31 820 rlwinm $acc01,$s1,`32-24`,24,31 821 lwz $t2,8($key) 822 lwz $t3,12($key) 823 rlwinm $acc02,$s2,`32-24`,24,31 824 rlwinm $acc03,$s3,`32-24`,24,31 825 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 826 lwz $acc09,`2048+32`($Tbl0) 827 rlwinm $acc04,$s3,`32-16`,24,31 828 rlwinm $acc05,$s0,`32-16`,24,31 829 lwz $acc10,`2048+64`($Tbl0) 830 lwz $acc11,`2048+96`($Tbl0) 831 lbzx $acc00,$Tbl2,$acc00 832 lbzx $acc01,$Tbl2,$acc01 833 lwz $acc12,`2048+128`($Tbl0) 834 lwz $acc13,`2048+160`($Tbl0) 835 rlwinm $acc06,$s1,`32-16`,24,31 836 rlwinm $acc07,$s2,`32-16`,24,31 837 lwz $acc14,`2048+192`($Tbl0) 838 lwz $acc15,`2048+224`($Tbl0) 839 rlwinm $acc08,$s2,`32-8`,24,31 840 rlwinm $acc09,$s3,`32-8`,24,31 841 lbzx $acc02,$Tbl2,$acc02 842 lbzx $acc03,$Tbl2,$acc03 843 rlwinm $acc10,$s0,`32-8`,24,31 844 rlwinm $acc11,$s1,`32-8`,24,31 845 lbzx $acc04,$Tbl2,$acc04 846 lbzx $acc05,$Tbl2,$acc05 847 rlwinm $acc12,$s1,`0`,24,31 848 rlwinm $acc13,$s2,`0`,24,31 849 lbzx $acc06,$Tbl2,$acc06 850 lbzx $acc07,$Tbl2,$acc07 851 rlwinm $acc14,$s3,`0`,24,31 852 rlwinm $acc15,$s0,`0`,24,31 853 lbzx $acc08,$Tbl2,$acc08 854 lbzx $acc09,$Tbl2,$acc09 855 rlwinm $s0,$acc00,24,0,7 856 rlwinm $s1,$acc01,24,0,7 857 lbzx $acc10,$Tbl2,$acc10 858 lbzx $acc11,$Tbl2,$acc11 859 rlwinm $s2,$acc02,24,0,7 860 rlwinm $s3,$acc03,24,0,7 861 lbzx $acc12,$Tbl2,$acc12 862 lbzx $acc13,$Tbl2,$acc13 863 rlwimi $s0,$acc04,16,8,15 864 rlwimi $s1,$acc05,16,8,15 865 lbzx $acc14,$Tbl2,$acc14 866 lbzx $acc15,$Tbl2,$acc15 867 rlwimi $s2,$acc06,16,8,15 868 rlwimi $s3,$acc07,16,8,15 869 rlwimi $s0,$acc08,8,16,23 870 rlwimi $s1,$acc09,8,16,23 871 rlwimi $s2,$acc10,8,16,23 872 rlwimi $s3,$acc11,8,16,23 873 or $s0,$s0,$acc12 874 or $s1,$s1,$acc13 875 or $s2,$s2,$acc14 876 or $s3,$s3,$acc15 877 xor $s0,$s0,$t0 878 xor $s1,$s1,$t1 879 xor $s2,$s2,$t2 880 xor $s3,$s3,$t3 881 blr 882 883.align 4 884Lppc_AES_decrypt_compact: 885 lwz $acc00,240($key) 886 lwz $t0,0($key) 887 lwz $t1,4($key) 888 lwz $t2,8($key) 889 lwz $t3,12($key) 890 addi $Tbl1,$Tbl0,2048 891 lis $mask80,0x8080 892 lis $mask1b,0x1b1b 893 addi $key,$key,16 894 ori $mask80,$mask80,0x8080 895 ori $mask1b,$mask1b,0x1b1b 896___ 897$code.=<<___ if ($SIZE_T==8); 898 insrdi $mask80,$mask80,32,0 899 insrdi $mask1b,$mask1b,32,0 900___ 901$code.=<<___; 902 mtctr $acc00 903.align 4 904Ldec_compact_loop: 905 xor $s0,$s0,$t0 906 xor $s1,$s1,$t1 907 xor $s2,$s2,$t2 908 xor $s3,$s3,$t3 909 rlwinm $acc00,$s0,`32-24`,24,31 910 rlwinm $acc01,$s1,`32-24`,24,31 911 rlwinm $acc02,$s2,`32-24`,24,31 912 rlwinm $acc03,$s3,`32-24`,24,31 913 rlwinm $acc04,$s3,`32-16`,24,31 914 rlwinm $acc05,$s0,`32-16`,24,31 915 rlwinm $acc06,$s1,`32-16`,24,31 916 rlwinm $acc07,$s2,`32-16`,24,31 917 lbzx $acc00,$Tbl1,$acc00 918 lbzx $acc01,$Tbl1,$acc01 919 rlwinm $acc08,$s2,`32-8`,24,31 920 rlwinm $acc09,$s3,`32-8`,24,31 921 lbzx $acc02,$Tbl1,$acc02 922 lbzx $acc03,$Tbl1,$acc03 923 rlwinm $acc10,$s0,`32-8`,24,31 924 rlwinm $acc11,$s1,`32-8`,24,31 925 lbzx $acc04,$Tbl1,$acc04 926 lbzx $acc05,$Tbl1,$acc05 927 rlwinm $acc12,$s1,`0`,24,31 928 rlwinm $acc13,$s2,`0`,24,31 929 lbzx $acc06,$Tbl1,$acc06 930 lbzx $acc07,$Tbl1,$acc07 931 rlwinm $acc14,$s3,`0`,24,31 932 rlwinm $acc15,$s0,`0`,24,31 933 lbzx $acc08,$Tbl1,$acc08 934 lbzx $acc09,$Tbl1,$acc09 935 rlwinm $s0,$acc00,24,0,7 936 rlwinm $s1,$acc01,24,0,7 937 lbzx $acc10,$Tbl1,$acc10 938 lbzx $acc11,$Tbl1,$acc11 939 rlwinm $s2,$acc02,24,0,7 940 rlwinm $s3,$acc03,24,0,7 941 lbzx $acc12,$Tbl1,$acc12 942 lbzx $acc13,$Tbl1,$acc13 943 rlwimi $s0,$acc04,16,8,15 944 rlwimi $s1,$acc05,16,8,15 945 lbzx $acc14,$Tbl1,$acc14 946 lbzx $acc15,$Tbl1,$acc15 947 rlwimi $s2,$acc06,16,8,15 948 rlwimi $s3,$acc07,16,8,15 949 rlwimi $s0,$acc08,8,16,23 950 rlwimi $s1,$acc09,8,16,23 951 rlwimi $s2,$acc10,8,16,23 952 rlwimi $s3,$acc11,8,16,23 953 lwz $t0,0($key) 954 lwz $t1,4($key) 955 or $s0,$s0,$acc12 956 or $s1,$s1,$acc13 957 lwz $t2,8($key) 958 lwz $t3,12($key) 959 or $s2,$s2,$acc14 960 or $s3,$s3,$acc15 961 962 addi $key,$key,16 963 bdz Ldec_compact_done 964___ 965$code.=<<___ if ($SIZE_T==8); 966 # vectorized permutation improves decrypt performance by 10% 967 insrdi $s0,$s1,32,0 968 insrdi $s2,$s3,32,0 969 970 and $acc00,$s0,$mask80 # r1=r0&0x80808080 971 and $acc02,$s2,$mask80 972 srdi $acc04,$acc00,7 # r1>>7 973 srdi $acc06,$acc02,7 974 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 975 andc $acc10,$s2,$mask80 976 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 977 sub $acc02,$acc02,$acc06 978 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 979 add $acc10,$acc10,$acc10 980 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 981 and $acc02,$acc02,$mask1b 982 xor $acc00,$acc00,$acc08 # r2 983 xor $acc02,$acc02,$acc10 984 985 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 986 and $acc06,$acc02,$mask80 987 srdi $acc08,$acc04,7 # r1>>7 988 srdi $acc10,$acc06,7 989 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 990 andc $acc14,$acc02,$mask80 991 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 992 sub $acc06,$acc06,$acc10 993 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 994 add $acc14,$acc14,$acc14 995 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 996 and $acc06,$acc06,$mask1b 997 xor $acc04,$acc04,$acc12 # r4 998 xor $acc06,$acc06,$acc14 999 1000 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1001 and $acc10,$acc06,$mask80 1002 srdi $acc12,$acc08,7 # r1>>7 1003 srdi $acc14,$acc10,7 1004 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1005 sub $acc10,$acc10,$acc14 1006 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1007 andc $acc14,$acc06,$mask80 1008 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1009 add $acc14,$acc14,$acc14 1010 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1011 and $acc10,$acc10,$mask1b 1012 xor $acc08,$acc08,$acc12 # r8 1013 xor $acc10,$acc10,$acc14 1014 1015 xor $acc00,$acc00,$s0 # r2^r0 1016 xor $acc02,$acc02,$s2 1017 xor $acc04,$acc04,$s0 # r4^r0 1018 xor $acc06,$acc06,$s2 1019 1020 extrdi $acc01,$acc00,32,0 1021 extrdi $acc03,$acc02,32,0 1022 extrdi $acc05,$acc04,32,0 1023 extrdi $acc07,$acc06,32,0 1024 extrdi $acc09,$acc08,32,0 1025 extrdi $acc11,$acc10,32,0 1026___ 1027$code.=<<___ if ($SIZE_T==4); 1028 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1029 and $acc01,$s1,$mask80 1030 and $acc02,$s2,$mask80 1031 and $acc03,$s3,$mask80 1032 srwi $acc04,$acc00,7 # r1>>7 1033 srwi $acc05,$acc01,7 1034 srwi $acc06,$acc02,7 1035 srwi $acc07,$acc03,7 1036 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1037 andc $acc09,$s1,$mask80 1038 andc $acc10,$s2,$mask80 1039 andc $acc11,$s3,$mask80 1040 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1041 sub $acc01,$acc01,$acc05 1042 sub $acc02,$acc02,$acc06 1043 sub $acc03,$acc03,$acc07 1044 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1045 add $acc09,$acc09,$acc09 1046 add $acc10,$acc10,$acc10 1047 add $acc11,$acc11,$acc11 1048 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1049 and $acc01,$acc01,$mask1b 1050 and $acc02,$acc02,$mask1b 1051 and $acc03,$acc03,$mask1b 1052 xor $acc00,$acc00,$acc08 # r2 1053 xor $acc01,$acc01,$acc09 1054 xor $acc02,$acc02,$acc10 1055 xor $acc03,$acc03,$acc11 1056 1057 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1058 and $acc05,$acc01,$mask80 1059 and $acc06,$acc02,$mask80 1060 and $acc07,$acc03,$mask80 1061 srwi $acc08,$acc04,7 # r1>>7 1062 srwi $acc09,$acc05,7 1063 srwi $acc10,$acc06,7 1064 srwi $acc11,$acc07,7 1065 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1066 andc $acc13,$acc01,$mask80 1067 andc $acc14,$acc02,$mask80 1068 andc $acc15,$acc03,$mask80 1069 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1070 sub $acc05,$acc05,$acc09 1071 sub $acc06,$acc06,$acc10 1072 sub $acc07,$acc07,$acc11 1073 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1074 add $acc13,$acc13,$acc13 1075 add $acc14,$acc14,$acc14 1076 add $acc15,$acc15,$acc15 1077 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1078 and $acc05,$acc05,$mask1b 1079 and $acc06,$acc06,$mask1b 1080 and $acc07,$acc07,$mask1b 1081 xor $acc04,$acc04,$acc12 # r4 1082 xor $acc05,$acc05,$acc13 1083 xor $acc06,$acc06,$acc14 1084 xor $acc07,$acc07,$acc15 1085 1086 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1087 and $acc09,$acc05,$mask80 1088 and $acc10,$acc06,$mask80 1089 and $acc11,$acc07,$mask80 1090 srwi $acc12,$acc08,7 # r1>>7 1091 srwi $acc13,$acc09,7 1092 srwi $acc14,$acc10,7 1093 srwi $acc15,$acc11,7 1094 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1095 sub $acc09,$acc09,$acc13 1096 sub $acc10,$acc10,$acc14 1097 sub $acc11,$acc11,$acc15 1098 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1099 andc $acc13,$acc05,$mask80 1100 andc $acc14,$acc06,$mask80 1101 andc $acc15,$acc07,$mask80 1102 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1103 add $acc13,$acc13,$acc13 1104 add $acc14,$acc14,$acc14 1105 add $acc15,$acc15,$acc15 1106 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1107 and $acc09,$acc09,$mask1b 1108 and $acc10,$acc10,$mask1b 1109 and $acc11,$acc11,$mask1b 1110 xor $acc08,$acc08,$acc12 # r8 1111 xor $acc09,$acc09,$acc13 1112 xor $acc10,$acc10,$acc14 1113 xor $acc11,$acc11,$acc15 1114 1115 xor $acc00,$acc00,$s0 # r2^r0 1116 xor $acc01,$acc01,$s1 1117 xor $acc02,$acc02,$s2 1118 xor $acc03,$acc03,$s3 1119 xor $acc04,$acc04,$s0 # r4^r0 1120 xor $acc05,$acc05,$s1 1121 xor $acc06,$acc06,$s2 1122 xor $acc07,$acc07,$s3 1123___ 1124$code.=<<___; 1125 rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1126 rotrwi $s1,$s1,8 1127 rotrwi $s2,$s2,8 1128 rotrwi $s3,$s3,8 1129 xor $s0,$s0,$acc00 # ^= r2^r0 1130 xor $s1,$s1,$acc01 1131 xor $s2,$s2,$acc02 1132 xor $s3,$s3,$acc03 1133 xor $acc00,$acc00,$acc08 1134 xor $acc01,$acc01,$acc09 1135 xor $acc02,$acc02,$acc10 1136 xor $acc03,$acc03,$acc11 1137 xor $s0,$s0,$acc04 # ^= r4^r0 1138 xor $s1,$s1,$acc05 1139 xor $s2,$s2,$acc06 1140 xor $s3,$s3,$acc07 1141 rotrwi $acc00,$acc00,24 1142 rotrwi $acc01,$acc01,24 1143 rotrwi $acc02,$acc02,24 1144 rotrwi $acc03,$acc03,24 1145 xor $acc04,$acc04,$acc08 1146 xor $acc05,$acc05,$acc09 1147 xor $acc06,$acc06,$acc10 1148 xor $acc07,$acc07,$acc11 1149 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1150 xor $s1,$s1,$acc09 1151 xor $s2,$s2,$acc10 1152 xor $s3,$s3,$acc11 1153 rotrwi $acc04,$acc04,16 1154 rotrwi $acc05,$acc05,16 1155 rotrwi $acc06,$acc06,16 1156 rotrwi $acc07,$acc07,16 1157 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1158 xor $s1,$s1,$acc01 1159 xor $s2,$s2,$acc02 1160 xor $s3,$s3,$acc03 1161 rotrwi $acc08,$acc08,8 1162 rotrwi $acc09,$acc09,8 1163 rotrwi $acc10,$acc10,8 1164 rotrwi $acc11,$acc11,8 1165 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1166 xor $s1,$s1,$acc05 1167 xor $s2,$s2,$acc06 1168 xor $s3,$s3,$acc07 1169 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) 1170 xor $s1,$s1,$acc09 1171 xor $s2,$s2,$acc10 1172 xor $s3,$s3,$acc11 1173 1174 b Ldec_compact_loop 1175.align 4 1176Ldec_compact_done: 1177 xor $s0,$s0,$t0 1178 xor $s1,$s1,$t1 1179 xor $s2,$s2,$t2 1180 xor $s3,$s3,$t3 1181 blr 1182.long 0 1183.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" 1184.align 7 1185___ 1186 1187$code =~ s/\`([^\`]*)\`/eval $1/gem; 1188print $code; 1189close STDOUT; 1190