1#include "arm_arch.h" 2 3.text 4 5.global sha1_block_data_order 6.type sha1_block_data_order,%function 7 8.align 2 9sha1_block_data_order: 10 stmdb sp!,{r4-r12,lr} 11 add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 12 ldmia r0,{r3,r4,r5,r6,r7} 13.Lloop: 14 ldr r8,.LK_00_19 15 mov r14,sp 16 sub sp,sp,#15*4 17 mov r5,r5,ror#30 18 mov r6,r6,ror#30 19 mov r7,r7,ror#30 @ [6] 20.L_00_15: 21#if __ARM_ARCH__<7 22 ldrb r10,[r1,#2] 23 ldrb r9,[r1,#3] 24 ldrb r11,[r1,#1] 25 add r7,r8,r7,ror#2 @ E+=K_00_19 26 ldrb r12,[r1],#4 27 orr r9,r9,r10,lsl#8 28 eor r10,r5,r6 @ F_xx_xx 29 orr r9,r9,r11,lsl#16 30 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 31 orr r9,r9,r12,lsl#24 32#else 33 ldr r9,[r1],#4 @ handles unaligned 34 add r7,r8,r7,ror#2 @ E+=K_00_19 35 eor r10,r5,r6 @ F_xx_xx 36 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 37#ifdef __ARMEL__ 38 rev r9,r9 @ byte swap 39#endif 40#endif 41 and r10,r4,r10,ror#2 42 add r7,r7,r9 @ E+=X[i] 43 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) 44 str r9,[r14,#-4]! 45 add r7,r7,r10 @ E+=F_00_19(B,C,D) 46#if __ARM_ARCH__<7 47 ldrb r10,[r1,#2] 48 ldrb r9,[r1,#3] 49 ldrb r11,[r1,#1] 50 add r6,r8,r6,ror#2 @ E+=K_00_19 51 ldrb r12,[r1],#4 52 orr r9,r9,r10,lsl#8 53 eor r10,r4,r5 @ F_xx_xx 54 orr r9,r9,r11,lsl#16 55 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 56 orr r9,r9,r12,lsl#24 57#else 58 ldr r9,[r1],#4 @ handles unaligned 59 add r6,r8,r6,ror#2 @ E+=K_00_19 60 eor r10,r4,r5 @ F_xx_xx 61 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 62#ifdef __ARMEL__ 63 rev r9,r9 @ byte swap 64#endif 65#endif 66 and r10,r3,r10,ror#2 67 add r6,r6,r9 @ E+=X[i] 68 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) 69 str r9,[r14,#-4]! 70 add r6,r6,r10 @ E+=F_00_19(B,C,D) 71#if __ARM_ARCH__<7 72 ldrb r10,[r1,#2] 73 ldrb r9,[r1,#3] 74 ldrb r11,[r1,#1] 75 add r5,r8,r5,ror#2 @ E+=K_00_19 76 ldrb r12,[r1],#4 77 orr r9,r9,r10,lsl#8 78 eor r10,r3,r4 @ F_xx_xx 79 orr r9,r9,r11,lsl#16 80 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 81 orr r9,r9,r12,lsl#24 82#else 83 ldr r9,[r1],#4 @ handles unaligned 84 add r5,r8,r5,ror#2 @ E+=K_00_19 85 eor r10,r3,r4 @ F_xx_xx 86 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 87#ifdef __ARMEL__ 88 rev r9,r9 @ byte swap 89#endif 90#endif 91 and r10,r7,r10,ror#2 92 add r5,r5,r9 @ E+=X[i] 93 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) 94 str r9,[r14,#-4]! 95 add r5,r5,r10 @ E+=F_00_19(B,C,D) 96#if __ARM_ARCH__<7 97 ldrb r10,[r1,#2] 98 ldrb r9,[r1,#3] 99 ldrb r11,[r1,#1] 100 add r4,r8,r4,ror#2 @ E+=K_00_19 101 ldrb r12,[r1],#4 102 orr r9,r9,r10,lsl#8 103 eor r10,r7,r3 @ F_xx_xx 104 orr r9,r9,r11,lsl#16 105 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 106 orr r9,r9,r12,lsl#24 107#else 108 ldr r9,[r1],#4 @ handles unaligned 109 add r4,r8,r4,ror#2 @ E+=K_00_19 110 eor r10,r7,r3 @ F_xx_xx 111 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 112#ifdef __ARMEL__ 113 rev r9,r9 @ byte swap 114#endif 115#endif 116 and r10,r6,r10,ror#2 117 add r4,r4,r9 @ E+=X[i] 118 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) 119 str r9,[r14,#-4]! 120 add r4,r4,r10 @ E+=F_00_19(B,C,D) 121#if __ARM_ARCH__<7 122 ldrb r10,[r1,#2] 123 ldrb r9,[r1,#3] 124 ldrb r11,[r1,#1] 125 add r3,r8,r3,ror#2 @ E+=K_00_19 126 ldrb r12,[r1],#4 127 orr r9,r9,r10,lsl#8 128 eor r10,r6,r7 @ F_xx_xx 129 orr r9,r9,r11,lsl#16 130 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 131 orr r9,r9,r12,lsl#24 132#else 133 ldr r9,[r1],#4 @ handles unaligned 134 add r3,r8,r3,ror#2 @ E+=K_00_19 135 eor r10,r6,r7 @ F_xx_xx 136 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 137#ifdef __ARMEL__ 138 rev r9,r9 @ byte swap 139#endif 140#endif 141 and r10,r5,r10,ror#2 142 add r3,r3,r9 @ E+=X[i] 143 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 144 str r9,[r14,#-4]! 145 add r3,r3,r10 @ E+=F_00_19(B,C,D) 146 teq r14,sp 147 bne .L_00_15 @ [((11+4)*5+2)*3] 148 sub sp,sp,#5*4 149#if __ARM_ARCH__<7 150 ldrb r10,[r1,#2] 151 ldrb r9,[r1,#3] 152 ldrb r11,[r1,#1] 153 add r7,r8,r7,ror#2 @ E+=K_00_19 154 ldrb r12,[r1],#4 155 orr r9,r9,r10,lsl#8 156 eor r10,r5,r6 @ F_xx_xx 157 orr r9,r9,r11,lsl#16 158 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 159 orr r9,r9,r12,lsl#24 160#else 161 ldr r9,[r1],#4 @ handles unaligned 162 add r7,r8,r7,ror#2 @ E+=K_00_19 163 eor r10,r5,r6 @ F_xx_xx 164 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 165#ifdef __ARMEL__ 166 rev r9,r9 @ byte swap 167#endif 168#endif 169 and r10,r4,r10,ror#2 170 add r7,r7,r9 @ E+=X[i] 171 eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) 172 str r9,[r14,#-4]! 173 add r7,r7,r10 @ E+=F_00_19(B,C,D) 174 ldr r9,[r14,#15*4] 175 ldr r10,[r14,#13*4] 176 ldr r11,[r14,#7*4] 177 add r6,r8,r6,ror#2 @ E+=K_xx_xx 178 ldr r12,[r14,#2*4] 179 eor r9,r9,r10 180 eor r11,r11,r12 @ 1 cycle stall 181 eor r10,r4,r5 @ F_xx_xx 182 mov r9,r9,ror#31 183 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 184 eor r9,r9,r11,ror#31 185 str r9,[r14,#-4]! 186 and r10,r3,r10,ror#2 @ F_xx_xx 187 @ F_xx_xx 188 add r6,r6,r9 @ E+=X[i] 189 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) 190 add r6,r6,r10 @ E+=F_00_19(B,C,D) 191 ldr r9,[r14,#15*4] 192 ldr r10,[r14,#13*4] 193 ldr r11,[r14,#7*4] 194 add r5,r8,r5,ror#2 @ E+=K_xx_xx 195 ldr r12,[r14,#2*4] 196 eor r9,r9,r10 197 eor r11,r11,r12 @ 1 cycle stall 198 eor r10,r3,r4 @ F_xx_xx 199 mov r9,r9,ror#31 200 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 201 eor r9,r9,r11,ror#31 202 str r9,[r14,#-4]! 203 and r10,r7,r10,ror#2 @ F_xx_xx 204 @ F_xx_xx 205 add r5,r5,r9 @ E+=X[i] 206 eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) 207 add r5,r5,r10 @ E+=F_00_19(B,C,D) 208 ldr r9,[r14,#15*4] 209 ldr r10,[r14,#13*4] 210 ldr r11,[r14,#7*4] 211 add r4,r8,r4,ror#2 @ E+=K_xx_xx 212 ldr r12,[r14,#2*4] 213 eor r9,r9,r10 214 eor r11,r11,r12 @ 1 cycle stall 215 eor r10,r7,r3 @ F_xx_xx 216 mov r9,r9,ror#31 217 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 218 eor r9,r9,r11,ror#31 219 str r9,[r14,#-4]! 220 and r10,r6,r10,ror#2 @ F_xx_xx 221 @ F_xx_xx 222 add r4,r4,r9 @ E+=X[i] 223 eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) 224 add r4,r4,r10 @ E+=F_00_19(B,C,D) 225 ldr r9,[r14,#15*4] 226 ldr r10,[r14,#13*4] 227 ldr r11,[r14,#7*4] 228 add r3,r8,r3,ror#2 @ E+=K_xx_xx 229 ldr r12,[r14,#2*4] 230 eor r9,r9,r10 231 eor r11,r11,r12 @ 1 cycle stall 232 eor r10,r6,r7 @ F_xx_xx 233 mov r9,r9,ror#31 234 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 235 eor r9,r9,r11,ror#31 236 str r9,[r14,#-4]! 237 and r10,r5,r10,ror#2 @ F_xx_xx 238 @ F_xx_xx 239 add r3,r3,r9 @ E+=X[i] 240 eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) 241 add r3,r3,r10 @ E+=F_00_19(B,C,D) 242 243 ldr r8,.LK_20_39 @ [+15+16*4] 244 sub sp,sp,#20*4 245 cmn sp,#0 @ [+3], clear carry to denote 20_39 246.L_20_39_or_60_79: 247 ldr r9,[r14,#15*4] 248 ldr r10,[r14,#13*4] 249 ldr r11,[r14,#7*4] 250 add r7,r8,r7,ror#2 @ E+=K_xx_xx 251 ldr r12,[r14,#2*4] 252 eor r9,r9,r10 253 eor r11,r11,r12 @ 1 cycle stall 254 eor r10,r5,r6 @ F_xx_xx 255 mov r9,r9,ror#31 256 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 257 eor r9,r9,r11,ror#31 258 str r9,[r14,#-4]! 259 eor r10,r4,r10,ror#2 @ F_xx_xx 260 @ F_xx_xx 261 add r7,r7,r9 @ E+=X[i] 262 add r7,r7,r10 @ E+=F_20_39(B,C,D) 263 ldr r9,[r14,#15*4] 264 ldr r10,[r14,#13*4] 265 ldr r11,[r14,#7*4] 266 add r6,r8,r6,ror#2 @ E+=K_xx_xx 267 ldr r12,[r14,#2*4] 268 eor r9,r9,r10 269 eor r11,r11,r12 @ 1 cycle stall 270 eor r10,r4,r5 @ F_xx_xx 271 mov r9,r9,ror#31 272 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 273 eor r9,r9,r11,ror#31 274 str r9,[r14,#-4]! 275 eor r10,r3,r10,ror#2 @ F_xx_xx 276 @ F_xx_xx 277 add r6,r6,r9 @ E+=X[i] 278 add r6,r6,r10 @ E+=F_20_39(B,C,D) 279 ldr r9,[r14,#15*4] 280 ldr r10,[r14,#13*4] 281 ldr r11,[r14,#7*4] 282 add r5,r8,r5,ror#2 @ E+=K_xx_xx 283 ldr r12,[r14,#2*4] 284 eor r9,r9,r10 285 eor r11,r11,r12 @ 1 cycle stall 286 eor r10,r3,r4 @ F_xx_xx 287 mov r9,r9,ror#31 288 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 289 eor r9,r9,r11,ror#31 290 str r9,[r14,#-4]! 291 eor r10,r7,r10,ror#2 @ F_xx_xx 292 @ F_xx_xx 293 add r5,r5,r9 @ E+=X[i] 294 add r5,r5,r10 @ E+=F_20_39(B,C,D) 295 ldr r9,[r14,#15*4] 296 ldr r10,[r14,#13*4] 297 ldr r11,[r14,#7*4] 298 add r4,r8,r4,ror#2 @ E+=K_xx_xx 299 ldr r12,[r14,#2*4] 300 eor r9,r9,r10 301 eor r11,r11,r12 @ 1 cycle stall 302 eor r10,r7,r3 @ F_xx_xx 303 mov r9,r9,ror#31 304 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 305 eor r9,r9,r11,ror#31 306 str r9,[r14,#-4]! 307 eor r10,r6,r10,ror#2 @ F_xx_xx 308 @ F_xx_xx 309 add r4,r4,r9 @ E+=X[i] 310 add r4,r4,r10 @ E+=F_20_39(B,C,D) 311 ldr r9,[r14,#15*4] 312 ldr r10,[r14,#13*4] 313 ldr r11,[r14,#7*4] 314 add r3,r8,r3,ror#2 @ E+=K_xx_xx 315 ldr r12,[r14,#2*4] 316 eor r9,r9,r10 317 eor r11,r11,r12 @ 1 cycle stall 318 eor r10,r6,r7 @ F_xx_xx 319 mov r9,r9,ror#31 320 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 321 eor r9,r9,r11,ror#31 322 str r9,[r14,#-4]! 323 eor r10,r5,r10,ror#2 @ F_xx_xx 324 @ F_xx_xx 325 add r3,r3,r9 @ E+=X[i] 326 add r3,r3,r10 @ E+=F_20_39(B,C,D) 327 teq r14,sp @ preserve carry 328 bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] 329 bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes 330 331 ldr r8,.LK_40_59 332 sub sp,sp,#20*4 @ [+2] 333.L_40_59: 334 ldr r9,[r14,#15*4] 335 ldr r10,[r14,#13*4] 336 ldr r11,[r14,#7*4] 337 add r7,r8,r7,ror#2 @ E+=K_xx_xx 338 ldr r12,[r14,#2*4] 339 eor r9,r9,r10 340 eor r11,r11,r12 @ 1 cycle stall 341 eor r10,r5,r6 @ F_xx_xx 342 mov r9,r9,ror#31 343 add r7,r7,r3,ror#27 @ E+=ROR(A,27) 344 eor r9,r9,r11,ror#31 345 str r9,[r14,#-4]! 346 and r10,r4,r10,ror#2 @ F_xx_xx 347 and r11,r5,r6 @ F_xx_xx 348 add r7,r7,r9 @ E+=X[i] 349 add r7,r7,r10 @ E+=F_40_59(B,C,D) 350 add r7,r7,r11,ror#2 351 ldr r9,[r14,#15*4] 352 ldr r10,[r14,#13*4] 353 ldr r11,[r14,#7*4] 354 add r6,r8,r6,ror#2 @ E+=K_xx_xx 355 ldr r12,[r14,#2*4] 356 eor r9,r9,r10 357 eor r11,r11,r12 @ 1 cycle stall 358 eor r10,r4,r5 @ F_xx_xx 359 mov r9,r9,ror#31 360 add r6,r6,r7,ror#27 @ E+=ROR(A,27) 361 eor r9,r9,r11,ror#31 362 str r9,[r14,#-4]! 363 and r10,r3,r10,ror#2 @ F_xx_xx 364 and r11,r4,r5 @ F_xx_xx 365 add r6,r6,r9 @ E+=X[i] 366 add r6,r6,r10 @ E+=F_40_59(B,C,D) 367 add r6,r6,r11,ror#2 368 ldr r9,[r14,#15*4] 369 ldr r10,[r14,#13*4] 370 ldr r11,[r14,#7*4] 371 add r5,r8,r5,ror#2 @ E+=K_xx_xx 372 ldr r12,[r14,#2*4] 373 eor r9,r9,r10 374 eor r11,r11,r12 @ 1 cycle stall 375 eor r10,r3,r4 @ F_xx_xx 376 mov r9,r9,ror#31 377 add r5,r5,r6,ror#27 @ E+=ROR(A,27) 378 eor r9,r9,r11,ror#31 379 str r9,[r14,#-4]! 380 and r10,r7,r10,ror#2 @ F_xx_xx 381 and r11,r3,r4 @ F_xx_xx 382 add r5,r5,r9 @ E+=X[i] 383 add r5,r5,r10 @ E+=F_40_59(B,C,D) 384 add r5,r5,r11,ror#2 385 ldr r9,[r14,#15*4] 386 ldr r10,[r14,#13*4] 387 ldr r11,[r14,#7*4] 388 add r4,r8,r4,ror#2 @ E+=K_xx_xx 389 ldr r12,[r14,#2*4] 390 eor r9,r9,r10 391 eor r11,r11,r12 @ 1 cycle stall 392 eor r10,r7,r3 @ F_xx_xx 393 mov r9,r9,ror#31 394 add r4,r4,r5,ror#27 @ E+=ROR(A,27) 395 eor r9,r9,r11,ror#31 396 str r9,[r14,#-4]! 397 and r10,r6,r10,ror#2 @ F_xx_xx 398 and r11,r7,r3 @ F_xx_xx 399 add r4,r4,r9 @ E+=X[i] 400 add r4,r4,r10 @ E+=F_40_59(B,C,D) 401 add r4,r4,r11,ror#2 402 ldr r9,[r14,#15*4] 403 ldr r10,[r14,#13*4] 404 ldr r11,[r14,#7*4] 405 add r3,r8,r3,ror#2 @ E+=K_xx_xx 406 ldr r12,[r14,#2*4] 407 eor r9,r9,r10 408 eor r11,r11,r12 @ 1 cycle stall 409 eor r10,r6,r7 @ F_xx_xx 410 mov r9,r9,ror#31 411 add r3,r3,r4,ror#27 @ E+=ROR(A,27) 412 eor r9,r9,r11,ror#31 413 str r9,[r14,#-4]! 414 and r10,r5,r10,ror#2 @ F_xx_xx 415 and r11,r6,r7 @ F_xx_xx 416 add r3,r3,r9 @ E+=X[i] 417 add r3,r3,r10 @ E+=F_40_59(B,C,D) 418 add r3,r3,r11,ror#2 419 teq r14,sp 420 bne .L_40_59 @ [+((12+5)*5+2)*4] 421 422 ldr r8,.LK_60_79 423 sub sp,sp,#20*4 424 cmp sp,#0 @ set carry to denote 60_79 425 b .L_20_39_or_60_79 @ [+4], spare 300 bytes 426.L_done: 427 add sp,sp,#80*4 @ "deallocate" stack frame 428 ldmia r0,{r8,r9,r10,r11,r12} 429 add r3,r8,r3 430 add r4,r9,r4 431 add r5,r10,r5,ror#2 432 add r6,r11,r6,ror#2 433 add r7,r12,r7,ror#2 434 stmia r0,{r3,r4,r5,r6,r7} 435 teq r1,r2 436 bne .Lloop @ [+18], total 1307 437 438#if __ARM_ARCH__>=5 439 ldmia sp!,{r4-r12,pc} 440#else 441 ldmia sp!,{r4-r12,lr} 442 tst lr,#1 443 moveq pc,lr @ be binary compatible with V4, yet 444 .word 0xe12fff1e @ interoperable with Thumb ISA:-) 445#endif 446.align 2 447.LK_00_19: .word 0x5a827999 448.LK_20_39: .word 0x6ed9eba1 449.LK_40_59: .word 0x8f1bbcdc 450.LK_60_79: .word 0xca62c1d6 451.size sha1_block_data_order,.-sha1_block_data_order 452.asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" 453.align 2 454