1/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ 2 3#ifndef _UECC_ASM_ARM_MULT_SQUARE_H_ 4#define _UECC_ASM_ARM_MULT_SQUARE_H_ 5 6#define FAST_MULT_ASM_5 \ 7 "push {r3} \n\t" \ 8 "add r0, 12 \n\t" \ 9 "add r2, 12 \n\t" \ 10 "ldmia r1!, {r3,r4} \n\t" \ 11 "ldmia r2!, {r6,r7} \n\t" \ 12 \ 13 "umull r11, r12, r3, r6 \n\t" \ 14 "stmia r0!, {r11} \n\t" \ 15 \ 16 "mov r10, #0 \n\t" \ 17 "umull r11, r9, r3, r7 \n\t" \ 18 "adds r12, r12, r11 \n\t" \ 19 "adc r9, r9, #0 \n\t" \ 20 "umull r11, r14, r4, r6 \n\t" \ 21 "adds r12, r12, r11 \n\t" \ 22 "adcs r9, r9, r14 \n\t" \ 23 "adc r10, r10, #0 \n\t" \ 24 "stmia r0!, {r12} \n\t" \ 25 \ 26 "umull r12, r14, r4, r7 \n\t" \ 27 "adds r9, r9, r12 \n\t" \ 28 "adc r10, r10, r14 \n\t" \ 29 "stmia r0!, {r9, r10} \n\t" \ 30 \ 31 "sub r0, 28 \n\t" \ 32 "sub r2, 20 \n\t" \ 33 "ldmia r2!, {r6,r7,r8} \n\t" \ 34 "ldmia r1!, {r5} \n\t" \ 35 \ 36 "umull r11, r12, r3, r6 \n\t" \ 37 "stmia r0!, {r11} \n\t" \ 38 \ 39 "mov r10, #0 \n\t" \ 40 "umull r11, r9, r3, r7 \n\t" \ 41 "adds r12, r12, r11 \n\t" \ 42 "adc r9, r9, #0 \n\t" \ 43 "umull r11, r14, r4, r6 \n\t" \ 44 "adds r12, r12, r11 \n\t" \ 45 "adcs r9, r9, r14 \n\t" \ 46 "adc r10, r10, #0 \n\t" \ 47 "stmia r0!, {r12} \n\t" \ 48 \ 49 "mov r11, #0 \n\t" \ 50 "umull r12, r14, r3, r8 \n\t" \ 51 "adds r9, r9, r12 \n\t" \ 52 "adcs r10, r10, r14 \n\t" \ 53 "adc r11, r11, #0 \n\t" \ 54 "umull r12, r14, r4, r7 \n\t" \ 55 "adds r9, r9, r12 \n\t" \ 56 "adcs r10, r10, r14 \n\t" \ 57 "adc r11, r11, #0 \n\t" \ 58 "umull r12, r14, r5, r6 \n\t" \ 59 "adds r9, r9, r12 \n\t" \ 60 "adcs r10, r10, r14 \n\t" \ 61 "adc r11, r11, #0 \n\t" \ 62 "stmia r0!, {r9} \n\t" \ 63 \ 64 "ldmia r1!, {r3} \n\t" \ 65 "mov r12, #0 \n\t" \ 66 "umull r14, r9, r4, r8 \n\t" \ 67 "adds r10, r10, r14 \n\t" \ 68 "adcs r11, r11, r9 \n\t" \ 69 "adc r12, r12, #0 \n\t" \ 70 "umull r14, r9, r5, r7 \n\t" \ 71 "adds r10, r10, r14 \n\t" \ 72 "adcs r11, r11, r9 \n\t" \ 73 "adc r12, r12, #0 \n\t" \ 74 "umull r14, r9, r3, r6 \n\t" \ 75 "adds r10, r10, r14 \n\t" \ 76 "adcs r11, r11, r9 \n\t" \ 77 "adc r12, r12, #0 \n\t" \ 78 "ldr r14, [r0] \n\t" \ 79 "adds r10, r10, r14 \n\t" \ 80 "adcs r11, r11, #0 \n\t" \ 81 "adc r12, r12, #0 \n\t" \ 82 "stmia r0!, {r10} \n\t" \ 83 \ 84 "ldmia r1!, {r4} \n\t" \ 85 "mov r14, #0 \n\t" \ 86 "umull r9, r10, r5, r8 \n\t" \ 87 "adds r11, r11, r9 \n\t" \ 88 "adcs r12, r12, r10 \n\t" \ 89 "adc r14, r14, #0 \n\t" \ 90 "umull r9, r10, r3, r7 \n\t" \ 91 "adds r11, r11, r9 \n\t" \ 92 "adcs r12, r12, r10 \n\t" \ 93 "adc r14, r14, #0 \n\t" \ 94 "umull r9, r10, r4, r6 \n\t" \ 95 "adds r11, r11, r9 \n\t" \ 96 "adcs r12, r12, r10 \n\t" \ 97 "adc r14, r14, #0 \n\t" \ 98 "ldr r9, [r0] \n\t" \ 99 "adds r11, r11, r9 \n\t" \ 100 "adcs r12, r12, #0 \n\t" \ 101 "adc r14, r14, #0 \n\t" \ 102 "stmia r0!, {r11} \n\t" \ 103 \ 104 "ldmia r2!, {r6} \n\t" \ 105 "mov r9, #0 \n\t" \ 106 "umull r10, r11, r5, r6 \n\t" \ 107 "adds r12, r12, r10 \n\t" \ 108 "adcs r14, r14, r11 \n\t" \ 109 "adc r9, r9, #0 \n\t" \ 110 "umull r10, r11, r3, r8 \n\t" \ 111 "adds r12, r12, r10 \n\t" \ 112 "adcs r14, r14, r11 \n\t" \ 113 "adc r9, r9, #0 \n\t" \ 114 "umull r10, r11, r4, r7 \n\t" \ 115 "adds r12, r12, r10 \n\t" \ 116 "adcs r14, r14, r11 \n\t" \ 117 "adc r9, r9, #0 \n\t" \ 118 "ldr r10, [r0] \n\t" \ 119 "adds r12, r12, r10 \n\t" \ 120 "adcs r14, r14, #0 \n\t" \ 121 "adc r9, r9, #0 \n\t" \ 122 "stmia r0!, {r12} \n\t" \ 123 \ 124 "ldmia r2!, {r7} \n\t" \ 125 "mov r10, #0 \n\t" \ 126 "umull r11, r12, r5, r7 \n\t" \ 127 "adds r14, r14, r11 \n\t" \ 128 "adcs r9, r9, r12 \n\t" \ 129 "adc r10, r10, #0 \n\t" \ 130 "umull r11, r12, r3, r6 \n\t" \ 131 "adds r14, r14, r11 \n\t" \ 132 "adcs r9, r9, r12 \n\t" \ 133 "adc r10, r10, #0 \n\t" \ 134 "umull r11, r12, r4, r8 \n\t" \ 135 "adds r14, r14, r11 \n\t" \ 136 "adcs r9, r9, r12 \n\t" \ 137 "adc r10, r10, #0 \n\t" \ 138 "ldr r11, [r0] \n\t" \ 139 "adds r14, r14, r11 \n\t" \ 140 "adcs r9, r9, #0 \n\t" \ 141 "adc r10, r10, #0 \n\t" \ 142 "stmia r0!, {r14} \n\t" \ 143 \ 144 "mov r11, #0 \n\t" \ 145 "umull r12, r14, r3, r7 \n\t" \ 146 "adds r9, r9, r12 \n\t" \ 147 "adcs r10, r10, r14 \n\t" \ 148 "adc r11, r11, #0 \n\t" \ 149 "umull r12, r14, r4, r6 \n\t" \ 150 "adds r9, r9, r12 \n\t" \ 151 "adcs r10, r10, r14 \n\t" \ 152 "adc r11, r11, #0 \n\t" \ 153 "stmia r0!, {r9} \n\t" \ 154 \ 155 "umull r14, r9, r4, r7 \n\t" \ 156 "adds r10, r10, r14 \n\t" \ 157 "adc r11, r11, r9 \n\t" \ 158 "stmia r0!, {r10, r11} \n\t" \ 159 "pop {r3} \n\t" 160 161#define FAST_MULT_ASM_5_TO_6 \ 162 "cmp r3, #5 \n\t" \ 163 "beq 1f \n\t" \ 164 \ 165 /* r4 = left high, r5 = right high */ \ 166 "ldr r4, [r1] \n\t" \ 167 "ldr r5, [r2] \n\t" \ 168 \ 169 "sub r0, #20 \n\t" \ 170 "sub r1, #20 \n\t" \ 171 "sub r2, #20 \n\t" \ 172 \ 173 "ldr r6, [r0] \n\t" \ 174 "ldr r7, [r1], #4 \n\t" \ 175 "ldr r8, [r2], #4 \n\t" \ 176 "mov r14, #0 \n\t" \ 177 "umull r9, r10, r4, r8 \n\t" \ 178 "umull r11, r12, r5, r7 \n\t" \ 179 "adds r9, r9, r6 \n\t" \ 180 "adc r10, r10, #0 \n\t" \ 181 "adds r9, r9, r11 \n\t" \ 182 "adcs r10, r10, r12 \n\t" \ 183 "adc r14, r14, #0 \n\t" \ 184 "str r9, [r0], #4 \n\t" \ 185 \ 186 "ldr r6, [r0] \n\t" \ 187 "adds r10, r10, r6 \n\t" \ 188 "adcs r14, r14, #0 \n\t" \ 189 "ldr r7, [r1], #4 \n\t" \ 190 "ldr r8, [r2], #4 \n\t" \ 191 "mov r9, #0 \n\t" \ 192 "umull r11, r12, r4, r8 \n\t" \ 193 "adds r10, r10, r11 \n\t" \ 194 "adcs r14, r14, r12 \n\t" \ 195 "adc r9, r9, #0 \n\t" \ 196 "umull r11, r12, r5, r7 \n\t" \ 197 "adds r10, r10, r11 \n\t" \ 198 "adcs r14, r14, r12 \n\t" \ 199 "adc r9, r9, #0 \n\t" \ 200 "str r10, [r0], #4 \n\t" \ 201 \ 202 "ldr r6, [r0] \n\t" \ 203 "adds r14, r14, r6 \n\t" \ 204 "adcs r9, r9, #0 \n\t" \ 205 "ldr r7, [r1], #4 \n\t" \ 206 "ldr r8, [r2], #4 \n\t" \ 207 "mov r10, #0 \n\t" \ 208 "umull r11, r12, r4, r8 \n\t" \ 209 "adds r14, r14, r11 \n\t" \ 210 "adcs r9, r9, r12 \n\t" \ 211 "adc r10, r10, #0 \n\t" \ 212 "umull r11, r12, r5, r7 \n\t" \ 213 "adds r14, r14, r11 \n\t" \ 214 "adcs r9, r9, r12 \n\t" \ 215 "adc r10, r10, #0 \n\t" \ 216 "str r14, [r0], #4 \n\t" \ 217 \ 218 "ldr r6, [r0] \n\t" \ 219 "adds r9, r9, r6 \n\t" \ 220 "adcs r10, r10, #0 \n\t" \ 221 "ldr r7, [r1], #4 \n\t" \ 222 "ldr r8, [r2], #4 \n\t" \ 223 "mov r14, #0 \n\t" \ 224 "umull r11, r12, r4, r8 \n\t" \ 225 "adds r9, r9, r11 \n\t" \ 226 "adcs r10, r10, r12 \n\t" \ 227 "adc r14, r14, #0 \n\t" \ 228 "umull r11, r12, r5, r7 \n\t" \ 229 "adds r9, r9, r11 \n\t" \ 230 "adcs r10, r10, r12 \n\t" \ 231 "adc r14, r14, #0 \n\t" \ 232 "str r9, [r0], #4 \n\t" \ 233 \ 234 "ldr r6, [r0] \n\t" \ 235 "adds r10, r10, r6 \n\t" \ 236 "adcs r14, r14, #0 \n\t" \ 237 /* skip past already-loaded (r4, r5) */ \ 238 "ldr r7, [r1], #8 \n\t" \ 239 "ldr r8, [r2], #8 \n\t" \ 240 "mov r9, #0 \n\t" \ 241 "umull r11, r12, r4, r8 \n\t" \ 242 "adds r10, r10, r11 \n\t" \ 243 "adcs r14, r14, r12 \n\t" \ 244 "adc r9, r9, #0 \n\t" \ 245 "umull r11, r12, r5, r7 \n\t" \ 246 "adds r10, r10, r11 \n\t" \ 247 "adcs r14, r14, r12 \n\t" \ 248 "adc r9, r9, #0 \n\t" \ 249 "str r10, [r0], #4 \n\t" \ 250 \ 251 "umull r11, r12, r4, r5 \n\t" \ 252 "adds r11, r11, r14 \n\t" \ 253 "adc r12, r12, r9 \n\t" \ 254 "stmia r0!, {r11, r12} \n\t" 255 256#define FAST_MULT_ASM_6 \ 257 "push {r3} \n\t" \ 258 "add r0, 12 \n\t" \ 259 "add r2, 12 \n\t" \ 260 "ldmia r1!, {r3,r4,r5} \n\t" \ 261 "ldmia r2!, {r6,r7,r8} \n\t" \ 262 \ 263 "umull r11, r12, r3, r6 \n\t" \ 264 "stmia r0!, {r11} \n\t" \ 265 \ 266 "mov r10, #0 \n\t" \ 267 "umull r11, r9, r3, r7 \n\t" \ 268 "adds r12, r12, r11 \n\t" \ 269 "adc r9, r9, #0 \n\t" \ 270 "umull r11, r14, r4, r6 \n\t" \ 271 "adds r12, r12, r11 \n\t" \ 272 "adcs r9, r9, r14 \n\t" \ 273 "adc r10, r10, #0 \n\t" \ 274 "stmia r0!, {r12} \n\t" \ 275 \ 276 "mov r11, #0 \n\t" \ 277 "umull r12, r14, r3, r8 \n\t" \ 278 "adds r9, r9, r12 \n\t" \ 279 "adcs r10, r10, r14 \n\t" \ 280 "adc r11, r11, #0 \n\t" \ 281 "umull r12, r14, r4, r7 \n\t" \ 282 "adds r9, r9, r12 \n\t" \ 283 "adcs r10, r10, r14 \n\t" \ 284 "adc r11, r11, #0 \n\t" \ 285 "umull r12, r14, r5, r6 \n\t" \ 286 "adds r9, r9, r12 \n\t" \ 287 "adcs r10, r10, r14 \n\t" \ 288 "adc r11, r11, #0 \n\t" \ 289 "stmia r0!, {r9} \n\t" \ 290 \ 291 "mov r12, #0 \n\t" \ 292 "umull r14, r9, r4, r8 \n\t" \ 293 "adds r10, r10, r14 \n\t" \ 294 "adcs r11, r11, r9 \n\t" \ 295 "adc r12, r12, #0 \n\t" \ 296 "umull r14, r9, r5, r7 \n\t" \ 297 "adds r10, r10, r14 \n\t" \ 298 "adcs r11, r11, r9 \n\t" \ 299 "adc r12, r12, #0 \n\t" \ 300 "stmia r0!, {r10} \n\t" \ 301 \ 302 "umull r9, r10, r5, r8 \n\t" \ 303 "adds r11, r11, r9 \n\t" \ 304 "adc r12, r12, r10 \n\t" \ 305 "stmia r0!, {r11, r12} \n\t" \ 306 \ 307 "sub r0, 36 \n\t" \ 308 "sub r2, 24 \n\t" \ 309 "ldmia r2!, {r6,r7,r8} \n\t" \ 310 \ 311 "umull r11, r12, r3, r6 \n\t" \ 312 "stmia r0!, {r11} \n\t" \ 313 \ 314 "mov r10, #0 \n\t" \ 315 "umull r11, r9, r3, r7 \n\t" \ 316 "adds r12, r12, r11 \n\t" \ 317 "adc r9, r9, #0 \n\t" \ 318 "umull r11, r14, r4, r6 \n\t" \ 319 "adds r12, r12, r11 \n\t" \ 320 "adcs r9, r9, r14 \n\t" \ 321 "adc r10, r10, #0 \n\t" \ 322 "stmia r0!, {r12} \n\t" \ 323 \ 324 "mov r11, #0 \n\t" \ 325 "umull r12, r14, r3, r8 \n\t" \ 326 "adds r9, r9, r12 \n\t" \ 327 "adcs r10, r10, r14 \n\t" \ 328 "adc r11, r11, #0 \n\t" \ 329 "umull r12, r14, r4, r7 \n\t" \ 330 "adds r9, r9, r12 \n\t" \ 331 "adcs r10, r10, r14 \n\t" \ 332 "adc r11, r11, #0 \n\t" \ 333 "umull r12, r14, r5, r6 \n\t" \ 334 "adds r9, r9, r12 \n\t" \ 335 "adcs r10, r10, r14 \n\t" \ 336 "adc r11, r11, #0 \n\t" \ 337 "stmia r0!, {r9} \n\t" \ 338 \ 339 "ldmia r1!, {r3} \n\t" \ 340 "mov r12, #0 \n\t" \ 341 "umull r14, r9, r4, r8 \n\t" \ 342 "adds r10, r10, r14 \n\t" \ 343 "adcs r11, r11, r9 \n\t" \ 344 "adc r12, r12, #0 \n\t" \ 345 "umull r14, r9, r5, r7 \n\t" \ 346 "adds r10, r10, r14 \n\t" \ 347 "adcs r11, r11, r9 \n\t" \ 348 "adc r12, r12, #0 \n\t" \ 349 "umull r14, r9, r3, r6 \n\t" \ 350 "adds r10, r10, r14 \n\t" \ 351 "adcs r11, r11, r9 \n\t" \ 352 "adc r12, r12, #0 \n\t" \ 353 "ldr r14, [r0] \n\t" \ 354 "adds r10, r10, r14 \n\t" \ 355 "adcs r11, r11, #0 \n\t" \ 356 "adc r12, r12, #0 \n\t" \ 357 "stmia r0!, {r10} \n\t" \ 358 \ 359 "ldmia r1!, {r4} \n\t" \ 360 "mov r14, #0 \n\t" \ 361 "umull r9, r10, r5, r8 \n\t" \ 362 "adds r11, r11, r9 \n\t" \ 363 "adcs r12, r12, r10 \n\t" \ 364 "adc r14, r14, #0 \n\t" \ 365 "umull r9, r10, r3, r7 \n\t" \ 366 "adds r11, r11, r9 \n\t" \ 367 "adcs r12, r12, r10 \n\t" \ 368 "adc r14, r14, #0 \n\t" \ 369 "umull r9, r10, r4, r6 \n\t" \ 370 "adds r11, r11, r9 \n\t" \ 371 "adcs r12, r12, r10 \n\t" \ 372 "adc r14, r14, #0 \n\t" \ 373 "ldr r9, [r0] \n\t" \ 374 "adds r11, r11, r9 \n\t" \ 375 "adcs r12, r12, #0 \n\t" \ 376 "adc r14, r14, #0 \n\t" \ 377 "stmia r0!, {r11} \n\t" \ 378 \ 379 "ldmia r1!, {r5} \n\t" \ 380 "mov r9, #0 \n\t" \ 381 "umull r10, r11, r3, r8 \n\t" \ 382 "adds r12, r12, r10 \n\t" \ 383 "adcs r14, r14, r11 \n\t" \ 384 "adc r9, r9, #0 \n\t" \ 385 "umull r10, r11, r4, r7 \n\t" \ 386 "adds r12, r12, r10 \n\t" \ 387 "adcs r14, r14, r11 \n\t" \ 388 "adc r9, r9, #0 \n\t" \ 389 "umull r10, r11, r5, r6 \n\t" \ 390 "adds r12, r12, r10 \n\t" \ 391 "adcs r14, r14, r11 \n\t" \ 392 "adc r9, r9, #0 \n\t" \ 393 "ldr r10, [r0] \n\t" \ 394 "adds r12, r12, r10 \n\t" \ 395 "adcs r14, r14, #0 \n\t" \ 396 "adc r9, r9, #0 \n\t" \ 397 "stmia r0!, {r12} \n\t" \ 398 \ 399 "ldmia r2!, {r6} \n\t" \ 400 "mov r10, #0 \n\t" \ 401 "umull r11, r12, r3, r6 \n\t" \ 402 "adds r14, r14, r11 \n\t" \ 403 "adcs r9, r9, r12 \n\t" \ 404 "adc r10, r10, #0 \n\t" \ 405 "umull r11, r12, r4, r8 \n\t" \ 406 "adds r14, r14, r11 \n\t" \ 407 "adcs r9, r9, r12 \n\t" \ 408 "adc r10, r10, #0 \n\t" \ 409 "umull r11, r12, r5, r7 \n\t" \ 410 "adds r14, r14, r11 \n\t" \ 411 "adcs r9, r9, r12 \n\t" \ 412 "adc r10, r10, #0 \n\t" \ 413 "ldr r11, [r0] \n\t" \ 414 "adds r14, r14, r11 \n\t" \ 415 "adcs r9, r9, #0 \n\t" \ 416 "adc r10, r10, #0 \n\t" \ 417 "stmia r0!, {r14} \n\t" \ 418 \ 419 "ldmia r2!, {r7} \n\t" \ 420 "mov r11, #0 \n\t" \ 421 "umull r12, r14, r3, r7 \n\t" \ 422 "adds r9, r9, r12 \n\t" \ 423 "adcs r10, r10, r14 \n\t" \ 424 "adc r11, r11, #0 \n\t" \ 425 "umull r12, r14, r4, r6 \n\t" \ 426 "adds r9, r9, r12 \n\t" \ 427 "adcs r10, r10, r14 \n\t" \ 428 "adc r11, r11, #0 \n\t" \ 429 "umull r12, r14, r5, r8 \n\t" \ 430 "adds r9, r9, r12 \n\t" \ 431 "adcs r10, r10, r14 \n\t" \ 432 "adc r11, r11, #0 \n\t" \ 433 "ldr r12, [r0] \n\t" \ 434 "adds r9, r9, r12 \n\t" \ 435 "adcs r10, r10, #0 \n\t" \ 436 "adc r11, r11, #0 \n\t" \ 437 "stmia r0!, {r9} \n\t" \ 438 \ 439 "ldmia r2!, {r8} \n\t" \ 440 "mov r12, #0 \n\t" \ 441 "umull r14, r9, r3, r8 \n\t" \ 442 "adds r10, r10, r14 \n\t" \ 443 "adcs r11, r11, r9 \n\t" \ 444 "adc r12, r12, #0 \n\t" \ 445 "umull r14, r9, r4, r7 \n\t" \ 446 "adds r10, r10, r14 \n\t" \ 447 "adcs r11, r11, r9 \n\t" \ 448 "adc r12, r12, #0 \n\t" \ 449 "umull r14, r9, r5, r6 \n\t" \ 450 "adds r10, r10, r14 \n\t" \ 451 "adcs r11, r11, r9 \n\t" \ 452 "adc r12, r12, #0 \n\t" \ 453 "ldr r14, [r0] \n\t" \ 454 "adds r10, r10, r14 \n\t" \ 455 "adcs r11, r11, #0 \n\t" \ 456 "adc r12, r12, #0 \n\t" \ 457 "stmia r0!, {r10} \n\t" \ 458 \ 459 "mov r14, #0 \n\t" \ 460 "umull r9, r10, r4, r8 \n\t" \ 461 "adds r11, r11, r9 \n\t" \ 462 "adcs r12, r12, r10 \n\t" \ 463 "adc r14, r14, #0 \n\t" \ 464 "umull r9, r10, r5, r7 \n\t" \ 465 "adds r11, r11, r9 \n\t" \ 466 "adcs r12, r12, r10 \n\t" \ 467 "adc r14, r14, #0 \n\t" \ 468 "stmia r0!, {r11} \n\t" \ 469 \ 470 "umull r10, r11, r5, r8 \n\t" \ 471 "adds r12, r12, r10 \n\t" \ 472 "adc r14, r14, r11 \n\t" \ 473 "stmia r0!, {r12, r14} \n\t" \ 474 "pop {r3} \n\t" 475 476#define FAST_MULT_ASM_6_TO_7 \ 477 "cmp r3, #6 \n\t" \ 478 "beq 1f \n\t" \ 479 \ 480 /* r4 = left high, r5 = right high */ \ 481 "ldr r4, [r1] \n\t" \ 482 "ldr r5, [r2] \n\t" \ 483 \ 484 "sub r0, #24 \n\t" \ 485 "sub r1, #24 \n\t" \ 486 "sub r2, #24 \n\t" \ 487 \ 488 "ldr r6, [r0] \n\t" \ 489 "ldr r7, [r1], #4 \n\t" \ 490 "ldr r8, [r2], #4 \n\t" \ 491 "mov r14, #0 \n\t" \ 492 "umull r9, r10, r4, r8 \n\t" \ 493 "umull r11, r12, r5, r7 \n\t" \ 494 "adds r9, r9, r6 \n\t" \ 495 "adc r10, r10, #0 \n\t" \ 496 "adds r9, r9, r11 \n\t" \ 497 "adcs r10, r10, r12 \n\t" \ 498 "adc r14, r14, #0 \n\t" \ 499 "str r9, [r0], #4 \n\t" \ 500 \ 501 "ldr r6, [r0] \n\t" \ 502 "adds r10, r10, r6 \n\t" \ 503 "adcs r14, r14, #0 \n\t" \ 504 "ldr r7, [r1], #4 \n\t" \ 505 "ldr r8, [r2], #4 \n\t" \ 506 "mov r9, #0 \n\t" \ 507 "umull r11, r12, r4, r8 \n\t" \ 508 "adds r10, r10, r11 \n\t" \ 509 "adcs r14, r14, r12 \n\t" \ 510 "adc r9, r9, #0 \n\t" \ 511 "umull r11, r12, r5, r7 \n\t" \ 512 "adds r10, r10, r11 \n\t" \ 513 "adcs r14, r14, r12 \n\t" \ 514 "adc r9, r9, #0 \n\t" \ 515 "str r10, [r0], #4 \n\t" \ 516 \ 517 "ldr r6, [r0] \n\t" \ 518 "adds r14, r14, r6 \n\t" \ 519 "adcs r9, r9, #0 \n\t" \ 520 "ldr r7, [r1], #4 \n\t" \ 521 "ldr r8, [r2], #4 \n\t" \ 522 "mov r10, #0 \n\t" \ 523 "umull r11, r12, r4, r8 \n\t" \ 524 "adds r14, r14, r11 \n\t" \ 525 "adcs r9, r9, r12 \n\t" \ 526 "adc r10, r10, #0 \n\t" \ 527 "umull r11, r12, r5, r7 \n\t" \ 528 "adds r14, r14, r11 \n\t" \ 529 "adcs r9, r9, r12 \n\t" \ 530 "adc r10, r10, #0 \n\t" \ 531 "str r14, [r0], #4 \n\t" \ 532 \ 533 "ldr r6, [r0] \n\t" \ 534 "adds r9, r9, r6 \n\t" \ 535 "adcs r10, r10, #0 \n\t" \ 536 "ldr r7, [r1], #4 \n\t" \ 537 "ldr r8, [r2], #4 \n\t" \ 538 "mov r14, #0 \n\t" \ 539 "umull r11, r12, r4, r8 \n\t" \ 540 "adds r9, r9, r11 \n\t" \ 541 "adcs r10, r10, r12 \n\t" \ 542 "adc r14, r14, #0 \n\t" \ 543 "umull r11, r12, r5, r7 \n\t" \ 544 "adds r9, r9, r11 \n\t" \ 545 "adcs r10, r10, r12 \n\t" \ 546 "adc r14, r14, #0 \n\t" \ 547 "str r9, [r0], #4 \n\t" \ 548 \ 549 "ldr r6, [r0] \n\t" \ 550 "adds r10, r10, r6 \n\t" \ 551 "adcs r14, r14, #0 \n\t" \ 552 "ldr r7, [r1], #4 \n\t" \ 553 "ldr r8, [r2], #4 \n\t" \ 554 "mov r9, #0 \n\t" \ 555 "umull r11, r12, r4, r8 \n\t" \ 556 "adds r10, r10, r11 \n\t" \ 557 "adcs r14, r14, r12 \n\t" \ 558 "adc r9, r9, #0 \n\t" \ 559 "umull r11, r12, r5, r7 \n\t" \ 560 "adds r10, r10, r11 \n\t" \ 561 "adcs r14, r14, r12 \n\t" \ 562 "adc r9, r9, #0 \n\t" \ 563 "str r10, [r0], #4 \n\t" \ 564 \ 565 "ldr r6, [r0] \n\t" \ 566 "adds r14, r14, r6 \n\t" \ 567 "adcs r9, r9, #0 \n\t" \ 568 /* skip past already-loaded (r4, r5) */ \ 569 "ldr r7, [r1], #8 \n\t" \ 570 "ldr r8, [r2], #8 \n\t" \ 571 "mov r10, #0 \n\t" \ 572 "umull r11, r12, r4, r8 \n\t" \ 573 "adds r14, r14, r11 \n\t" \ 574 "adcs r9, r9, r12 \n\t" \ 575 "adc r10, r10, #0 \n\t" \ 576 "umull r11, r12, r5, r7 \n\t" \ 577 "adds r14, r14, r11 \n\t" \ 578 "adcs r9, r9, r12 \n\t" \ 579 "adc r10, r10, #0 \n\t" \ 580 "str r14, [r0], #4 \n\t" \ 581 \ 582 "umull r11, r12, r4, r5 \n\t" \ 583 "adds r11, r11, r9 \n\t" \ 584 "adc r12, r12, r10 \n\t" \ 585 "stmia r0!, {r11, r12} \n\t" 586 587#define FAST_MULT_ASM_7 \ 588 "push {r3} \n\t" \ 589 "add r0, 24 \n\t" \ 590 "add r2, 24 \n\t" \ 591 "ldmia r1!, {r3} \n\t" \ 592 "ldmia r2!, {r6} \n\t" \ 593 \ 594 "umull r9, r10, r3, r6 \n\t" \ 595 "stmia r0!, {r9, r10} \n\t" \ 596 \ 597 "sub r0, 20 \n\t" \ 598 "sub r2, 16 \n\t" \ 599 "ldmia r2!, {r6, r7, r8} \n\t" \ 600 "ldmia r1!, {r4, r5} \n\t" \ 601 \ 602 "umull r9, r10, r3, r6 \n\t" \ 603 "stmia r0!, {r9} \n\t" \ 604 \ 605 "mov r14, #0 \n\t" \ 606 "umull r9, r12, r3, r7 \n\t" \ 607 "adds r10, r10, r9 \n\t" \ 608 "adc r12, r12, #0 \n\t" \ 609 "umull r9, r11, r4, r6 \n\t" \ 610 "adds r10, r10, r9 \n\t" \ 611 "adcs r12, r12, r11 \n\t" \ 612 "adc r14, r14, #0 \n\t" \ 613 "stmia r0!, {r10} \n\t" \ 614 \ 615 "mov r9, #0 \n\t" \ 616 "umull r10, r11, r3, r8 \n\t" \ 617 "adds r12, r12, r10 \n\t" \ 618 "adcs r14, r14, r11 \n\t" \ 619 "adc r9, r9, #0 \n\t" \ 620 "umull r10, r11, r4, r7 \n\t" \ 621 "adds r12, r12, r10 \n\t" \ 622 "adcs r14, r14, r11 \n\t" \ 623 "adc r9, r9, #0 \n\t" \ 624 "umull r10, r11, r5, r6 \n\t" \ 625 "adds r12, r12, r10 \n\t" \ 626 "adcs r14, r14, r11 \n\t" \ 627 "adc r9, r9, #0 \n\t" \ 628 "stmia r0!, {r12} \n\t" \ 629 \ 630 "ldmia r1!, {r3} \n\t" \ 631 "mov r10, #0 \n\t" \ 632 "umull r11, r12, r4, r8 \n\t" \ 633 "adds r14, r14, r11 \n\t" \ 634 "adcs r9, r9, r12 \n\t" \ 635 "adc r10, r10, #0 \n\t" \ 636 "umull r11, r12, r5, r7 \n\t" \ 637 "adds r14, r14, r11 \n\t" \ 638 "adcs r9, r9, r12 \n\t" \ 639 "adc r10, r10, #0 \n\t" \ 640 "umull r11, r12, r3, r6 \n\t" \ 641 "adds r14, r14, r11 \n\t" \ 642 "adcs r9, r9, r12 \n\t" \ 643 "adc r10, r10, #0 \n\t" \ 644 "ldr r11, [r0] \n\t" \ 645 "adds r14, r14, r11 \n\t" \ 646 "adcs r9, r9, #0 \n\t" \ 647 "adc r10, r10, #0 \n\t" \ 648 "stmia r0!, {r14} \n\t" \ 649 \ 650 "ldmia r2!, {r6} \n\t" \ 651 "mov r11, #0 \n\t" \ 652 "umull r12, r14, r4, r6 \n\t" \ 653 "adds r9, r9, r12 \n\t" \ 654 "adcs r10, r10, r14 \n\t" \ 655 "adc r11, r11, #0 \n\t" \ 656 "umull r12, r14, r5, r8 \n\t" \ 657 "adds r9, r9, r12 \n\t" \ 658 "adcs r10, r10, r14 \n\t" \ 659 "adc r11, r11, #0 \n\t" \ 660 "umull r12, r14, r3, r7 \n\t" \ 661 "adds r9, r9, r12 \n\t" \ 662 "adcs r10, r10, r14 \n\t" \ 663 "adc r11, r11, #0 \n\t" \ 664 "ldr r12, [r0] \n\t" \ 665 "adds r9, r9, r12 \n\t" \ 666 "adcs r10, r10, #0 \n\t" \ 667 "adc r11, r11, #0 \n\t" \ 668 "stmia r0!, {r9} \n\t" \ 669 \ 670 "mov r12, #0 \n\t" \ 671 "umull r14, r9, r5, r6 \n\t" \ 672 "adds r10, r10, r14 \n\t" \ 673 "adcs r11, r11, r9 \n\t" \ 674 "adc r12, r12, #0 \n\t" \ 675 "umull r14, r9, r3, r8 \n\t" \ 676 "adds r10, r10, r14 \n\t" \ 677 "adcs r11, r11, r9 \n\t" \ 678 "adc r12, r12, #0 \n\t" \ 679 "stmia r0!, {r10} \n\t" \ 680 \ 681 "umull r9, r10, r3, r6 \n\t" \ 682 "adds r11, r11, r9 \n\t" \ 683 "adc r12, r12, r10 \n\t" \ 684 "stmia r0!, {r11, r12} \n\t" \ 685 \ 686 "sub r0, 44 \n\t" \ 687 "sub r1, 16 \n\t" \ 688 "sub r2, 28 \n\t" \ 689 "ldmia r1!, {r3,r4,r5} \n\t" \ 690 "ldmia r2!, {r6,r7,r8} \n\t" \ 691 \ 692 "umull r9, r10, r3, r6 \n\t" \ 693 "stmia r0!, {r9} \n\t" \ 694 \ 695 "mov r14, #0 \n\t" \ 696 "umull r9, r12, r3, r7 \n\t" \ 697 "adds r10, r10, r9 \n\t" \ 698 "adc r12, r12, #0 \n\t" \ 699 "umull r9, r11, r4, r6 \n\t" \ 700 "adds r10, r10, r9 \n\t" \ 701 "adcs r12, r12, r11 \n\t" \ 702 "adc r14, r14, #0 \n\t" \ 703 "stmia r0!, {r10} \n\t" \ 704 \ 705 "mov r9, #0 \n\t" \ 706 "umull r10, r11, r3, r8 \n\t" \ 707 "adds r12, r12, r10 \n\t" \ 708 "adcs r14, r14, r11 \n\t" \ 709 "adc r9, r9, #0 \n\t" \ 710 "umull r10, r11, r4, r7 \n\t" \ 711 "adds r12, r12, r10 \n\t" \ 712 "adcs r14, r14, r11 \n\t" \ 713 "adc r9, r9, #0 \n\t" \ 714 "umull r10, r11, r5, r6 \n\t" \ 715 "adds r12, r12, r10 \n\t" \ 716 "adcs r14, r14, r11 \n\t" \ 717 "adc r9, r9, #0 \n\t" \ 718 "stmia r0!, {r12} \n\t" \ 719 \ 720 "ldmia r1!, {r3} \n\t" \ 721 "mov r10, #0 \n\t" \ 722 "umull r11, r12, r4, r8 \n\t" \ 723 "adds r14, r14, r11 \n\t" \ 724 "adcs r9, r9, r12 \n\t" \ 725 "adc r10, r10, #0 \n\t" \ 726 "umull r11, r12, r5, r7 \n\t" \ 727 "adds r14, r14, r11 \n\t" \ 728 "adcs r9, r9, r12 \n\t" \ 729 "adc r10, r10, #0 \n\t" \ 730 "umull r11, r12, r3, r6 \n\t" \ 731 "adds r14, r14, r11 \n\t" \ 732 "adcs r9, r9, r12 \n\t" \ 733 "adc r10, r10, #0 \n\t" \ 734 "ldr r11, [r0] \n\t" \ 735 "adds r14, r14, r11 \n\t" \ 736 "adcs r9, r9, #0 \n\t" \ 737 "adc r10, r10, #0 \n\t" \ 738 "stmia r0!, {r14} \n\t" \ 739 \ 740 "ldmia r1!, {r4} \n\t" \ 741 "mov r11, #0 \n\t" \ 742 "umull r12, r14, r5, r8 \n\t" \ 743 "adds r9, r9, r12 \n\t" \ 744 "adcs r10, r10, r14 \n\t" \ 745 "adc r11, r11, #0 \n\t" \ 746 "umull r12, r14, r3, r7 \n\t" \ 747 "adds r9, r9, r12 \n\t" \ 748 "adcs r10, r10, r14 \n\t" \ 749 "adc r11, r11, #0 \n\t" \ 750 "umull r12, r14, r4, r6 \n\t" \ 751 "adds r9, r9, r12 \n\t" \ 752 "adcs r10, r10, r14 \n\t" \ 753 "adc r11, r11, #0 \n\t" \ 754 "ldr r12, [r0] \n\t" \ 755 "adds r9, r9, r12 \n\t" \ 756 "adcs r10, r10, #0 \n\t" \ 757 "adc r11, r11, #0 \n\t" \ 758 "stmia r0!, {r9} \n\t" \ 759 \ 760 "ldmia r1!, {r5} \n\t" \ 761 "mov r12, #0 \n\t" \ 762 "umull r14, r9, r3, r8 \n\t" \ 763 "adds r10, r10, r14 \n\t" \ 764 "adcs r11, r11, r9 \n\t" \ 765 "adc r12, r12, #0 \n\t" \ 766 "umull r14, r9, r4, r7 \n\t" \ 767 "adds r10, r10, r14 \n\t" \ 768 "adcs r11, r11, r9 \n\t" \ 769 "adc r12, r12, #0 \n\t" \ 770 "umull r14, r9, r5, r6 \n\t" \ 771 "adds r10, r10, r14 \n\t" \ 772 "adcs r11, r11, r9 \n\t" \ 773 "adc r12, r12, #0 \n\t" \ 774 "ldr r14, [r0] \n\t" \ 775 "adds r10, r10, r14 \n\t" \ 776 "adcs r11, r11, #0 \n\t" \ 777 "adc r12, r12, #0 \n\t" \ 778 "stmia r0!, {r10} \n\t" \ 779 \ 780 "ldmia r1!, {r3} \n\t" \ 781 "mov r14, #0 \n\t" \ 782 "umull r9, r10, r4, r8 \n\t" \ 783 "adds r11, r11, r9 \n\t" \ 784 "adcs r12, r12, r10 \n\t" \ 785 "adc r14, r14, #0 \n\t" \ 786 "umull r9, r10, r5, r7 \n\t" \ 787 "adds r11, r11, r9 \n\t" \ 788 "adcs r12, r12, r10 \n\t" \ 789 "adc r14, r14, #0 \n\t" \ 790 "umull r9, r10, r3, r6 \n\t" \ 791 "adds r11, r11, r9 \n\t" \ 792 "adcs r12, r12, r10 \n\t" \ 793 "adc r14, r14, #0 \n\t" \ 794 "ldr r9, [r0] \n\t" \ 795 "adds r11, r11, r9 \n\t" \ 796 "adcs r12, r12, #0 \n\t" \ 797 "adc r14, r14, #0 \n\t" \ 798 "stmia r0!, {r11} \n\t" \ 799 \ 800 "ldmia r2!, {r6} \n\t" \ 801 "mov r9, #0 \n\t" \ 802 "umull r10, r11, r4, r6 \n\t" \ 803 "adds r12, r12, r10 \n\t" \ 804 "adcs r14, r14, r11 \n\t" \ 805 "adc r9, r9, #0 \n\t" \ 806 "umull r10, r11, r5, r8 \n\t" \ 807 "adds r12, r12, r10 \n\t" \ 808 "adcs r14, r14, r11 \n\t" \ 809 "adc r9, r9, #0 \n\t" \ 810 "umull r10, r11, r3, r7 \n\t" \ 811 "adds r12, r12, r10 \n\t" \ 812 "adcs r14, r14, r11 \n\t" \ 813 "adc r9, r9, #0 \n\t" \ 814 "ldr r10, [r0] \n\t" \ 815 "adds r12, r12, r10 \n\t" \ 816 "adcs r14, r14, #0 \n\t" \ 817 "adc r9, r9, #0 \n\t" \ 818 "stmia r0!, {r12} \n\t" \ 819 \ 820 "ldmia r2!, {r7} \n\t" \ 821 "mov r10, #0 \n\t" \ 822 "umull r11, r12, r4, r7 \n\t" \ 823 "adds r14, r14, r11 \n\t" \ 824 "adcs r9, r9, r12 \n\t" \ 825 "adc r10, r10, #0 \n\t" \ 826 "umull r11, r12, r5, r6 \n\t" \ 827 "adds r14, r14, r11 \n\t" \ 828 "adcs r9, r9, r12 \n\t" \ 829 "adc r10, r10, #0 \n\t" \ 830 "umull r11, r12, r3, r8 \n\t" \ 831 "adds r14, r14, r11 \n\t" \ 832 "adcs r9, r9, r12 \n\t" \ 833 "adc r10, r10, #0 \n\t" \ 834 "ldr r11, [r0] \n\t" \ 835 "adds r14, r14, r11 \n\t" \ 836 "adcs r9, r9, #0 \n\t" \ 837 "adc r10, r10, #0 \n\t" \ 838 "stmia r0!, {r14} \n\t" \ 839 \ 840 "ldmia r2!, {r8} \n\t" \ 841 "mov r11, #0 \n\t" \ 842 "umull r12, r14, r4, r8 \n\t" \ 843 "adds r9, r9, r12 \n\t" \ 844 "adcs r10, r10, r14 \n\t" \ 845 "adc r11, r11, #0 \n\t" \ 846 "umull r12, r14, r5, r7 \n\t" \ 847 "adds r9, r9, r12 \n\t" \ 848 "adcs r10, r10, r14 \n\t" \ 849 "adc r11, r11, #0 \n\t" \ 850 "umull r12, r14, r3, r6 \n\t" \ 851 "adds r9, r9, r12 \n\t" \ 852 "adcs r10, r10, r14 \n\t" \ 853 "adc r11, r11, #0 \n\t" \ 854 "ldr r12, [r0] \n\t" \ 855 "adds r9, r9, r12 \n\t" \ 856 "adcs r10, r10, #0 \n\t" \ 857 "adc r11, r11, #0 \n\t" \ 858 "stmia r0!, {r9} \n\t" \ 859 \ 860 "ldmia r2!, {r6} \n\t" \ 861 "mov r12, #0 \n\t" \ 862 "umull r14, r9, r4, r6 \n\t" \ 863 "adds r10, r10, r14 \n\t" \ 864 "adcs r11, r11, r9 \n\t" \ 865 "adc r12, r12, #0 \n\t" \ 866 "umull r14, r9, r5, r8 \n\t" \ 867 "adds r10, r10, r14 \n\t" \ 868 "adcs r11, r11, r9 \n\t" \ 869 "adc r12, r12, #0 \n\t" \ 870 "umull r14, r9, r3, r7 \n\t" \ 871 "adds r10, r10, r14 \n\t" \ 872 "adcs r11, r11, r9 \n\t" \ 873 "adc r12, r12, #0 \n\t" \ 874 "ldr r14, [r0] \n\t" \ 875 "adds r10, r10, r14 \n\t" \ 876 "adcs r11, r11, #0 \n\t" \ 877 "adc r12, r12, #0 \n\t" \ 878 "stmia r0!, {r10} \n\t" \ 879 \ 880 "mov r14, #0 \n\t" \ 881 "umull r9, r10, r5, r6 \n\t" \ 882 "adds r11, r11, r9 \n\t" \ 883 "adcs r12, r12, r10 \n\t" \ 884 "adc r14, r14, #0 \n\t" \ 885 "umull r9, r10, r3, r8 \n\t" \ 886 "adds r11, r11, r9 \n\t" \ 887 "adcs r12, r12, r10 \n\t" \ 888 "adc r14, r14, #0 \n\t" \ 889 "stmia r0!, {r11} \n\t" \ 890 \ 891 "umull r10, r11, r3, r6 \n\t" \ 892 "adds r12, r12, r10 \n\t" \ 893 "adc r14, r14, r11 \n\t" \ 894 "stmia r0!, {r12, r14} \n\t" \ 895 "pop {r3} \n\t" 896 897#define FAST_MULT_ASM_7_TO_8 \ 898 "cmp r3, #7 \n\t" \ 899 "beq 1f \n\t" \ 900 \ 901 /* r4 = left high, r5 = right high */ \ 902 "ldr r4, [r1] \n\t" \ 903 "ldr r5, [r2] \n\t" \ 904 \ 905 "sub r0, #28 \n\t" \ 906 "sub r1, #28 \n\t" \ 907 "sub r2, #28 \n\t" \ 908 \ 909 "ldr r6, [r0] \n\t" \ 910 "ldr r7, [r1], #4 \n\t" \ 911 "ldr r8, [r2], #4 \n\t" \ 912 "mov r14, #0 \n\t" \ 913 "umull r9, r10, r4, r8 \n\t" \ 914 "umull r11, r12, r5, r7 \n\t" \ 915 "adds r9, r9, r6 \n\t" \ 916 "adc r10, r10, #0 \n\t" \ 917 "adds r9, r9, r11 \n\t" \ 918 "adcs r10, r10, r12 \n\t" \ 919 "adc r14, r14, #0 \n\t" \ 920 "str r9, [r0], #4 \n\t" \ 921 \ 922 "ldr r6, [r0] \n\t" \ 923 "adds r10, r10, r6 \n\t" \ 924 "adcs r14, r14, #0 \n\t" \ 925 "ldr r7, [r1], #4 \n\t" \ 926 "ldr r8, [r2], #4 \n\t" \ 927 "mov r9, #0 \n\t" \ 928 "umull r11, r12, r4, r8 \n\t" \ 929 "adds r10, r10, r11 \n\t" \ 930 "adcs r14, r14, r12 \n\t" \ 931 "adc r9, r9, #0 \n\t" \ 932 "umull r11, r12, r5, r7 \n\t" \ 933 "adds r10, r10, r11 \n\t" \ 934 "adcs r14, r14, r12 \n\t" \ 935 "adc r9, r9, #0 \n\t" \ 936 "str r10, [r0], #4 \n\t" \ 937 \ 938 "ldr r6, [r0] \n\t" \ 939 "adds r14, r14, r6 \n\t" \ 940 "adcs r9, r9, #0 \n\t" \ 941 "ldr r7, [r1], #4 \n\t" \ 942 "ldr r8, [r2], #4 \n\t" \ 943 "mov r10, #0 \n\t" \ 944 "umull r11, r12, r4, r8 \n\t" \ 945 "adds r14, r14, r11 \n\t" \ 946 "adcs r9, r9, r12 \n\t" \ 947 "adc r10, r10, #0 \n\t" \ 948 "umull r11, r12, r5, r7 \n\t" \ 949 "adds r14, r14, r11 \n\t" \ 950 "adcs r9, r9, r12 \n\t" \ 951 "adc r10, r10, #0 \n\t" \ 952 "str r14, [r0], #4 \n\t" \ 953 \ 954 "ldr r6, [r0] \n\t" \ 955 "adds r9, r9, r6 \n\t" \ 956 "adcs r10, r10, #0 \n\t" \ 957 "ldr r7, [r1], #4 \n\t" \ 958 "ldr r8, [r2], #4 \n\t" \ 959 "mov r14, #0 \n\t" \ 960 "umull r11, r12, r4, r8 \n\t" \ 961 "adds r9, r9, r11 \n\t" \ 962 "adcs r10, r10, r12 \n\t" \ 963 "adc r14, r14, #0 \n\t" \ 964 "umull r11, r12, r5, r7 \n\t" \ 965 "adds r9, r9, r11 \n\t" \ 966 "adcs r10, r10, r12 \n\t" \ 967 "adc r14, r14, #0 \n\t" \ 968 "str r9, [r0], #4 \n\t" \ 969 \ 970 "ldr r6, [r0] \n\t" \ 971 "adds r10, r10, r6 \n\t" \ 972 "adcs r14, r14, #0 \n\t" \ 973 "ldr r7, [r1], #4 \n\t" \ 974 "ldr r8, [r2], #4 \n\t" \ 975 "mov r9, #0 \n\t" \ 976 "umull r11, r12, r4, r8 \n\t" \ 977 "adds r10, r10, r11 \n\t" \ 978 "adcs r14, r14, r12 \n\t" \ 979 "adc r9, r9, #0 \n\t" \ 980 "umull r11, r12, r5, r7 \n\t" \ 981 "adds r10, r10, r11 \n\t" \ 982 "adcs r14, r14, r12 \n\t" \ 983 "adc r9, r9, #0 \n\t" \ 984 "str r10, [r0], #4 \n\t" \ 985 \ 986 "ldr r6, [r0] \n\t" \ 987 "adds r14, r14, r6 \n\t" \ 988 "adcs r9, r9, #0 \n\t" \ 989 "ldr r7, [r1], #4 \n\t" \ 990 "ldr r8, [r2], #4 \n\t" \ 991 "mov r10, #0 \n\t" \ 992 "umull r11, r12, r4, r8 \n\t" \ 993 "adds r14, r14, r11 \n\t" \ 994 "adcs r9, r9, r12 \n\t" \ 995 "adc r10, r10, #0 \n\t" \ 996 "umull r11, r12, r5, r7 \n\t" \ 997 "adds r14, r14, r11 \n\t" \ 998 "adcs r9, r9, r12 \n\t" \ 999 "adc r10, r10, #0 \n\t" \ 1000 "str r14, [r0], #4 \n\t" \ 1001 \ 1002 "ldr r6, [r0] \n\t" \ 1003 "adds r9, r9, r6 \n\t" \ 1004 "adcs r10, r10, #0 \n\t" \ 1005 /* skip past already-loaded (r4, r5) */ \ 1006 "ldr r7, [r1], #8 \n\t" \ 1007 "ldr r8, [r2], #8 \n\t" \ 1008 "mov r14, #0 \n\t" \ 1009 "umull r11, r12, r4, r8 \n\t" \ 1010 "adds r9, r9, r11 \n\t" \ 1011 "adcs r10, r10, r12 \n\t" \ 1012 "adc r14, r14, #0 \n\t" \ 1013 "umull r11, r12, r5, r7 \n\t" \ 1014 "adds r9, r9, r11 \n\t" \ 1015 "adcs r10, r10, r12 \n\t" \ 1016 "adc r14, r14, #0 \n\t" \ 1017 "str r9, [r0], #4 \n\t" \ 1018 \ 1019 "umull r11, r12, r4, r5 \n\t" \ 1020 "adds r11, r11, r10 \n\t" \ 1021 "adc r12, r12, r14 \n\t" \ 1022 "stmia r0!, {r11, r12} \n\t" 1023 1024#define FAST_MULT_ASM_8 \ 1025 "push {r3} \n\t" \ 1026 "add r0, 24 \n\t" \ 1027 "add r2, 24 \n\t" \ 1028 "ldmia r1!, {r3,r4} \n\t" \ 1029 "ldmia r2!, {r6,r7} \n\t" \ 1030 \ 1031 "umull r11, r12, r3, r6 \n\t" \ 1032 "stmia r0!, {r11} \n\t" \ 1033 \ 1034 "mov r10, #0 \n\t" \ 1035 "umull r11, r9, r3, r7 \n\t" \ 1036 "adds r12, r12, r11 \n\t" \ 1037 "adc r9, r9, #0 \n\t" \ 1038 "umull r11, r14, r4, r6 \n\t" \ 1039 "adds r12, r12, r11 \n\t" \ 1040 "adcs r9, r9, r14 \n\t" \ 1041 "adc r10, r10, #0 \n\t" \ 1042 "stmia r0!, {r12} \n\t" \ 1043 \ 1044 "umull r12, r14, r4, r7 \n\t" \ 1045 "adds r9, r9, r12 \n\t" \ 1046 "adc r10, r10, r14 \n\t" \ 1047 "stmia r0!, {r9, r10} \n\t" \ 1048 \ 1049 "sub r0, 28 \n\t" \ 1050 "sub r2, 20 \n\t" \ 1051 "ldmia r2!, {r6,r7,r8} \n\t" \ 1052 "ldmia r1!, {r5} \n\t" \ 1053 \ 1054 "umull r11, r12, r3, r6 \n\t" \ 1055 "stmia r0!, {r11} \n\t" \ 1056 \ 1057 "mov r10, #0 \n\t" \ 1058 "umull r11, r9, r3, r7 \n\t" \ 1059 "adds r12, r12, r11 \n\t" \ 1060 "adc r9, r9, #0 \n\t" \ 1061 "umull r11, r14, r4, r6 \n\t" \ 1062 "adds r12, r12, r11 \n\t" \ 1063 "adcs r9, r9, r14 \n\t" \ 1064 "adc r10, r10, #0 \n\t" \ 1065 "stmia r0!, {r12} \n\t" \ 1066 \ 1067 "mov r11, #0 \n\t" \ 1068 "umull r12, r14, r3, r8 \n\t" \ 1069 "adds r9, r9, r12 \n\t" \ 1070 "adcs r10, r10, r14 \n\t" \ 1071 "adc r11, r11, #0 \n\t" \ 1072 "umull r12, r14, r4, r7 \n\t" \ 1073 "adds r9, r9, r12 \n\t" \ 1074 "adcs r10, r10, r14 \n\t" \ 1075 "adc r11, r11, #0 \n\t" \ 1076 "umull r12, r14, r5, r6 \n\t" \ 1077 "adds r9, r9, r12 \n\t" \ 1078 "adcs r10, r10, r14 \n\t" \ 1079 "adc r11, r11, #0 \n\t" \ 1080 "stmia r0!, {r9} \n\t" \ 1081 \ 1082 "ldmia r1!, {r3} \n\t" \ 1083 "mov r12, #0 \n\t" \ 1084 "umull r14, r9, r4, r8 \n\t" \ 1085 "adds r10, r10, r14 \n\t" \ 1086 "adcs r11, r11, r9 \n\t" \ 1087 "adc r12, r12, #0 \n\t" \ 1088 "umull r14, r9, r5, r7 \n\t" \ 1089 "adds r10, r10, r14 \n\t" \ 1090 "adcs r11, r11, r9 \n\t" \ 1091 "adc r12, r12, #0 \n\t" \ 1092 "umull r14, r9, r3, r6 \n\t" \ 1093 "adds r10, r10, r14 \n\t" \ 1094 "adcs r11, r11, r9 \n\t" \ 1095 "adc r12, r12, #0 \n\t" \ 1096 "ldr r14, [r0] \n\t" \ 1097 "adds r10, r10, r14 \n\t" \ 1098 "adcs r11, r11, #0 \n\t" \ 1099 "adc r12, r12, #0 \n\t" \ 1100 "stmia r0!, {r10} \n\t" \ 1101 \ 1102 "ldmia r1!, {r4} \n\t" \ 1103 "mov r14, #0 \n\t" \ 1104 "umull r9, r10, r5, r8 \n\t" \ 1105 "adds r11, r11, r9 \n\t" \ 1106 "adcs r12, r12, r10 \n\t" \ 1107 "adc r14, r14, #0 \n\t" \ 1108 "umull r9, r10, r3, r7 \n\t" \ 1109 "adds r11, r11, r9 \n\t" \ 1110 "adcs r12, r12, r10 \n\t" \ 1111 "adc r14, r14, #0 \n\t" \ 1112 "umull r9, r10, r4, r6 \n\t" \ 1113 "adds r11, r11, r9 \n\t" \ 1114 "adcs r12, r12, r10 \n\t" \ 1115 "adc r14, r14, #0 \n\t" \ 1116 "ldr r9, [r0] \n\t" \ 1117 "adds r11, r11, r9 \n\t" \ 1118 "adcs r12, r12, #0 \n\t" \ 1119 "adc r14, r14, #0 \n\t" \ 1120 "stmia r0!, {r11} \n\t" \ 1121 \ 1122 "ldmia r2!, {r6} \n\t" \ 1123 "mov r9, #0 \n\t" \ 1124 "umull r10, r11, r5, r6 \n\t" \ 1125 "adds r12, r12, r10 \n\t" \ 1126 "adcs r14, r14, r11 \n\t" \ 1127 "adc r9, r9, #0 \n\t" \ 1128 "umull r10, r11, r3, r8 \n\t" \ 1129 "adds r12, r12, r10 \n\t" \ 1130 "adcs r14, r14, r11 \n\t" \ 1131 "adc r9, r9, #0 \n\t" \ 1132 "umull r10, r11, r4, r7 \n\t" \ 1133 "adds r12, r12, r10 \n\t" \ 1134 "adcs r14, r14, r11 \n\t" \ 1135 "adc r9, r9, #0 \n\t" \ 1136 "ldr r10, [r0] \n\t" \ 1137 "adds r12, r12, r10 \n\t" \ 1138 "adcs r14, r14, #0 \n\t" \ 1139 "adc r9, r9, #0 \n\t" \ 1140 "stmia r0!, {r12} \n\t" \ 1141 \ 1142 "ldmia r2!, {r7} \n\t" \ 1143 "mov r10, #0 \n\t" \ 1144 "umull r11, r12, r5, r7 \n\t" \ 1145 "adds r14, r14, r11 \n\t" \ 1146 "adcs r9, r9, r12 \n\t" \ 1147 "adc r10, r10, #0 \n\t" \ 1148 "umull r11, r12, r3, r6 \n\t" \ 1149 "adds r14, r14, r11 \n\t" \ 1150 "adcs r9, r9, r12 \n\t" \ 1151 "adc r10, r10, #0 \n\t" \ 1152 "umull r11, r12, r4, r8 \n\t" \ 1153 "adds r14, r14, r11 \n\t" \ 1154 "adcs r9, r9, r12 \n\t" \ 1155 "adc r10, r10, #0 \n\t" \ 1156 "ldr r11, [r0] \n\t" \ 1157 "adds r14, r14, r11 \n\t" \ 1158 "adcs r9, r9, #0 \n\t" \ 1159 "adc r10, r10, #0 \n\t" \ 1160 "stmia r0!, {r14} \n\t" \ 1161 \ 1162 "mov r11, #0 \n\t" \ 1163 "umull r12, r14, r3, r7 \n\t" \ 1164 "adds r9, r9, r12 \n\t" \ 1165 "adcs r10, r10, r14 \n\t" \ 1166 "adc r11, r11, #0 \n\t" \ 1167 "umull r12, r14, r4, r6 \n\t" \ 1168 "adds r9, r9, r12 \n\t" \ 1169 "adcs r10, r10, r14 \n\t" \ 1170 "adc r11, r11, #0 \n\t" \ 1171 "stmia r0!, {r9} \n\t" \ 1172 \ 1173 "umull r14, r9, r4, r7 \n\t" \ 1174 "adds r10, r10, r14 \n\t" \ 1175 "adc r11, r11, r9 \n\t" \ 1176 "stmia r0!, {r10, r11} \n\t" \ 1177 \ 1178 "sub r0, 52 \n\t" \ 1179 "sub r1, 20 \n\t" \ 1180 "sub r2, 32 \n\t" \ 1181 "ldmia r1!, {r3,r4,r5} \n\t" \ 1182 "ldmia r2!, {r6,r7,r8} \n\t" \ 1183 \ 1184 "umull r11, r12, r3, r6 \n\t" \ 1185 "stmia r0!, {r11} \n\t" \ 1186 \ 1187 "mov r10, #0 \n\t" \ 1188 "umull r11, r9, r3, r7 \n\t" \ 1189 "adds r12, r12, r11 \n\t" \ 1190 "adc r9, r9, #0 \n\t" \ 1191 "umull r11, r14, r4, r6 \n\t" \ 1192 "adds r12, r12, r11 \n\t" \ 1193 "adcs r9, r9, r14 \n\t" \ 1194 "adc r10, r10, #0 \n\t" \ 1195 "stmia r0!, {r12} \n\t" \ 1196 \ 1197 "mov r11, #0 \n\t" \ 1198 "umull r12, r14, r3, r8 \n\t" \ 1199 "adds r9, r9, r12 \n\t" \ 1200 "adcs r10, r10, r14 \n\t" \ 1201 "adc r11, r11, #0 \n\t" \ 1202 "umull r12, r14, r4, r7 \n\t" \ 1203 "adds r9, r9, r12 \n\t" \ 1204 "adcs r10, r10, r14 \n\t" \ 1205 "adc r11, r11, #0 \n\t" \ 1206 "umull r12, r14, r5, r6 \n\t" \ 1207 "adds r9, r9, r12 \n\t" \ 1208 "adcs r10, r10, r14 \n\t" \ 1209 "adc r11, r11, #0 \n\t" \ 1210 "stmia r0!, {r9} \n\t" \ 1211 \ 1212 "ldmia r1!, {r3} \n\t" \ 1213 "mov r12, #0 \n\t" \ 1214 "umull r14, r9, r4, r8 \n\t" \ 1215 "adds r10, r10, r14 \n\t" \ 1216 "adcs r11, r11, r9 \n\t" \ 1217 "adc r12, r12, #0 \n\t" \ 1218 "umull r14, r9, r5, r7 \n\t" \ 1219 "adds r10, r10, r14 \n\t" \ 1220 "adcs r11, r11, r9 \n\t" \ 1221 "adc r12, r12, #0 \n\t" \ 1222 "umull r14, r9, r3, r6 \n\t" \ 1223 "adds r10, r10, r14 \n\t" \ 1224 "adcs r11, r11, r9 \n\t" \ 1225 "adc r12, r12, #0 \n\t" \ 1226 "ldr r14, [r0] \n\t" \ 1227 "adds r10, r10, r14 \n\t" \ 1228 "adcs r11, r11, #0 \n\t" \ 1229 "adc r12, r12, #0 \n\t" \ 1230 "stmia r0!, {r10} \n\t" \ 1231 \ 1232 "ldmia r1!, {r4} \n\t" \ 1233 "mov r14, #0 \n\t" \ 1234 "umull r9, r10, r5, r8 \n\t" \ 1235 "adds r11, r11, r9 \n\t" \ 1236 "adcs r12, r12, r10 \n\t" \ 1237 "adc r14, r14, #0 \n\t" \ 1238 "umull r9, r10, r3, r7 \n\t" \ 1239 "adds r11, r11, r9 \n\t" \ 1240 "adcs r12, r12, r10 \n\t" \ 1241 "adc r14, r14, #0 \n\t" \ 1242 "umull r9, r10, r4, r6 \n\t" \ 1243 "adds r11, r11, r9 \n\t" \ 1244 "adcs r12, r12, r10 \n\t" \ 1245 "adc r14, r14, #0 \n\t" \ 1246 "ldr r9, [r0] \n\t" \ 1247 "adds r11, r11, r9 \n\t" \ 1248 "adcs r12, r12, #0 \n\t" \ 1249 "adc r14, r14, #0 \n\t" \ 1250 "stmia r0!, {r11} \n\t" \ 1251 \ 1252 "ldmia r1!, {r5} \n\t" \ 1253 "mov r9, #0 \n\t" \ 1254 "umull r10, r11, r3, r8 \n\t" \ 1255 "adds r12, r12, r10 \n\t" \ 1256 "adcs r14, r14, r11 \n\t" \ 1257 "adc r9, r9, #0 \n\t" \ 1258 "umull r10, r11, r4, r7 \n\t" \ 1259 "adds r12, r12, r10 \n\t" \ 1260 "adcs r14, r14, r11 \n\t" \ 1261 "adc r9, r9, #0 \n\t" \ 1262 "umull r10, r11, r5, r6 \n\t" \ 1263 "adds r12, r12, r10 \n\t" \ 1264 "adcs r14, r14, r11 \n\t" \ 1265 "adc r9, r9, #0 \n\t" \ 1266 "ldr r10, [r0] \n\t" \ 1267 "adds r12, r12, r10 \n\t" \ 1268 "adcs r14, r14, #0 \n\t" \ 1269 "adc r9, r9, #0 \n\t" \ 1270 "stmia r0!, {r12} \n\t" \ 1271 \ 1272 "ldmia r1!, {r3} \n\t" \ 1273 "mov r10, #0 \n\t" \ 1274 "umull r11, r12, r4, r8 \n\t" \ 1275 "adds r14, r14, r11 \n\t" \ 1276 "adcs r9, r9, r12 \n\t" \ 1277 "adc r10, r10, #0 \n\t" \ 1278 "umull r11, r12, r5, r7 \n\t" \ 1279 "adds r14, r14, r11 \n\t" \ 1280 "adcs r9, r9, r12 \n\t" \ 1281 "adc r10, r10, #0 \n\t" \ 1282 "umull r11, r12, r3, r6 \n\t" \ 1283 "adds r14, r14, r11 \n\t" \ 1284 "adcs r9, r9, r12 \n\t" \ 1285 "adc r10, r10, #0 \n\t" \ 1286 "ldr r11, [r0] \n\t" \ 1287 "adds r14, r14, r11 \n\t" \ 1288 "adcs r9, r9, #0 \n\t" \ 1289 "adc r10, r10, #0 \n\t" \ 1290 "stmia r0!, {r14} \n\t" \ 1291 \ 1292 "ldmia r1!, {r4} \n\t" \ 1293 "mov r11, #0 \n\t" \ 1294 "umull r12, r14, r5, r8 \n\t" \ 1295 "adds r9, r9, r12 \n\t" \ 1296 "adcs r10, r10, r14 \n\t" \ 1297 "adc r11, r11, #0 \n\t" \ 1298 "umull r12, r14, r3, r7 \n\t" \ 1299 "adds r9, r9, r12 \n\t" \ 1300 "adcs r10, r10, r14 \n\t" \ 1301 "adc r11, r11, #0 \n\t" \ 1302 "umull r12, r14, r4, r6 \n\t" \ 1303 "adds r9, r9, r12 \n\t" \ 1304 "adcs r10, r10, r14 \n\t" \ 1305 "adc r11, r11, #0 \n\t" \ 1306 "ldr r12, [r0] \n\t" \ 1307 "adds r9, r9, r12 \n\t" \ 1308 "adcs r10, r10, #0 \n\t" \ 1309 "adc r11, r11, #0 \n\t" \ 1310 "stmia r0!, {r9} \n\t" \ 1311 \ 1312 "ldmia r2!, {r6} \n\t" \ 1313 "mov r12, #0 \n\t" \ 1314 "umull r14, r9, r5, r6 \n\t" \ 1315 "adds r10, r10, r14 \n\t" \ 1316 "adcs r11, r11, r9 \n\t" \ 1317 "adc r12, r12, #0 \n\t" \ 1318 "umull r14, r9, r3, r8 \n\t" \ 1319 "adds r10, r10, r14 \n\t" \ 1320 "adcs r11, r11, r9 \n\t" \ 1321 "adc r12, r12, #0 \n\t" \ 1322 "umull r14, r9, r4, r7 \n\t" \ 1323 "adds r10, r10, r14 \n\t" \ 1324 "adcs r11, r11, r9 \n\t" \ 1325 "adc r12, r12, #0 \n\t" \ 1326 "ldr r14, [r0] \n\t" \ 1327 "adds r10, r10, r14 \n\t" \ 1328 "adcs r11, r11, #0 \n\t" \ 1329 "adc r12, r12, #0 \n\t" \ 1330 "stmia r0!, {r10} \n\t" \ 1331 \ 1332 "ldmia r2!, {r7} \n\t" \ 1333 "mov r14, #0 \n\t" \ 1334 "umull r9, r10, r5, r7 \n\t" \ 1335 "adds r11, r11, r9 \n\t" \ 1336 "adcs r12, r12, r10 \n\t" \ 1337 "adc r14, r14, #0 \n\t" \ 1338 "umull r9, r10, r3, r6 \n\t" \ 1339 "adds r11, r11, r9 \n\t" \ 1340 "adcs r12, r12, r10 \n\t" \ 1341 "adc r14, r14, #0 \n\t" \ 1342 "umull r9, r10, r4, r8 \n\t" \ 1343 "adds r11, r11, r9 \n\t" \ 1344 "adcs r12, r12, r10 \n\t" \ 1345 "adc r14, r14, #0 \n\t" \ 1346 "ldr r9, [r0] \n\t" \ 1347 "adds r11, r11, r9 \n\t" \ 1348 "adcs r12, r12, #0 \n\t" \ 1349 "adc r14, r14, #0 \n\t" \ 1350 "stmia r0!, {r11} \n\t" \ 1351 \ 1352 "ldmia r2!, {r8} \n\t" \ 1353 "mov r9, #0 \n\t" \ 1354 "umull r10, r11, r5, r8 \n\t" \ 1355 "adds r12, r12, r10 \n\t" \ 1356 "adcs r14, r14, r11 \n\t" \ 1357 "adc r9, r9, #0 \n\t" \ 1358 "umull r10, r11, r3, r7 \n\t" \ 1359 "adds r12, r12, r10 \n\t" \ 1360 "adcs r14, r14, r11 \n\t" \ 1361 "adc r9, r9, #0 \n\t" \ 1362 "umull r10, r11, r4, r6 \n\t" \ 1363 "adds r12, r12, r10 \n\t" \ 1364 "adcs r14, r14, r11 \n\t" \ 1365 "adc r9, r9, #0 \n\t" \ 1366 "ldr r10, [r0] \n\t" \ 1367 "adds r12, r12, r10 \n\t" \ 1368 "adcs r14, r14, #0 \n\t" \ 1369 "adc r9, r9, #0 \n\t" \ 1370 "stmia r0!, {r12} \n\t" \ 1371 \ 1372 "ldmia r2!, {r6} \n\t" \ 1373 "mov r10, #0 \n\t" \ 1374 "umull r11, r12, r5, r6 \n\t" \ 1375 "adds r14, r14, r11 \n\t" \ 1376 "adcs r9, r9, r12 \n\t" \ 1377 "adc r10, r10, #0 \n\t" \ 1378 "umull r11, r12, r3, r8 \n\t" \ 1379 "adds r14, r14, r11 \n\t" \ 1380 "adcs r9, r9, r12 \n\t" \ 1381 "adc r10, r10, #0 \n\t" \ 1382 "umull r11, r12, r4, r7 \n\t" \ 1383 "adds r14, r14, r11 \n\t" \ 1384 "adcs r9, r9, r12 \n\t" \ 1385 "adc r10, r10, #0 \n\t" \ 1386 "ldr r11, [r0] \n\t" \ 1387 "adds r14, r14, r11 \n\t" \ 1388 "adcs r9, r9, #0 \n\t" \ 1389 "adc r10, r10, #0 \n\t" \ 1390 "stmia r0!, {r14} \n\t" \ 1391 \ 1392 "ldmia r2!, {r7} \n\t" \ 1393 "mov r11, #0 \n\t" \ 1394 "umull r12, r14, r5, r7 \n\t" \ 1395 "adds r9, r9, r12 \n\t" \ 1396 "adcs r10, r10, r14 \n\t" \ 1397 "adc r11, r11, #0 \n\t" \ 1398 "umull r12, r14, r3, r6 \n\t" \ 1399 "adds r9, r9, r12 \n\t" \ 1400 "adcs r10, r10, r14 \n\t" \ 1401 "adc r11, r11, #0 \n\t" \ 1402 "umull r12, r14, r4, r8 \n\t" \ 1403 "adds r9, r9, r12 \n\t" \ 1404 "adcs r10, r10, r14 \n\t" \ 1405 "adc r11, r11, #0 \n\t" \ 1406 "ldr r12, [r0] \n\t" \ 1407 "adds r9, r9, r12 \n\t" \ 1408 "adcs r10, r10, #0 \n\t" \ 1409 "adc r11, r11, #0 \n\t" \ 1410 "stmia r0!, {r9} \n\t" \ 1411 \ 1412 "mov r12, #0 \n\t" \ 1413 "umull r14, r9, r3, r7 \n\t" \ 1414 "adds r10, r10, r14 \n\t" \ 1415 "adcs r11, r11, r9 \n\t" \ 1416 "adc r12, r12, #0 \n\t" \ 1417 "umull r14, r9, r4, r6 \n\t" \ 1418 "adds r10, r10, r14 \n\t" \ 1419 "adcs r11, r11, r9 \n\t" \ 1420 "adc r12, r12, #0 \n\t" \ 1421 "stmia r0!, {r10} \n\t" \ 1422 \ 1423 "umull r9, r10, r4, r7 \n\t" \ 1424 "adds r11, r11, r9 \n\t" \ 1425 "adc r12, r12, r10 \n\t" \ 1426 "stmia r0!, {r11, r12} \n\t" \ 1427 "pop {r3} \n\t" 1428 1429#define FAST_SQUARE_ASM_5 \ 1430 "push {r2} \n\t" \ 1431 "ldmia r1!, {r2,r3,r4,r5,r6} \n\t" \ 1432 "push {r1} \n\t" \ 1433 \ 1434 "umull r11, r12, r2, r2 \n\t" \ 1435 "stmia r0!, {r11} \n\t" \ 1436 \ 1437 "mov r9, #0 \n\t" \ 1438 "umull r10, r11, r2, r3 \n\t" \ 1439 "adds r12, r12, r10 \n\t" \ 1440 "adcs r8, r11, #0 \n\t" \ 1441 "adc r9, r9, #0 \n\t" \ 1442 "adds r12, r12, r10 \n\t" \ 1443 "adcs r8, r8, r11 \n\t" \ 1444 "adc r9, r9, #0 \n\t" \ 1445 "stmia r0!, {r12} \n\t" \ 1446 \ 1447 "mov r10, #0 \n\t" \ 1448 "umull r11, r12, r2, r4 \n\t" \ 1449 "adds r11, r11, r11 \n\t" \ 1450 "adcs r12, r12, r12 \n\t" \ 1451 "adc r10, r10, #0 \n\t" \ 1452 "adds r8, r8, r11 \n\t" \ 1453 "adcs r9, r9, r12 \n\t" \ 1454 "adc r10, r10, #0 \n\t" \ 1455 "umull r11, r12, r3, r3 \n\t" \ 1456 "adds r8, r8, r11 \n\t" \ 1457 "adcs r9, r9, r12 \n\t" \ 1458 "adc r10, r10, #0 \n\t" \ 1459 "stmia r0!, {r8} \n\t" \ 1460 \ 1461 "mov r12, #0 \n\t" \ 1462 "umull r8, r11, r2, r5 \n\t" \ 1463 "umull r1, r14, r3, r4 \n\t" \ 1464 "adds r8, r8, r1 \n\t" \ 1465 "adcs r11, r11, r14 \n\t" \ 1466 "adc r12, r12, #0 \n\t" \ 1467 "adds r8, r8, r8 \n\t" \ 1468 "adcs r11, r11, r11 \n\t" \ 1469 "adc r12, r12, r12 \n\t" \ 1470 "adds r8, r8, r9 \n\t" \ 1471 "adcs r11, r11, r10 \n\t" \ 1472 "adc r12, r12, #0 \n\t" \ 1473 "stmia r0!, {r8} \n\t" \ 1474 \ 1475 "mov r10, #0 \n\t" \ 1476 "umull r8, r9, r2, r6 \n\t" \ 1477 "umull r1, r14, r3, r5 \n\t" \ 1478 "adds r8, r8, r1 \n\t" \ 1479 "adcs r9, r9, r14 \n\t" \ 1480 "adc r10, r10, #0 \n\t" \ 1481 "adds r8, r8, r8 \n\t" \ 1482 "adcs r9, r9, r9 \n\t" \ 1483 "adc r10, r10, r10 \n\t" \ 1484 "umull r1, r14, r4, r4 \n\t" \ 1485 "adds r8, r8, r1 \n\t" \ 1486 "adcs r9, r9, r14 \n\t" \ 1487 "adc r10, r10, #0 \n\t" \ 1488 "adds r8, r8, r11 \n\t" \ 1489 "adcs r9, r9, r12 \n\t" \ 1490 "adc r10, r10, #0 \n\t" \ 1491 "stmia r0!, {r8} \n\t" \ 1492 \ 1493 "mov r12, #0 \n\t" \ 1494 "umull r8, r11, r3, r6 \n\t" \ 1495 "umull r1, r14, r4, r5 \n\t" \ 1496 "adds r8, r8, r1 \n\t" \ 1497 "adcs r11, r11, r14 \n\t" \ 1498 "adc r12, r12, #0 \n\t" \ 1499 "adds r8, r8, r8 \n\t" \ 1500 "adcs r11, r11, r11 \n\t" \ 1501 "adc r12, r12, r12 \n\t" \ 1502 "adds r8, r8, r9 \n\t" \ 1503 "adcs r11, r11, r10 \n\t" \ 1504 "adc r12, r12, #0 \n\t" \ 1505 "stmia r0!, {r8} \n\t" \ 1506 \ 1507 "mov r8, #0 \n\t" \ 1508 "umull r1, r10, r4, r6 \n\t" \ 1509 "adds r1, r1, r1 \n\t" \ 1510 "adcs r10, r10, r10 \n\t" \ 1511 "adc r8, r8, #0 \n\t" \ 1512 "adds r11, r11, r1 \n\t" \ 1513 "adcs r12, r12, r10 \n\t" \ 1514 "adc r8, r8, #0 \n\t" \ 1515 "umull r1, r10, r5, r5 \n\t" \ 1516 "adds r11, r11, r1 \n\t" \ 1517 "adcs r12, r12, r10 \n\t" \ 1518 "adc r8, r8, #0 \n\t" \ 1519 "stmia r0!, {r11} \n\t" \ 1520 \ 1521 "mov r11, #0 \n\t" \ 1522 "umull r1, r10, r5, r6 \n\t" \ 1523 "adds r1, r1, r1 \n\t" \ 1524 "adcs r10, r10, r10 \n\t" \ 1525 "adc r11, r11, #0 \n\t" \ 1526 "adds r12, r12, r1 \n\t" \ 1527 "adcs r8, r8, r10 \n\t" \ 1528 "adc r11, r11, #0 \n\t" \ 1529 "stmia r0!, {r12} \n\t" \ 1530 \ 1531 "umull r1, r10, r6, r6 \n\t" \ 1532 "adds r8, r8, r1 \n\t" \ 1533 "adcs r11, r11, r10 \n\t" \ 1534 "stmia r0!, {r8, r11} \n\t" \ 1535 "pop {r1, r2} \n\t" 1536 1537#define FAST_SQUARE_ASM_5_TO_6 \ 1538 "cmp r2, #5 \n\t" \ 1539 "beq 1f \n\t" \ 1540 \ 1541 "sub r0, #20 \n\t" \ 1542 "sub r1, #20 \n\t" \ 1543 \ 1544 /* Do off-center multiplication */ \ 1545 "ldmia r1!, {r6,r7,r8,r9,r10,r11} \n\t" \ 1546 "umull r3, r4, r6, r11 \n\t" \ 1547 "umull r6, r5, r7, r11 \n\t" \ 1548 "adds r4, r4, r6 \n\t" \ 1549 "umull r7, r6, r8, r11 \n\t" \ 1550 "adcs r5, r5, r7 \n\t" \ 1551 "umull r8, r7, r9, r11 \n\t" \ 1552 "adcs r6, r6, r8 \n\t" \ 1553 "umull r9, r8, r10, r11 \n\t" \ 1554 "adcs r7, r7, r9 \n\t" \ 1555 "adcs r8, r8, #0 \n\t" \ 1556 \ 1557 /* Multiply by 2 */ \ 1558 "mov r9, #0 \n\t" \ 1559 "adds r3, r3, r3 \n\t" \ 1560 "adcs r4, r4, r4 \n\t" \ 1561 "adcs r5, r5, r5 \n\t" \ 1562 "adcs r6, r6, r6 \n\t" \ 1563 "adcs r7, r7, r7 \n\t" \ 1564 "adcs r8, r8, r8 \n\t" \ 1565 "adcs r9, r9, #0 \n\t" \ 1566 \ 1567 /* Add into previous */ \ 1568 "ldr r14, [r0], #4 \n\t" \ 1569 "adds r3, r3, r14 \n\t" \ 1570 "ldr r14, [r0], #4 \n\t" \ 1571 "adcs r4, r4, r14 \n\t" \ 1572 "ldr r14, [r0], #4 \n\t" \ 1573 "adcs r5, r5, r14 \n\t" \ 1574 "ldr r14, [r0], #4 \n\t" \ 1575 "adcs r6, r6, r14 \n\t" \ 1576 "ldr r14, [r0], #4 \n\t" \ 1577 "adcs r7, r7, r14 \n\t" \ 1578 "adcs r8, r8, #0 \n\t" \ 1579 "adcs r9, r9, #0 \n\t" \ 1580 "sub r0, #20 \n\t" \ 1581 \ 1582 /* Perform center multiplication */ \ 1583 "umlal r8, r9, r11, r11 \n\t" \ 1584 "stmia r0!, {r3,r4,r5,r6,r7,r8,r9} \n\t" 1585 1586#define FAST_SQUARE_ASM_6 \ 1587 "push {r2} \n\t" \ 1588 "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t" \ 1589 "push {r1} \n\t" \ 1590 \ 1591 "umull r11, r12, r2, r2 \n\t" \ 1592 "stmia r0!, {r11} \n\t" \ 1593 \ 1594 "mov r9, #0 \n\t" \ 1595 "umull r10, r11, r2, r3 \n\t" \ 1596 "adds r12, r12, r10 \n\t" \ 1597 "adcs r8, r11, #0 \n\t" \ 1598 "adc r9, r9, #0 \n\t" \ 1599 "adds r12, r12, r10 \n\t" \ 1600 "adcs r8, r8, r11 \n\t" \ 1601 "adc r9, r9, #0 \n\t" \ 1602 "stmia r0!, {r12} \n\t" \ 1603 \ 1604 "mov r10, #0 \n\t" \ 1605 "umull r11, r12, r2, r4 \n\t" \ 1606 "adds r11, r11, r11 \n\t" \ 1607 "adcs r12, r12, r12 \n\t" \ 1608 "adc r10, r10, #0 \n\t" \ 1609 "adds r8, r8, r11 \n\t" \ 1610 "adcs r9, r9, r12 \n\t" \ 1611 "adc r10, r10, #0 \n\t" \ 1612 "umull r11, r12, r3, r3 \n\t" \ 1613 "adds r8, r8, r11 \n\t" \ 1614 "adcs r9, r9, r12 \n\t" \ 1615 "adc r10, r10, #0 \n\t" \ 1616 "stmia r0!, {r8} \n\t" \ 1617 \ 1618 "mov r12, #0 \n\t" \ 1619 "umull r8, r11, r2, r5 \n\t" \ 1620 "umull r1, r14, r3, r4 \n\t" \ 1621 "adds r8, r8, r1 \n\t" \ 1622 "adcs r11, r11, r14 \n\t" \ 1623 "adc r12, r12, #0 \n\t" \ 1624 "adds r8, r8, r8 \n\t" \ 1625 "adcs r11, r11, r11 \n\t" \ 1626 "adc r12, r12, r12 \n\t" \ 1627 "adds r8, r8, r9 \n\t" \ 1628 "adcs r11, r11, r10 \n\t" \ 1629 "adc r12, r12, #0 \n\t" \ 1630 "stmia r0!, {r8} \n\t" \ 1631 \ 1632 "mov r10, #0 \n\t" \ 1633 "umull r8, r9, r2, r6 \n\t" \ 1634 "umull r1, r14, r3, r5 \n\t" \ 1635 "adds r8, r8, r1 \n\t" \ 1636 "adcs r9, r9, r14 \n\t" \ 1637 "adc r10, r10, #0 \n\t" \ 1638 "adds r8, r8, r8 \n\t" \ 1639 "adcs r9, r9, r9 \n\t" \ 1640 "adc r10, r10, r10 \n\t" \ 1641 "umull r1, r14, r4, r4 \n\t" \ 1642 "adds r8, r8, r1 \n\t" \ 1643 "adcs r9, r9, r14 \n\t" \ 1644 "adc r10, r10, #0 \n\t" \ 1645 "adds r8, r8, r11 \n\t" \ 1646 "adcs r9, r9, r12 \n\t" \ 1647 "adc r10, r10, #0 \n\t" \ 1648 "stmia r0!, {r8} \n\t" \ 1649 \ 1650 "mov r12, #0 \n\t" \ 1651 "umull r8, r11, r2, r7 \n\t" \ 1652 "umull r1, r14, r3, r6 \n\t" \ 1653 "adds r8, r8, r1 \n\t" \ 1654 "adcs r11, r11, r14 \n\t" \ 1655 "adc r12, r12, #0 \n\t" \ 1656 "umull r1, r14, r4, r5 \n\t" \ 1657 "adds r8, r8, r1 \n\t" \ 1658 "adcs r11, r11, r14 \n\t" \ 1659 "adc r12, r12, #0 \n\t" \ 1660 "adds r8, r8, r8 \n\t" \ 1661 "adcs r11, r11, r11 \n\t" \ 1662 "adc r12, r12, r12 \n\t" \ 1663 "adds r8, r8, r9 \n\t" \ 1664 "adcs r11, r11, r10 \n\t" \ 1665 "adc r12, r12, #0 \n\t" \ 1666 "stmia r0!, {r8} \n\t" \ 1667 \ 1668 "mov r10, #0 \n\t" \ 1669 "umull r8, r9, r3, r7 \n\t" \ 1670 "umull r1, r14, r4, r6 \n\t" \ 1671 "adds r8, r8, r1 \n\t" \ 1672 "adcs r9, r9, r14 \n\t" \ 1673 "adc r10, r10, #0 \n\t" \ 1674 "adds r8, r8, r8 \n\t" \ 1675 "adcs r9, r9, r9 \n\t" \ 1676 "adc r10, r10, r10 \n\t" \ 1677 "umull r1, r14, r5, r5 \n\t" \ 1678 "adds r8, r8, r1 \n\t" \ 1679 "adcs r9, r9, r14 \n\t" \ 1680 "adc r10, r10, #0 \n\t" \ 1681 "adds r8, r8, r11 \n\t" \ 1682 "adcs r9, r9, r12 \n\t" \ 1683 "adc r10, r10, #0 \n\t" \ 1684 "stmia r0!, {r8} \n\t" \ 1685 \ 1686 "mov r12, #0 \n\t" \ 1687 "umull r8, r11, r4, r7 \n\t" \ 1688 "umull r1, r14, r5, r6 \n\t" \ 1689 "adds r8, r8, r1 \n\t" \ 1690 "adcs r11, r11, r14 \n\t" \ 1691 "adc r12, r12, #0 \n\t" \ 1692 "adds r8, r8, r8 \n\t" \ 1693 "adcs r11, r11, r11 \n\t" \ 1694 "adc r12, r12, r12 \n\t" \ 1695 "adds r8, r8, r9 \n\t" \ 1696 "adcs r11, r11, r10 \n\t" \ 1697 "adc r12, r12, #0 \n\t" \ 1698 "stmia r0!, {r8} \n\t" \ 1699 \ 1700 "mov r8, #0 \n\t" \ 1701 "umull r1, r10, r5, r7 \n\t" \ 1702 "adds r1, r1, r1 \n\t" \ 1703 "adcs r10, r10, r10 \n\t" \ 1704 "adc r8, r8, #0 \n\t" \ 1705 "adds r11, r11, r1 \n\t" \ 1706 "adcs r12, r12, r10 \n\t" \ 1707 "adc r8, r8, #0 \n\t" \ 1708 "umull r1, r10, r6, r6 \n\t" \ 1709 "adds r11, r11, r1 \n\t" \ 1710 "adcs r12, r12, r10 \n\t" \ 1711 "adc r8, r8, #0 \n\t" \ 1712 "stmia r0!, {r11} \n\t" \ 1713 \ 1714 "mov r11, #0 \n\t" \ 1715 "umull r1, r10, r6, r7 \n\t" \ 1716 "adds r1, r1, r1 \n\t" \ 1717 "adcs r10, r10, r10 \n\t" \ 1718 "adc r11, r11, #0 \n\t" \ 1719 "adds r12, r12, r1 \n\t" \ 1720 "adcs r8, r8, r10 \n\t" \ 1721 "adc r11, r11, #0 \n\t" \ 1722 "stmia r0!, {r12} \n\t" \ 1723 \ 1724 "umull r1, r10, r7, r7 \n\t" \ 1725 "adds r8, r8, r1 \n\t" \ 1726 "adcs r11, r11, r10 \n\t" \ 1727 "stmia r0!, {r8, r11} \n\t" \ 1728 "pop {r1, r2} \n\t" 1729 1730#define FAST_SQUARE_ASM_6_TO_7 \ 1731 "cmp r2, #6 \n\t" \ 1732 "beq 1f \n\t" \ 1733 \ 1734 "sub r0, #24 \n\t" \ 1735 "sub r1, #24 \n\t" \ 1736 \ 1737 /* Do off-center multiplication */ \ 1738 "ldmia r1!, {r6,r7,r8,r9,r10,r11,r12} \n\t" \ 1739 "umull r3, r4, r6, r12 \n\t" \ 1740 "umull r6, r5, r7, r12 \n\t" \ 1741 "adds r4, r4, r6 \n\t" \ 1742 "umull r7, r6, r8, r12 \n\t" \ 1743 "adcs r5, r5, r7 \n\t" \ 1744 "umull r8, r7, r9, r12 \n\t" \ 1745 "adcs r6, r6, r8 \n\t" \ 1746 "umull r9, r8, r10, r12 \n\t" \ 1747 "adcs r7, r7, r9 \n\t" \ 1748 "umull r10, r9, r11, r12 \n\t" \ 1749 "adcs r8, r8, r10 \n\t" \ 1750 "adcs r9, r9, #0 \n\t" \ 1751 \ 1752 /* Multiply by 2 */ \ 1753 "mov r10, #0 \n\t" \ 1754 "adds r3, r3, r3 \n\t" \ 1755 "adcs r4, r4, r4 \n\t" \ 1756 "adcs r5, r5, r5 \n\t" \ 1757 "adcs r6, r6, r6 \n\t" \ 1758 "adcs r7, r7, r7 \n\t" \ 1759 "adcs r8, r8, r8 \n\t" \ 1760 "adcs r9, r9, r9 \n\t" \ 1761 "adcs r10, r10, #0 \n\t" \ 1762 \ 1763 /* Add into previous */ \ 1764 "ldr r14, [r0], #4 \n\t" \ 1765 "adds r3, r3, r14 \n\t" \ 1766 "ldr r14, [r0], #4 \n\t" \ 1767 "adcs r4, r4, r14 \n\t" \ 1768 "ldr r14, [r0], #4 \n\t" \ 1769 "adcs r5, r5, r14 \n\t" \ 1770 "ldr r14, [r0], #4 \n\t" \ 1771 "adcs r6, r6, r14 \n\t" \ 1772 "ldr r14, [r0], #4 \n\t" \ 1773 "adcs r7, r7, r14 \n\t" \ 1774 "ldr r14, [r0], #4 \n\t" \ 1775 "adcs r8, r8, r14 \n\t" \ 1776 "adcs r9, r9, #0 \n\t" \ 1777 "adcs r10, r10, #0 \n\t" \ 1778 "sub r0, #24 \n\t" \ 1779 \ 1780 /* Perform center multiplication */ \ 1781 "umlal r9, r10, r12, r12 \n\t" \ 1782 "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} \n\t" 1783 1784#define FAST_SQUARE_ASM_7 \ 1785 "push {r2} \n\t" \ 1786 "ldmia r1!, {r2, r3, r4, r5, r6, r7, r8} \n\t" \ 1787 "push {r1} \n\t" \ 1788 "sub r1, 4 \n\t" \ 1789 \ 1790 "add r0, 24 \n\t" \ 1791 "umull r9, r10, r2, r8 \n\t" \ 1792 "stmia r0!, {r9, r10} \n\t" \ 1793 "sub r0, 32 \n\t" \ 1794 \ 1795 "umull r11, r12, r2, r2 \n\t" \ 1796 "stmia r0!, {r11} \n\t" \ 1797 \ 1798 "mov r9, #0 \n\t" \ 1799 "umull r10, r11, r2, r3 \n\t" \ 1800 "adds r12, r12, r10 \n\t" \ 1801 "adcs r8, r11, #0 \n\t" \ 1802 "adc r9, r9, #0 \n\t" \ 1803 "adds r12, r12, r10 \n\t" \ 1804 "adcs r8, r8, r11 \n\t" \ 1805 "adc r9, r9, #0 \n\t" \ 1806 "stmia r0!, {r12} \n\t" \ 1807 \ 1808 "mov r10, #0 \n\t" \ 1809 "umull r11, r12, r2, r4 \n\t" \ 1810 "adds r11, r11, r11 \n\t" \ 1811 "adcs r12, r12, r12 \n\t" \ 1812 "adc r10, r10, #0 \n\t" \ 1813 "adds r8, r8, r11 \n\t" \ 1814 "adcs r9, r9, r12 \n\t" \ 1815 "adc r10, r10, #0 \n\t" \ 1816 "umull r11, r12, r3, r3 \n\t" \ 1817 "adds r8, r8, r11 \n\t" \ 1818 "adcs r9, r9, r12 \n\t" \ 1819 "adc r10, r10, #0 \n\t" \ 1820 "stmia r0!, {r8} \n\t" \ 1821 \ 1822 "mov r12, #0 \n\t" \ 1823 "umull r8, r11, r2, r5 \n\t" \ 1824 "mov r14, r11 \n\t" \ 1825 "umlal r8, r11, r3, r4 \n\t" \ 1826 "cmp r14, r11 \n\t" \ 1827 "it hi \n\t" \ 1828 "adchi r12, r12, #0 \n\t" \ 1829 "adds r8, r8, r8 \n\t" \ 1830 "adcs r11, r11, r11 \n\t" \ 1831 "adc r12, r12, r12 \n\t" \ 1832 "adds r8, r8, r9 \n\t" \ 1833 "adcs r11, r11, r10 \n\t" \ 1834 "adc r12, r12, #0 \n\t" \ 1835 "stmia r0!, {r8} \n\t" \ 1836 \ 1837 "mov r10, #0 \n\t" \ 1838 "umull r8, r9, r2, r6 \n\t" \ 1839 "mov r14, r9 \n\t" \ 1840 "umlal r8, r9, r3, r5 \n\t" \ 1841 "cmp r14, r9 \n\t" \ 1842 "it hi \n\t" \ 1843 "adchi r10, r10, #0 \n\t" \ 1844 "adds r8, r8, r8 \n\t" \ 1845 "adcs r9, r9, r9 \n\t" \ 1846 "adc r10, r10, r10 \n\t" \ 1847 "mov r14, r9 \n\t" \ 1848 "umlal r8, r9, r4, r4 \n\t" \ 1849 "cmp r14, r9 \n\t" \ 1850 "it hi \n\t" \ 1851 "adchi r10, r10, #0 \n\t" \ 1852 "adds r8, r8, r11 \n\t" \ 1853 "adcs r9, r9, r12 \n\t" \ 1854 "adc r10, r10, #0 \n\t" \ 1855 "stmia r0!, {r8} \n\t" \ 1856 \ 1857 "mov r12, #0 \n\t" \ 1858 "umull r8, r11, r2, r7 \n\t" \ 1859 "mov r14, r11 \n\t" \ 1860 "umlal r8, r11, r3, r6 \n\t" \ 1861 "cmp r14, r11 \n\t" \ 1862 "it hi \n\t" \ 1863 "adchi r12, r12, #0 \n\t" \ 1864 "mov r14, r11 \n\t" \ 1865 "umlal r8, r11, r4, r5 \n\t" \ 1866 "cmp r14, r11 \n\t" \ 1867 "it hi \n\t" \ 1868 "adchi r12, r12, #0 \n\t" \ 1869 "adds r8, r8, r8 \n\t" \ 1870 "adcs r11, r11, r11 \n\t" \ 1871 "adc r12, r12, r12 \n\t" \ 1872 "adds r8, r8, r9 \n\t" \ 1873 "adcs r11, r11, r10 \n\t" \ 1874 "adc r12, r12, #0 \n\t" \ 1875 "stmia r0!, {r8} \n\t" \ 1876 \ 1877 "ldmia r1!, {r2} \n\t" \ 1878 "mov r10, #0 \n\t" \ 1879 "umull r8, r9, r3, r7 \n\t" \ 1880 "mov r14, r9 \n\t" \ 1881 "umlal r8, r9, r4, r6 \n\t" \ 1882 "cmp r14, r9 \n\t" \ 1883 "it hi \n\t" \ 1884 "adchi r10, r10, #0 \n\t" \ 1885 "ldr r14, [r0] \n\t" \ 1886 "adds r8, r8, r14 \n\t" \ 1887 "adcs r9, r9, #0 \n\t" \ 1888 "adc r10, r10, #0 \n\t" \ 1889 "adds r8, r8, r8 \n\t" \ 1890 "adcs r9, r9, r9 \n\t" \ 1891 "adc r10, r10, r10 \n\t" \ 1892 "mov r14, r9 \n\t" \ 1893 "umlal r8, r9, r5, r5 \n\t" \ 1894 "cmp r14, r9 \n\t" \ 1895 "it hi \n\t" \ 1896 "adchi r10, r10, #0 \n\t" \ 1897 "adds r8, r8, r11 \n\t" \ 1898 "adcs r9, r9, r12 \n\t" \ 1899 "adc r10, r10, #0 \n\t" \ 1900 "stmia r0!, {r8} \n\t" \ 1901 \ 1902 "mov r12, #0 \n\t" \ 1903 "umull r8, r11, r3, r2 \n\t" \ 1904 "mov r14, r11 \n\t" \ 1905 "umlal r8, r11, r4, r7 \n\t" \ 1906 "cmp r14, r11 \n\t" \ 1907 "it hi \n\t" \ 1908 "adchi r12, r12, #0 \n\t" \ 1909 "mov r14, r11 \n\t" \ 1910 "umlal r8, r11, r5, r6 \n\t" \ 1911 "cmp r14, r11 \n\t" \ 1912 "it hi \n\t" \ 1913 "adchi r12, r12, #0 \n\t" \ 1914 "ldr r14, [r0] \n\t" \ 1915 "adds r8, r8, r14 \n\t" \ 1916 "adcs r11, r11, #0 \n\t" \ 1917 "adc r12, r12, #0 \n\t" \ 1918 "adds r8, r8, r8 \n\t" \ 1919 "adcs r11, r11, r11 \n\t" \ 1920 "adc r12, r12, r12 \n\t" \ 1921 "adds r8, r8, r9 \n\t" \ 1922 "adcs r11, r11, r10 \n\t" \ 1923 "adc r12, r12, #0 \n\t" \ 1924 "stmia r0!, {r8} \n\t" \ 1925 \ 1926 "mov r10, #0 \n\t" \ 1927 "umull r8, r9, r4, r2 \n\t" \ 1928 "mov r14, r9 \n\t" \ 1929 "umlal r8, r9, r5, r7 \n\t" \ 1930 "cmp r14, r9 \n\t" \ 1931 "it hi \n\t" \ 1932 "adchi r10, r10, #0 \n\t" \ 1933 "adds r8, r8, r8 \n\t" \ 1934 "adcs r9, r9, r9 \n\t" \ 1935 "adc r10, r10, r10 \n\t" \ 1936 "mov r14, r9 \n\t" \ 1937 "umlal r8, r9, r6, r6 \n\t" \ 1938 "cmp r14, r9 \n\t" \ 1939 "it hi \n\t" \ 1940 "adchi r10, r10, #0 \n\t" \ 1941 "adds r8, r8, r11 \n\t" \ 1942 "adcs r9, r9, r12 \n\t" \ 1943 "adc r10, r10, #0 \n\t" \ 1944 "stmia r0!, {r8} \n\t" \ 1945 \ 1946 "mov r12, #0 \n\t" \ 1947 "umull r8, r11, r5, r2 \n\t" \ 1948 "mov r14, r11 \n\t" \ 1949 "umlal r8, r11, r6, r7 \n\t" \ 1950 "cmp r14, r11 \n\t" \ 1951 "it hi \n\t" \ 1952 "adchi r12, r12, #0 \n\t" \ 1953 "adds r8, r8, r8 \n\t" \ 1954 "adcs r11, r11, r11 \n\t" \ 1955 "adc r12, r12, r12 \n\t" \ 1956 "adds r8, r8, r9 \n\t" \ 1957 "adcs r11, r11, r10 \n\t" \ 1958 "adc r12, r12, #0 \n\t" \ 1959 "stmia r0!, {r8} \n\t" \ 1960 \ 1961 "mov r8, #0 \n\t" \ 1962 "umull r1, r10, r6, r2 \n\t" \ 1963 "adds r1, r1, r1 \n\t" \ 1964 "adcs r10, r10, r10 \n\t" \ 1965 "adc r8, r8, #0 \n\t" \ 1966 "adds r11, r11, r1 \n\t" \ 1967 "adcs r12, r12, r10 \n\t" \ 1968 "adc r8, r8, #0 \n\t" \ 1969 "umull r1, r10, r7, r7 \n\t" \ 1970 "adds r11, r11, r1 \n\t" \ 1971 "adcs r12, r12, r10 \n\t" \ 1972 "adc r8, r8, #0 \n\t" \ 1973 "stmia r0!, {r11} \n\t" \ 1974 \ 1975 "mov r11, #0 \n\t" \ 1976 "umull r1, r10, r7, r2 \n\t" \ 1977 "adds r1, r1, r1 \n\t" \ 1978 "adcs r10, r10, r10 \n\t" \ 1979 "adc r11, r11, #0 \n\t" \ 1980 "adds r12, r12, r1 \n\t" \ 1981 "adcs r8, r8, r10 \n\t" \ 1982 "adc r11, r11, #0 \n\t" \ 1983 "stmia r0!, {r12} \n\t" \ 1984 \ 1985 "umull r1, r10, r2, r2 \n\t" \ 1986 "adds r8, r8, r1 \n\t" \ 1987 "adcs r11, r11, r10 \n\t" \ 1988 "stmia r0!, {r8, r11} \n\t" \ 1989 "pop {r1, r2} \n\t" 1990 1991#define FAST_SQUARE_ASM_7_TO_8 \ 1992 "cmp r2, #7 \n\t" \ 1993 "beq 1f \n\t" \ 1994 \ 1995 "sub r0, #28 \n\t" \ 1996 "sub r1, #28 \n\t" \ 1997 \ 1998 /* Do off-center multiplication */ \ 1999 "ldmia r1!, {r6,r7,r8,r9,r10,r11,r12,r14} \n\t" \ 2000 "umull r3, r4, r6, r14 \n\t" \ 2001 "umull r6, r5, r7, r14 \n\t" \ 2002 "adds r4, r4, r6 \n\t" \ 2003 "umull r7, r6, r8, r14 \n\t" \ 2004 "adcs r5, r5, r7 \n\t" \ 2005 "umull r8, r7, r9, r14 \n\t" \ 2006 "adcs r6, r6, r8 \n\t" \ 2007 "umull r9, r8, r10, r14 \n\t" \ 2008 "adcs r7, r7, r9 \n\t" \ 2009 "umull r10, r9, r11, r14 \n\t" \ 2010 "adcs r8, r8, r10 \n\t" \ 2011 "umull r11, r10, r12, r14 \n\t" \ 2012 "adcs r9, r9, r11 \n\t" \ 2013 "adcs r10, r10, #0 \n\t" \ 2014 \ 2015 /* Multiply by 2 */ \ 2016 "mov r11, #0 \n\t" \ 2017 "adds r3, r3, r3 \n\t" \ 2018 "adcs r4, r4, r4 \n\t" \ 2019 "adcs r5, r5, r5 \n\t" \ 2020 "adcs r6, r6, r6 \n\t" \ 2021 "adcs r7, r7, r7 \n\t" \ 2022 "adcs r8, r8, r8 \n\t" \ 2023 "adcs r9, r9, r9 \n\t" \ 2024 "adcs r10, r10, r10 \n\t" \ 2025 "adcs r11, r11, #0 \n\t" \ 2026 \ 2027 /* Add into previous */ \ 2028 "ldr r12, [r0], #4 \n\t" \ 2029 "adds r3, r3, r12 \n\t" \ 2030 "ldr r12, [r0], #4 \n\t" \ 2031 "adcs r4, r4, r12 \n\t" \ 2032 "ldr r12, [r0], #4 \n\t" \ 2033 "adcs r5, r5, r12 \n\t" \ 2034 "ldr r12, [r0], #4 \n\t" \ 2035 "adcs r6, r6, r12 \n\t" \ 2036 "ldr r12, [r0], #4 \n\t" \ 2037 "adcs r7, r7, r12 \n\t" \ 2038 "ldr r12, [r0], #4 \n\t" \ 2039 "adcs r8, r8, r12 \n\t" \ 2040 "ldr r12, [r0], #4 \n\t" \ 2041 "adcs r9, r9, r12 \n\t" \ 2042 "adcs r10, r10, #0 \n\t" \ 2043 "adcs r11, r11, #0 \n\t" \ 2044 "sub r0, #28 \n\t" \ 2045 \ 2046 /* Perform center multiplication */ \ 2047 "umlal r10, r11, r14, r14 \n\t" \ 2048 "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10,r11} \n\t" 2049 2050#define FAST_SQUARE_ASM_8 \ 2051 "push {r2} \n\t" \ 2052 "ldmia r1!, {r2,r3,r4,r5,r6,r7,r8,r9} \n\t" \ 2053 "push {r1} \n\t" \ 2054 "sub r1, 8 \n\t" \ 2055 \ 2056 "add r0, 24 \n\t" \ 2057 "umull r10, r11, r2, r8 \n\t" \ 2058 "umull r12, r14, r2, r9 \n\t" \ 2059 "umull r8, r9, r3, r9 \n\t" \ 2060 "adds r11, r11, r12 \n\t" \ 2061 "adcs r12, r14, r8 \n\t" \ 2062 "adcs r14, r9, #0 \n\t" \ 2063 "stmia r0!, {r10, r11, r12, r14} \n\t" \ 2064 "sub r0, 40 \n\t" \ 2065 \ 2066 "umull r11, r12, r2, r2 \n\t" \ 2067 "stmia r0!, {r11} \n\t" \ 2068 \ 2069 "mov r9, #0 \n\t" \ 2070 "umull r10, r11, r2, r3 \n\t" \ 2071 "adds r12, r12, r10 \n\t" \ 2072 "adcs r8, r11, #0 \n\t" \ 2073 "adc r9, r9, #0 \n\t" \ 2074 "adds r12, r12, r10 \n\t" \ 2075 "adcs r8, r8, r11 \n\t" \ 2076 "adc r9, r9, #0 \n\t" \ 2077 "stmia r0!, {r12} \n\t" \ 2078 \ 2079 "mov r10, #0 \n\t" \ 2080 "umull r11, r12, r2, r4 \n\t" \ 2081 "adds r11, r11, r11 \n\t" \ 2082 "adcs r12, r12, r12 \n\t" \ 2083 "adc r10, r10, #0 \n\t" \ 2084 "adds r8, r8, r11 \n\t" \ 2085 "adcs r9, r9, r12 \n\t" \ 2086 "adc r10, r10, #0 \n\t" \ 2087 "umull r11, r12, r3, r3 \n\t" \ 2088 "adds r8, r8, r11 \n\t" \ 2089 "adcs r9, r9, r12 \n\t" \ 2090 "adc r10, r10, #0 \n\t" \ 2091 "stmia r0!, {r8} \n\t" \ 2092 \ 2093 "mov r12, #0 \n\t" \ 2094 "umull r8, r11, r2, r5 \n\t" \ 2095 "mov r14, r11 \n\t" \ 2096 "umlal r8, r11, r3, r4 \n\t" \ 2097 "cmp r14, r11 \n\t" \ 2098 "it hi \n\t" \ 2099 "adchi r12, r12, #0 \n\t" \ 2100 "adds r8, r8, r8 \n\t" \ 2101 "adcs r11, r11, r11 \n\t" \ 2102 "adc r12, r12, r12 \n\t" \ 2103 "adds r8, r8, r9 \n\t" \ 2104 "adcs r11, r11, r10 \n\t" \ 2105 "adc r12, r12, #0 \n\t" \ 2106 "stmia r0!, {r8} \n\t" \ 2107 \ 2108 "mov r10, #0 \n\t" \ 2109 "umull r8, r9, r2, r6 \n\t" \ 2110 "mov r14, r9 \n\t" \ 2111 "umlal r8, r9, r3, r5 \n\t" \ 2112 "cmp r14, r9 \n\t" \ 2113 "it hi \n\t" \ 2114 "adchi r10, r10, #0 \n\t" \ 2115 "adds r8, r8, r8 \n\t" \ 2116 "adcs r9, r9, r9 \n\t" \ 2117 "adc r10, r10, r10 \n\t" \ 2118 "mov r14, r9 \n\t" \ 2119 "umlal r8, r9, r4, r4 \n\t" \ 2120 "cmp r14, r9 \n\t" \ 2121 "it hi \n\t" \ 2122 "adchi r10, r10, #0 \n\t" \ 2123 "adds r8, r8, r11 \n\t" \ 2124 "adcs r9, r9, r12 \n\t" \ 2125 "adc r10, r10, #0 \n\t" \ 2126 "stmia r0!, {r8} \n\t" \ 2127 \ 2128 "mov r12, #0 \n\t" \ 2129 "umull r8, r11, r2, r7 \n\t" \ 2130 "mov r14, r11 \n\t" \ 2131 "umlal r8, r11, r3, r6 \n\t" \ 2132 "cmp r14, r11 \n\t" \ 2133 "it hi \n\t" \ 2134 "adchi r12, r12, #0 \n\t" \ 2135 "mov r14, r11 \n\t" \ 2136 "umlal r8, r11, r4, r5 \n\t" \ 2137 "cmp r14, r11 \n\t" \ 2138 "it hi \n\t" \ 2139 "adchi r12, r12, #0 \n\t" \ 2140 "adds r8, r8, r8 \n\t" \ 2141 "adcs r11, r11, r11 \n\t" \ 2142 "adc r12, r12, r12 \n\t" \ 2143 "adds r8, r8, r9 \n\t" \ 2144 "adcs r11, r11, r10 \n\t" \ 2145 "adc r12, r12, #0 \n\t" \ 2146 "stmia r0!, {r8} \n\t" \ 2147 \ 2148 "ldmia r1!, {r2} \n\t" \ 2149 "mov r10, #0 \n\t" \ 2150 "umull r8, r9, r3, r7 \n\t" \ 2151 "mov r14, r9 \n\t" \ 2152 "umlal r8, r9, r4, r6 \n\t" \ 2153 "cmp r14, r9 \n\t" \ 2154 "it hi \n\t" \ 2155 "adchi r10, r10, #0 \n\t" \ 2156 "ldr r14, [r0] \n\t" \ 2157 "adds r8, r8, r14 \n\t" \ 2158 "adcs r9, r9, #0 \n\t" \ 2159 "adc r10, r10, #0 \n\t" \ 2160 "adds r8, r8, r8 \n\t" \ 2161 "adcs r9, r9, r9 \n\t" \ 2162 "adc r10, r10, r10 \n\t" \ 2163 "mov r14, r9 \n\t" \ 2164 "umlal r8, r9, r5, r5 \n\t" \ 2165 "cmp r14, r9 \n\t" \ 2166 "it hi \n\t" \ 2167 "adchi r10, r10, #0 \n\t" \ 2168 "adds r8, r8, r11 \n\t" \ 2169 "adcs r9, r9, r12 \n\t" \ 2170 "adc r10, r10, #0 \n\t" \ 2171 "stmia r0!, {r8} \n\t" \ 2172 \ 2173 "mov r12, #0 \n\t" \ 2174 "umull r8, r11, r3, r2 \n\t" \ 2175 "mov r14, r11 \n\t" \ 2176 "umlal r8, r11, r4, r7 \n\t" \ 2177 "cmp r14, r11 \n\t" \ 2178 "it hi \n\t" \ 2179 "adchi r12, r12, #0 \n\t" \ 2180 "mov r14, r11 \n\t" \ 2181 "umlal r8, r11, r5, r6 \n\t" \ 2182 "cmp r14, r11 \n\t" \ 2183 "it hi \n\t" \ 2184 "adchi r12, r12, #0 \n\t" \ 2185 "ldr r14, [r0] \n\t" \ 2186 "adds r8, r8, r14 \n\t" \ 2187 "adcs r11, r11, #0 \n\t" \ 2188 "adc r12, r12, #0 \n\t" \ 2189 "adds r8, r8, r8 \n\t" \ 2190 "adcs r11, r11, r11 \n\t" \ 2191 "adc r12, r12, r12 \n\t" \ 2192 "adds r8, r8, r9 \n\t" \ 2193 "adcs r11, r11, r10 \n\t" \ 2194 "adc r12, r12, #0 \n\t" \ 2195 "stmia r0!, {r8} \n\t" \ 2196 \ 2197 "ldmia r1!, {r3} \n\t" \ 2198 "mov r10, #0 \n\t" \ 2199 "umull r8, r9, r4, r2 \n\t" \ 2200 "mov r14, r9 \n\t" \ 2201 "umlal r8, r9, r5, r7 \n\t" \ 2202 "cmp r14, r9 \n\t" \ 2203 "it hi \n\t" \ 2204 "adchi r10, r10, #0 \n\t" \ 2205 "ldr r14, [r0] \n\t" \ 2206 "adds r8, r8, r14 \n\t" \ 2207 "adcs r9, r9, #0 \n\t" \ 2208 "adc r10, r10, #0 \n\t" \ 2209 "adds r8, r8, r8 \n\t" \ 2210 "adcs r9, r9, r9 \n\t" \ 2211 "adc r10, r10, r10 \n\t" \ 2212 "mov r14, r9 \n\t" \ 2213 "umlal r8, r9, r6, r6 \n\t" \ 2214 "cmp r14, r9 \n\t" \ 2215 "it hi \n\t" \ 2216 "adchi r10, r10, #0 \n\t" \ 2217 "adds r8, r8, r11 \n\t" \ 2218 "adcs r9, r9, r12 \n\t" \ 2219 "adc r10, r10, #0 \n\t" \ 2220 "stmia r0!, {r8} \n\t" \ 2221 \ 2222 "mov r12, #0 \n\t" \ 2223 "umull r8, r11, r4, r3 \n\t" \ 2224 "mov r14, r11 \n\t" \ 2225 "umlal r8, r11, r5, r2 \n\t" \ 2226 "cmp r14, r11 \n\t" \ 2227 "it hi \n\t" \ 2228 "adchi r12, r12, #0 \n\t" \ 2229 "mov r14, r11 \n\t" \ 2230 "umlal r8, r11, r6, r7 \n\t" \ 2231 "cmp r14, r11 \n\t" \ 2232 "it hi \n\t" \ 2233 "adchi r12, r12, #0 \n\t" \ 2234 "ldr r14, [r0] \n\t" \ 2235 "adds r8, r8, r14 \n\t" \ 2236 "adcs r11, r11, #0 \n\t" \ 2237 "adc r12, r12, #0 \n\t" \ 2238 "adds r8, r8, r8 \n\t" \ 2239 "adcs r11, r11, r11 \n\t" \ 2240 "adc r12, r12, r12 \n\t" \ 2241 "adds r8, r8, r9 \n\t" \ 2242 "adcs r11, r11, r10 \n\t" \ 2243 "adc r12, r12, #0 \n\t" \ 2244 "stmia r0!, {r8} \n\t" \ 2245 \ 2246 "mov r10, #0 \n\t" \ 2247 "umull r8, r9, r5, r3 \n\t" \ 2248 "mov r14, r9 \n\t" \ 2249 "umlal r8, r9, r6, r2 \n\t" \ 2250 "cmp r14, r9 \n\t" \ 2251 "it hi \n\t" \ 2252 "adchi r10, r10, #0 \n\t" \ 2253 "adds r8, r8, r8 \n\t" \ 2254 "adcs r9, r9, r9 \n\t" \ 2255 "adc r10, r10, r10 \n\t" \ 2256 "mov r14, r9 \n\t" \ 2257 "umlal r8, r9, r7, r7 \n\t" \ 2258 "cmp r14, r9 \n\t" \ 2259 "it hi \n\t" \ 2260 "adchi r10, r10, #0 \n\t" \ 2261 "adds r8, r8, r11 \n\t" \ 2262 "adcs r9, r9, r12 \n\t" \ 2263 "adc r10, r10, #0 \n\t" \ 2264 "stmia r0!, {r8} \n\t" \ 2265 \ 2266 "mov r12, #0 \n\t" \ 2267 "umull r8, r11, r6, r3 \n\t" \ 2268 "mov r14, r11 \n\t" \ 2269 "umlal r8, r11, r7, r2 \n\t" \ 2270 "cmp r14, r11 \n\t" \ 2271 "it hi \n\t" \ 2272 "adchi r12, r12, #0 \n\t" \ 2273 "adds r8, r8, r8 \n\t" \ 2274 "adcs r11, r11, r11 \n\t" \ 2275 "adc r12, r12, r12 \n\t" \ 2276 "adds r8, r8, r9 \n\t" \ 2277 "adcs r11, r11, r10 \n\t" \ 2278 "adc r12, r12, #0 \n\t" \ 2279 "stmia r0!, {r8} \n\t" \ 2280 \ 2281 "mov r8, #0 \n\t" \ 2282 "umull r1, r10, r7, r3 \n\t" \ 2283 "adds r1, r1, r1 \n\t" \ 2284 "adcs r10, r10, r10 \n\t" \ 2285 "adc r8, r8, #0 \n\t" \ 2286 "adds r11, r11, r1 \n\t" \ 2287 "adcs r12, r12, r10 \n\t" \ 2288 "adc r8, r8, #0 \n\t" \ 2289 "umull r1, r10, r2, r2 \n\t" \ 2290 "adds r11, r11, r1 \n\t" \ 2291 "adcs r12, r12, r10 \n\t" \ 2292 "adc r8, r8, #0 \n\t" \ 2293 "stmia r0!, {r11} \n\t" \ 2294 \ 2295 "mov r11, #0 \n\t" \ 2296 "umull r1, r10, r2, r3 \n\t" \ 2297 "adds r1, r1, r1 \n\t" \ 2298 "adcs r10, r10, r10 \n\t" \ 2299 "adc r11, r11, #0 \n\t" \ 2300 "adds r12, r12, r1 \n\t" \ 2301 "adcs r8, r8, r10 \n\t" \ 2302 "adc r11, r11, #0 \n\t" \ 2303 "stmia r0!, {r12} \n\t" \ 2304 \ 2305 "umull r1, r10, r3, r3 \n\t" \ 2306 "adds r8, r8, r1 \n\t" \ 2307 "adcs r11, r11, r10 \n\t" \ 2308 "stmia r0!, {r8, r11} \n\t" \ 2309 "pop {r1, r2} \n\t" 2310 2311#endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */ 2312