1%def binop(preinstr="", result="r0", chkzero="0", instr=""): 2 /* 3 * Generic 32-bit binary operation. Provide an "instr" line that 4 * specifies an instruction that performs "result = r0 op r1". 5 * This could be an ARM instruction or a function call. (If the result 6 * comes back in a register other than r0, you can override "result".) 7 * 8 * If "chkzero" is set to 1, we perform a divide-by-zero check on 9 * vCC (r1). Useful for integer division and modulus. Note that we 10 * *don't* check for (INT_MIN / -1) here, because the ARM math lib 11 * handles it correctly. 12 * 13 * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int, 14 * xor-int, shl-int, shr-int, ushr-int, add-float, sub-float, 15 * mul-float, div-float, rem-float 16 */ 17 /* binop vAA, vBB, vCC */ 18 FETCH r0, 1 @ r0<- CCBB 19 mov r4, rINST, lsr #8 @ r4<- AA 20 mov r3, r0, lsr #8 @ r3<- CC 21 and r2, r0, #255 @ r2<- BB 22 GET_VREG r1, r3 @ r1<- vCC 23 GET_VREG r0, r2 @ r0<- vBB 24 .if $chkzero 25 cmp r1, #0 @ is second operand zero? 26 beq common_errDivideByZero 27 .endif 28 29 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 30 $preinstr @ optional op; may set condition codes 31 $instr @ $result<- op, r0-r3 changed 32 GET_INST_OPCODE ip @ extract opcode from rINST 33 SET_VREG $result, r4 @ vAA<- $result 34 GOTO_OPCODE ip @ jump to next instruction 35 /* 11-14 instructions */ 36 37%def binop2addr(preinstr="", result="r0", chkzero="0", instr=""): 38 /* 39 * Generic 32-bit "/2addr" binary operation. Provide an "instr" line 40 * that specifies an instruction that performs "result = r0 op r1". 41 * This could be an ARM instruction or a function call. (If the result 42 * comes back in a register other than r0, you can override "result".) 43 * 44 * If "chkzero" is set to 1, we perform a divide-by-zero check on 45 * vCC (r1). Useful for integer division and modulus. 46 * 47 * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr, 48 * rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr, 49 * shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr, 50 * sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr 51 */ 52 /* binop/2addr vA, vB */ 53 mov r3, rINST, lsr #12 @ r3<- B 54 ubfx r4, rINST, #8, #4 @ r4<- A 55 GET_VREG r1, r3 @ r1<- vB 56 GET_VREG r0, r4 @ r0<- vA 57 .if $chkzero 58 cmp r1, #0 @ is second operand zero? 59 beq common_errDivideByZero 60 .endif 61 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 62 63 $preinstr @ optional op; may set condition codes 64 $instr @ $result<- op, r0-r3 changed 65 GET_INST_OPCODE ip @ extract opcode from rINST 66 SET_VREG $result, r4 @ vAA<- $result 67 GOTO_OPCODE ip @ jump to next instruction 68 /* 10-13 instructions */ 69 70%def binopLit16(result="r0", chkzero="0", instr=""): 71 /* 72 * Generic 32-bit "lit16" binary operation. Provide an "instr" line 73 * that specifies an instruction that performs "result = r0 op r1". 74 * This could be an ARM instruction or a function call. (If the result 75 * comes back in a register other than r0, you can override "result".) 76 * 77 * If "chkzero" is set to 1, we perform a divide-by-zero check on 78 * vCC (r1). Useful for integer division and modulus. 79 * 80 * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16, 81 * rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16 82 */ 83 /* binop/lit16 vA, vB, #+CCCC */ 84 FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended) 85 mov r2, rINST, lsr #12 @ r2<- B 86 ubfx r4, rINST, #8, #4 @ r4<- A 87 GET_VREG r0, r2 @ r0<- vB 88 .if $chkzero 89 cmp r1, #0 @ is second operand zero? 90 beq common_errDivideByZero 91 .endif 92 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 93 94 $instr @ $result<- op, r0-r3 changed 95 GET_INST_OPCODE ip @ extract opcode from rINST 96 SET_VREG $result, r4 @ vAA<- $result 97 GOTO_OPCODE ip @ jump to next instruction 98 /* 10-13 instructions */ 99 100%def binopLit8(extract="asr r1, r3, #8", result="r0", chkzero="0", instr=""): 101 /* 102 * Generic 32-bit "lit8" binary operation. Provide an "instr" line 103 * that specifies an instruction that performs "result = r0 op r1". 104 * This could be an ARM instruction or a function call. (If the result 105 * comes back in a register other than r0, you can override "result".) 106 * 107 * You can override "extract" if the extraction of the literal value 108 * from r3 to r1 is not the default "asr r1, r3, #8". The extraction 109 * can be omitted completely if the shift is embedded in "instr". 110 * 111 * If "chkzero" is set to 1, we perform a divide-by-zero check on 112 * vCC (r1). Useful for integer division and modulus. 113 * 114 * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8, 115 * rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8, 116 * shl-int/lit8, shr-int/lit8, ushr-int/lit8 117 */ 118 /* binop/lit8 vAA, vBB, #+CC */ 119 FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC) 120 mov r4, rINST, lsr #8 @ r4<- AA 121 and r2, r3, #255 @ r2<- BB 122 GET_VREG r0, r2 @ r0<- vBB 123 $extract @ optional; typically r1<- ssssssCC (sign extended) 124 .if $chkzero 125 @cmp r1, #0 @ is second operand zero? 126 beq common_errDivideByZero 127 .endif 128 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 129 130 $instr @ $result<- op, r0-r3 changed 131 GET_INST_OPCODE ip @ extract opcode from rINST 132 SET_VREG $result, r4 @ vAA<- $result 133 GOTO_OPCODE ip @ jump to next instruction 134 /* 10-12 instructions */ 135 136%def binopWide(preinstr="", result0="r0", result1="r1", chkzero="0", instr=""): 137 /* 138 * Generic 64-bit binary operation. Provide an "instr" line that 139 * specifies an instruction that performs "result = r0-r1 op r2-r3". 140 * This could be an ARM instruction or a function call. (If the result 141 * comes back in a register other than r0, you can override "result".) 142 * 143 * If "chkzero" is set to 1, we perform a divide-by-zero check on 144 * vCC (r1). Useful for integer division and modulus. 145 * 146 * for: add-long, sub-long, div-long, rem-long, and-long, or-long, 147 * xor-long, add-double, sub-double, mul-double, div-double, 148 * rem-double 149 * 150 * IMPORTANT: you may specify "chkzero" or "preinstr" but not both. 151 */ 152 /* binop vAA, vBB, vCC */ 153 FETCH r0, 1 @ r0<- CCBB 154 mov rINST, rINST, lsr #8 @ rINST<- AA 155 and r2, r0, #255 @ r2<- BB 156 mov r3, r0, lsr #8 @ r3<- CC 157 VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[AA] 158 VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 159 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 160 GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1 161 GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1 162 .if $chkzero 163 orrs ip, r2, r3 @ second arg (r2-r3) is zero? 164 beq common_errDivideByZero 165 .endif 166 CLEAR_SHADOW_PAIR rINST, lr, ip @ Zero out the shadow regs 167 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 168 $preinstr @ optional op; may set condition codes 169 $instr @ result<- op, r0-r3 changed 170 GET_INST_OPCODE ip @ extract opcode from rINST 171 SET_VREG_WIDE_BY_ADDR $result0,$result1,r4 @ vAA/vAA+1<, $result0/$result1 172 GOTO_OPCODE ip @ jump to next instruction 173 /* 14-17 instructions */ 174 175%def binopWide2addr(preinstr="", result0="r0", result1="r1", chkzero="0", instr=""): 176 /* 177 * Generic 64-bit "/2addr" binary operation. Provide an "instr" line 178 * that specifies an instruction that performs "result = r0-r1 op r2-r3". 179 * This could be an ARM instruction or a function call. (If the result 180 * comes back in a register other than r0, you can override "result".) 181 * 182 * If "chkzero" is set to 1, we perform a divide-by-zero check on 183 * vCC (r1). Useful for integer division and modulus. 184 * 185 * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr, 186 * and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr, 187 * sub-double/2addr, mul-double/2addr, div-double/2addr, 188 * rem-double/2addr 189 */ 190 /* binop/2addr vA, vB */ 191 mov r1, rINST, lsr #12 @ r1<- B 192 ubfx rINST, rINST, #8, #4 @ rINST<- A 193 VREG_INDEX_TO_ADDR r1, r1 @ r1<- &fp[B] 194 VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[A] 195 GET_VREG_WIDE_BY_ADDR r2, r3, r1 @ r2/r3<- vBB/vBB+1 196 GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1 197 .if $chkzero 198 orrs ip, r2, r3 @ second arg (r2-r3) is zero? 199 beq common_errDivideByZero 200 .endif 201 CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs 202 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 203 $preinstr @ optional op; may set condition codes 204 $instr @ result<- op, r0-r3 changed 205 GET_INST_OPCODE ip @ extract opcode from rINST 206 SET_VREG_WIDE_BY_ADDR $result0,$result1,r4 @ vAA/vAA+1<- $result0/$result1 207 GOTO_OPCODE ip @ jump to next instruction 208 /* 12-15 instructions */ 209 210%def unop(preinstr="", instr=""): 211 /* 212 * Generic 32-bit unary operation. Provide an "instr" line that 213 * specifies an instruction that performs "result = op r0". 214 * This could be an ARM instruction or a function call. 215 * 216 * for: neg-int, not-int, neg-float, int-to-float, float-to-int, 217 * int-to-byte, int-to-char, int-to-short 218 */ 219 /* unop vA, vB */ 220 mov r3, rINST, lsr #12 @ r3<- B 221 ubfx r4, rINST, #8, #4 @ r4<- A 222 GET_VREG r0, r3 @ r0<- vB 223 $preinstr @ optional op; may set condition codes 224 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 225 $instr @ r0<- op, r0-r3 changed 226 GET_INST_OPCODE ip @ extract opcode from rINST 227 SET_VREG r0, r4 @ vAA<- r0 228 GOTO_OPCODE ip @ jump to next instruction 229 /* 8-9 instructions */ 230 231%def unopNarrower(preinstr="", instr=""): 232 /* 233 * Generic 64bit-to-32bit unary operation. Provide an "instr" line 234 * that specifies an instruction that performs "result = op r0/r1", where 235 * "result" is a 32-bit quantity in r0. 236 * 237 * For: long-to-float 238 * 239 * (This would work for long-to-int, but that instruction is actually 240 * an exact match for op_move.) 241 */ 242 /* unop vA, vB */ 243 mov r3, rINST, lsr #12 @ r3<- B 244 ubfx r4, rINST, #8, #4 @ r4<- A 245 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B] 246 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vB/vB+1 247 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 248 $preinstr @ optional op; may set condition codes 249 $instr @ r0<- op, r0-r3 changed 250 GET_INST_OPCODE ip @ extract opcode from rINST 251 SET_VREG r0, r4 @ vA<- r0 252 GOTO_OPCODE ip @ jump to next instruction 253 /* 9-10 instructions */ 254 255%def unopWide(preinstr="", instr=""): 256 /* 257 * Generic 64-bit unary operation. Provide an "instr" line that 258 * specifies an instruction that performs "result = op r0/r1". 259 * This could be an ARM instruction or a function call. 260 * 261 * For: neg-long, not-long, neg-double, long-to-double, double-to-long 262 */ 263 /* unop vA, vB */ 264 mov r3, rINST, lsr #12 @ r3<- B 265 ubfx rINST, rINST, #8, #4 @ rINST<- A 266 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B] 267 VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[A] 268 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vAA 269 CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs 270 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 271 $preinstr @ optional op; may set condition codes 272 $instr @ r0/r1<- op, r2-r3 changed 273 GET_INST_OPCODE ip @ extract opcode from rINST 274 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA<- r0/r1 275 GOTO_OPCODE ip @ jump to next instruction 276 /* 10-11 instructions */ 277 278%def unopWider(preinstr="", instr=""): 279 /* 280 * Generic 32bit-to-64bit unary operation. Provide an "instr" line 281 * that specifies an instruction that performs "result = op r0", where 282 * "result" is a 64-bit quantity in r0/r1. 283 * 284 * For: int-to-long, int-to-double, float-to-long, float-to-double 285 */ 286 /* unop vA, vB */ 287 mov r3, rINST, lsr #12 @ r3<- B 288 ubfx rINST, rINST, #8, #4 @ rINST<- A 289 GET_VREG r0, r3 @ r0<- vB 290 VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[A] 291 $preinstr @ optional op; may set condition codes 292 CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs 293 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 294 $instr @ r0<- op, r0-r3 changed 295 GET_INST_OPCODE ip @ extract opcode from rINST 296 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vA/vA+1<- r0/r1 297 GOTO_OPCODE ip @ jump to next instruction 298 /* 9-10 instructions */ 299 300%def op_add_int(): 301% binop(instr="add r0, r0, r1") 302 303%def op_add_int_2addr(): 304% binop2addr(instr="add r0, r0, r1") 305 306%def op_add_int_lit16(): 307% binopLit16(instr="add r0, r0, r1") 308 309%def op_add_int_lit8(): 310% binopLit8(extract="", instr="add r0, r0, r3, asr #8") 311 312%def op_add_long(): 313% binopWide(preinstr="adds r0, r0, r2", instr="adc r1, r1, r3") 314 315%def op_add_long_2addr(): 316% binopWide2addr(preinstr="adds r0, r0, r2", instr="adc r1, r1, r3") 317 318%def op_and_int(): 319% binop(instr="and r0, r0, r1") 320 321%def op_and_int_2addr(): 322% binop2addr(instr="and r0, r0, r1") 323 324%def op_and_int_lit16(): 325% binopLit16(instr="and r0, r0, r1") 326 327%def op_and_int_lit8(): 328% binopLit8(extract="", instr="and r0, r0, r3, asr #8") 329 330%def op_and_long(): 331% binopWide(preinstr="and r0, r0, r2", instr="and r1, r1, r3") 332 333%def op_and_long_2addr(): 334% binopWide2addr(preinstr="and r0, r0, r2", instr="and r1, r1, r3") 335 336%def op_cmp_long(): 337 /* 338 * Compare two 64-bit values. Puts 0, 1, or -1 into the destination 339 * register based on the results of the comparison. 340 */ 341 /* cmp-long vAA, vBB, vCC */ 342 FETCH r0, 1 @ r0<- CCBB 343 mov r4, rINST, lsr #8 @ r4<- AA 344 and r2, r0, #255 @ r2<- BB 345 mov r3, r0, lsr #8 @ r3<- CC 346 VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 347 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 348 GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1 349 GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1 350 cmp r0, r2 351 sbcs ip, r1, r3 @ Sets correct CCs for checking LT (but not EQ/NE) 352 mov r3, #-1 353 it ge 354 movge r3, #1 355 it eq 356 cmpeq r0, r2 357 it eq 358 moveq r3, #0 359 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 360 SET_VREG r3, r4 @ vAA<- ip 361 GET_INST_OPCODE ip @ extract opcode from rINST 362 GOTO_OPCODE ip @ jump to next instruction 363 364%def op_div_int(): 365 /* 366 * Specialized 32-bit binary operation 367 * 368 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 369 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 370 * ARMv7 CPUs that have hardware division support). 371 * 372 * div-int 373 * 374 */ 375 FETCH r0, 1 @ r0<- CCBB 376 mov r4, rINST, lsr #8 @ r4<- AA 377 mov r3, r0, lsr #8 @ r3<- CC 378 and r2, r0, #255 @ r2<- BB 379 GET_VREG r1, r3 @ r1<- vCC 380 GET_VREG r0, r2 @ r0<- vBB 381 cmp r1, #0 @ is second operand zero? 382 beq common_errDivideByZero 383 384 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 385#ifdef __ARM_ARCH_EXT_IDIV__ 386 sdiv r0, r0, r1 @ r0<- op 387#else 388 bl __aeabi_idiv @ r0<- op, r0-r3 changed 389#endif 390 GET_INST_OPCODE ip @ extract opcode from rINST 391 SET_VREG r0, r4 @ vAA<- r0 392 GOTO_OPCODE ip @ jump to next instruction 393 /* 11-14 instructions */ 394 395%def op_div_int_2addr(): 396 /* 397 * Specialized 32-bit binary operation 398 * 399 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 400 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 401 * ARMv7 CPUs that have hardware division support). 402 * 403 * div-int/2addr 404 * 405 */ 406 mov r3, rINST, lsr #12 @ r3<- B 407 ubfx r4, rINST, #8, #4 @ r4<- A 408 GET_VREG r1, r3 @ r1<- vB 409 GET_VREG r0, r4 @ r0<- vA 410 cmp r1, #0 @ is second operand zero? 411 beq common_errDivideByZero 412 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 413 414#ifdef __ARM_ARCH_EXT_IDIV__ 415 sdiv r0, r0, r1 @ r0<- op 416#else 417 bl __aeabi_idiv @ r0<- op, r0-r3 changed 418#endif 419 GET_INST_OPCODE ip @ extract opcode from rINST 420 SET_VREG r0, r4 @ vAA<- r0 421 GOTO_OPCODE ip @ jump to next instruction 422 /* 10-13 instructions */ 423 424 425%def op_div_int_lit16(): 426 /* 427 * Specialized 32-bit binary operation 428 * 429 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 430 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 431 * ARMv7 CPUs that have hardware division support). 432 * 433 * div-int/lit16 434 * 435 */ 436 FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended) 437 mov r2, rINST, lsr #12 @ r2<- B 438 ubfx r4, rINST, #8, #4 @ r4<- A 439 GET_VREG r0, r2 @ r0<- vB 440 cmp r1, #0 @ is second operand zero? 441 beq common_errDivideByZero 442 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 443 444#ifdef __ARM_ARCH_EXT_IDIV__ 445 sdiv r0, r0, r1 @ r0<- op 446#else 447 bl __aeabi_idiv @ r0<- op, r0-r3 changed 448#endif 449 GET_INST_OPCODE ip @ extract opcode from rINST 450 SET_VREG r0, r4 @ vAA<- r0 451 GOTO_OPCODE ip @ jump to next instruction 452 /* 10-13 instructions */ 453 454%def op_div_int_lit8(): 455 /* 456 * Specialized 32-bit binary operation 457 * 458 * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper 459 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 460 * ARMv7 CPUs that have hardware division support). 461 * 462 * div-int/lit8 463 * 464 */ 465 FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC 466 mov r4, rINST, lsr #8 @ r4<- AA 467 and r2, r3, #255 @ r2<- BB 468 GET_VREG r0, r2 @ r0<- vBB 469 movs r1, r3, asr #8 @ r1<- ssssssCC (sign extended) 470 @cmp r1, #0 @ is second operand zero? 471 beq common_errDivideByZero 472 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 473 474#ifdef __ARM_ARCH_EXT_IDIV__ 475 sdiv r0, r0, r1 @ r0<- op 476#else 477 bl __aeabi_idiv @ r0<- op, r0-r3 changed 478#endif 479 GET_INST_OPCODE ip @ extract opcode from rINST 480 SET_VREG r0, r4 @ vAA<- r0 481 GOTO_OPCODE ip @ jump to next instruction 482 /* 10-12 instructions */ 483 484%def op_div_long(): 485% binopWide(instr="bl __aeabi_ldivmod", chkzero="1") 486 487%def op_div_long_2addr(): 488% binopWide2addr(instr="bl __aeabi_ldivmod", chkzero="1") 489 490%def op_int_to_byte(): 491% unop(instr="sxtb r0, r0") 492 493%def op_int_to_char(): 494% unop(instr="uxth r0, r0") 495 496%def op_int_to_long(): 497% unopWider(instr="mov r1, r0, asr #31") 498 499%def op_int_to_short(): 500% unop(instr="sxth r0, r0") 501 502%def op_long_to_int(): 503/* we ignore the high word, making this equivalent to a 32-bit reg move */ 504% op_move() 505 506/* 507 * We use "mul r0, r1, r0" instead of "r0, r0, r1". The latter was illegal in old versions. 508 * Also, for T32, this operand order allows using a 16-bit instruction (encoding T1) while the 509 * other order would require 32-bit instruction (encoding T2). 510 */ 511 512%def op_mul_int(): 513% binop(instr="mul r0, r1, r0") 514 515%def op_mul_int_2addr(): 516% binop2addr(instr="mul r0, r1, r0") 517 518%def op_mul_int_lit16(): 519% binopLit16(instr="mul r0, r1, r0") 520 521%def op_mul_int_lit8(): 522% binopLit8(instr="mul r0, r1, r0") 523 524%def op_mul_long(): 525 /* 526 * Signed 64-bit integer multiply. 527 * 528 * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 529 * WX 530 * x YZ 531 * -------- 532 * ZW ZX 533 * YW YX 534 * 535 * The low word of the result holds ZX, the high word holds 536 * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 537 * it doesn't fit in the low 64 bits. 538 * 539 * Unlike most ARM math operations, multiply instructions have 540 * restrictions on using the same register more than once (Rd and Rn 541 * cannot be the same). 542 */ 543 /* mul-long vAA, vBB, vCC */ 544 FETCH r0, 1 @ r0<- CCBB 545 and r2, r0, #255 @ r2<- BB 546 mov r3, r0, lsr #8 @ r3<- CC 547 VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB] 548 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC] 549 GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1 550 GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1 551 mul ip, r0, r3 @ ip<- YxX 552 umull r0, lr, r2, r0 @ r0/lr <- ZxX RdLo == Rn - this is OK. 553 mla r3, r1, r2, ip @ r3<- YxX + (ZxW) 554 mov r4, rINST, lsr #8 @ r4<- AA 555 add r1, r3, lr @ r1<- lr + low(ZxW + (YxX)) 556 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 557 VREG_INDEX_TO_ADDR r4, r4 @ r2<- &fp[AA] 558 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 559 GET_INST_OPCODE ip @ extract opcode from rINST 560 SET_VREG_WIDE_BY_ADDR r0, r1 , r4 @ vAA/vAA+1<- r1/r2 561 GOTO_OPCODE ip @ jump to next instruction 562 563%def op_mul_long_2addr(): 564 /* 565 * Signed 64-bit integer multiply, "/2addr" version. 566 * 567 * See op_mul_long for an explanation. 568 * 569 * We get a little tight on registers, so to avoid looking up &fp[A] 570 * again we stuff it into rINST. 571 */ 572 /* mul-long/2addr vA, vB */ 573 mov r1, rINST, lsr #12 @ r1<- B 574 ubfx r4, rINST, #8, #4 @ r4<- A 575 VREG_INDEX_TO_ADDR r1, r1 @ r1<- &fp[B] 576 VREG_INDEX_TO_ADDR rINST, r4 @ rINST<- &fp[A] 577 GET_VREG_WIDE_BY_ADDR r2, r3, r1 @ r2/r3<- vBB/vBB+1 578 GET_VREG_WIDE_BY_ADDR r0, r1, rINST @ r0/r1<- vAA/vAA+1 579 mul ip, r0, r3 @ ip<- YxX 580 umull r0, lr, r2, r0 @ r0/lr <- ZxX RdLo == Rn - this is OK. 581 mla r3, r1, r2, ip @ r3<- YxX + (ZxW) 582 mov r4, rINST @ Save vAA before FETCH_ADVANCE_INST 583 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 584 add r1, r3, lr @ r1<- lr + low(ZxW + (YxX)) 585 GET_INST_OPCODE ip @ extract opcode from rINST 586 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 587 GOTO_OPCODE ip @ jump to next instruction 588 589%def op_neg_int(): 590% unop(instr="rsb r0, r0, #0") 591 592%def op_neg_long(): 593% unopWide(preinstr="rsbs r0, r0, #0", instr="rsc r1, r1, #0") 594 595%def op_not_int(): 596% unop(instr="mvn r0, r0") 597 598%def op_not_long(): 599% unopWide(preinstr="mvn r0, r0", instr="mvn r1, r1") 600 601%def op_or_int(): 602% binop(instr="orr r0, r0, r1") 603 604%def op_or_int_2addr(): 605% binop2addr(instr="orr r0, r0, r1") 606 607%def op_or_int_lit16(): 608% binopLit16(instr="orr r0, r0, r1") 609 610%def op_or_int_lit8(): 611% binopLit8(extract="", instr="orr r0, r0, r3, asr #8") 612 613%def op_or_long(): 614% binopWide(preinstr="orr r0, r0, r2", instr="orr r1, r1, r3") 615 616%def op_or_long_2addr(): 617% binopWide2addr(preinstr="orr r0, r0, r2", instr="orr r1, r1, r3") 618 619%def op_rem_int(): 620 /* 621 * Specialized 32-bit binary operation 622 * 623 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 624 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 625 * ARMv7 CPUs that have hardware division support). 626 * 627 * NOTE: idivmod returns quotient in r0 and remainder in r1 628 * 629 * rem-int 630 * 631 */ 632 FETCH r0, 1 @ r0<- CCBB 633 mov r4, rINST, lsr #8 @ r4<- AA 634 mov r3, r0, lsr #8 @ r3<- CC 635 and r2, r0, #255 @ r2<- BB 636 GET_VREG r1, r3 @ r1<- vCC 637 GET_VREG r0, r2 @ r0<- vBB 638 cmp r1, #0 @ is second operand zero? 639 beq common_errDivideByZero 640 641 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 642#ifdef __ARM_ARCH_EXT_IDIV__ 643 sdiv r2, r0, r1 644 mls r1, r1, r2, r0 @ r1<- op, r0-r2 changed 645#else 646 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 647#endif 648 GET_INST_OPCODE ip @ extract opcode from rINST 649 SET_VREG r1, r4 @ vAA<- r1 650 GOTO_OPCODE ip @ jump to next instruction 651 /* 11-14 instructions */ 652 653%def op_rem_int_2addr(): 654 /* 655 * Specialized 32-bit binary operation 656 * 657 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 658 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 659 * ARMv7 CPUs that have hardware division support). 660 * 661 * NOTE: idivmod returns quotient in r0 and remainder in r1 662 * 663 * rem-int/2addr 664 * 665 */ 666 mov r3, rINST, lsr #12 @ r3<- B 667 ubfx r4, rINST, #8, #4 @ r4<- A 668 GET_VREG r1, r3 @ r1<- vB 669 GET_VREG r0, r4 @ r0<- vA 670 cmp r1, #0 @ is second operand zero? 671 beq common_errDivideByZero 672 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 673 674#ifdef __ARM_ARCH_EXT_IDIV__ 675 sdiv r2, r0, r1 676 mls r1, r1, r2, r0 @ r1<- op 677#else 678 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 679#endif 680 GET_INST_OPCODE ip @ extract opcode from rINST 681 SET_VREG r1, r4 @ vAA<- r1 682 GOTO_OPCODE ip @ jump to next instruction 683 /* 10-13 instructions */ 684 685 686%def op_rem_int_lit16(): 687 /* 688 * Specialized 32-bit binary operation 689 * 690 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 691 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 692 * ARMv7 CPUs that have hardware division support). 693 * 694 * NOTE: idivmod returns quotient in r0 and remainder in r1 695 * 696 * rem-int/lit16 697 * 698 */ 699 FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended) 700 mov r2, rINST, lsr #12 @ r2<- B 701 ubfx r4, rINST, #8, #4 @ r4<- A 702 GET_VREG r0, r2 @ r0<- vB 703 cmp r1, #0 @ is second operand zero? 704 beq common_errDivideByZero 705 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 706 707#ifdef __ARM_ARCH_EXT_IDIV__ 708 sdiv r2, r0, r1 709 mls r1, r1, r2, r0 @ r1<- op 710#else 711 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 712#endif 713 GET_INST_OPCODE ip @ extract opcode from rINST 714 SET_VREG r1, r4 @ vAA<- r1 715 GOTO_OPCODE ip @ jump to next instruction 716 /* 10-13 instructions */ 717 718%def op_rem_int_lit8(): 719 /* 720 * Specialized 32-bit binary operation 721 * 722 * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper 723 * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for 724 * ARMv7 CPUs that have hardware division support). 725 * 726 * NOTE: idivmod returns quotient in r0 and remainder in r1 727 * 728 * rem-int/lit8 729 * 730 */ 731 FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC) 732 mov r4, rINST, lsr #8 @ r4<- AA 733 and r2, r3, #255 @ r2<- BB 734 GET_VREG r0, r2 @ r0<- vBB 735 movs r1, r3, asr #8 @ r1<- ssssssCC (sign extended) 736 @cmp r1, #0 @ is second operand zero? 737 beq common_errDivideByZero 738 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 739 740#ifdef __ARM_ARCH_EXT_IDIV__ 741 sdiv r2, r0, r1 742 mls r1, r1, r2, r0 @ r1<- op 743#else 744 bl __aeabi_idivmod @ r1<- op, r0-r3 changed 745#endif 746 GET_INST_OPCODE ip @ extract opcode from rINST 747 SET_VREG r1, r4 @ vAA<- r1 748 GOTO_OPCODE ip @ jump to next instruction 749 /* 10-12 instructions */ 750 751%def op_rem_long(): 752/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */ 753% binopWide(instr="bl __aeabi_ldivmod", result0="r2", result1="r3", chkzero="1") 754 755%def op_rem_long_2addr(): 756/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */ 757% binopWide2addr(instr="bl __aeabi_ldivmod", result0="r2", result1="r3", chkzero="1") 758 759%def op_rsub_int(): 760/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */ 761% binopLit16(instr="rsb r0, r0, r1") 762 763%def op_rsub_int_lit8(): 764% binopLit8(extract="", instr="rsb r0, r0, r3, asr #8") 765 766%def op_shl_int(): 767% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, lsl r1") 768 769%def op_shl_int_2addr(): 770% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, lsl r1") 771 772%def op_shl_int_lit8(): 773% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, lsl r1") 774 775%def op_shl_long(): 776 /* 777 * Long integer shift. This is different from the generic 32/64-bit 778 * binary operations because vAA/vBB are 64-bit but vCC (the shift 779 * distance) is 32-bit. Also, Dalvik requires us to mask off the low 780 * 6 bits of the shift distance. 781 */ 782 /* shl-long vAA, vBB, vCC */ 783 FETCH r0, 1 @ r0<- CCBB 784 mov r4, rINST, lsr #8 @ r4<- AA 785 and r3, r0, #255 @ r3<- BB 786 mov r0, r0, lsr #8 @ r0<- CC 787 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB] 788 GET_VREG r2, r0 @ r2<- vCC 789 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1 790 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 791 and r2, r2, #63 @ r2<- r2 & 0x3f 792 VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA] 793 mov r1, r1, asl r2 @ r1<- r1 << r2 794 rsb r3, r2, #32 @ r3<- 32 - r2 795 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 796 subs ip, r2, #32 @ ip<- r2 - 32 797 it pl 798 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 799 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 800 mov r0, r0, asl r2 @ r0<- r0 << r2 801 GET_INST_OPCODE ip @ extract opcode from rINST 802 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 803 GOTO_OPCODE ip @ jump to next instruction 804 805%def op_shl_long_2addr(): 806 /* 807 * Long integer shift, 2addr version. vA is 64-bit value/result, vB is 808 * 32-bit shift distance. 809 */ 810 /* shl-long/2addr vA, vB */ 811 mov r3, rINST, lsr #12 @ r3<- B 812 ubfx r4, rINST, #8, #4 @ r4<- A 813 GET_VREG r2, r3 @ r2<- vB 814 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 815 VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A] 816 and r2, r2, #63 @ r2<- r2 & 0x3f 817 GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1 818 mov r1, r1, asl r2 @ r1<- r1 << r2 819 rsb r3, r2, #32 @ r3<- 32 - r2 820 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 821 subs ip, r2, #32 @ ip<- r2 - 32 822 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 823 it pl 824 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 825 mov r0, r0, asl r2 @ r0<- r0 << r2 826 GET_INST_OPCODE ip @ extract opcode from rINST 827 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 828 GOTO_OPCODE ip @ jump to next instruction 829 830%def op_shr_int(): 831% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, asr r1") 832 833%def op_shr_int_2addr(): 834% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, asr r1") 835 836%def op_shr_int_lit8(): 837% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, asr r1") 838 839%def op_shr_long(): 840 /* 841 * Long integer shift. This is different from the generic 32/64-bit 842 * binary operations because vAA/vBB are 64-bit but vCC (the shift 843 * distance) is 32-bit. Also, Dalvik requires us to mask off the low 844 * 6 bits of the shift distance. 845 */ 846 /* shr-long vAA, vBB, vCC */ 847 FETCH r0, 1 @ r0<- CCBB 848 mov r4, rINST, lsr #8 @ r4<- AA 849 and r3, r0, #255 @ r3<- BB 850 mov r0, r0, lsr #8 @ r0<- CC 851 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB] 852 GET_VREG r2, r0 @ r2<- vCC 853 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1 854 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 855 and r2, r2, #63 @ r0<- r0 & 0x3f 856 VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA] 857 mov r0, r0, lsr r2 @ r0<- r2 >> r2 858 rsb r3, r2, #32 @ r3<- 32 - r2 859 orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2)) 860 subs ip, r2, #32 @ ip<- r2 - 32 861 it pl 862 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 863 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 864 mov r1, r1, asr r2 @ r1<- r1 >> r2 865 GET_INST_OPCODE ip @ extract opcode from rINST 866 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 867 GOTO_OPCODE ip @ jump to next instruction 868 869%def op_shr_long_2addr(): 870 /* 871 * Long integer shift, 2addr version. vA is 64-bit value/result, vB is 872 * 32-bit shift distance. 873 */ 874 /* shr-long/2addr vA, vB */ 875 mov r3, rINST, lsr #12 @ r3<- B 876 ubfx r4, rINST, #8, #4 @ r4<- A 877 GET_VREG r2, r3 @ r2<- vB 878 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 879 VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A] 880 and r2, r2, #63 @ r2<- r2 & 0x3f 881 GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1 882 mov r0, r0, lsr r2 @ r0<- r2 >> r2 883 rsb r3, r2, #32 @ r3<- 32 - r2 884 orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2)) 885 subs ip, r2, #32 @ ip<- r2 - 32 886 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 887 it pl 888 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 889 mov r1, r1, asr r2 @ r1<- r1 >> r2 890 GET_INST_OPCODE ip @ extract opcode from rINST 891 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 892 GOTO_OPCODE ip @ jump to next instruction 893 894%def op_sub_int(): 895% binop(instr="sub r0, r0, r1") 896 897%def op_sub_int_2addr(): 898% binop2addr(instr="sub r0, r0, r1") 899 900%def op_sub_long(): 901% binopWide(preinstr="subs r0, r0, r2", instr="sbc r1, r1, r3") 902 903%def op_sub_long_2addr(): 904% binopWide2addr(preinstr="subs r0, r0, r2", instr="sbc r1, r1, r3") 905 906%def op_ushr_int(): 907% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, lsr r1") 908 909%def op_ushr_int_2addr(): 910% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, lsr r1") 911 912%def op_ushr_int_lit8(): 913% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, lsr r1") 914 915%def op_ushr_long(): 916 /* 917 * Long integer shift. This is different from the generic 32/64-bit 918 * binary operations because vAA/vBB are 64-bit but vCC (the shift 919 * distance) is 32-bit. Also, Dalvik requires us to mask off the low 920 * 6 bits of the shift distance. 921 */ 922 /* ushr-long vAA, vBB, vCC */ 923 FETCH r0, 1 @ r0<- CCBB 924 mov r4, rINST, lsr #8 @ r4<- AA 925 and r3, r0, #255 @ r3<- BB 926 mov r0, r0, lsr #8 @ r0<- CC 927 VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB] 928 GET_VREG r2, r0 @ r2<- vCC 929 GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1 930 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 931 and r2, r2, #63 @ r0<- r0 & 0x3f 932 VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA] 933 mov r0, r0, lsr r2 @ r0<- r2 >> r2 934 rsb r3, r2, #32 @ r3<- 32 - r2 935 orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2)) 936 subs ip, r2, #32 @ ip<- r2 - 32 937 it pl 938 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 939 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST 940 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 941 GET_INST_OPCODE ip @ extract opcode from rINST 942 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 943 GOTO_OPCODE ip @ jump to next instruction 944 945%def op_ushr_long_2addr(): 946 /* 947 * Long integer shift, 2addr version. vA is 64-bit value/result, vB is 948 * 32-bit shift distance. 949 */ 950 /* ushr-long/2addr vA, vB */ 951 mov r3, rINST, lsr #12 @ r3<- B 952 ubfx r4, rINST, #8, #4 @ r4<- A 953 GET_VREG r2, r3 @ r2<- vB 954 CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs 955 VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A] 956 and r2, r2, #63 @ r2<- r2 & 0x3f 957 GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1 958 mov r0, r0, lsr r2 @ r0<- r2 >> r2 959 rsb r3, r2, #32 @ r3<- 32 - r2 960 orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2)) 961 subs ip, r2, #32 @ ip<- r2 - 32 962 FETCH_ADVANCE_INST 1 @ advance rPC, load rINST 963 it pl 964 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 965 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 966 GET_INST_OPCODE ip @ extract opcode from rINST 967 SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1 968 GOTO_OPCODE ip @ jump to next instruction 969 970%def op_xor_int(): 971% binop(instr="eor r0, r0, r1") 972 973%def op_xor_int_2addr(): 974% binop2addr(instr="eor r0, r0, r1") 975 976%def op_xor_int_lit16(): 977% binopLit16(instr="eor r0, r0, r1") 978 979%def op_xor_int_lit8(): 980% binopLit8(extract="", instr="eor r0, r0, r3, asr #8") 981 982%def op_xor_long(): 983% binopWide(preinstr="eor r0, r0, r2", instr="eor r1, r1, r3") 984 985%def op_xor_long_2addr(): 986% binopWide2addr(preinstr="eor r0, r0, r2", instr="eor r1, r1, r3") 987