1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* This file contains codegen for the X86 ISA */
18
19 #include "codegen_x86.h"
20 #include "dex/quick/mir_to_lir-inl.h"
21 #include "dex/reg_storage_eq.h"
22 #include "mirror/art_method.h"
23 #include "mirror/array.h"
24 #include "x86_lir.h"
25
26 namespace art {
27
28 /*
29 * Compare two 64-bit values
30 * x = y return 0
31 * x < y return -1
32 * x > y return 1
33 */
GenCmpLong(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)34 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
35 RegLocation rl_src2) {
36 if (cu_->target64) {
37 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
38 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
39 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
40 RegStorage temp_reg = AllocTemp();
41 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
42 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0
43 NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1
44 NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg());
45 NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
46
47 StoreValue(rl_dest, rl_result);
48 FreeTemp(temp_reg);
49 return;
50 }
51
52 FlushAllRegs();
53 LockCallTemps(); // Prepare for explicit register usage
54 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
55 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
56 LoadValueDirectWideFixed(rl_src1, r_tmp1);
57 LoadValueDirectWideFixed(rl_src2, r_tmp2);
58 // Compute (r1:r0) = (r1:r0) - (r3:r2)
59 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2
60 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF
61 NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0
62 NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg());
63 OpReg(kOpNeg, rs_r2); // r2 = -r2
64 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF
65 NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0
66 NewLIR2(kX86Movzx8RR, r0, r0);
67 OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2
68 RegLocation rl_result = LocCReturn();
69 StoreValue(rl_dest, rl_result);
70 }
71
X86ConditionEncoding(ConditionCode cond)72 X86ConditionCode X86ConditionEncoding(ConditionCode cond) {
73 switch (cond) {
74 case kCondEq: return kX86CondEq;
75 case kCondNe: return kX86CondNe;
76 case kCondCs: return kX86CondC;
77 case kCondCc: return kX86CondNc;
78 case kCondUlt: return kX86CondC;
79 case kCondUge: return kX86CondNc;
80 case kCondMi: return kX86CondS;
81 case kCondPl: return kX86CondNs;
82 case kCondVs: return kX86CondO;
83 case kCondVc: return kX86CondNo;
84 case kCondHi: return kX86CondA;
85 case kCondLs: return kX86CondBe;
86 case kCondGe: return kX86CondGe;
87 case kCondLt: return kX86CondL;
88 case kCondGt: return kX86CondG;
89 case kCondLe: return kX86CondLe;
90 case kCondAl:
91 case kCondNv: LOG(FATAL) << "Should not reach here";
92 }
93 return kX86CondO;
94 }
95
OpCmpBranch(ConditionCode cond,RegStorage src1,RegStorage src2,LIR * target)96 LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
97 NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg());
98 X86ConditionCode cc = X86ConditionEncoding(cond);
99 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ ,
100 cc);
101 branch->target = target;
102 return branch;
103 }
104
OpCmpImmBranch(ConditionCode cond,RegStorage reg,int check_value,LIR * target)105 LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg,
106 int check_value, LIR* target) {
107 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) {
108 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode
109 NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg());
110 } else {
111 if (reg.Is64Bit()) {
112 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value);
113 } else {
114 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value);
115 }
116 }
117 X86ConditionCode cc = X86ConditionEncoding(cond);
118 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc);
119 branch->target = target;
120 return branch;
121 }
122
OpRegCopyNoInsert(RegStorage r_dest,RegStorage r_src)123 LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
124 // If src or dest is a pair, we'll be using low reg.
125 if (r_dest.IsPair()) {
126 r_dest = r_dest.GetLow();
127 }
128 if (r_src.IsPair()) {
129 r_src = r_src.GetLow();
130 }
131 if (r_dest.IsFloat() || r_src.IsFloat())
132 return OpFpRegCopy(r_dest, r_src);
133 LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
134 r_dest.GetReg(), r_src.GetReg());
135 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
136 res->flags.is_nop = true;
137 }
138 return res;
139 }
140
OpRegCopy(RegStorage r_dest,RegStorage r_src)141 void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
142 if (r_dest != r_src) {
143 LIR *res = OpRegCopyNoInsert(r_dest, r_src);
144 AppendLIR(res);
145 }
146 }
147
OpRegCopyWide(RegStorage r_dest,RegStorage r_src)148 void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
149 if (r_dest != r_src) {
150 bool dest_fp = r_dest.IsFloat();
151 bool src_fp = r_src.IsFloat();
152 if (dest_fp) {
153 if (src_fp) {
154 OpRegCopy(r_dest, r_src);
155 } else {
156 // TODO: Prevent this from happening in the code. The result is often
157 // unused or could have been loaded more easily from memory.
158 if (!r_src.IsPair()) {
159 DCHECK(!r_dest.IsPair());
160 NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
161 } else {
162 NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
163 RegStorage r_tmp = AllocTempDouble();
164 NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
165 NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
166 FreeTemp(r_tmp);
167 }
168 }
169 } else {
170 if (src_fp) {
171 if (!r_dest.IsPair()) {
172 DCHECK(!r_src.IsPair());
173 NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
174 } else {
175 NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
176 RegStorage temp_reg = AllocTempDouble();
177 NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
178 NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
179 NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
180 }
181 } else {
182 DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
183 if (!r_src.IsPair()) {
184 // Just copy the register directly.
185 OpRegCopy(r_dest, r_src);
186 } else {
187 // Handle overlap
188 if (r_src.GetHighReg() == r_dest.GetLowReg() &&
189 r_src.GetLowReg() == r_dest.GetHighReg()) {
190 // Deal with cycles.
191 RegStorage temp_reg = AllocTemp();
192 OpRegCopy(temp_reg, r_dest.GetHigh());
193 OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
194 OpRegCopy(r_dest.GetLow(), temp_reg);
195 FreeTemp(temp_reg);
196 } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
197 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
198 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
199 } else {
200 OpRegCopy(r_dest.GetLow(), r_src.GetLow());
201 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
202 }
203 }
204 }
205 }
206 }
207 }
208
GenSelectConst32(RegStorage left_op,RegStorage right_op,ConditionCode code,int32_t true_val,int32_t false_val,RegStorage rs_dest,int dest_reg_class)209 void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
210 int32_t true_val, int32_t false_val, RegStorage rs_dest,
211 int dest_reg_class) {
212 DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair());
213 DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat());
214
215 // We really need this check for correctness, otherwise we will need to do more checks in
216 // non zero/one case
217 if (true_val == false_val) {
218 LoadConstantNoClobber(rs_dest, true_val);
219 return;
220 }
221
222 const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op);
223
224 const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0);
225 if (zero_one_case && IsByteRegister(rs_dest)) {
226 if (!dest_intersect) {
227 LoadConstantNoClobber(rs_dest, 0);
228 }
229 OpRegReg(kOpCmp, left_op, right_op);
230 // Set the low byte of the result to 0 or 1 from the compare condition code.
231 NewLIR2(kX86Set8R, rs_dest.GetReg(),
232 X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code)));
233 if (dest_intersect) {
234 NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg());
235 }
236 } else {
237 // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops
238 // and it cannot use xor because it makes cc flags to be dirty
239 RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false);
240 if (temp_reg.Valid()) {
241 if (false_val == 0 && dest_intersect) {
242 code = FlipComparisonOrder(code);
243 std::swap(true_val, false_val);
244 }
245 if (!dest_intersect) {
246 LoadConstantNoClobber(rs_dest, false_val);
247 }
248 LoadConstantNoClobber(temp_reg, true_val);
249 OpRegReg(kOpCmp, left_op, right_op);
250 if (dest_intersect) {
251 LoadConstantNoClobber(rs_dest, false_val);
252 DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
253 }
254 OpCondRegReg(kOpCmov, code, rs_dest, temp_reg);
255 FreeTemp(temp_reg);
256 } else {
257 // slow path
258 LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr);
259 LoadConstantNoClobber(rs_dest, false_val);
260 LIR* that_is_it = NewLIR1(kX86Jmp8, 0);
261 LIR* true_case = NewLIR0(kPseudoTargetLabel);
262 cmp_branch->target = true_case;
263 LoadConstantNoClobber(rs_dest, true_val);
264 LIR* end = NewLIR0(kPseudoTargetLabel);
265 that_is_it->target = end;
266 }
267 }
268 }
269
GenSelect(BasicBlock * bb,MIR * mir)270 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
271 RegLocation rl_result;
272 RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
273 RegLocation rl_dest = mir_graph_->GetDest(mir);
274 // Avoid using float regs here.
275 RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg;
276 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
277 ConditionCode ccode = mir->meta.ccode;
278
279 // The kMirOpSelect has two variants, one for constants and one for moves.
280 const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
281
282 if (is_constant_case) {
283 int true_val = mir->dalvikInsn.vB;
284 int false_val = mir->dalvikInsn.vC;
285
286 // simplest strange case
287 if (true_val == false_val) {
288 rl_result = EvalLoc(rl_dest, result_reg_class, true);
289 LoadConstantNoClobber(rl_result.reg, true_val);
290 } else {
291 // TODO: use GenSelectConst32 and handle additional opcode patterns such as
292 // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal".
293 rl_src = LoadValue(rl_src, src_reg_class);
294 rl_result = EvalLoc(rl_dest, result_reg_class, true);
295 /*
296 * For ccode == kCondEq:
297 *
298 * 1) When the true case is zero and result_reg is not same as src_reg:
299 * xor result_reg, result_reg
300 * cmp $0, src_reg
301 * mov t1, $false_case
302 * cmovnz result_reg, t1
303 * 2) When the false case is zero and result_reg is not same as src_reg:
304 * xor result_reg, result_reg
305 * cmp $0, src_reg
306 * mov t1, $true_case
307 * cmovz result_reg, t1
308 * 3) All other cases (we do compare first to set eflags):
309 * cmp $0, src_reg
310 * mov result_reg, $false_case
311 * mov t1, $true_case
312 * cmovz result_reg, t1
313 */
314 // FIXME: depending on how you use registers you could get a false != mismatch when dealing
315 // with different views of the same underlying physical resource (i.e. solo32 vs. solo64).
316 const bool result_reg_same_as_src =
317 (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum());
318 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
319 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
320 const bool catch_all_case = !(true_zero_case || false_zero_case);
321
322 if (true_zero_case || false_zero_case) {
323 OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
324 }
325
326 if (true_zero_case || false_zero_case || catch_all_case) {
327 OpRegImm(kOpCmp, rl_src.reg, 0);
328 }
329
330 if (catch_all_case) {
331 OpRegImm(kOpMov, rl_result.reg, false_val);
332 }
333
334 if (true_zero_case || false_zero_case || catch_all_case) {
335 ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode;
336 int immediateForTemp = true_zero_case ? false_val : true_val;
337 RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class);
338 OpRegImm(kOpMov, temp1_reg, immediateForTemp);
339
340 OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg);
341
342 FreeTemp(temp1_reg);
343 }
344 }
345 } else {
346 rl_src = LoadValue(rl_src, src_reg_class);
347 RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
348 RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
349 rl_true = LoadValue(rl_true, result_reg_class);
350 rl_false = LoadValue(rl_false, result_reg_class);
351 rl_result = EvalLoc(rl_dest, result_reg_class, true);
352
353 /*
354 * For ccode == kCondEq:
355 *
356 * 1) When true case is already in place:
357 * cmp $0, src_reg
358 * cmovnz result_reg, false_reg
359 * 2) When false case is already in place:
360 * cmp $0, src_reg
361 * cmovz result_reg, true_reg
362 * 3) When neither cases are in place:
363 * cmp $0, src_reg
364 * mov result_reg, false_reg
365 * cmovz result_reg, true_reg
366 */
367
368 // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
369 OpRegImm(kOpCmp, rl_src.reg, 0);
370
371 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) {
372 OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg);
373 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) {
374 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
375 } else {
376 OpRegCopy(rl_result.reg, rl_false.reg);
377 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg);
378 }
379 }
380
381 StoreValue(rl_dest, rl_result);
382 }
383
GenFusedLongCmpBranch(BasicBlock * bb,MIR * mir)384 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
385 LIR* taken = &block_label_list_[bb->taken];
386 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
387 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
388 ConditionCode ccode = mir->meta.ccode;
389
390 if (rl_src1.is_const) {
391 std::swap(rl_src1, rl_src2);
392 ccode = FlipComparisonOrder(ccode);
393 }
394 if (rl_src2.is_const) {
395 // Do special compare/branch against simple const operand
396 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
397 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode);
398 return;
399 }
400
401 if (cu_->target64) {
402 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
403 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
404
405 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
406 OpCondBranch(ccode, taken);
407 return;
408 }
409
410 FlushAllRegs();
411 LockCallTemps(); // Prepare for explicit register usage
412 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
413 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3);
414 LoadValueDirectWideFixed(rl_src1, r_tmp1);
415 LoadValueDirectWideFixed(rl_src2, r_tmp2);
416
417 // Swap operands and condition code to prevent use of zero flag.
418 if (ccode == kCondLe || ccode == kCondGt) {
419 // Compute (r3:r2) = (r3:r2) - (r1:r0)
420 OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0
421 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF
422 } else {
423 // Compute (r1:r0) = (r1:r0) - (r3:r2)
424 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2
425 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF
426 }
427 switch (ccode) {
428 case kCondEq:
429 case kCondNe:
430 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1
431 break;
432 case kCondLe:
433 ccode = kCondGe;
434 break;
435 case kCondGt:
436 ccode = kCondLt;
437 break;
438 case kCondLt:
439 case kCondGe:
440 break;
441 default:
442 LOG(FATAL) << "Unexpected ccode: " << ccode;
443 }
444 OpCondBranch(ccode, taken);
445 }
446
GenFusedLongCmpImmBranch(BasicBlock * bb,RegLocation rl_src1,int64_t val,ConditionCode ccode)447 void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
448 int64_t val, ConditionCode ccode) {
449 int32_t val_lo = Low32Bits(val);
450 int32_t val_hi = High32Bits(val);
451 LIR* taken = &block_label_list_[bb->taken];
452 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
453 bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
454
455 if (cu_->target64) {
456 if (is_equality_test && val == 0) {
457 // We can simplify of comparing for ==, != to 0.
458 NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg());
459 } else if (is_equality_test && val_hi == 0 && val_lo > 0) {
460 OpRegImm(kOpCmp, rl_src1.reg, val_lo);
461 } else {
462 RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
463 LoadConstantWide(tmp, val);
464 OpRegReg(kOpCmp, rl_src1.reg, tmp);
465 FreeTemp(tmp);
466 }
467 OpCondBranch(ccode, taken);
468 return;
469 }
470
471 if (is_equality_test && val != 0) {
472 rl_src1 = ForceTempWide(rl_src1);
473 }
474 RegStorage low_reg = rl_src1.reg.GetLow();
475 RegStorage high_reg = rl_src1.reg.GetHigh();
476
477 if (is_equality_test) {
478 // We can simplify of comparing for ==, != to 0.
479 if (val == 0) {
480 if (IsTemp(low_reg)) {
481 OpRegReg(kOpOr, low_reg, high_reg);
482 // We have now changed it; ignore the old values.
483 Clobber(rl_src1.reg);
484 } else {
485 RegStorage t_reg = AllocTemp();
486 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
487 FreeTemp(t_reg);
488 }
489 OpCondBranch(ccode, taken);
490 return;
491 }
492
493 // Need to compute the actual value for ==, !=.
494 OpRegImm(kOpSub, low_reg, val_lo);
495 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
496 OpRegReg(kOpOr, high_reg, low_reg);
497 Clobber(rl_src1.reg);
498 } else if (ccode == kCondLe || ccode == kCondGt) {
499 // Swap operands and condition code to prevent use of zero flag.
500 RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
501 LoadConstantWide(tmp, val);
502 OpRegReg(kOpSub, tmp.GetLow(), low_reg);
503 OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
504 ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
505 FreeTemp(tmp);
506 } else {
507 // We can use a compare for the low word to set CF.
508 OpRegImm(kOpCmp, low_reg, val_lo);
509 if (IsTemp(high_reg)) {
510 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
511 // We have now changed it; ignore the old values.
512 Clobber(rl_src1.reg);
513 } else {
514 // mov temp_reg, high_reg; sbb temp_reg, high_constant
515 RegStorage t_reg = AllocTemp();
516 OpRegCopy(t_reg, high_reg);
517 NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
518 FreeTemp(t_reg);
519 }
520 }
521
522 OpCondBranch(ccode, taken);
523 }
524
CalculateMagicAndShift(int64_t divisor,int64_t & magic,int & shift,bool is_long)525 void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) {
526 // It does not make sense to calculate magic and shift for zero divisor.
527 DCHECK_NE(divisor, 0);
528
529 /* According to H.S.Warren's Hacker's Delight Chapter 10 and
530 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
531 * The magic number M and shift S can be calculated in the following way:
532 * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
533 * where divisor(d) >=2.
534 * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
535 * where divisor(d) <= -2.
536 * Thus nc can be calculated like:
537 * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
538 * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
539 *
540 * So the shift p is the smallest p satisfying
541 * 2^p > nc * (d - 2^p % d), where d >= 2
542 * 2^p > nc * (d + 2^p % d), where d <= -2.
543 *
544 * the magic number M is calcuated by
545 * M = (2^p + d - 2^p % d) / d, where d >= 2
546 * M = (2^p - d - 2^p % d) / d, where d <= -2.
547 *
548 * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as
549 * the shift number S.
550 */
551
552 int64_t p = (is_long) ? 63 : 31;
553 const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U;
554
555 // Initialize the computations.
556 uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
557 uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 :
558 static_cast<uint32_t>(divisor) >> 31);
559 uint64_t abs_nc = tmp - 1 - tmp % abs_d;
560 uint64_t quotient1 = exp / abs_nc;
561 uint64_t remainder1 = exp % abs_nc;
562 uint64_t quotient2 = exp / abs_d;
563 uint64_t remainder2 = exp % abs_d;
564
565 /*
566 * To avoid handling both positive and negative divisor, Hacker's Delight
567 * introduces a method to handle these 2 cases together to avoid duplication.
568 */
569 uint64_t delta;
570 do {
571 p++;
572 quotient1 = 2 * quotient1;
573 remainder1 = 2 * remainder1;
574 if (remainder1 >= abs_nc) {
575 quotient1++;
576 remainder1 = remainder1 - abs_nc;
577 }
578 quotient2 = 2 * quotient2;
579 remainder2 = 2 * remainder2;
580 if (remainder2 >= abs_d) {
581 quotient2++;
582 remainder2 = remainder2 - abs_d;
583 }
584 delta = abs_d - remainder2;
585 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
586
587 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
588
589 if (!is_long) {
590 magic = static_cast<int>(magic);
591 }
592
593 shift = (is_long) ? p - 64 : p - 32;
594 }
595
GenDivRemLit(RegLocation rl_dest,RegStorage reg_lo,int lit,bool is_div)596 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) {
597 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
598 return rl_dest;
599 }
600
GenDivRemLit(RegLocation rl_dest,RegLocation rl_src,int imm,bool is_div)601 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
602 int imm, bool is_div) {
603 // Use a multiply (and fixup) to perform an int div/rem by a constant.
604 RegLocation rl_result;
605
606 if (imm == 1) {
607 rl_result = EvalLoc(rl_dest, kCoreReg, true);
608 if (is_div) {
609 // x / 1 == x.
610 LoadValueDirectFixed(rl_src, rl_result.reg);
611 } else {
612 // x % 1 == 0.
613 LoadConstantNoClobber(rl_result.reg, 0);
614 }
615 } else if (imm == -1) { // handle 0x80000000 / -1 special case.
616 rl_result = EvalLoc(rl_dest, kCoreReg, true);
617 if (is_div) {
618 LoadValueDirectFixed(rl_src, rl_result.reg);
619 OpRegImm(kOpCmp, rl_result.reg, 0x80000000);
620 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
621
622 // for x != MIN_INT, x / -1 == -x.
623 NewLIR1(kX86Neg32R, rl_result.reg.GetReg());
624
625 // EAX already contains the right value (0x80000000),
626 minint_branch->target = NewLIR0(kPseudoTargetLabel);
627 } else {
628 // x % -1 == 0.
629 LoadConstantNoClobber(rl_result.reg, 0);
630 }
631 } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
632 // Division using shifting.
633 rl_src = LoadValue(rl_src, kCoreReg);
634 rl_result = EvalLoc(rl_dest, kCoreReg, true);
635 if (IsSameReg(rl_result.reg, rl_src.reg)) {
636 RegStorage rs_temp = AllocTypedTemp(false, kCoreReg);
637 rl_result.reg.SetReg(rs_temp.GetReg());
638 }
639 NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1);
640 NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
641 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
642 int shift_amount = LowestSetBit(imm);
643 OpRegImm(kOpAsr, rl_result.reg, shift_amount);
644 if (imm < 0) {
645 OpReg(kOpNeg, rl_result.reg);
646 }
647 } else {
648 CHECK(imm <= -2 || imm >= 2);
649
650 // Use H.S.Warren's Hacker's Delight Chapter 10 and
651 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
652 int64_t magic;
653 int shift;
654 CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */);
655
656 /*
657 * For imm >= 2,
658 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
659 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
660 * For imm <= -2,
661 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
662 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
663 * We implement this algorithm in the following way:
664 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
665 * 2. if imm > 0 and magic < 0, add numerator to EDX
666 * if imm < 0 and magic > 0, sub numerator from EDX
667 * 3. if S !=0, SAR S bits for EDX
668 * 4. add 1 to EDX if EDX < 0
669 * 5. Thus, EDX is the quotient
670 */
671
672 FlushReg(rs_r0);
673 Clobber(rs_r0);
674 LockTemp(rs_r0);
675 FlushReg(rs_r2);
676 Clobber(rs_r2);
677 LockTemp(rs_r2);
678
679 // Assume that the result will be in EDX.
680 rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG};
681
682 // Numerator into EAX.
683 RegStorage numerator_reg;
684 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
685 // We will need the value later.
686 rl_src = LoadValue(rl_src, kCoreReg);
687 numerator_reg = rl_src.reg;
688 OpRegCopy(rs_r0, numerator_reg);
689 } else {
690 // Only need this once. Just put it into EAX.
691 LoadValueDirectFixed(rl_src, rs_r0);
692 }
693
694 // EDX = magic.
695 LoadConstantNoClobber(rs_r2, magic);
696
697 // EDX:EAX = magic & dividend.
698 NewLIR1(kX86Imul32DaR, rs_r2.GetReg());
699
700 if (imm > 0 && magic < 0) {
701 // Add numerator to EDX.
702 DCHECK(numerator_reg.Valid());
703 NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg());
704 } else if (imm < 0 && magic > 0) {
705 DCHECK(numerator_reg.Valid());
706 NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg());
707 }
708
709 // Do we need the shift?
710 if (shift != 0) {
711 // Shift EDX by 'shift' bits.
712 NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift);
713 }
714
715 // Add 1 to EDX if EDX < 0.
716
717 // Move EDX to EAX.
718 OpRegCopy(rs_r0, rs_r2);
719
720 // Move sign bit to bit 0, zeroing the rest.
721 NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31);
722
723 // EDX = EDX + EAX.
724 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg());
725
726 // Quotient is in EDX.
727 if (!is_div) {
728 // We need to compute the remainder.
729 // Remainder is divisor - (quotient * imm).
730 DCHECK(numerator_reg.Valid());
731 OpRegCopy(rs_r0, numerator_reg);
732
733 // EAX = numerator * imm.
734 OpRegRegImm(kOpMul, rs_r2, rs_r2, imm);
735
736 // EDX -= EAX.
737 NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg());
738
739 // For this case, return the result in EAX.
740 rl_result.reg.SetReg(r0);
741 }
742 }
743
744 return rl_result;
745 }
746
GenDivRem(RegLocation rl_dest,RegStorage reg_lo,RegStorage reg_hi,bool is_div)747 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi,
748 bool is_div) {
749 LOG(FATAL) << "Unexpected use of GenDivRem for x86";
750 return rl_dest;
751 }
752
GenDivRem(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div,bool check_zero)753 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
754 RegLocation rl_src2, bool is_div, bool check_zero) {
755 // We have to use fixed registers, so flush all the temps.
756 FlushAllRegs();
757 LockCallTemps(); // Prepare for explicit register usage.
758
759 // Load LHS into EAX.
760 LoadValueDirectFixed(rl_src1, rs_r0);
761
762 // Load RHS into EBX.
763 LoadValueDirectFixed(rl_src2, rs_r1);
764
765 // Copy LHS sign bit into EDX.
766 NewLIR0(kx86Cdq32Da);
767
768 if (check_zero) {
769 // Handle division by zero case.
770 GenDivZeroCheck(rs_r1);
771 }
772
773 // Have to catch 0x80000000/-1 case, or we will get an exception!
774 OpRegImm(kOpCmp, rs_r1, -1);
775 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
776
777 // RHS is -1.
778 OpRegImm(kOpCmp, rs_r0, 0x80000000);
779 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
780
781 // In 0x80000000/-1 case.
782 if (!is_div) {
783 // For DIV, EAX is already right. For REM, we need EDX 0.
784 LoadConstantNoClobber(rs_r2, 0);
785 }
786 LIR* done = NewLIR1(kX86Jmp8, 0);
787
788 // Expected case.
789 minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
790 minint_branch->target = minus_one_branch->target;
791 NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg());
792 done->target = NewLIR0(kPseudoTargetLabel);
793
794 // Result is in EAX for div and EDX for rem.
795 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
796 if (!is_div) {
797 rl_result.reg.SetReg(r2);
798 }
799 return rl_result;
800 }
801
GenInlinedMinMax(CallInfo * info,bool is_min,bool is_long)802 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
803 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
804
805 if (is_long && cu_->instruction_set == kX86) {
806 return false;
807 }
808
809 // Get the two arguments to the invoke and place them in GP registers.
810 RegLocation rl_src1 = info->args[0];
811 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
812 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
813 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
814
815 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
816 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
817
818 /*
819 * If the result register is the same as the second element, then we need to be careful.
820 * The reason is that the first copy will inadvertently clobber the second element with
821 * the first one thus yielding the wrong result. Thus we do a swap in that case.
822 */
823 if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
824 std::swap(rl_src1, rl_src2);
825 }
826
827 // Pick the first integer as min/max.
828 OpRegCopy(rl_result.reg, rl_src1.reg);
829
830 // If the integers are both in the same register, then there is nothing else to do
831 // because they are equal and we have already moved one into the result.
832 if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) {
833 // It is possible we didn't pick correctly so do the actual comparison now.
834 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
835
836 // Conditionally move the other integer into the destination register.
837 ConditionCode condition_code = is_min ? kCondGt : kCondLt;
838 OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg);
839 }
840
841 if (is_long) {
842 StoreValueWide(rl_dest, rl_result);
843 } else {
844 StoreValue(rl_dest, rl_result);
845 }
846 return true;
847 }
848
GenInlinedPeek(CallInfo * info,OpSize size)849 bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
850 RegLocation rl_src_address = info->args[0]; // long address
851 RegLocation rl_address;
852 if (!cu_->target64) {
853 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
854 rl_address = LoadValue(rl_src_address, kCoreReg);
855 } else {
856 rl_address = LoadValueWide(rl_src_address, kCoreReg);
857 }
858 RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
859 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
860 // Unaligned access is allowed on x86.
861 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
862 if (size == k64) {
863 StoreValueWide(rl_dest, rl_result);
864 } else {
865 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
866 StoreValue(rl_dest, rl_result);
867 }
868 return true;
869 }
870
GenInlinedPoke(CallInfo * info,OpSize size)871 bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
872 RegLocation rl_src_address = info->args[0]; // long address
873 RegLocation rl_address;
874 if (!cu_->target64) {
875 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
876 rl_address = LoadValue(rl_src_address, kCoreReg);
877 } else {
878 rl_address = LoadValueWide(rl_src_address, kCoreReg);
879 }
880 RegLocation rl_src_value = info->args[2]; // [size] value
881 RegLocation rl_value;
882 if (size == k64) {
883 // Unaligned access is allowed on x86.
884 rl_value = LoadValueWide(rl_src_value, kCoreReg);
885 } else {
886 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
887 // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
888 if (!cu_->target64 && size == kSignedByte) {
889 rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
890 if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
891 RegStorage temp = AllocateByteRegister();
892 OpRegCopy(temp, rl_src_value.reg);
893 rl_value.reg = temp;
894 } else {
895 rl_value = LoadValue(rl_src_value, kCoreReg);
896 }
897 } else {
898 rl_value = LoadValue(rl_src_value, kCoreReg);
899 }
900 }
901 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
902 return true;
903 }
904
OpLea(RegStorage r_base,RegStorage reg1,RegStorage reg2,int scale,int offset)905 void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
906 NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset);
907 }
908
OpTlsCmp(ThreadOffset<4> offset,int val)909 void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) {
910 DCHECK_EQ(kX86, cu_->instruction_set);
911 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
912 }
913
OpTlsCmp(ThreadOffset<8> offset,int val)914 void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) {
915 DCHECK_EQ(kX86_64, cu_->instruction_set);
916 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val);
917 }
918
IsInReg(X86Mir2Lir * pMir2Lir,const RegLocation & rl,RegStorage reg)919 static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) {
920 return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home);
921 }
922
GenInlinedCas(CallInfo * info,bool is_long,bool is_object)923 bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
924 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
925 // Unused - RegLocation rl_src_unsafe = info->args[0];
926 RegLocation rl_src_obj = info->args[1]; // Object - known non-null
927 RegLocation rl_src_offset = info->args[2]; // long low
928 if (!cu_->target64) {
929 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
930 }
931 RegLocation rl_src_expected = info->args[4]; // int, long or Object
932 // If is_long, high half is in info->args[5]
933 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
934 // If is_long, high half is in info->args[7]
935
936 if (is_long && cu_->target64) {
937 // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
938 FlushReg(rs_r0q);
939 Clobber(rs_r0q);
940 LockTemp(rs_r0q);
941
942 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
943 RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
944 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
945 LoadValueDirectWide(rl_src_expected, rs_r0q);
946 NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
947 rl_new_value.reg.GetReg());
948
949 // After a store we need to insert barrier in case of potential load. Since the
950 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
951 GenMemBarrier(kAnyAny);
952
953 FreeTemp(rs_r0q);
954 } else if (is_long) {
955 // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
956 // TODO: CFI support.
957 FlushAllRegs();
958 LockCallTemps();
959 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
960 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
961 LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
962 LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
963 // FIXME: needs 64-bit update.
964 const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
965 const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
966 DCHECK(!obj_in_si || !obj_in_di);
967 const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
968 const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
969 DCHECK(!off_in_si || !off_in_di);
970 // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
971 RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
972 RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
973 bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
974 bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
975 if (push_di) {
976 NewLIR1(kX86Push32R, rs_rDI.GetReg());
977 MarkTemp(rs_rDI);
978 LockTemp(rs_rDI);
979 }
980 if (push_si) {
981 NewLIR1(kX86Push32R, rs_rSI.GetReg());
982 MarkTemp(rs_rSI);
983 LockTemp(rs_rSI);
984 }
985 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
986 const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
987 if (!obj_in_si && !obj_in_di) {
988 LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
989 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
990 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
991 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
992 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
993 }
994 if (!off_in_si && !off_in_di) {
995 LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
996 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
997 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
998 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
999 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
1000 }
1001 NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
1002
1003 // After a store we need to insert barrier to prevent reordering with either
1004 // earlier or later memory accesses. Since
1005 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1006 // and it will be associated with the cmpxchg instruction, preventing both.
1007 GenMemBarrier(kAnyAny);
1008
1009 if (push_si) {
1010 FreeTemp(rs_rSI);
1011 UnmarkTemp(rs_rSI);
1012 NewLIR1(kX86Pop32R, rs_rSI.GetReg());
1013 }
1014 if (push_di) {
1015 FreeTemp(rs_rDI);
1016 UnmarkTemp(rs_rDI);
1017 NewLIR1(kX86Pop32R, rs_rDI.GetReg());
1018 }
1019 FreeCallTemps();
1020 } else {
1021 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
1022 FlushReg(rs_r0);
1023 Clobber(rs_r0);
1024 LockTemp(rs_r0);
1025
1026 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
1027 RegLocation rl_new_value = LoadValue(rl_src_new_value);
1028
1029 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
1030 // Mark card for object assuming new value is stored.
1031 FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard().
1032 MarkGCCard(rl_new_value.reg, rl_object.reg);
1033 LockTemp(rs_r0);
1034 }
1035
1036 RegLocation rl_offset;
1037 if (cu_->target64) {
1038 rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
1039 } else {
1040 rl_offset = LoadValue(rl_src_offset, kCoreReg);
1041 }
1042 LoadValueDirect(rl_src_expected, rs_r0);
1043 NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0,
1044 rl_new_value.reg.GetReg());
1045
1046 // After a store we need to insert barrier to prevent reordering with either
1047 // earlier or later memory accesses. Since
1048 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated,
1049 // and it will be associated with the cmpxchg instruction, preventing both.
1050 GenMemBarrier(kAnyAny);
1051
1052 FreeTemp(rs_r0);
1053 }
1054
1055 // Convert ZF to boolean
1056 RegLocation rl_dest = InlineTarget(info); // boolean place for result
1057 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
1058 RegStorage result_reg = rl_result.reg;
1059
1060 // For 32-bit, SETcc only works with EAX..EDX.
1061 if (!IsByteRegister(result_reg)) {
1062 result_reg = AllocateByteRegister();
1063 }
1064 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
1065 NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
1066 if (IsTemp(result_reg)) {
1067 FreeTemp(result_reg);
1068 }
1069 StoreValue(rl_dest, rl_result);
1070 return true;
1071 }
1072
OpPcRelLoad(RegStorage reg,LIR * target)1073 LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
1074 CHECK(base_of_code_ != nullptr);
1075
1076 // Address the start of the method
1077 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
1078 if (rl_method.wide) {
1079 LoadValueDirectWideFixed(rl_method, reg);
1080 } else {
1081 LoadValueDirectFixed(rl_method, reg);
1082 }
1083 store_method_addr_used_ = true;
1084
1085 // Load the proper value from the literal area.
1086 // We don't know the proper offset for the value, so pick one that will force
1087 // 4 byte offset. We will fix this up in the assembler later to have the right
1088 // value.
1089 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
1090 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
1091 0, 0, target);
1092 res->target = target;
1093 res->flags.fixup = kFixupLoad;
1094 store_method_addr_used_ = true;
1095 return res;
1096 }
1097
OpVldm(RegStorage r_base,int count)1098 LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
1099 LOG(FATAL) << "Unexpected use of OpVldm for x86";
1100 return NULL;
1101 }
1102
OpVstm(RegStorage r_base,int count)1103 LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) {
1104 LOG(FATAL) << "Unexpected use of OpVstm for x86";
1105 return NULL;
1106 }
1107
GenMultiplyByTwoBitMultiplier(RegLocation rl_src,RegLocation rl_result,int lit,int first_bit,int second_bit)1108 void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
1109 RegLocation rl_result, int lit,
1110 int first_bit, int second_bit) {
1111 RegStorage t_reg = AllocTemp();
1112 OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit);
1113 OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg);
1114 FreeTemp(t_reg);
1115 if (first_bit != 0) {
1116 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1117 }
1118 }
1119
GenDivZeroCheckWide(RegStorage reg)1120 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
1121 if (cu_->target64) {
1122 DCHECK(reg.Is64Bit());
1123
1124 NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
1125 } else {
1126 DCHECK(reg.IsPair());
1127
1128 // We are not supposed to clobber the incoming storage, so allocate a temporary.
1129 RegStorage t_reg = AllocTemp();
1130 // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
1131 OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
1132 // The temp is no longer needed so free it at this time.
1133 FreeTemp(t_reg);
1134 }
1135
1136 // In case of zero, throw ArithmeticException.
1137 GenDivZeroCheck(kCondEq);
1138 }
1139
GenArrayBoundsCheck(RegStorage index,RegStorage array_base,int len_offset)1140 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
1141 RegStorage array_base,
1142 int len_offset) {
1143 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1144 public:
1145 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1146 RegStorage index, RegStorage array_base, int32_t len_offset)
1147 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1148 index_(index), array_base_(array_base), len_offset_(len_offset) {
1149 }
1150
1151 void Compile() OVERRIDE {
1152 m2l_->ResetRegPool();
1153 m2l_->ResetDefTracking();
1154 GenerateTargetLabel(kPseudoThrowTarget);
1155
1156 RegStorage new_index = index_;
1157 // Move index out of kArg1, either directly to kArg0, or to kArg2.
1158 // TODO: clean-up to check not a number but with type
1159 if (index_ == m2l_->TargetReg(kArg1, kNotWide)) {
1160 if (array_base_ == m2l_->TargetReg(kArg0, kRef)) {
1161 m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_);
1162 new_index = m2l_->TargetReg(kArg2, kNotWide);
1163 } else {
1164 m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_);
1165 new_index = m2l_->TargetReg(kArg0, kNotWide);
1166 }
1167 }
1168 // Load array length to kArg1.
1169 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1170 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1171 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index,
1172 m2l_->TargetReg(kArg1, kNotWide), true);
1173 }
1174
1175 private:
1176 const RegStorage index_;
1177 const RegStorage array_base_;
1178 const int32_t len_offset_;
1179 };
1180
1181 OpRegMem(kOpCmp, index, array_base, len_offset);
1182 MarkPossibleNullPointerException(0);
1183 LIR* branch = OpCondBranch(kCondUge, nullptr);
1184 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1185 index, array_base, len_offset));
1186 }
1187
GenArrayBoundsCheck(int32_t index,RegStorage array_base,int32_t len_offset)1188 void X86Mir2Lir::GenArrayBoundsCheck(int32_t index,
1189 RegStorage array_base,
1190 int32_t len_offset) {
1191 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath {
1192 public:
1193 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch,
1194 int32_t index, RegStorage array_base, int32_t len_offset)
1195 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch),
1196 index_(index), array_base_(array_base), len_offset_(len_offset) {
1197 }
1198
1199 void Compile() OVERRIDE {
1200 m2l_->ResetRegPool();
1201 m2l_->ResetDefTracking();
1202 GenerateTargetLabel(kPseudoThrowTarget);
1203
1204 // Load array length to kArg1.
1205 X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_);
1206 x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_);
1207 x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_);
1208 x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide),
1209 m2l_->TargetReg(kArg1, kNotWide), true);
1210 }
1211
1212 private:
1213 const int32_t index_;
1214 const RegStorage array_base_;
1215 const int32_t len_offset_;
1216 };
1217
1218 NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index);
1219 MarkPossibleNullPointerException(0);
1220 LIR* branch = OpCondBranch(kCondLs, nullptr);
1221 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch,
1222 index, array_base, len_offset));
1223 }
1224
1225 // Test suspend flag, return target of taken suspend branch
OpTestSuspend(LIR * target)1226 LIR* X86Mir2Lir::OpTestSuspend(LIR* target) {
1227 if (cu_->target64) {
1228 OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0);
1229 } else {
1230 OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0);
1231 }
1232 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target);
1233 }
1234
1235 // Decrement register and branch on condition
OpDecAndBranch(ConditionCode c_code,RegStorage reg,LIR * target)1236 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
1237 OpRegImm(kOpSub, reg, 1);
1238 return OpCondBranch(c_code, target);
1239 }
1240
SmallLiteralDivRem(Instruction::Code dalvik_opcode,bool is_div,RegLocation rl_src,RegLocation rl_dest,int lit)1241 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
1242 RegLocation rl_src, RegLocation rl_dest, int lit) {
1243 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86";
1244 return false;
1245 }
1246
EasyMultiply(RegLocation rl_src,RegLocation rl_dest,int lit)1247 bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
1248 LOG(FATAL) << "Unexpected use of easyMultiply in x86";
1249 return false;
1250 }
1251
OpIT(ConditionCode cond,const char * guide)1252 LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) {
1253 LOG(FATAL) << "Unexpected use of OpIT in x86";
1254 return NULL;
1255 }
1256
OpEndIT(LIR * it)1257 void X86Mir2Lir::OpEndIT(LIR* it) {
1258 LOG(FATAL) << "Unexpected use of OpEndIT in x86";
1259 }
1260
GenImulRegImm(RegStorage dest,RegStorage src,int val)1261 void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) {
1262 switch (val) {
1263 case 0:
1264 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1265 break;
1266 case 1:
1267 OpRegCopy(dest, src);
1268 break;
1269 default:
1270 OpRegRegImm(kOpMul, dest, src, val);
1271 break;
1272 }
1273 }
1274
GenImulMemImm(RegStorage dest,int sreg,int displacement,int val)1275 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
1276 // All memory accesses below reference dalvik regs.
1277 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1278
1279 LIR *m;
1280 switch (val) {
1281 case 0:
1282 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg());
1283 break;
1284 case 1:
1285 LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile);
1286 break;
1287 default:
1288 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(),
1289 rs_rX86_SP.GetReg(), displacement, val);
1290 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
1291 break;
1292 }
1293 }
1294
GenArithOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1295 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
1296 RegLocation rl_src2) {
1297 if (!cu_->target64) {
1298 // Some x86 32b ops are fallback.
1299 switch (opcode) {
1300 case Instruction::NOT_LONG:
1301 case Instruction::DIV_LONG:
1302 case Instruction::DIV_LONG_2ADDR:
1303 case Instruction::REM_LONG:
1304 case Instruction::REM_LONG_2ADDR:
1305 Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
1306 return;
1307
1308 default:
1309 // Everything else we can handle.
1310 break;
1311 }
1312 }
1313
1314 switch (opcode) {
1315 case Instruction::NOT_LONG:
1316 GenNotLong(rl_dest, rl_src2);
1317 return;
1318
1319 case Instruction::ADD_LONG:
1320 case Instruction::ADD_LONG_2ADDR:
1321 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1322 return;
1323
1324 case Instruction::SUB_LONG:
1325 case Instruction::SUB_LONG_2ADDR:
1326 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
1327 return;
1328
1329 case Instruction::MUL_LONG:
1330 case Instruction::MUL_LONG_2ADDR:
1331 GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
1332 return;
1333
1334 case Instruction::DIV_LONG:
1335 case Instruction::DIV_LONG_2ADDR:
1336 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
1337 return;
1338
1339 case Instruction::REM_LONG:
1340 case Instruction::REM_LONG_2ADDR:
1341 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
1342 return;
1343
1344 case Instruction::AND_LONG_2ADDR:
1345 case Instruction::AND_LONG:
1346 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1347 return;
1348
1349 case Instruction::OR_LONG:
1350 case Instruction::OR_LONG_2ADDR:
1351 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1352 return;
1353
1354 case Instruction::XOR_LONG:
1355 case Instruction::XOR_LONG_2ADDR:
1356 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
1357 return;
1358
1359 case Instruction::NEG_LONG:
1360 GenNegLong(rl_dest, rl_src2);
1361 return;
1362
1363 default:
1364 LOG(FATAL) << "Invalid long arith op";
1365 return;
1366 }
1367 }
1368
GenMulLongConst(RegLocation rl_dest,RegLocation rl_src1,int64_t val)1369 bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val) {
1370 // All memory accesses below reference dalvik regs.
1371 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1372
1373 if (val == 0) {
1374 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1375 if (cu_->target64) {
1376 OpRegReg(kOpXor, rl_result.reg, rl_result.reg);
1377 } else {
1378 OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow());
1379 OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());
1380 }
1381 StoreValueWide(rl_dest, rl_result);
1382 return true;
1383 } else if (val == 1) {
1384 StoreValueWide(rl_dest, rl_src1);
1385 return true;
1386 } else if (val == 2) {
1387 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
1388 return true;
1389 } else if (IsPowerOfTwo(val)) {
1390 int shift_amount = LowestSetBit(val);
1391 if (!BadOverlap(rl_src1, rl_dest)) {
1392 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1393 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
1394 shift_amount);
1395 StoreValueWide(rl_dest, rl_result);
1396 return true;
1397 }
1398 }
1399
1400 // Okay, on 32b just bite the bullet and do it, still better than the general case.
1401 if (!cu_->target64) {
1402 int32_t val_lo = Low32Bits(val);
1403 int32_t val_hi = High32Bits(val);
1404 FlushAllRegs();
1405 LockCallTemps(); // Prepare for explicit register usage.
1406 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1407 bool src1_in_reg = rl_src1.location == kLocPhysReg;
1408 int displacement = SRegOffset(rl_src1.s_reg_low);
1409
1410 // ECX <- 1H * 2L
1411 // EAX <- 1L * 2H
1412 if (src1_in_reg) {
1413 GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo);
1414 GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi);
1415 } else {
1416 GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
1417 GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
1418 }
1419
1420 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
1421 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1422
1423 // EAX <- 2L
1424 LoadConstantNoClobber(rs_r0, val_lo);
1425
1426 // EDX:EAX <- 2L * 1L (double precision)
1427 if (src1_in_reg) {
1428 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1429 } else {
1430 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1431 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1432 true /* is_load */, true /* is_64bit */);
1433 }
1434
1435 // EDX <- EDX + ECX (add high words)
1436 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1437
1438 // Result is EDX:EAX
1439 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1440 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1441 StoreValueWide(rl_dest, rl_result);
1442 return true;
1443 }
1444 return false;
1445 }
1446
GenMulLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)1447 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1448 RegLocation rl_src2) {
1449 if (rl_src1.is_const) {
1450 std::swap(rl_src1, rl_src2);
1451 }
1452
1453 if (rl_src2.is_const) {
1454 if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2))) {
1455 return;
1456 }
1457 }
1458
1459 // All memory accesses below reference dalvik regs.
1460 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1461
1462 if (cu_->target64) {
1463 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1464 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1465 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1466 if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1467 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1468 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
1469 } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() &&
1470 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) {
1471 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg());
1472 } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() &&
1473 rl_result.reg.GetReg() != rl_src2.reg.GetReg()) {
1474 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1475 } else {
1476 OpRegCopy(rl_result.reg, rl_src1.reg);
1477 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg());
1478 }
1479 StoreValueWide(rl_dest, rl_result);
1480 return;
1481 }
1482
1483 // Not multiplying by a constant. Do it the hard way
1484 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L.
1485 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
1486 mir_graph_->SRegToVReg(rl_src2.s_reg_low);
1487
1488 FlushAllRegs();
1489 LockCallTemps(); // Prepare for explicit register usage.
1490 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1491 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1492
1493 // At this point, the VRs are in their home locations.
1494 bool src1_in_reg = rl_src1.location == kLocPhysReg;
1495 bool src2_in_reg = rl_src2.location == kLocPhysReg;
1496
1497 // ECX <- 1H
1498 if (src1_in_reg) {
1499 NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg());
1500 } else {
1501 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32,
1502 kNotVolatile);
1503 }
1504
1505 if (is_square) {
1506 // Take advantage of the fact that the values are the same.
1507 // ECX <- ECX * 2L (1H * 2L)
1508 if (src2_in_reg) {
1509 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1510 } else {
1511 int displacement = SRegOffset(rl_src2.s_reg_low);
1512 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1513 displacement + LOWORD_OFFSET);
1514 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1515 true /* is_load */, true /* is_64bit */);
1516 }
1517
1518 // ECX <- 2*ECX (2H * 1L) + (1H * 2L)
1519 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg());
1520 } else {
1521 // EAX <- 2H
1522 if (src2_in_reg) {
1523 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg());
1524 } else {
1525 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32,
1526 kNotVolatile);
1527 }
1528
1529 // EAX <- EAX * 1L (2H * 1L)
1530 if (src1_in_reg) {
1531 NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg());
1532 } else {
1533 int displacement = SRegOffset(rl_src1.s_reg_low);
1534 LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(),
1535 displacement + LOWORD_OFFSET);
1536 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1537 true /* is_load */, true /* is_64bit */);
1538 }
1539
1540 // ECX <- ECX * 2L (1H * 2L)
1541 if (src2_in_reg) {
1542 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg());
1543 } else {
1544 int displacement = SRegOffset(rl_src2.s_reg_low);
1545 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(),
1546 displacement + LOWORD_OFFSET);
1547 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1548 true /* is_load */, true /* is_64bit */);
1549 }
1550
1551 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L)
1552 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg());
1553 }
1554
1555 // EAX <- 2L
1556 if (src2_in_reg) {
1557 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg());
1558 } else {
1559 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32,
1560 kNotVolatile);
1561 }
1562
1563 // EDX:EAX <- 2L * 1L (double precision)
1564 if (src1_in_reg) {
1565 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg());
1566 } else {
1567 int displacement = SRegOffset(rl_src1.s_reg_low);
1568 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET);
1569 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
1570 true /* is_load */, true /* is_64bit */);
1571 }
1572
1573 // EDX <- EDX + ECX (add high words)
1574 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg());
1575
1576 // Result is EDX:EAX
1577 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
1578 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG};
1579 StoreValueWide(rl_dest, rl_result);
1580 }
1581
GenLongRegOrMemOp(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1582 void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
1583 Instruction::Code op) {
1584 DCHECK_EQ(rl_dest.location, kLocPhysReg);
1585 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1586 if (rl_src.location == kLocPhysReg) {
1587 // Both operands are in registers.
1588 // But we must ensure that rl_src is in pair
1589 if (cu_->target64) {
1590 NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
1591 } else {
1592 rl_src = LoadValueWide(rl_src, kCoreReg);
1593 if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
1594 // The registers are the same, so we would clobber it before the use.
1595 RegStorage temp_reg = AllocTemp();
1596 OpRegCopy(temp_reg, rl_dest.reg);
1597 rl_src.reg.SetHighReg(temp_reg.GetReg());
1598 }
1599 NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
1600
1601 x86op = GetOpcode(op, rl_dest, rl_src, true);
1602 NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
1603 FreeTemp(rl_src.reg); // ???
1604 }
1605 return;
1606 }
1607
1608 // RHS is in memory.
1609 DCHECK((rl_src.location == kLocDalvikFrame) ||
1610 (rl_src.location == kLocCompilerTemp));
1611 int r_base = rs_rX86_SP.GetReg();
1612 int displacement = SRegOffset(rl_src.s_reg_low);
1613
1614 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1615 LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(),
1616 r_base, displacement + LOWORD_OFFSET);
1617 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1618 true /* is_load */, true /* is64bit */);
1619 if (!cu_->target64) {
1620 x86op = GetOpcode(op, rl_dest, rl_src, true);
1621 lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
1622 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1623 true /* is_load */, true /* is64bit */);
1624 }
1625 }
1626
GenLongArith(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)1627 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
1628 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
1629 if (rl_dest.location == kLocPhysReg) {
1630 // Ensure we are in a register pair
1631 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1632
1633 rl_src = UpdateLocWideTyped(rl_src, kCoreReg);
1634 GenLongRegOrMemOp(rl_result, rl_src, op);
1635 StoreFinalValueWide(rl_dest, rl_result);
1636 return;
1637 }
1638
1639 // It wasn't in registers, so it better be in memory.
1640 DCHECK((rl_dest.location == kLocDalvikFrame) ||
1641 (rl_dest.location == kLocCompilerTemp));
1642 rl_src = LoadValueWide(rl_src, kCoreReg);
1643
1644 // Operate directly into memory.
1645 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
1646 int r_base = rs_rX86_SP.GetReg();
1647 int displacement = SRegOffset(rl_dest.s_reg_low);
1648
1649 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
1650 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
1651 cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
1652 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1653 true /* is_load */, true /* is64bit */);
1654 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
1655 false /* is_load */, true /* is64bit */);
1656 if (!cu_->target64) {
1657 x86op = GetOpcode(op, rl_dest, rl_src, true);
1658 lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
1659 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1660 true /* is_load */, true /* is64bit */);
1661 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
1662 false /* is_load */, true /* is64bit */);
1663 }
1664 FreeTemp(rl_src.reg);
1665 }
1666
GenLongArith(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op,bool is_commutative)1667 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
1668 RegLocation rl_src2, Instruction::Code op,
1669 bool is_commutative) {
1670 // Is this really a 2 operand operation?
1671 switch (op) {
1672 case Instruction::ADD_LONG_2ADDR:
1673 case Instruction::SUB_LONG_2ADDR:
1674 case Instruction::AND_LONG_2ADDR:
1675 case Instruction::OR_LONG_2ADDR:
1676 case Instruction::XOR_LONG_2ADDR:
1677 if (GenerateTwoOperandInstructions()) {
1678 GenLongArith(rl_dest, rl_src2, op);
1679 return;
1680 }
1681 break;
1682
1683 default:
1684 break;
1685 }
1686
1687 if (rl_dest.location == kLocPhysReg) {
1688 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
1689
1690 // We are about to clobber the LHS, so it needs to be a temp.
1691 rl_result = ForceTempWide(rl_result);
1692
1693 // Perform the operation using the RHS.
1694 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1695 GenLongRegOrMemOp(rl_result, rl_src2, op);
1696
1697 // And now record that the result is in the temp.
1698 StoreFinalValueWide(rl_dest, rl_result);
1699 return;
1700 }
1701
1702 // It wasn't in registers, so it better be in memory.
1703 DCHECK((rl_dest.location == kLocDalvikFrame) ||
1704 (rl_dest.location == kLocCompilerTemp));
1705 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
1706 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg);
1707
1708 // Get one of the source operands into temporary register.
1709 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1710 if (cu_->target64) {
1711 if (IsTemp(rl_src1.reg)) {
1712 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1713 } else if (is_commutative) {
1714 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1715 // We need at least one of them to be a temporary.
1716 if (!IsTemp(rl_src2.reg)) {
1717 rl_src1 = ForceTempWide(rl_src1);
1718 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1719 } else {
1720 GenLongRegOrMemOp(rl_src2, rl_src1, op);
1721 StoreFinalValueWide(rl_dest, rl_src2);
1722 return;
1723 }
1724 } else {
1725 // Need LHS to be the temp.
1726 rl_src1 = ForceTempWide(rl_src1);
1727 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1728 }
1729 } else {
1730 if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
1731 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1732 } else if (is_commutative) {
1733 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1734 // We need at least one of them to be a temporary.
1735 if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
1736 rl_src1 = ForceTempWide(rl_src1);
1737 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1738 } else {
1739 GenLongRegOrMemOp(rl_src2, rl_src1, op);
1740 StoreFinalValueWide(rl_dest, rl_src2);
1741 return;
1742 }
1743 } else {
1744 // Need LHS to be the temp.
1745 rl_src1 = ForceTempWide(rl_src1);
1746 GenLongRegOrMemOp(rl_src1, rl_src2, op);
1747 }
1748 }
1749
1750 StoreFinalValueWide(rl_dest, rl_src1);
1751 }
1752
GenNotLong(RegLocation rl_dest,RegLocation rl_src)1753 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1754 if (cu_->target64) {
1755 rl_src = LoadValueWide(rl_src, kCoreReg);
1756 RegLocation rl_result;
1757 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1758 OpRegCopy(rl_result.reg, rl_src.reg);
1759 OpReg(kOpNot, rl_result.reg);
1760 StoreValueWide(rl_dest, rl_result);
1761 } else {
1762 LOG(FATAL) << "Unexpected use GenNotLong()";
1763 }
1764 }
1765
GenDivRemLongLit(RegLocation rl_dest,RegLocation rl_src,int64_t imm,bool is_div)1766 void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src,
1767 int64_t imm, bool is_div) {
1768 if (imm == 0) {
1769 GenDivZeroException();
1770 } else if (imm == 1) {
1771 if (is_div) {
1772 // x / 1 == x.
1773 StoreValueWide(rl_dest, rl_src);
1774 } else {
1775 // x % 1 == 0.
1776 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1777 LoadConstantWide(rl_result.reg, 0);
1778 StoreValueWide(rl_dest, rl_result);
1779 }
1780 } else if (imm == -1) { // handle 0x8000000000000000 / -1 special case.
1781 if (is_div) {
1782 rl_src = LoadValueWide(rl_src, kCoreReg);
1783 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1784 RegStorage rs_temp = AllocTempWide();
1785
1786 OpRegCopy(rl_result.reg, rl_src.reg);
1787 LoadConstantWide(rs_temp, 0x8000000000000000);
1788
1789 // If x == MIN_LONG, return MIN_LONG.
1790 OpRegReg(kOpCmp, rl_src.reg, rs_temp);
1791 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
1792
1793 // For x != MIN_LONG, x / -1 == -x.
1794 OpReg(kOpNeg, rl_result.reg);
1795
1796 minint_branch->target = NewLIR0(kPseudoTargetLabel);
1797 FreeTemp(rs_temp);
1798 StoreValueWide(rl_dest, rl_result);
1799 } else {
1800 // x % -1 == 0.
1801 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1802 LoadConstantWide(rl_result.reg, 0);
1803 StoreValueWide(rl_dest, rl_result);
1804 }
1805 } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
1806 // Division using shifting.
1807 rl_src = LoadValueWide(rl_src, kCoreReg);
1808 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1809 if (IsSameReg(rl_result.reg, rl_src.reg)) {
1810 RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg);
1811 rl_result.reg.SetReg(rs_temp.GetReg());
1812 }
1813 LoadConstantWide(rl_result.reg, std::abs(imm) - 1);
1814 OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
1815 NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg());
1816 OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg);
1817 int shift_amount = LowestSetBit(imm);
1818 OpRegImm(kOpAsr, rl_result.reg, shift_amount);
1819 if (imm < 0) {
1820 OpReg(kOpNeg, rl_result.reg);
1821 }
1822 StoreValueWide(rl_dest, rl_result);
1823 } else {
1824 CHECK(imm <= -2 || imm >= 2);
1825
1826 FlushReg(rs_r0q);
1827 Clobber(rs_r0q);
1828 LockTemp(rs_r0q);
1829 FlushReg(rs_r2q);
1830 Clobber(rs_r2q);
1831 LockTemp(rs_r2q);
1832
1833 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r2q, INVALID_SREG, INVALID_SREG};
1834
1835 // Use H.S.Warren's Hacker's Delight Chapter 10 and
1836 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
1837 int64_t magic;
1838 int shift;
1839 CalculateMagicAndShift(imm, magic, shift, true /* is_long */);
1840
1841 /*
1842 * For imm >= 2,
1843 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
1844 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
1845 * For imm <= -2,
1846 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
1847 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
1848 * We implement this algorithm in the following way:
1849 * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX
1850 * 2. if imm > 0 and magic < 0, add numerator to RDX
1851 * if imm < 0 and magic > 0, sub numerator from RDX
1852 * 3. if S !=0, SAR S bits for RDX
1853 * 4. add 1 to RDX if RDX < 0
1854 * 5. Thus, RDX is the quotient
1855 */
1856
1857 // Numerator into RAX.
1858 RegStorage numerator_reg;
1859 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
1860 // We will need the value later.
1861 rl_src = LoadValueWide(rl_src, kCoreReg);
1862 numerator_reg = rl_src.reg;
1863 OpRegCopyWide(rs_r0q, numerator_reg);
1864 } else {
1865 // Only need this once. Just put it into RAX.
1866 LoadValueDirectWideFixed(rl_src, rs_r0q);
1867 }
1868
1869 // RDX = magic.
1870 LoadConstantWide(rs_r2q, magic);
1871
1872 // RDX:RAX = magic & dividend.
1873 NewLIR1(kX86Imul64DaR, rs_r2q.GetReg());
1874
1875 if (imm > 0 && magic < 0) {
1876 // Add numerator to RDX.
1877 DCHECK(numerator_reg.Valid());
1878 OpRegReg(kOpAdd, rs_r2q, numerator_reg);
1879 } else if (imm < 0 && magic > 0) {
1880 DCHECK(numerator_reg.Valid());
1881 OpRegReg(kOpSub, rs_r2q, numerator_reg);
1882 }
1883
1884 // Do we need the shift?
1885 if (shift != 0) {
1886 // Shift RDX by 'shift' bits.
1887 OpRegImm(kOpAsr, rs_r2q, shift);
1888 }
1889
1890 // Move RDX to RAX.
1891 OpRegCopyWide(rs_r0q, rs_r2q);
1892
1893 // Move sign bit to bit 0, zeroing the rest.
1894 OpRegImm(kOpLsr, rs_r2q, 63);
1895
1896 // RDX = RDX + RAX.
1897 OpRegReg(kOpAdd, rs_r2q, rs_r0q);
1898
1899 // Quotient is in RDX.
1900 if (!is_div) {
1901 // We need to compute the remainder.
1902 // Remainder is divisor - (quotient * imm).
1903 DCHECK(numerator_reg.Valid());
1904 OpRegCopyWide(rs_r0q, numerator_reg);
1905
1906 // Imul doesn't support 64-bit imms.
1907 if (imm > std::numeric_limits<int32_t>::max() ||
1908 imm < std::numeric_limits<int32_t>::min()) {
1909 RegStorage rs_temp = AllocTempWide();
1910 LoadConstantWide(rs_temp, imm);
1911
1912 // RAX = numerator * imm.
1913 NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg());
1914
1915 FreeTemp(rs_temp);
1916 } else {
1917 // RAX = numerator * imm.
1918 int short_imm = static_cast<int>(imm);
1919 NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm);
1920 }
1921
1922 // RDX -= RAX.
1923 OpRegReg(kOpSub, rs_r0q, rs_r2q);
1924
1925 // Store result.
1926 OpRegCopyWide(rl_result.reg, rs_r0q);
1927 } else {
1928 // Store result.
1929 OpRegCopyWide(rl_result.reg, rs_r2q);
1930 }
1931 StoreValueWide(rl_dest, rl_result);
1932 FreeTemp(rs_r0q);
1933 FreeTemp(rs_r2q);
1934 }
1935 }
1936
GenDivRemLong(Instruction::Code,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,bool is_div)1937 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
1938 RegLocation rl_src2, bool is_div) {
1939 if (!cu_->target64) {
1940 LOG(FATAL) << "Unexpected use GenDivRemLong()";
1941 return;
1942 }
1943
1944 if (rl_src2.is_const) {
1945 DCHECK(rl_src2.wide);
1946 int64_t imm = mir_graph_->ConstantValueWide(rl_src2);
1947 GenDivRemLongLit(rl_dest, rl_src1, imm, is_div);
1948 return;
1949 }
1950
1951 // We have to use fixed registers, so flush all the temps.
1952 FlushAllRegs();
1953 LockCallTemps(); // Prepare for explicit register usage.
1954
1955 // Load LHS into RAX.
1956 LoadValueDirectWideFixed(rl_src1, rs_r0q);
1957
1958 // Load RHS into RCX.
1959 LoadValueDirectWideFixed(rl_src2, rs_r1q);
1960
1961 // Copy LHS sign bit into RDX.
1962 NewLIR0(kx86Cqo64Da);
1963
1964 // Handle division by zero case.
1965 GenDivZeroCheckWide(rs_r1q);
1966
1967 // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
1968 NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
1969 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1970
1971 // RHS is -1.
1972 LoadConstantWide(rs_r6q, 0x8000000000000000);
1973 NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg());
1974 LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
1975
1976 // In 0x8000000000000000/-1 case.
1977 if (!is_div) {
1978 // For DIV, RAX is already right. For REM, we need RDX 0.
1979 NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
1980 }
1981 LIR* done = NewLIR1(kX86Jmp8, 0);
1982
1983 // Expected case.
1984 minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
1985 minint_branch->target = minus_one_branch->target;
1986 NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
1987 done->target = NewLIR0(kPseudoTargetLabel);
1988
1989 // Result is in RAX for div and RDX for rem.
1990 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
1991 if (!is_div) {
1992 rl_result.reg.SetReg(r2q);
1993 }
1994
1995 StoreValueWide(rl_dest, rl_result);
1996 }
1997
GenNegLong(RegLocation rl_dest,RegLocation rl_src)1998 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1999 rl_src = LoadValueWide(rl_src, kCoreReg);
2000 RegLocation rl_result;
2001 if (cu_->target64) {
2002 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2003 OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
2004 } else {
2005 rl_result = ForceTempWide(rl_src);
2006 if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
2007 ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
2008 // The registers are the same, so we would clobber it before the use.
2009 RegStorage temp_reg = AllocTemp();
2010 OpRegCopy(temp_reg, rl_result.reg);
2011 rl_result.reg.SetHighReg(temp_reg.GetReg());
2012 }
2013 OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow
2014 OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF
2015 OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh
2016 }
2017 StoreValueWide(rl_dest, rl_result);
2018 }
2019
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<4> thread_offset)2020 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) {
2021 DCHECK_EQ(kX86, cu_->instruction_set);
2022 X86OpCode opcode = kX86Bkpt;
2023 switch (op) {
2024 case kOpCmp: opcode = kX86Cmp32RT; break;
2025 case kOpMov: opcode = kX86Mov32RT; break;
2026 default:
2027 LOG(FATAL) << "Bad opcode: " << op;
2028 break;
2029 }
2030 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2031 }
2032
OpRegThreadMem(OpKind op,RegStorage r_dest,ThreadOffset<8> thread_offset)2033 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
2034 DCHECK_EQ(kX86_64, cu_->instruction_set);
2035 X86OpCode opcode = kX86Bkpt;
2036 if (cu_->target64 && r_dest.Is64BitSolo()) {
2037 switch (op) {
2038 case kOpCmp: opcode = kX86Cmp64RT; break;
2039 case kOpMov: opcode = kX86Mov64RT; break;
2040 default:
2041 LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
2042 break;
2043 }
2044 } else {
2045 switch (op) {
2046 case kOpCmp: opcode = kX86Cmp32RT; break;
2047 case kOpMov: opcode = kX86Mov32RT; break;
2048 default:
2049 LOG(FATAL) << "Bad opcode: " << op;
2050 break;
2051 }
2052 }
2053 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
2054 }
2055
2056 /*
2057 * Generate array load
2058 */
GenArrayGet(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_dest,int scale)2059 void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
2060 RegLocation rl_index, RegLocation rl_dest, int scale) {
2061 RegisterClass reg_class = RegClassBySize(size);
2062 int len_offset = mirror::Array::LengthOffset().Int32Value();
2063 RegLocation rl_result;
2064 rl_array = LoadValue(rl_array, kRefReg);
2065
2066 int data_offset;
2067 if (size == k64 || size == kDouble) {
2068 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2069 } else {
2070 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2071 }
2072
2073 bool constant_index = rl_index.is_const;
2074 int32_t constant_index_value = 0;
2075 if (!constant_index) {
2076 rl_index = LoadValue(rl_index, kCoreReg);
2077 } else {
2078 constant_index_value = mir_graph_->ConstantValue(rl_index);
2079 // If index is constant, just fold it into the data offset
2080 data_offset += constant_index_value << scale;
2081 // treat as non array below
2082 rl_index.reg = RegStorage::InvalidReg();
2083 }
2084
2085 /* null object? */
2086 GenNullCheck(rl_array.reg, opt_flags);
2087
2088 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2089 if (constant_index) {
2090 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2091 } else {
2092 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2093 }
2094 }
2095 rl_result = EvalLoc(rl_dest, reg_class, true);
2096 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size);
2097 if ((size == k64) || (size == kDouble)) {
2098 StoreValueWide(rl_dest, rl_result);
2099 } else {
2100 StoreValue(rl_dest, rl_result);
2101 }
2102 }
2103
2104 /*
2105 * Generate array store
2106 *
2107 */
GenArrayPut(int opt_flags,OpSize size,RegLocation rl_array,RegLocation rl_index,RegLocation rl_src,int scale,bool card_mark)2108 void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
2109 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
2110 RegisterClass reg_class = RegClassBySize(size);
2111 int len_offset = mirror::Array::LengthOffset().Int32Value();
2112 int data_offset;
2113
2114 if (size == k64 || size == kDouble) {
2115 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
2116 } else {
2117 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
2118 }
2119
2120 rl_array = LoadValue(rl_array, kRefReg);
2121 bool constant_index = rl_index.is_const;
2122 int32_t constant_index_value = 0;
2123 if (!constant_index) {
2124 rl_index = LoadValue(rl_index, kCoreReg);
2125 } else {
2126 // If index is constant, just fold it into the data offset
2127 constant_index_value = mir_graph_->ConstantValue(rl_index);
2128 data_offset += constant_index_value << scale;
2129 // treat as non array below
2130 rl_index.reg = RegStorage::InvalidReg();
2131 }
2132
2133 /* null object? */
2134 GenNullCheck(rl_array.reg, opt_flags);
2135
2136 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) {
2137 if (constant_index) {
2138 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset);
2139 } else {
2140 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset);
2141 }
2142 }
2143 if ((size == k64) || (size == kDouble)) {
2144 rl_src = LoadValueWide(rl_src, reg_class);
2145 } else {
2146 rl_src = LoadValue(rl_src, reg_class);
2147 }
2148 // If the src reg can't be byte accessed, move it to a temp first.
2149 if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
2150 RegStorage temp = AllocTemp();
2151 OpRegCopy(temp, rl_src.reg);
2152 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
2153 } else {
2154 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size);
2155 }
2156 if (card_mark) {
2157 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark.
2158 if (!constant_index) {
2159 FreeTemp(rl_index.reg);
2160 }
2161 MarkGCCard(rl_src.reg, rl_array.reg);
2162 }
2163 }
2164
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,int shift_amount)2165 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2166 RegLocation rl_src, int shift_amount) {
2167 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2168 if (cu_->target64) {
2169 OpKind op = static_cast<OpKind>(0); /* Make gcc happy */
2170 switch (opcode) {
2171 case Instruction::SHL_LONG:
2172 case Instruction::SHL_LONG_2ADDR:
2173 op = kOpLsl;
2174 break;
2175 case Instruction::SHR_LONG:
2176 case Instruction::SHR_LONG_2ADDR:
2177 op = kOpAsr;
2178 break;
2179 case Instruction::USHR_LONG:
2180 case Instruction::USHR_LONG_2ADDR:
2181 op = kOpLsr;
2182 break;
2183 default:
2184 LOG(FATAL) << "Unexpected case";
2185 }
2186 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
2187 } else {
2188 switch (opcode) {
2189 case Instruction::SHL_LONG:
2190 case Instruction::SHL_LONG_2ADDR:
2191 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening.
2192 if (shift_amount == 32) {
2193 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2194 LoadConstant(rl_result.reg.GetLow(), 0);
2195 } else if (shift_amount > 31) {
2196 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
2197 NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
2198 LoadConstant(rl_result.reg.GetLow(), 0);
2199 } else {
2200 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2201 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2202 NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
2203 shift_amount);
2204 NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
2205 }
2206 break;
2207 case Instruction::SHR_LONG:
2208 case Instruction::SHR_LONG_2ADDR:
2209 if (shift_amount == 32) {
2210 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2211 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2212 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2213 } else if (shift_amount > 31) {
2214 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2215 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2216 NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2217 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
2218 } else {
2219 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2220 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2221 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2222 shift_amount);
2223 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
2224 }
2225 break;
2226 case Instruction::USHR_LONG:
2227 case Instruction::USHR_LONG_2ADDR:
2228 if (shift_amount == 32) {
2229 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2230 LoadConstant(rl_result.reg.GetHigh(), 0);
2231 } else if (shift_amount > 31) {
2232 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
2233 NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
2234 LoadConstant(rl_result.reg.GetHigh(), 0);
2235 } else {
2236 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow());
2237 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
2238 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
2239 shift_amount);
2240 NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
2241 }
2242 break;
2243 default:
2244 LOG(FATAL) << "Unexpected case";
2245 }
2246 }
2247 return rl_result;
2248 }
2249
GenShiftImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src,RegLocation rl_shift)2250 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
2251 RegLocation rl_src, RegLocation rl_shift) {
2252 // Per spec, we only care about low 6 bits of shift amount.
2253 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
2254 if (shift_amount == 0) {
2255 rl_src = LoadValueWide(rl_src, kCoreReg);
2256 StoreValueWide(rl_dest, rl_src);
2257 return;
2258 } else if (shift_amount == 1 &&
2259 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
2260 // Need to handle this here to avoid calling StoreValueWide twice.
2261 GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
2262 return;
2263 }
2264 if (BadOverlap(rl_src, rl_dest)) {
2265 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
2266 return;
2267 }
2268 rl_src = LoadValueWide(rl_src, kCoreReg);
2269 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
2270 StoreValueWide(rl_dest, rl_result);
2271 }
2272
GenArithImmOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2)2273 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
2274 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
2275 bool isConstSuccess = false;
2276 switch (opcode) {
2277 case Instruction::ADD_LONG:
2278 case Instruction::AND_LONG:
2279 case Instruction::OR_LONG:
2280 case Instruction::XOR_LONG:
2281 if (rl_src2.is_const) {
2282 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2283 } else {
2284 DCHECK(rl_src1.is_const);
2285 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2286 }
2287 break;
2288 case Instruction::SUB_LONG:
2289 case Instruction::SUB_LONG_2ADDR:
2290 if (rl_src2.is_const) {
2291 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2292 } else {
2293 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
2294 isConstSuccess = true;
2295 }
2296 break;
2297 case Instruction::ADD_LONG_2ADDR:
2298 case Instruction::OR_LONG_2ADDR:
2299 case Instruction::XOR_LONG_2ADDR:
2300 case Instruction::AND_LONG_2ADDR:
2301 if (rl_src2.is_const) {
2302 if (GenerateTwoOperandInstructions()) {
2303 isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
2304 } else {
2305 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
2306 }
2307 } else {
2308 DCHECK(rl_src1.is_const);
2309 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
2310 }
2311 break;
2312 default:
2313 isConstSuccess = false;
2314 break;
2315 }
2316
2317 if (!isConstSuccess) {
2318 // Default - bail to non-const handler.
2319 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
2320 }
2321 }
2322
IsNoOp(Instruction::Code op,int32_t value)2323 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
2324 switch (op) {
2325 case Instruction::AND_LONG_2ADDR:
2326 case Instruction::AND_LONG:
2327 return value == -1;
2328 case Instruction::OR_LONG:
2329 case Instruction::OR_LONG_2ADDR:
2330 case Instruction::XOR_LONG:
2331 case Instruction::XOR_LONG_2ADDR:
2332 return value == 0;
2333 default:
2334 return false;
2335 }
2336 }
2337
GetOpcode(Instruction::Code op,RegLocation dest,RegLocation rhs,bool is_high_op)2338 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
2339 bool is_high_op) {
2340 bool rhs_in_mem = rhs.location != kLocPhysReg;
2341 bool dest_in_mem = dest.location != kLocPhysReg;
2342 bool is64Bit = cu_->target64;
2343 DCHECK(!rhs_in_mem || !dest_in_mem);
2344 switch (op) {
2345 case Instruction::ADD_LONG:
2346 case Instruction::ADD_LONG_2ADDR:
2347 if (dest_in_mem) {
2348 return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
2349 } else if (rhs_in_mem) {
2350 return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
2351 }
2352 return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
2353 case Instruction::SUB_LONG:
2354 case Instruction::SUB_LONG_2ADDR:
2355 if (dest_in_mem) {
2356 return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
2357 } else if (rhs_in_mem) {
2358 return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
2359 }
2360 return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
2361 case Instruction::AND_LONG_2ADDR:
2362 case Instruction::AND_LONG:
2363 if (dest_in_mem) {
2364 return is64Bit ? kX86And64MR : kX86And32MR;
2365 }
2366 if (is64Bit) {
2367 return rhs_in_mem ? kX86And64RM : kX86And64RR;
2368 }
2369 return rhs_in_mem ? kX86And32RM : kX86And32RR;
2370 case Instruction::OR_LONG:
2371 case Instruction::OR_LONG_2ADDR:
2372 if (dest_in_mem) {
2373 return is64Bit ? kX86Or64MR : kX86Or32MR;
2374 }
2375 if (is64Bit) {
2376 return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
2377 }
2378 return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
2379 case Instruction::XOR_LONG:
2380 case Instruction::XOR_LONG_2ADDR:
2381 if (dest_in_mem) {
2382 return is64Bit ? kX86Xor64MR : kX86Xor32MR;
2383 }
2384 if (is64Bit) {
2385 return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
2386 }
2387 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
2388 default:
2389 LOG(FATAL) << "Unexpected opcode: " << op;
2390 return kX86Add32RR;
2391 }
2392 }
2393
GetOpcode(Instruction::Code op,RegLocation loc,bool is_high_op,int32_t value)2394 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
2395 int32_t value) {
2396 bool in_mem = loc.location != kLocPhysReg;
2397 bool is64Bit = cu_->target64;
2398 bool byte_imm = IS_SIMM8(value);
2399 DCHECK(in_mem || !loc.reg.IsFloat());
2400 switch (op) {
2401 case Instruction::ADD_LONG:
2402 case Instruction::ADD_LONG_2ADDR:
2403 if (byte_imm) {
2404 if (in_mem) {
2405 return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
2406 }
2407 return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
2408 }
2409 if (in_mem) {
2410 return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
2411 }
2412 return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
2413 case Instruction::SUB_LONG:
2414 case Instruction::SUB_LONG_2ADDR:
2415 if (byte_imm) {
2416 if (in_mem) {
2417 return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
2418 }
2419 return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
2420 }
2421 if (in_mem) {
2422 return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
2423 }
2424 return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
2425 case Instruction::AND_LONG_2ADDR:
2426 case Instruction::AND_LONG:
2427 if (byte_imm) {
2428 if (is64Bit) {
2429 return in_mem ? kX86And64MI8 : kX86And64RI8;
2430 }
2431 return in_mem ? kX86And32MI8 : kX86And32RI8;
2432 }
2433 if (is64Bit) {
2434 return in_mem ? kX86And64MI : kX86And64RI;
2435 }
2436 return in_mem ? kX86And32MI : kX86And32RI;
2437 case Instruction::OR_LONG:
2438 case Instruction::OR_LONG_2ADDR:
2439 if (byte_imm) {
2440 if (is64Bit) {
2441 return in_mem ? kX86Or64MI8 : kX86Or64RI8;
2442 }
2443 return in_mem ? kX86Or32MI8 : kX86Or32RI8;
2444 }
2445 if (is64Bit) {
2446 return in_mem ? kX86Or64MI : kX86Or64RI;
2447 }
2448 return in_mem ? kX86Or32MI : kX86Or32RI;
2449 case Instruction::XOR_LONG:
2450 case Instruction::XOR_LONG_2ADDR:
2451 if (byte_imm) {
2452 if (is64Bit) {
2453 return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
2454 }
2455 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
2456 }
2457 if (is64Bit) {
2458 return in_mem ? kX86Xor64MI : kX86Xor64RI;
2459 }
2460 return in_mem ? kX86Xor32MI : kX86Xor32RI;
2461 default:
2462 LOG(FATAL) << "Unexpected opcode: " << op;
2463 return kX86Add32MI;
2464 }
2465 }
2466
GenLongImm(RegLocation rl_dest,RegLocation rl_src,Instruction::Code op)2467 bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
2468 DCHECK(rl_src.is_const);
2469 int64_t val = mir_graph_->ConstantValueWide(rl_src);
2470
2471 if (cu_->target64) {
2472 // We can do with imm only if it fits 32 bit
2473 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2474 return false;
2475 }
2476
2477 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2478
2479 if ((rl_dest.location == kLocDalvikFrame) ||
2480 (rl_dest.location == kLocCompilerTemp)) {
2481 int r_base = rs_rX86_SP.GetReg();
2482 int displacement = SRegOffset(rl_dest.s_reg_low);
2483
2484 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2485 X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2486 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
2487 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2488 true /* is_load */, true /* is64bit */);
2489 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2490 false /* is_load */, true /* is64bit */);
2491 return true;
2492 }
2493
2494 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2495 DCHECK_EQ(rl_result.location, kLocPhysReg);
2496 DCHECK(!rl_result.reg.IsFloat());
2497
2498 X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2499 NewLIR2(x86op, rl_result.reg.GetReg(), val);
2500
2501 StoreValueWide(rl_dest, rl_result);
2502 return true;
2503 }
2504
2505 int32_t val_lo = Low32Bits(val);
2506 int32_t val_hi = High32Bits(val);
2507 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2508
2509 // Can we just do this into memory?
2510 if ((rl_dest.location == kLocDalvikFrame) ||
2511 (rl_dest.location == kLocCompilerTemp)) {
2512 int r_base = rs_rX86_SP.GetReg();
2513 int displacement = SRegOffset(rl_dest.s_reg_low);
2514
2515 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2516 if (!IsNoOp(op, val_lo)) {
2517 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2518 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
2519 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2520 true /* is_load */, true /* is64bit */);
2521 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
2522 false /* is_load */, true /* is64bit */);
2523 }
2524 if (!IsNoOp(op, val_hi)) {
2525 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2526 LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi);
2527 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2528 true /* is_load */, true /* is64bit */);
2529 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
2530 false /* is_load */, true /* is64bit */);
2531 }
2532 return true;
2533 }
2534
2535 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
2536 DCHECK_EQ(rl_result.location, kLocPhysReg);
2537 DCHECK(!rl_result.reg.IsFloat());
2538
2539 if (!IsNoOp(op, val_lo)) {
2540 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2541 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2542 }
2543 if (!IsNoOp(op, val_hi)) {
2544 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2545 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2546 }
2547 StoreValueWide(rl_dest, rl_result);
2548 return true;
2549 }
2550
GenLongLongImm(RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_src2,Instruction::Code op)2551 bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
2552 RegLocation rl_src2, Instruction::Code op) {
2553 DCHECK(rl_src2.is_const);
2554 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
2555
2556 if (cu_->target64) {
2557 // We can do with imm only if it fits 32 bit
2558 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
2559 return false;
2560 }
2561 if (rl_dest.location == kLocPhysReg &&
2562 rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
2563 X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
2564 OpRegCopy(rl_dest.reg, rl_src1.reg);
2565 NewLIR2(x86op, rl_dest.reg.GetReg(), val);
2566 StoreFinalValueWide(rl_dest, rl_dest);
2567 return true;
2568 }
2569
2570 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2571 // We need the values to be in a temporary
2572 RegLocation rl_result = ForceTempWide(rl_src1);
2573
2574 X86OpCode x86op = GetOpcode(op, rl_result, false, val);
2575 NewLIR2(x86op, rl_result.reg.GetReg(), val);
2576
2577 StoreFinalValueWide(rl_dest, rl_result);
2578 return true;
2579 }
2580
2581 int32_t val_lo = Low32Bits(val);
2582 int32_t val_hi = High32Bits(val);
2583 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
2584 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg);
2585
2586 // Can we do this directly into the destination registers?
2587 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
2588 rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() &&
2589 rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) {
2590 if (!IsNoOp(op, val_lo)) {
2591 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
2592 NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo);
2593 }
2594 if (!IsNoOp(op, val_hi)) {
2595 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
2596 NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi);
2597 }
2598
2599 StoreFinalValueWide(rl_dest, rl_dest);
2600 return true;
2601 }
2602
2603 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
2604 DCHECK_EQ(rl_src1.location, kLocPhysReg);
2605
2606 // We need the values to be in a temporary
2607 RegLocation rl_result = ForceTempWide(rl_src1);
2608 if (!IsNoOp(op, val_lo)) {
2609 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
2610 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo);
2611 }
2612 if (!IsNoOp(op, val_hi)) {
2613 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
2614 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
2615 }
2616
2617 StoreFinalValueWide(rl_dest, rl_result);
2618 return true;
2619 }
2620
2621 // For final classes there are no sub-classes to check and so we can answer the instance-of
2622 // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
GenInstanceofFinal(bool use_declaring_class,uint32_t type_idx,RegLocation rl_dest,RegLocation rl_src)2623 void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
2624 RegLocation rl_dest, RegLocation rl_src) {
2625 RegLocation object = LoadValue(rl_src, kRefReg);
2626 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2627 RegStorage result_reg = rl_result.reg;
2628
2629 // For 32-bit, SETcc only works with EAX..EDX.
2630 RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg;
2631 if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) {
2632 result_reg = AllocateByteRegister();
2633 }
2634
2635 // Assume that there is no match.
2636 LoadConstant(result_reg, 0);
2637 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL);
2638
2639 // We will use this register to compare to memory below.
2640 // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode).
2641 // For this reason, force allocation of a 32 bit register to use, so that the
2642 // compare to memory will be done using a 32 bit comparision.
2643 // The LoadRefDisp(s) below will work normally, even in 64 bit mode.
2644 RegStorage check_class = AllocTemp();
2645
2646 // If Method* is already in a register, we can save a copy.
2647 RegLocation rl_method = mir_graph_->GetMethodLoc();
2648 int32_t offset_of_type = mirror::Array::DataOffset(
2649 sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
2650 (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
2651
2652 if (rl_method.location == kLocPhysReg) {
2653 if (use_declaring_class) {
2654 LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2655 check_class, kNotVolatile);
2656 } else {
2657 LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2658 check_class, kNotVolatile);
2659 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2660 }
2661 } else {
2662 LoadCurrMethodDirect(check_class);
2663 if (use_declaring_class) {
2664 LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
2665 check_class, kNotVolatile);
2666 } else {
2667 LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
2668 check_class, kNotVolatile);
2669 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile);
2670 }
2671 }
2672
2673 // Compare the computed class to the class in the object.
2674 DCHECK_EQ(object.location, kLocPhysReg);
2675 OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value());
2676
2677 // Set the low byte of the result to 0 or 1 from the compare condition code.
2678 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq);
2679
2680 LIR* target = NewLIR0(kPseudoTargetLabel);
2681 null_branchover->target = target;
2682 FreeTemp(check_class);
2683 if (IsTemp(result_reg)) {
2684 OpRegCopy(rl_result.reg, result_reg);
2685 FreeTemp(result_reg);
2686 }
2687 StoreValue(rl_dest, rl_result);
2688 }
2689
GenArithOpInt(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_lhs,RegLocation rl_rhs)2690 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
2691 RegLocation rl_lhs, RegLocation rl_rhs) {
2692 OpKind op = kOpBkpt;
2693 bool is_div_rem = false;
2694 bool unary = false;
2695 bool shift_op = false;
2696 bool is_two_addr = false;
2697 RegLocation rl_result;
2698 switch (opcode) {
2699 case Instruction::NEG_INT:
2700 op = kOpNeg;
2701 unary = true;
2702 break;
2703 case Instruction::NOT_INT:
2704 op = kOpMvn;
2705 unary = true;
2706 break;
2707 case Instruction::ADD_INT_2ADDR:
2708 is_two_addr = true;
2709 // Fallthrough
2710 case Instruction::ADD_INT:
2711 op = kOpAdd;
2712 break;
2713 case Instruction::SUB_INT_2ADDR:
2714 is_two_addr = true;
2715 // Fallthrough
2716 case Instruction::SUB_INT:
2717 op = kOpSub;
2718 break;
2719 case Instruction::MUL_INT_2ADDR:
2720 is_two_addr = true;
2721 // Fallthrough
2722 case Instruction::MUL_INT:
2723 op = kOpMul;
2724 break;
2725 case Instruction::DIV_INT_2ADDR:
2726 is_two_addr = true;
2727 // Fallthrough
2728 case Instruction::DIV_INT:
2729 op = kOpDiv;
2730 is_div_rem = true;
2731 break;
2732 /* NOTE: returns in kArg1 */
2733 case Instruction::REM_INT_2ADDR:
2734 is_two_addr = true;
2735 // Fallthrough
2736 case Instruction::REM_INT:
2737 op = kOpRem;
2738 is_div_rem = true;
2739 break;
2740 case Instruction::AND_INT_2ADDR:
2741 is_two_addr = true;
2742 // Fallthrough
2743 case Instruction::AND_INT:
2744 op = kOpAnd;
2745 break;
2746 case Instruction::OR_INT_2ADDR:
2747 is_two_addr = true;
2748 // Fallthrough
2749 case Instruction::OR_INT:
2750 op = kOpOr;
2751 break;
2752 case Instruction::XOR_INT_2ADDR:
2753 is_two_addr = true;
2754 // Fallthrough
2755 case Instruction::XOR_INT:
2756 op = kOpXor;
2757 break;
2758 case Instruction::SHL_INT_2ADDR:
2759 is_two_addr = true;
2760 // Fallthrough
2761 case Instruction::SHL_INT:
2762 shift_op = true;
2763 op = kOpLsl;
2764 break;
2765 case Instruction::SHR_INT_2ADDR:
2766 is_two_addr = true;
2767 // Fallthrough
2768 case Instruction::SHR_INT:
2769 shift_op = true;
2770 op = kOpAsr;
2771 break;
2772 case Instruction::USHR_INT_2ADDR:
2773 is_two_addr = true;
2774 // Fallthrough
2775 case Instruction::USHR_INT:
2776 shift_op = true;
2777 op = kOpLsr;
2778 break;
2779 default:
2780 LOG(FATAL) << "Invalid word arith op: " << opcode;
2781 }
2782
2783 // Can we convert to a two address instruction?
2784 if (!is_two_addr &&
2785 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2786 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
2787 is_two_addr = true;
2788 }
2789
2790 if (!GenerateTwoOperandInstructions()) {
2791 is_two_addr = false;
2792 }
2793
2794 // Get the div/rem stuff out of the way.
2795 if (is_div_rem) {
2796 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
2797 StoreValue(rl_dest, rl_result);
2798 return;
2799 }
2800
2801 // If we generate any memory access below, it will reference a dalvik reg.
2802 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2803
2804 if (unary) {
2805 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2806 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2807 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2808 OpRegReg(op, rl_result.reg, rl_lhs.reg);
2809 } else {
2810 if (shift_op) {
2811 // X86 doesn't require masking and must use ECX.
2812 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX
2813 LoadValueDirectFixed(rl_rhs, t_reg);
2814 if (is_two_addr) {
2815 // Can we do this directly into memory?
2816 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2817 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2818 if (rl_result.location != kLocPhysReg) {
2819 // Okay, we can do this into memory
2820 OpMemReg(op, rl_result, t_reg.GetReg());
2821 FreeTemp(t_reg);
2822 return;
2823 } else if (!rl_result.reg.IsFloat()) {
2824 // Can do this directly into the result register
2825 OpRegReg(op, rl_result.reg, t_reg);
2826 FreeTemp(t_reg);
2827 StoreFinalValue(rl_dest, rl_result);
2828 return;
2829 }
2830 }
2831 // Three address form, or we can't do directly.
2832 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2833 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2834 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg);
2835 FreeTemp(t_reg);
2836 } else {
2837 // Multiply is 3 operand only (sort of).
2838 if (is_two_addr && op != kOpMul) {
2839 // Can we do this directly into memory?
2840 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2841 if (rl_result.location == kLocPhysReg) {
2842 // Ensure res is in a core reg
2843 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2844 // Can we do this from memory directly?
2845 rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2846 if (rl_rhs.location != kLocPhysReg) {
2847 OpRegMem(op, rl_result.reg, rl_rhs);
2848 StoreFinalValue(rl_dest, rl_result);
2849 return;
2850 } else if (!rl_rhs.reg.IsFloat()) {
2851 OpRegReg(op, rl_result.reg, rl_rhs.reg);
2852 StoreFinalValue(rl_dest, rl_result);
2853 return;
2854 }
2855 }
2856 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2857 // It might happen rl_rhs and rl_dest are the same VR
2858 // in this case rl_dest is in reg after LoadValue while
2859 // rl_result is not updated yet, so do this
2860 rl_result = UpdateLocTyped(rl_dest, kCoreReg);
2861 if (rl_result.location != kLocPhysReg) {
2862 // Okay, we can do this into memory.
2863 OpMemReg(op, rl_result, rl_rhs.reg.GetReg());
2864 return;
2865 } else if (!rl_result.reg.IsFloat()) {
2866 // Can do this directly into the result register.
2867 OpRegReg(op, rl_result.reg, rl_rhs.reg);
2868 StoreFinalValue(rl_dest, rl_result);
2869 return;
2870 } else {
2871 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2872 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2873 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2874 }
2875 } else {
2876 // Try to use reg/memory instructions.
2877 rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg);
2878 rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg);
2879 // We can't optimize with FP registers.
2880 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
2881 // Something is difficult, so fall back to the standard case.
2882 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2883 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2884 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2885 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2886 } else {
2887 // We can optimize by moving to result and using memory operands.
2888 if (rl_rhs.location != kLocPhysReg) {
2889 // Force LHS into result.
2890 // We should be careful with order here
2891 // If rl_dest and rl_lhs points to the same VR we should load first
2892 // If the are different we should find a register first for dest
2893 if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
2894 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
2895 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2896 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2897 // No-op if these are the same.
2898 OpRegCopy(rl_result.reg, rl_lhs.reg);
2899 } else {
2900 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2901 LoadValueDirect(rl_lhs, rl_result.reg);
2902 }
2903 OpRegMem(op, rl_result.reg, rl_rhs);
2904 } else if (rl_lhs.location != kLocPhysReg) {
2905 // RHS is in a register; LHS is in memory.
2906 if (op != kOpSub) {
2907 // Force RHS into result and operate on memory.
2908 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2909 OpRegCopy(rl_result.reg, rl_rhs.reg);
2910 OpRegMem(op, rl_result.reg, rl_lhs);
2911 } else {
2912 // Subtraction isn't commutative.
2913 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2914 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2915 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2916 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2917 }
2918 } else {
2919 // Both are in registers.
2920 rl_lhs = LoadValue(rl_lhs, kCoreReg);
2921 rl_rhs = LoadValue(rl_rhs, kCoreReg);
2922 rl_result = EvalLoc(rl_dest, kCoreReg, true);
2923 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg);
2924 }
2925 }
2926 }
2927 }
2928 }
2929 StoreValue(rl_dest, rl_result);
2930 }
2931
IsOperationSafeWithoutTemps(RegLocation rl_lhs,RegLocation rl_rhs)2932 bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
2933 // If we have non-core registers, then we can't do good things.
2934 if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) {
2935 return false;
2936 }
2937 if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) {
2938 return false;
2939 }
2940
2941 // Everything will be fine :-).
2942 return true;
2943 }
2944
GenIntToLong(RegLocation rl_dest,RegLocation rl_src)2945 void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
2946 if (!cu_->target64) {
2947 Mir2Lir::GenIntToLong(rl_dest, rl_src);
2948 return;
2949 }
2950 rl_src = UpdateLocTyped(rl_src, kCoreReg);
2951 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
2952 if (rl_src.location == kLocPhysReg) {
2953 NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
2954 } else {
2955 int displacement = SRegOffset(rl_src.s_reg_low);
2956 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
2957 LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
2958 displacement + LOWORD_OFFSET);
2959 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
2960 true /* is_load */, true /* is_64bit */);
2961 }
2962 StoreValueWide(rl_dest, rl_result);
2963 }
2964
GenShiftOpLong(Instruction::Code opcode,RegLocation rl_dest,RegLocation rl_src1,RegLocation rl_shift)2965 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
2966 RegLocation rl_src1, RegLocation rl_shift) {
2967 if (!cu_->target64) {
2968 Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
2969 return;
2970 }
2971
2972 bool is_two_addr = false;
2973 OpKind op = kOpBkpt;
2974 RegLocation rl_result;
2975
2976 switch (opcode) {
2977 case Instruction::SHL_LONG_2ADDR:
2978 is_two_addr = true;
2979 // Fallthrough
2980 case Instruction::SHL_LONG:
2981 op = kOpLsl;
2982 break;
2983 case Instruction::SHR_LONG_2ADDR:
2984 is_two_addr = true;
2985 // Fallthrough
2986 case Instruction::SHR_LONG:
2987 op = kOpAsr;
2988 break;
2989 case Instruction::USHR_LONG_2ADDR:
2990 is_two_addr = true;
2991 // Fallthrough
2992 case Instruction::USHR_LONG:
2993 op = kOpLsr;
2994 break;
2995 default:
2996 op = kOpBkpt;
2997 }
2998
2999 // X86 doesn't require masking and must use ECX.
3000 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX
3001 LoadValueDirectFixed(rl_shift, t_reg);
3002 if (is_two_addr) {
3003 // Can we do this directly into memory?
3004 rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
3005 if (rl_result.location != kLocPhysReg) {
3006 // Okay, we can do this into memory
3007 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
3008 OpMemReg(op, rl_result, t_reg.GetReg());
3009 } else if (!rl_result.reg.IsFloat()) {
3010 // Can do this directly into the result register
3011 OpRegReg(op, rl_result.reg, t_reg);
3012 StoreFinalValueWide(rl_dest, rl_result);
3013 }
3014 } else {
3015 // Three address form, or we can't do directly.
3016 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
3017 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
3018 OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
3019 StoreFinalValueWide(rl_dest, rl_result);
3020 }
3021
3022 FreeTemp(t_reg);
3023 }
3024
3025 } // namespace art
3026