1 /*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "Dalvik.h"
18 #include "libdex/OpCode.h"
19 #include "dexdump/OpCodeNames.h"
20
21 #include "../../CompilerInternals.h"
22 #include "ArmLIR.h"
23 #include <unistd.h> /* for cacheflush */
24
25 /*
26 * opcode: ArmOpCode enum
27 * skeleton: pre-designated bit-pattern for this opcode
28 * k0: key to applying ds/de
29 * ds: dest start bit position
30 * de: dest end bit position
31 * k1: key to applying s1s/s1e
32 * s1s: src1 start bit position
33 * s1e: src1 end bit position
34 * k2: key to applying s2s/s2e
35 * s2s: src2 start bit position
36 * s2e: src2 end bit position
37 * operands: number of operands (for sanity check purposes)
38 * name: mnemonic name
39 * fmt: for pretty-prining
40 */
41 #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
42 operands, name, fmt, size) \
43 {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}}, \
44 opcode, operands, name, fmt, size}
45
46 /* Instruction dump string format keys: !pf, where "!" is the start
47 * of the key, "p" is which numeric operand to use and "f" is the
48 * print format.
49 *
50 * [p]ositions:
51 * 0 -> operands[0] (dest)
52 * 1 -> operands[1] (src1)
53 * 2 -> operands[2] (src2)
54 *
55 * [f]ormats:
56 * h -> 4-digit hex
57 * d -> decimal
58 * D -> decimal+8 (used to convert 3-bit regnum field to high reg)
59 * E -> decimal*4
60 * F -> decimal*2
61 * c -> branch condition (beq, bne, etc.)
62 * t -> pc-relative target
63 * u -> 1st half of bl[x] target
64 * v -> 2nd half ob bl[x] target
65 * R -> register list
66 * s -> single precision floating point register
67 * S -> double precision floating point register
68 * m -> Thumb2 modified immediate
69 * M -> Thumb2 16-bit zero-extended immediate
70 *
71 * [!] escape. To insert "!", use "!!"
72 */
73 /* NOTE: must be kept in sync with enum ArmOpcode from ArmLIR.h */
74 ArmEncodingMap EncodingMap[ARM_LAST] = {
75 ENCODING_MAP(ARM_16BIT_DATA, 0x0000,
76 BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
77 IS_UNARY_OP,
78 "data", "0x!0h(!0d)", 1),
79 ENCODING_MAP(THUMB_ADC, 0x4140,
80 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
81 IS_BINARY_OP | CLOBBER_DEST,
82 "adc", "r!0d, r!1d", 1),
83 ENCODING_MAP(THUMB_ADD_RRI3, 0x1c00,
84 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
85 IS_TERTIARY_OP | CLOBBER_DEST,
86 "add", "r!0d, r!1d, #!2d", 1),
87 ENCODING_MAP(THUMB_ADD_RI8, 0x3000,
88 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
89 IS_BINARY_OP | CLOBBER_DEST,
90 "add", "r!0d, r!0d, #!1d", 1),
91 ENCODING_MAP(THUMB_ADD_RRR, 0x1800,
92 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
93 IS_TERTIARY_OP | CLOBBER_DEST,
94 "add", "r!0d, r!1d, r!2d", 1),
95 ENCODING_MAP(THUMB_ADD_RR_LH, 0x4440,
96 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
97 IS_BINARY_OP | CLOBBER_DEST,
98 "add",
99 "r!0d, r!1d", 1),
100 ENCODING_MAP(THUMB_ADD_RR_HL, 0x4480,
101 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
102 IS_BINARY_OP | CLOBBER_DEST,
103 "add", "r!0d, r!1d", 1),
104 ENCODING_MAP(THUMB_ADD_RR_HH, 0x44c0,
105 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
106 IS_BINARY_OP | CLOBBER_DEST,
107 "add", "r!0d, r!1d", 1),
108 ENCODING_MAP(THUMB_ADD_PC_REL, 0xa000,
109 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
110 IS_TERTIARY_OP | CLOBBER_DEST,
111 "add", "r!0d, pc, #!1E", 1),
112 ENCODING_MAP(THUMB_ADD_SP_REL, 0xa800,
113 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
114 IS_BINARY_OP | CLOBBER_DEST,
115 "add", "r!0d, sp, #!1E", 1),
116 ENCODING_MAP(THUMB_ADD_SPI7, 0xb000,
117 BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1,
118 IS_UNARY_OP | CLOBBER_DEST,
119 "add", "sp, #!0d*4", 1),
120 ENCODING_MAP(THUMB_AND_RR, 0x4000,
121 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
122 IS_BINARY_OP | CLOBBER_DEST,
123 "and", "r!0d, r!1d", 1),
124 ENCODING_MAP(THUMB_ASR, 0x1000,
125 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
126 IS_TERTIARY_OP | CLOBBER_DEST,
127 "asr", "r!0d, r!1d, #!2d", 1),
128 ENCODING_MAP(THUMB_ASRV, 0x4100,
129 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
130 IS_BINARY_OP | CLOBBER_DEST,
131 "asr", "r!0d, r!1d", 1),
132 ENCODING_MAP(THUMB_B_COND, 0xd000,
133 BITBLT, 7, 0, BITBLT, 11, 8, UNUSED, -1, -1,
134 IS_BINARY_OP | IS_BRANCH,
135 "!1c", "!0t", 1),
136 ENCODING_MAP(THUMB_B_UNCOND, 0xe000,
137 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
138 NO_OPERAND | IS_BRANCH,
139 "b", "!0t", 1),
140 ENCODING_MAP(THUMB_BIC, 0x4380,
141 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
142 IS_BINARY_OP | CLOBBER_DEST,
143 "bic", "r!0d, r!1d", 1),
144 ENCODING_MAP(THUMB_BKPT, 0xbe00,
145 BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
146 IS_UNARY_OP | IS_BRANCH,
147 "bkpt", "!0d", 1),
148 ENCODING_MAP(THUMB_BLX_1, 0xf000,
149 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
150 IS_BINARY_OP | IS_BRANCH,
151 "blx_1", "!0u", 1),
152 ENCODING_MAP(THUMB_BLX_2, 0xe800,
153 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
154 IS_BINARY_OP | IS_BRANCH,
155 "blx_2", "!0v", 1),
156 ENCODING_MAP(THUMB_BL_1, 0xf000,
157 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
158 IS_UNARY_OP | IS_BRANCH,
159 "bl_1", "!0u", 1),
160 ENCODING_MAP(THUMB_BL_2, 0xf800,
161 BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1,
162 IS_UNARY_OP | IS_BRANCH,
163 "bl_2", "!0v", 1),
164 ENCODING_MAP(THUMB_BLX_R, 0x4780,
165 BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1,
166 IS_UNARY_OP | IS_BRANCH,
167 "blx", "r!0d", 1),
168 ENCODING_MAP(THUMB_BX, 0x4700,
169 BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1,
170 IS_UNARY_OP | IS_BRANCH,
171 "bx", "r!0d", 1),
172 ENCODING_MAP(THUMB_CMN, 0x42c0,
173 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
174 IS_BINARY_OP,
175 "cmn", "r!0d, r!1d", 1),
176 ENCODING_MAP(THUMB_CMP_RI8, 0x2800,
177 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
178 IS_BINARY_OP,
179 "cmp", "r!0d, #!1d", 1),
180 ENCODING_MAP(THUMB_CMP_RR, 0x4280,
181 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
182 IS_BINARY_OP,
183 "cmp", "r!0d, r!1d", 1),
184 ENCODING_MAP(THUMB_CMP_LH, 0x4540,
185 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
186 IS_BINARY_OP,
187 "cmp", "r!0d, r!1D", 1),
188 ENCODING_MAP(THUMB_CMP_HL, 0x4580,
189 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
190 IS_BINARY_OP,
191 "cmp", "r!0D, r!1d", 1),
192 ENCODING_MAP(THUMB_CMP_HH, 0x45c0,
193 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
194 IS_BINARY_OP,
195 "cmp", "r!0D, r!1D", 1),
196 ENCODING_MAP(THUMB_EOR, 0x4040,
197 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
198 IS_BINARY_OP | CLOBBER_DEST,
199 "eor", "r!0d, r!1d", 1),
200 ENCODING_MAP(THUMB_LDMIA, 0xc800,
201 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
202 IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
203 "ldmia", "r!0d!!, <!1R>", 1),
204 ENCODING_MAP(THUMB_LDR_RRI5, 0x6800,
205 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
206 IS_TERTIARY_OP | CLOBBER_DEST,
207 "ldr", "r!0d, [r!1d, #!2E]", 1),
208 ENCODING_MAP(THUMB_LDR_RRR, 0x5800,
209 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
210 IS_TERTIARY_OP | CLOBBER_DEST,
211 "ldr", "r!0d, [r!1d, r!2d]", 1),
212 ENCODING_MAP(THUMB_LDR_PC_REL, 0x4800,
213 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
214 IS_TERTIARY_OP | CLOBBER_DEST,
215 "ldr", "r!0d, [pc, #!1E]", 1),
216 ENCODING_MAP(THUMB_LDR_SP_REL, 0x9800,
217 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
218 IS_BINARY_OP | CLOBBER_DEST,
219 "ldr", "r!0d, [sp, #!1E]", 1),
220 ENCODING_MAP(THUMB_LDRB_RRI5, 0x7800,
221 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
222 IS_TERTIARY_OP | CLOBBER_DEST,
223 "ldrb", "r!0d, [r!1d, #2d]", 1),
224 ENCODING_MAP(THUMB_LDRB_RRR, 0x5c00,
225 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
226 IS_TERTIARY_OP | CLOBBER_DEST,
227 "ldrb", "r!0d, [r!1d, r!2d]", 1),
228 ENCODING_MAP(THUMB_LDRH_RRI5, 0x8800,
229 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
230 IS_TERTIARY_OP | CLOBBER_DEST,
231 "ldrh", "r!0d, [r!1d, #!2F]", 1),
232 ENCODING_MAP(THUMB_LDRH_RRR, 0x5a00,
233 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
234 IS_TERTIARY_OP | CLOBBER_DEST,
235 "ldrh", "r!0d, [r!1d, r!2d]", 1),
236 ENCODING_MAP(THUMB_LDRSB_RRR, 0x5600,
237 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
238 IS_TERTIARY_OP | CLOBBER_DEST,
239 "ldrsb", "r!0d, [r!1d, r!2d]", 1),
240 ENCODING_MAP(THUMB_LDRSH_RRR, 0x5e00,
241 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
242 IS_TERTIARY_OP | CLOBBER_DEST,
243 "ldrsh", "r!0d, [r!1d, r!2d]", 1),
244 ENCODING_MAP(THUMB_LSL, 0x0000,
245 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
246 IS_TERTIARY_OP | CLOBBER_DEST,
247 "lsl", "r!0d, r!1d, #!2d", 1),
248 ENCODING_MAP(THUMB_LSLV, 0x4080,
249 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
250 IS_BINARY_OP | CLOBBER_DEST,
251 "lsl", "r!0d, r!1d", 1),
252 ENCODING_MAP(THUMB_LSR, 0x0800,
253 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
254 IS_TERTIARY_OP | CLOBBER_DEST,
255 "lsr", "r!0d, r!1d, #!2d", 1),
256 ENCODING_MAP(THUMB_LSRV, 0x40c0,
257 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
258 IS_BINARY_OP | CLOBBER_DEST,
259 "lsr", "r!0d, r!1d", 1),
260 ENCODING_MAP(THUMB_MOV_IMM, 0x2000,
261 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
262 IS_BINARY_OP | CLOBBER_DEST,
263 "mov", "r!0d, #!1d", 1),
264 ENCODING_MAP(THUMB_MOV_RR, 0x1c00,
265 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
266 IS_BINARY_OP | CLOBBER_DEST,
267 "mov", "r!0d, r!1d", 1),
268 ENCODING_MAP(THUMB_MOV_RR_H2H, 0x46c0,
269 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
270 IS_BINARY_OP | CLOBBER_DEST,
271 "mov", "r!0D, r!1D", 1),
272 ENCODING_MAP(THUMB_MOV_RR_H2L, 0x4640,
273 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
274 IS_BINARY_OP | CLOBBER_DEST,
275 "mov", "r!0d, r!1D", 1),
276 ENCODING_MAP(THUMB_MOV_RR_L2H, 0x4680,
277 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
278 IS_BINARY_OP | CLOBBER_DEST,
279 "mov", "r!0D, r!1d", 1),
280 ENCODING_MAP(THUMB_MUL, 0x4340,
281 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
282 IS_BINARY_OP | CLOBBER_DEST,
283 "mul", "r!0d, r!1d", 1),
284 ENCODING_MAP(THUMB_MVN, 0x43c0,
285 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
286 IS_BINARY_OP | CLOBBER_DEST,
287 "mvn", "r!0d, r!1d", 1),
288 ENCODING_MAP(THUMB_NEG, 0x4240,
289 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
290 IS_BINARY_OP | CLOBBER_DEST,
291 "neg", "r!0d, r!1d", 1),
292 ENCODING_MAP(THUMB_ORR, 0x4300,
293 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
294 IS_BINARY_OP | CLOBBER_DEST,
295 "orr", "r!0d, r!1d", 1),
296 ENCODING_MAP(THUMB_POP, 0xbc00,
297 BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1,
298 IS_UNARY_OP,
299 "pop", "<!0R>", 1),
300 ENCODING_MAP(THUMB_PUSH, 0xb400,
301 BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1,
302 IS_UNARY_OP,
303 "push", "<!0R>", 1),
304 ENCODING_MAP(THUMB_ROR, 0x41c0,
305 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
306 IS_BINARY_OP | CLOBBER_DEST,
307 "ror", "r!0d, r!1d", 1),
308 ENCODING_MAP(THUMB_SBC, 0x4180,
309 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
310 IS_BINARY_OP | CLOBBER_DEST,
311 "sbc", "r!0d, r!1d", 1),
312 ENCODING_MAP(THUMB_STMIA, 0xc000,
313 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
314 IS_BINARY_OP | CLOBBER_SRC1,
315 "stmia", "r!0d!!, <!1R>", 1),
316 ENCODING_MAP(THUMB_STR_RRI5, 0x6000,
317 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
318 IS_TERTIARY_OP,
319 "str", "r!0d, [r!1d, #!2E]", 1),
320 ENCODING_MAP(THUMB_STR_RRR, 0x5000,
321 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
322 IS_TERTIARY_OP,
323 "str", "r!0d, [r!1d, r!2d]", 1),
324 ENCODING_MAP(THUMB_STR_SP_REL, 0x9000,
325 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
326 IS_BINARY_OP,
327 "str", "r!0d, [sp, #!1E]", 1),
328 ENCODING_MAP(THUMB_STRB_RRI5, 0x7000,
329 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
330 IS_TERTIARY_OP,
331 "strb", "r!0d, [r!1d, #!2d]", 1),
332 ENCODING_MAP(THUMB_STRB_RRR, 0x5400,
333 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
334 IS_TERTIARY_OP,
335 "strb", "r!0d, [r!1d, r!2d]", 1),
336 ENCODING_MAP(THUMB_STRH_RRI5, 0x8000,
337 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6,
338 IS_TERTIARY_OP,
339 "strh", "r!0d, [r!1d, #!2F]", 1),
340 ENCODING_MAP(THUMB_STRH_RRR, 0x5200,
341 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
342 IS_TERTIARY_OP,
343 "strh", "r!0d, [r!1d, r!2d]", 1),
344 ENCODING_MAP(THUMB_SUB_RRI3, 0x1e00,
345 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
346 IS_TERTIARY_OP | CLOBBER_DEST,
347 "sub", "r!0d, r!1d, #!2d]", 1),
348 ENCODING_MAP(THUMB_SUB_RI8, 0x3800,
349 BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1,
350 IS_BINARY_OP | CLOBBER_DEST,
351 "sub", "r!0d, #!1d", 1),
352 ENCODING_MAP(THUMB_SUB_RRR, 0x1a00,
353 BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6,
354 IS_TERTIARY_OP | CLOBBER_DEST,
355 "sub", "r!0d, r!1d, r!2d", 1),
356 ENCODING_MAP(THUMB_SUB_SPI7, 0xb080,
357 BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1,
358 IS_UNARY_OP | CLOBBER_DEST,
359 "sub", "sp, #!0d", 1),
360 ENCODING_MAP(THUMB_SWI, 0xdf00,
361 BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1,
362 IS_UNARY_OP | IS_BRANCH,
363 "swi", "!0d", 1),
364 ENCODING_MAP(THUMB_TST, 0x4200,
365 BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1,
366 IS_UNARY_OP,
367 "tst", "r!0d, r!1d", 1),
368 ENCODING_MAP(THUMB2_VLDRS, 0xed900a00,
369 SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
370 IS_TERTIARY_OP | CLOBBER_DEST,
371 "vldr", "!0s, [r!1d, #!2E]", 2),
372 ENCODING_MAP(THUMB2_VLDRD, 0xed900b00,
373 DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
374 IS_TERTIARY_OP | CLOBBER_DEST,
375 "vldr", "!0S, [r!1d, #!2E]", 2),
376 ENCODING_MAP(THUMB2_VMULS, 0xee200a00,
377 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
378 IS_TERTIARY_OP | CLOBBER_DEST,
379 "vmuls", "!0s, !1s, !2s", 2),
380 ENCODING_MAP(THUMB2_VMULD, 0xee200b00,
381 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
382 IS_TERTIARY_OP | CLOBBER_DEST,
383 "vmuld", "!0S, !1S, !2S", 2),
384 ENCODING_MAP(THUMB2_VSTRS, 0xed800a00,
385 SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
386 IS_TERTIARY_OP,
387 "vstr", "!0s, [r!1d, #!2E]", 2),
388 ENCODING_MAP(THUMB2_VSTRD, 0xed800b00,
389 DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0,
390 IS_TERTIARY_OP,
391 "vstr", "!0S, [r!1d, #!2E]", 2),
392 ENCODING_MAP(THUMB2_VSUBS, 0xee300a40,
393 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
394 IS_TERTIARY_OP | CLOBBER_DEST,
395 "vsub", "!0s, !1s, !2s", 2),
396 ENCODING_MAP(THUMB2_VSUBD, 0xee300b40,
397 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
398 IS_TERTIARY_OP | CLOBBER_DEST,
399 "vsub", "!0S, !1S, !2S", 2),
400 ENCODING_MAP(THUMB2_VADDS, 0xee300a00,
401 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
402 IS_TERTIARY_OP | CLOBBER_DEST,
403 "vadd", "!0s, !1s, !2s", 2),
404 ENCODING_MAP(THUMB2_VADDD, 0xee300b00,
405 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
406 IS_TERTIARY_OP | CLOBBER_DEST,
407 "vadd", "!0S, !1S, !2S", 2),
408 ENCODING_MAP(THUMB2_VDIVS, 0xee800a00,
409 SFP, 22, 12, SFP, 7, 16, SFP, 5, 0,
410 IS_TERTIARY_OP | CLOBBER_DEST,
411 "vdivs", "!0s, !1s, !2s", 2),
412 ENCODING_MAP(THUMB2_VDIVD, 0xee800b00,
413 DFP, 22, 12, DFP, 7, 16, DFP, 5, 0,
414 IS_TERTIARY_OP | CLOBBER_DEST,
415 "vdivs", "!0S, !1S, !2S", 2),
416 ENCODING_MAP(THUMB2_VCVTIF, 0xeeb80ac0,
417 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
418 IS_BINARY_OP | CLOBBER_DEST,
419 "vcvt.f32", "!0s, !1s", 2),
420 ENCODING_MAP(THUMB2_VCVTID, 0xeeb80bc0,
421 DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
422 IS_BINARY_OP | CLOBBER_DEST,
423 "vcvt.f64", "!0S, !1s", 2),
424 ENCODING_MAP(THUMB2_VCVTFI, 0xeebd0ac0,
425 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
426 IS_BINARY_OP | CLOBBER_DEST,
427 "vcvt.s32.f32 ", "!0s, !1s", 2),
428 ENCODING_MAP(THUMB2_VCVTDI, 0xeebd0bc0,
429 SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
430 IS_BINARY_OP | CLOBBER_DEST,
431 "vcvt.s32.f64 ", "!0s, !1S", 2),
432 ENCODING_MAP(THUMB2_VCVTFD, 0xeeb70ac0,
433 DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
434 IS_BINARY_OP | CLOBBER_DEST,
435 "vcvt.f64.f32 ", "!0S, !1s", 2),
436 ENCODING_MAP(THUMB2_VCVTDF, 0xeeb70bc0,
437 SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
438 IS_BINARY_OP | CLOBBER_DEST,
439 "vcvt.f32.f64 ", "!0s, !1S", 2),
440 ENCODING_MAP(THUMB2_VSQRTS, 0xeeb10ac0,
441 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
442 IS_BINARY_OP | CLOBBER_DEST,
443 "vsqrt.f32 ", "!0s, !1s", 2),
444 ENCODING_MAP(THUMB2_VSQRTD, 0xeeb10bc0,
445 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
446 IS_BINARY_OP | CLOBBER_DEST,
447 "vsqrt.f64 ", "!0S, !1S", 2),
448 ENCODING_MAP(THUMB2_MOV_IMM_SHIFT, 0xf04f0000,
449 BITBLT, 11, 8, MODIMM, -1, -1, UNUSED, -1, -1,
450 IS_BINARY_OP | CLOBBER_DEST,
451 "mov", "r!0d, #!1m", 2),
452 ENCODING_MAP(THUMB2_MOV_IMM16, 0xf2400000,
453 BITBLT, 11, 8, IMM16, -1, -1, UNUSED, -1, -1,
454 IS_BINARY_OP | CLOBBER_DEST,
455 "mov", "r!0d, #!1M", 2),
456 ENCODING_MAP(THUMB2_STR_RRI12, 0xf8c00000,
457 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
458 IS_TERTIARY_OP,
459 "str", "r!0d,[r!1d, #!2d", 2),
460 ENCODING_MAP(THUMB2_LDR_RRI12, 0xf8d00000,
461 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 11, 0,
462 IS_TERTIARY_OP | CLOBBER_DEST,
463 "ldr", "r!0d,[r!1d, #!2d", 2),
464 ENCODING_MAP(THUMB2_STR_RRI8_PREDEC, 0xf8400c00,
465 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
466 IS_TERTIARY_OP,
467 "str", "r!0d,[r!1d, #-!2d]", 2),
468 ENCODING_MAP(THUMB2_LDR_RRI8_PREDEC, 0xf8500c00,
469 BITBLT, 15, 12, BITBLT, 19, 16, BITBLT, 8, 0,
470 IS_TERTIARY_OP | CLOBBER_DEST,
471 "ldr", "r!0d,[r!1d, #-!2d]", 2),
472 ENCODING_MAP(THUMB2_CBNZ, 0xb900,
473 BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
474 IS_BINARY_OP,
475 "cbnz", "r!0d,!1t", 1),
476 ENCODING_MAP(THUMB2_CBZ, 0xb100,
477 BITBLT, 2, 0, IMM6, -1, -1, UNUSED, -1, -1,
478 IS_BINARY_OP,
479 "cbz", "r!0d,!1t", 1),
480 ENCODING_MAP(THUMB2_ADD_RRI12, 0xf1000000,
481 BITBLT, 11, 8, BITBLT, 19, 16, IMM12, -1, -1,
482 IS_TERTIARY_OP | CLOBBER_DEST,
483 "add", "r!0d,r!1d,#!2d", 2),
484 ENCODING_MAP(THUMB2_MOV_RR, 0xea4f0000,
485 BITBLT, 11, 8, BITBLT, 3, 0, UNUSED, -1, -1,
486 IS_BINARY_OP | CLOBBER_DEST,
487 "mov", "r!0d, r!1d", 2),
488 ENCODING_MAP(THUMB2_VMOVS, 0xeeb00a40,
489 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1,
490 IS_BINARY_OP | CLOBBER_DEST,
491 "vmov.f32 ", "!0s, !1s", 2),
492 ENCODING_MAP(THUMB2_VMOVD, 0xeeb00b40,
493 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1,
494 IS_BINARY_OP | CLOBBER_DEST,
495 "vmov.f64 ", "!0s, !1s", 2),
496 };
497
498 #define PADDING_MOV_R0_R0 0x1C00
499
500 /* Write the numbers in the literal pool to the codegen stream */
installDataContent(CompilationUnit * cUnit)501 static void installDataContent(CompilationUnit *cUnit)
502 {
503 int *dataPtr = (int *) ((char *) cUnit->baseAddr + cUnit->dataOffset);
504 ArmLIR *dataLIR = (ArmLIR *) cUnit->wordList;
505 while (dataLIR) {
506 *dataPtr++ = dataLIR->operands[0];
507 dataLIR = NEXT_LIR(dataLIR);
508 }
509 }
510
511 /* Returns the size of a Jit trace description */
jitTraceDescriptionSize(const JitTraceDescription * desc)512 static int jitTraceDescriptionSize(const JitTraceDescription *desc)
513 {
514 int runCount;
515 for (runCount = 0; ; runCount++) {
516 if (desc->trace[runCount].frag.runEnd)
517 break;
518 }
519 return sizeof(JitCodeDesc) + ((runCount+1) * sizeof(JitTraceRun));
520 }
521
522 /* Return TRUE if error happens */
assembleInstructions(CompilationUnit * cUnit,intptr_t startAddr)523 static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr)
524 {
525 short *bufferAddr = (short *) cUnit->codeBuffer;
526 ArmLIR *lir;
527
528 for (lir = (ArmLIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
529 if (lir->opCode < 0) {
530 if ((lir->opCode == ARM_PSEUDO_ALIGN4) &&
531 /* 1 means padding is needed */
532 (lir->operands[0] == 1)) {
533 *bufferAddr++ = PADDING_MOV_R0_R0;
534 }
535 continue;
536 }
537
538 if (lir->isNop) {
539 continue;
540 }
541
542 if (lir->opCode == THUMB_LDR_PC_REL ||
543 lir->opCode == THUMB_ADD_PC_REL) {
544 ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
545 intptr_t pc = (lir->generic.offset + 4) & ~3;
546 /*
547 * Allow an offset (stored in operands[2] to be added to the
548 * PC-relative target. Useful to get to a fixed field inside a
549 * chaining cell.
550 */
551 intptr_t target = lirTarget->generic.offset + lir->operands[2];
552 int delta = target - pc;
553 if (delta & 0x3) {
554 LOGE("PC-rel distance is not multiples of 4: %d\n", delta);
555 dvmAbort();
556 }
557 if (delta > 1023) {
558 return true;
559 }
560 lir->operands[1] = delta >> 2;
561 } else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
562 ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
563 intptr_t pc = lir->generic.offset + 4;
564 intptr_t target = targetLIR->generic.offset;
565 int delta = target - pc;
566 if (delta > 126 || delta < 0) {
567 return true;
568 }
569 lir->operands[1] = delta >> 1;
570 } else if (lir->opCode == THUMB_B_COND) {
571 ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
572 intptr_t pc = lir->generic.offset + 4;
573 intptr_t target = targetLIR->generic.offset;
574 int delta = target - pc;
575 if (delta > 254 || delta < -256) {
576 return true;
577 }
578 lir->operands[0] = delta >> 1;
579 } else if (lir->opCode == THUMB_B_UNCOND) {
580 ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
581 intptr_t pc = lir->generic.offset + 4;
582 intptr_t target = targetLIR->generic.offset;
583 int delta = target - pc;
584 if (delta > 2046 || delta < -2048) {
585 LOGE("Unconditional branch distance out of range: %d\n", delta);
586 dvmAbort();
587 }
588 lir->operands[0] = delta >> 1;
589 } else if (lir->opCode == THUMB_BLX_1) {
590 assert(NEXT_LIR(lir)->opCode == THUMB_BLX_2);
591 /* curPC is Thumb */
592 intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3;
593 intptr_t target = lir->operands[1];
594
595 /* Match bit[1] in target with base */
596 if (curPC & 0x2) {
597 target |= 0x2;
598 }
599 int delta = target - curPC;
600 assert((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
601
602 lir->operands[0] = (delta >> 12) & 0x7ff;
603 NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
604 }
605
606 ArmEncodingMap *encoder = &EncodingMap[lir->opCode];
607 u4 bits = encoder->skeleton;
608 int i;
609 for (i = 0; i < 3; i++) {
610 u4 value;
611 switch(encoder->fieldLoc[i].kind) {
612 case UNUSED:
613 break;
614 case IMM6:
615 value = ((lir->operands[i] & 0x20) >> 5) << 9;
616 value |= (lir->operands[i] & 0x1f) << 3;
617 bits |= value;
618 break;
619 case BITBLT:
620 value = (lir->operands[i] << encoder->fieldLoc[i].start) &
621 ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
622 bits |= value;
623 break;
624 case DFP:
625 /* Snag the 1-bit slice and position it */
626 value = ((lir->operands[i] & 0x10) >> 4) <<
627 encoder->fieldLoc[i].end;
628 /* Extract and position the 4-bit slice */
629 value |= (lir->operands[i] & 0x0f) <<
630 encoder->fieldLoc[i].start;
631 bits |= value;
632 break;
633 case SFP:
634 /* Snag the 1-bit slice and position it */
635 value = (lir->operands[i] & 0x1) <<
636 encoder->fieldLoc[i].end;
637 /* Extract and position the 4-bit slice */
638 value |= ((lir->operands[i] & 0x1e) >> 1) <<
639 encoder->fieldLoc[i].start;
640 bits |= value;
641 break;
642 case IMM12:
643 case MODIMM:
644 value = ((lir->operands[i] & 0x800) >> 11) << 26;
645 value |= ((lir->operands[i] & 0x700) >> 8) << 12;
646 value |= lir->operands[i] & 0x0ff;
647 bits |= value;
648 break;
649 case IMM16:
650 value = ((lir->operands[i] & 0x0800) >> 11) << 26;
651 value |= ((lir->operands[i] & 0xf000) >> 12) << 16;
652 value |= ((lir->operands[i] & 0x0700) >> 8) << 12;
653 value |= lir->operands[i] & 0x0ff;
654 bits |= value;
655 break;
656 default:
657 assert(0);
658 }
659 }
660 if (encoder->size == 2) {
661 *bufferAddr++ = (bits >> 16) & 0xffff;
662 }
663 *bufferAddr++ = bits & 0xffff;
664 }
665 return false;
666 }
667
668 /*
669 * Translation layout in the code cache. Note that the codeAddress pointer
670 * in JitTable will point directly to the code body (field codeAddress). The
671 * chain cell offset codeAddress - 2, and (if present) executionCount is at
672 * codeAddress - 6.
673 *
674 * +----------------------------+
675 * | Execution count | -> [Optional] 4 bytes
676 * +----------------------------+
677 * +--| Offset to chain cell counts| -> 2 bytes
678 * | +----------------------------+
679 * | | Code body | -> Start address for translation
680 * | | | variable in 2-byte chunks
681 * | . . (JitTable's codeAddress points here)
682 * | . .
683 * | | |
684 * | +----------------------------+
685 * | | Chaining Cells | -> 8 bytes each, must be 4 byte aligned
686 * | . .
687 * | . .
688 * | | |
689 * | +----------------------------+
690 * +->| Chaining cell counts | -> 4 bytes, chain cell counts by type
691 * +----------------------------+
692 * | Trace description | -> variable sized
693 * . .
694 * | |
695 * +----------------------------+
696 * | Literal pool | -> 4-byte aligned, variable size
697 * . .
698 * . .
699 * | |
700 * +----------------------------+
701 *
702 * Go over each instruction in the list and calculate the offset from the top
703 * before sending them off to the assembler. If out-of-range branch distance is
704 * seen rearrange the instructions a bit to correct it.
705 */
dvmCompilerAssembleLIR(CompilationUnit * cUnit,JitTranslationInfo * info)706 void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info)
707 {
708 LIR *lir;
709 ArmLIR *armLIR;
710 int offset = 0;
711 int i;
712 ChainCellCounts chainCellCounts;
713 int descSize = jitTraceDescriptionSize(cUnit->traceDesc);
714
715 info->codeAddress = NULL;
716 info->instructionSet = cUnit->instructionSet;
717
718 /* Beginning offset needs to allow space for chain cell offset */
719 for (armLIR = (ArmLIR *) cUnit->firstLIRInsn;
720 armLIR;
721 armLIR = NEXT_LIR(armLIR)) {
722 armLIR->generic.offset = offset;
723 if (armLIR->opCode >= 0 && !armLIR->isNop) {
724 armLIR->size = EncodingMap[armLIR->opCode].size * 2;
725 offset += armLIR->size;
726 } else if (armLIR->opCode == ARM_PSEUDO_ALIGN4) {
727 if (offset & 0x2) {
728 offset += 2;
729 armLIR->operands[0] = 1;
730 } else {
731 armLIR->operands[0] = 0;
732 }
733 }
734 /* Pseudo opcodes don't consume space */
735 }
736
737 /* Const values have to be word aligned */
738 offset = (offset + 3) & ~3;
739
740 /* Add space for chain cell counts & trace description */
741 u4 chainCellOffset = offset;
742 ArmLIR *chainCellOffsetLIR = (ArmLIR *) cUnit->chainCellOffsetLIR;
743 assert(chainCellOffsetLIR);
744 assert(chainCellOffset < 0x10000);
745 assert(chainCellOffsetLIR->opCode == ARM_16BIT_DATA &&
746 chainCellOffsetLIR->operands[0] == CHAIN_CELL_OFFSET_TAG);
747
748 /*
749 * Replace the CHAIN_CELL_OFFSET_TAG with the real value. If trace
750 * profiling is enabled, subtract 4 (occupied by the counter word) from
751 * the absolute offset as the value stored in chainCellOffsetLIR is the
752 * delta from &chainCellOffsetLIR to &ChainCellCounts.
753 */
754 chainCellOffsetLIR->operands[0] =
755 gDvmJit.profile ? (chainCellOffset - 4) : chainCellOffset;
756
757 offset += sizeof(chainCellCounts) + descSize;
758
759 assert((offset & 0x3) == 0); /* Should still be word aligned */
760
761 /* Set up offsets for literals */
762 cUnit->dataOffset = offset;
763
764 for (lir = cUnit->wordList; lir; lir = lir->next) {
765 lir->offset = offset;
766 offset += 4;
767 }
768
769 cUnit->totalSize = offset;
770
771 if (gDvmJit.codeCacheByteUsed + cUnit->totalSize > CODE_CACHE_SIZE) {
772 gDvmJit.codeCacheFull = true;
773 cUnit->baseAddr = NULL;
774 return;
775 }
776
777 /* Allocate enough space for the code block */
778 cUnit->codeBuffer = dvmCompilerNew(chainCellOffset, true);
779 if (cUnit->codeBuffer == NULL) {
780 LOGE("Code buffer allocation failure\n");
781 cUnit->baseAddr = NULL;
782 return;
783 }
784
785 bool assemblerFailure = assembleInstructions(
786 cUnit, (intptr_t) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed);
787
788 /*
789 * Currently the only reason that can cause the assembler to fail is due to
790 * trace length - cut it in half and retry.
791 */
792 if (assemblerFailure) {
793 cUnit->halveInstCount = true;
794 return;
795 }
796
797
798 cUnit->baseAddr = (char *) gDvmJit.codeCache + gDvmJit.codeCacheByteUsed;
799 gDvmJit.codeCacheByteUsed += offset;
800
801 /* Install the code block */
802 memcpy((char*)cUnit->baseAddr, cUnit->codeBuffer, chainCellOffset);
803 gDvmJit.numCompilations++;
804
805 /* Install the chaining cell counts */
806 for (i=0; i< CHAINING_CELL_LAST; i++) {
807 chainCellCounts.u.count[i] = cUnit->numChainingCells[i];
808 }
809 memcpy((char*)cUnit->baseAddr + chainCellOffset, &chainCellCounts,
810 sizeof(chainCellCounts));
811
812 /* Install the trace description */
813 memcpy((char*)cUnit->baseAddr + chainCellOffset + sizeof(chainCellCounts),
814 cUnit->traceDesc, descSize);
815
816 /* Write the literals directly into the code cache */
817 installDataContent(cUnit);
818
819 /* Flush dcache and invalidate the icache to maintain coherence */
820 cacheflush((long)cUnit->baseAddr,
821 (long)((char *) cUnit->baseAddr + offset), 0);
822
823 /* Record code entry point and instruction set */
824 info->codeAddress = (char*)cUnit->baseAddr + cUnit->headerSize;
825 info->instructionSet = cUnit->instructionSet;
826 /* If applicable, mark low bit to denote thumb */
827 if (info->instructionSet != DALVIK_JIT_ARM)
828 info->codeAddress = (char*)info->codeAddress + 1;
829 }
830
assembleBXPair(int branchOffset)831 static u4 assembleBXPair(int branchOffset)
832 {
833 u4 thumb1, thumb2;
834
835 if ((branchOffset < -2048) | (branchOffset > 2046)) {
836 thumb1 = (0xf000 | ((branchOffset>>12) & 0x7ff));
837 thumb2 = (0xf800 | ((branchOffset>> 1) & 0x7ff));
838 } else {
839 thumb1 = (0xe000 | ((branchOffset>> 1) & 0x7ff));
840 thumb2 = 0x4300; /* nop -> or r0, r0 */
841 }
842
843 return thumb2<<16 | thumb1;
844 }
845
846 /*
847 * Perform translation chain operation.
848 * For ARM, we'll use a pair of thumb instructions to generate
849 * an unconditional chaining branch of up to 4MB in distance.
850 * Use a BL, though we don't really need the link. The format is
851 * 111HHooooooooooo
852 * Where HH is 10 for the 1st inst, and 11 for the second and
853 * the "o" field is each instruction's 11-bit contribution to the
854 * 22-bit branch offset.
855 * If the target is nearby, use a single-instruction bl.
856 * If one or more threads is suspended, don't chain.
857 */
dvmJitChain(void * tgtAddr,u4 * branchAddr)858 void* dvmJitChain(void* tgtAddr, u4* branchAddr)
859 {
860 int baseAddr = (u4) branchAddr + 4;
861 int branchOffset = (int) tgtAddr - baseAddr;
862 u4 newInst;
863
864 if (gDvm.sumThreadSuspendCount == 0) {
865 assert((branchOffset >= -(1<<22)) && (branchOffset <= ((1<<22)-2)));
866
867 gDvmJit.translationChains++;
868
869 COMPILER_TRACE_CHAINING(
870 LOGD("Jit Runtime: chaining 0x%x to 0x%x\n",
871 (int) branchAddr, (int) tgtAddr & -2));
872
873 newInst = assembleBXPair(branchOffset);
874
875 *branchAddr = newInst;
876 cacheflush((long)branchAddr, (long)branchAddr + 4, 0);
877 }
878
879 return tgtAddr;
880 }
881
882 /*
883 * This method is called from the invoke templates for virtual and interface
884 * methods to speculatively setup a chain to the callee. The templates are
885 * written in assembly and have setup method, cell, and clazz at r0, r2, and
886 * r3 respectively, so there is a unused argument in the list. Upon return one
887 * of the following three results may happen:
888 * 1) Chain is not setup because the callee is native. Reset the rechain
889 * count to a big number so that it will take a long time before the next
890 * rechain attempt to happen.
891 * 2) Chain is not setup because the callee has not been created yet. Reset
892 * the rechain count to a small number and retry in the near future.
893 * 3) Ask all other threads to stop before patching this chaining cell.
894 * This is required because another thread may have passed the class check
895 * but hasn't reached the chaining cell yet to follow the chain. If we
896 * patch the content before halting the other thread, there could be a
897 * small window for race conditions to happen that it may follow the new
898 * but wrong chain to invoke a different method.
899 */
dvmJitToPatchPredictedChain(const Method * method,void * unused,PredictedChainingCell * cell,const ClassObject * clazz)900 const Method *dvmJitToPatchPredictedChain(const Method *method,
901 void *unused,
902 PredictedChainingCell *cell,
903 const ClassObject *clazz)
904 {
905 /* Don't come back here for a long time if the method is native */
906 if (dvmIsNativeMethod(method)) {
907 cell->counter = PREDICTED_CHAIN_COUNTER_AVOID;
908 cacheflush((long) cell, (long) (cell+1), 0);
909 COMPILER_TRACE_CHAINING(
910 LOGD("Jit Runtime: predicted chain %p to native method %s ignored",
911 cell, method->name));
912 goto done;
913 }
914 int tgtAddr = (int) dvmJitGetCodeAddr(method->insns);
915
916 /*
917 * Compilation not made yet for the callee. Reset the counter to a small
918 * value and come back to check soon.
919 */
920 if (tgtAddr == 0) {
921 /*
922 * Wait for a few invocations (currently set to be 16) before trying
923 * to setup the chain again.
924 */
925 cell->counter = PREDICTED_CHAIN_COUNTER_DELAY;
926 cacheflush((long) cell, (long) (cell+1), 0);
927 COMPILER_TRACE_CHAINING(
928 LOGD("Jit Runtime: predicted chain %p to method %s delayed",
929 cell, method->name));
930 goto done;
931 }
932
933 /* Stop the world */
934 dvmSuspendAllThreads(SUSPEND_FOR_JIT);
935
936 int baseAddr = (int) cell + 4; // PC is cur_addr + 4
937 int branchOffset = tgtAddr - baseAddr;
938
939 COMPILER_TRACE_CHAINING(
940 LOGD("Jit Runtime: predicted chain %p from %s to %s (%s) patched",
941 cell, cell->clazz ? cell->clazz->descriptor : "NULL",
942 clazz->descriptor,
943 method->name));
944
945 cell->branch = assembleBXPair(branchOffset);
946 cell->clazz = clazz;
947 cell->method = method;
948 cell->counter = PREDICTED_CHAIN_COUNTER_RECHAIN;
949
950 cacheflush((long) cell, (long) (cell+1), 0);
951
952 /* All done - resume all other threads */
953 dvmResumeAllThreads(SUSPEND_FOR_JIT);
954
955 done:
956 return method;
957 }
958
959 /*
960 * Unchain a trace given the starting address of the translation
961 * in the code cache. Refer to the diagram in dvmCompilerAssembleLIR.
962 * Returns the address following the last cell unchained. Note that
963 * the incoming codeAddr is a thumb code address, and therefore has
964 * the low bit set.
965 */
dvmJitUnchain(void * codeAddr)966 u4* dvmJitUnchain(void* codeAddr)
967 {
968 u2* pChainCellOffset = (u2*)((char*)codeAddr - 3);
969 u2 chainCellOffset = *pChainCellOffset;
970 ChainCellCounts *pChainCellCounts =
971 (ChainCellCounts*)((char*)codeAddr + chainCellOffset - 3);
972 int cellSize;
973 u4* pChainCells;
974 u4* pStart;
975 u4 thumb1;
976 u4 thumb2;
977 u4 newInst;
978 int i,j;
979 PredictedChainingCell *predChainCell;
980
981 /* Get total count of chain cells */
982 for (i = 0, cellSize = 0; i < CHAINING_CELL_LAST; i++) {
983 if (i != CHAINING_CELL_INVOKE_PREDICTED) {
984 cellSize += pChainCellCounts->u.count[i] * 2;
985 } else {
986 cellSize += pChainCellCounts->u.count[i] * 4;
987 }
988 }
989
990 /* Locate the beginning of the chain cell region */
991 pStart = pChainCells = ((u4 *) pChainCellCounts) - cellSize;
992
993 /* The cells are sorted in order - walk through them and reset */
994 for (i = 0; i < CHAINING_CELL_LAST; i++) {
995 int elemSize = 2; /* Most chaining cell has two words */
996 if (i == CHAINING_CELL_INVOKE_PREDICTED) {
997 elemSize = 4;
998 }
999
1000 for (j = 0; j < pChainCellCounts->u.count[i]; j++) {
1001 int targetOffset;
1002 switch(i) {
1003 case CHAINING_CELL_NORMAL:
1004 targetOffset = offsetof(InterpState,
1005 jitToInterpEntries.dvmJitToInterpNormal);
1006 break;
1007 case CHAINING_CELL_HOT:
1008 case CHAINING_CELL_INVOKE_SINGLETON:
1009 targetOffset = offsetof(InterpState,
1010 jitToInterpEntries.dvmJitToTraceSelect);
1011 break;
1012 case CHAINING_CELL_INVOKE_PREDICTED:
1013 targetOffset = 0;
1014 predChainCell = (PredictedChainingCell *) pChainCells;
1015 /* Reset the cell to the init state */
1016 predChainCell->branch = PREDICTED_CHAIN_BX_PAIR_INIT;
1017 predChainCell->clazz = PREDICTED_CHAIN_CLAZZ_INIT;
1018 predChainCell->method = PREDICTED_CHAIN_METHOD_INIT;
1019 predChainCell->counter = PREDICTED_CHAIN_COUNTER_INIT;
1020 break;
1021 default:
1022 dvmAbort();
1023 }
1024 COMPILER_TRACE_CHAINING(
1025 LOGD("Jit Runtime: unchaining 0x%x", (int)pChainCells));
1026 /*
1027 * Thumb code sequence for a chaining cell is:
1028 * ldr r0, rGLUE, #<word offset>
1029 * blx r0
1030 */
1031 if (i != CHAINING_CELL_INVOKE_PREDICTED) {
1032 targetOffset = targetOffset >> 2; /* convert to word offset */
1033 thumb1 = 0x6800 | (targetOffset << 6) |
1034 (rGLUE << 3) | (r0 << 0);
1035 thumb2 = 0x4780 | (r0 << 3);
1036 newInst = thumb2<<16 | thumb1;
1037 *pChainCells = newInst;
1038 }
1039 pChainCells += elemSize; /* Advance by a fixed number of words */
1040 }
1041 }
1042 return pChainCells;
1043 }
1044
1045 /* Unchain all translation in the cache. */
dvmJitUnchainAll()1046 void dvmJitUnchainAll()
1047 {
1048 u4* lowAddress = NULL;
1049 u4* highAddress = NULL;
1050 unsigned int i;
1051 if (gDvmJit.pJitEntryTable != NULL) {
1052 COMPILER_TRACE_CHAINING(LOGD("Jit Runtime: unchaining all"));
1053 dvmLockMutex(&gDvmJit.tableLock);
1054 for (i = 0; i < gDvmJit.jitTableSize; i++) {
1055 if (gDvmJit.pJitEntryTable[i].dPC &&
1056 gDvmJit.pJitEntryTable[i].codeAddress) {
1057 u4* lastAddress;
1058 lastAddress =
1059 dvmJitUnchain(gDvmJit.pJitEntryTable[i].codeAddress);
1060 if (lowAddress == NULL ||
1061 (u4*)gDvmJit.pJitEntryTable[i].codeAddress < lowAddress)
1062 lowAddress = lastAddress;
1063 if (lastAddress > highAddress)
1064 highAddress = lastAddress;
1065 }
1066 }
1067 cacheflush((long)lowAddress, (long)highAddress, 0);
1068 dvmUnlockMutex(&gDvmJit.tableLock);
1069 }
1070 }
1071
1072 typedef struct jitProfileAddrToLine {
1073 u4 lineNum;
1074 u4 bytecodeOffset;
1075 } jitProfileAddrToLine;
1076
1077
1078 /* Callback function to track the bytecode offset/line number relationiship */
addrToLineCb(void * cnxt,u4 bytecodeOffset,u4 lineNum)1079 static int addrToLineCb (void *cnxt, u4 bytecodeOffset, u4 lineNum)
1080 {
1081 jitProfileAddrToLine *addrToLine = (jitProfileAddrToLine *) cnxt;
1082
1083 /* Best match so far for this offset */
1084 if (addrToLine->bytecodeOffset >= bytecodeOffset) {
1085 addrToLine->lineNum = lineNum;
1086 }
1087 return 0;
1088 }
1089
getTraceBase(const JitEntry * p)1090 char *getTraceBase(const JitEntry *p)
1091 {
1092 return (char*)p->codeAddress -
1093 (6 + (p->u.info.instructionSet == DALVIK_JIT_ARM ? 0 : 1));
1094 }
1095
1096 /* Dumps profile info for a single trace */
dumpTraceProfile(JitEntry * p)1097 static int dumpTraceProfile(JitEntry *p)
1098 {
1099 ChainCellCounts* pCellCounts;
1100 char* traceBase;
1101 u4* pExecutionCount;
1102 u2* pCellOffset;
1103 JitTraceDescription *desc;
1104 const Method* method;
1105
1106 traceBase = getTraceBase(p);
1107
1108 if (p->codeAddress == NULL) {
1109 LOGD("TRACEPROFILE 0x%08x 0 NULL 0 0", (int)traceBase);
1110 return 0;
1111 }
1112
1113 pExecutionCount = (u4*) (traceBase);
1114 pCellOffset = (u2*) (traceBase + 4);
1115 pCellCounts = (ChainCellCounts*) ((char *)pCellOffset + *pCellOffset);
1116 desc = (JitTraceDescription*) ((char*)pCellCounts + sizeof(*pCellCounts));
1117 method = desc->method;
1118 char *methodDesc = dexProtoCopyMethodDescriptor(&method->prototype);
1119 jitProfileAddrToLine addrToLine = {0, desc->trace[0].frag.startOffset};
1120
1121 /*
1122 * We may end up decoding the debug information for the same method
1123 * multiple times, but the tradeoff is we don't need to allocate extra
1124 * space to store the addr/line mapping. Since this is a debugging feature
1125 * and done infrequently so the slower but simpler mechanism should work
1126 * just fine.
1127 */
1128 dexDecodeDebugInfo(method->clazz->pDvmDex->pDexFile,
1129 dvmGetMethodCode(method),
1130 method->clazz->descriptor,
1131 method->prototype.protoIdx,
1132 method->accessFlags,
1133 addrToLineCb, NULL, &addrToLine);
1134
1135 LOGD("TRACEPROFILE 0x%08x % 10d [%#x(+%d), %d] %s%s;%s",
1136 (int)traceBase,
1137 *pExecutionCount,
1138 desc->trace[0].frag.startOffset,
1139 desc->trace[0].frag.numInsts,
1140 addrToLine.lineNum,
1141 method->clazz->descriptor, method->name, methodDesc);
1142 free(methodDesc);
1143
1144 return *pExecutionCount;
1145 }
1146
1147 /* Handy function to retrieve the profile count */
getProfileCount(const JitEntry * entry)1148 static inline int getProfileCount(const JitEntry *entry)
1149 {
1150 if (entry->dPC == 0 || entry->codeAddress == 0)
1151 return 0;
1152 u4 *pExecutionCount = (u4 *) getTraceBase(entry);
1153
1154 return *pExecutionCount;
1155 }
1156
1157
1158 /* qsort callback function */
sortTraceProfileCount(const void * entry1,const void * entry2)1159 static int sortTraceProfileCount(const void *entry1, const void *entry2)
1160 {
1161 const JitEntry *jitEntry1 = entry1;
1162 const JitEntry *jitEntry2 = entry2;
1163
1164 int count1 = getProfileCount(jitEntry1);
1165 int count2 = getProfileCount(jitEntry2);
1166 return (count1 == count2) ? 0 : ((count1 > count2) ? -1 : 1);
1167 }
1168
1169 /* Sort the trace profile counts and dump them */
dvmCompilerSortAndPrintTraceProfiles()1170 void dvmCompilerSortAndPrintTraceProfiles()
1171 {
1172 JitEntry *sortedEntries;
1173 int numTraces = 0;
1174 unsigned long counts = 0;
1175 unsigned int i;
1176
1177 /* Make sure that the table is not changing */
1178 dvmLockMutex(&gDvmJit.tableLock);
1179
1180 /* Sort the entries by descending order */
1181 sortedEntries = malloc(sizeof(JitEntry) * gDvmJit.jitTableSize);
1182 if (sortedEntries == NULL)
1183 goto done;
1184 memcpy(sortedEntries, gDvmJit.pJitEntryTable,
1185 sizeof(JitEntry) * gDvmJit.jitTableSize);
1186 qsort(sortedEntries, gDvmJit.jitTableSize, sizeof(JitEntry),
1187 sortTraceProfileCount);
1188
1189 /* Dump the sorted entries */
1190 for (i=0; i < gDvmJit.jitTableSize; i++) {
1191 if (sortedEntries[i].dPC != 0) {
1192 counts += dumpTraceProfile(&sortedEntries[i]);
1193 numTraces++;
1194 }
1195 }
1196 if (numTraces == 0)
1197 numTraces = 1;
1198 LOGD("JIT: Average execution count -> %d",(int)(counts / numTraces));
1199
1200 free(sortedEntries);
1201 done:
1202 dvmUnlockMutex(&gDvmJit.tableLock);
1203 return;
1204 }
1205