1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * Dalvik bytecode verifier.
19 */
20 #ifndef DALVIK_CODEVERIFY_H_
21 #define DALVIK_CODEVERIFY_H_
22
23 #include "analysis/VerifySubs.h"
24 #include "analysis/VfyBasicBlock.h"
25
26 /*
27 * Enumeration for register type values. The "hi" piece of a 64-bit value
28 * MUST immediately follow the "lo" piece in the enumeration, so we can check
29 * that hi==lo+1.
30 *
31 * Assignment of constants:
32 * [-MAXINT,-32768) : integer
33 * [-32768,-128) : short
34 * [-128,0) : byte
35 * 0 : zero
36 * 1 : one
37 * [2,128) : posbyte
38 * [128,32768) : posshort
39 * [32768,65536) : char
40 * [65536,MAXINT] : integer
41 *
42 * Allowed "implicit" widening conversions:
43 * zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
44 * one -> boolean, posbyte, byte, posshort, short, char, integer
45 * boolean -> posbyte, byte, posshort, short, char, integer
46 * posbyte -> posshort, short, integer, char
47 * byte -> short, integer
48 * posshort -> integer, char
49 * short -> integer
50 * char -> integer
51 *
52 * In addition, all of the above can convert to "float".
53 *
54 * We're more careful with integer values than the spec requires. The
55 * motivation is to restrict byte/char/short to the correct range of values.
56 * For example, if a method takes a byte argument, we don't want to allow
57 * the code to load the constant "1024" and pass it in.
58 */
59 enum {
60 kRegTypeUnknown = 0, /* initial state; use value=0 so calloc works */
61 kRegTypeUninit = 1, /* MUST be odd to distinguish from pointer */
62 kRegTypeConflict, /* merge clash makes this reg's type unknowable */
63
64 /*
65 * Category-1nr types. The order of these is chiseled into a couple
66 * of tables, so don't add, remove, or reorder if you can avoid it.
67 */
68 #define kRegType1nrSTART kRegTypeZero
69 kRegTypeZero, /* 32-bit 0, could be Boolean, Int, Float, or Ref */
70 kRegTypeOne, /* 32-bit 1, could be Boolean, Int, Float */
71 kRegTypeBoolean, /* must be 0 or 1 */
72 kRegTypeConstPosByte, /* const derived byte, known positive */
73 kRegTypeConstByte, /* const derived byte */
74 kRegTypeConstPosShort, /* const derived short, known positive */
75 kRegTypeConstShort, /* const derived short */
76 kRegTypeConstChar, /* const derived char */
77 kRegTypeConstInteger, /* const derived integer */
78 kRegTypePosByte, /* byte, known positive (can become char) */
79 kRegTypeByte,
80 kRegTypePosShort, /* short, known positive (can become char) */
81 kRegTypeShort,
82 kRegTypeChar,
83 kRegTypeInteger,
84 kRegTypeFloat,
85 #define kRegType1nrEND kRegTypeFloat
86
87 kRegTypeConstLo, /* const derived wide, lower half */
88 kRegTypeConstHi, /* const derived wide, upper half */
89 kRegTypeLongLo, /* lower-numbered register; endian-independent */
90 kRegTypeLongHi,
91 kRegTypeDoubleLo,
92 kRegTypeDoubleHi,
93
94 /*
95 * Enumeration max; this is used with "full" (32-bit) RegType values.
96 *
97 * Anything larger than this is a ClassObject or uninit ref. Mask off
98 * all but the low 8 bits; if you're left with kRegTypeUninit, pull
99 * the uninit index out of the high 24. Because kRegTypeUninit has an
100 * odd value, there is no risk of a particular ClassObject pointer bit
101 * pattern being confused for it (assuming our class object allocator
102 * uses word alignment).
103 */
104 kRegTypeMAX
105 };
106 #define kRegTypeUninitMask 0xff
107 #define kRegTypeUninitShift 8
108
109 /*
110 * RegType holds information about the type of data held in a register.
111 * For most types it's a simple enum. For reference types it holds a
112 * pointer to the ClassObject, and for uninitialized references it holds
113 * an index into the UninitInstanceMap.
114 */
115 typedef u4 RegType;
116
117 /*
118 * A bit vector indicating which entries in the monitor stack are
119 * associated with this register. The low bit corresponds to the stack's
120 * bottom-most entry.
121 */
122 typedef u4 MonitorEntries;
123 #define kMaxMonitorStackDepth (sizeof(MonitorEntries) * 8)
124
125 /*
126 * During verification, we associate one of these with every "interesting"
127 * instruction. We track the status of all registers, and (if the method
128 * has any monitor-enter instructions) maintain a stack of entered monitors
129 * (identified by code unit offset).
130 *
131 * If live-precise register maps are enabled, the "liveRegs" vector will
132 * be populated. Unlike the other lists of registers here, we do not
133 * track the liveness of the method result register (which is not visible
134 * to the GC).
135 */
136 struct RegisterLine {
137 RegType* regTypes;
138 MonitorEntries* monitorEntries;
139 u4* monitorStack;
140 unsigned int monitorStackTop;
141 BitVector* liveRegs;
142 };
143
144 /*
145 * Table that maps uninitialized instances to classes, based on the
146 * address of the new-instance instruction. One per method.
147 */
148 struct UninitInstanceMap {
149 int numEntries;
150 struct {
151 int addr; /* code offset, or -1 for method arg ("this") */
152 ClassObject* clazz; /* class created at this address */
153 } map[1];
154 };
155 #define kUninitThisArgAddr (-1)
156 #define kUninitThisArgSlot 0
157
158 /*
159 * Various bits of data used by the verifier and register map generator.
160 */
161 struct VerifierData {
162 /*
163 * The method we're working on.
164 */
165 const Method* method;
166
167 /*
168 * Number of code units of instructions in the method. A cache of the
169 * value calculated by dvmGetMethodInsnsSize().
170 */
171 u4 insnsSize;
172
173 /*
174 * Number of registers we track for each instruction. This is equal
175 * to the method's declared "registersSize". (Does not include the
176 * pending return value.)
177 */
178 u4 insnRegCount;
179
180 /*
181 * Instruction widths and flags, one entry per code unit.
182 */
183 InsnFlags* insnFlags;
184
185 /*
186 * Uninitialized instance map, used for tracking the movement of
187 * objects that have been allocated but not initialized.
188 */
189 UninitInstanceMap* uninitMap;
190
191 /*
192 * Array of RegisterLine structs, one entry per code unit. We only need
193 * entries for code units that hold the start of an "interesting"
194 * instruction. For register map generation, we're only interested
195 * in GC points.
196 */
197 RegisterLine* registerLines;
198
199 /*
200 * The number of occurrences of specific opcodes.
201 */
202 size_t newInstanceCount;
203 size_t monitorEnterCount;
204
205 /*
206 * Array of pointers to basic blocks, one entry per code unit. Used
207 * for liveness analysis.
208 */
209 VfyBasicBlock** basicBlocks;
210 };
211
212
213 /* table with static merge logic for primitive types */
214 extern const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX];
215
216
217 /*
218 * Returns "true" if the flags indicate that this address holds the start
219 * of an instruction.
220 */
dvmInsnIsOpcode(const InsnFlags * insnFlags,int addr)221 INLINE bool dvmInsnIsOpcode(const InsnFlags* insnFlags, int addr) {
222 return (insnFlags[addr] & kInsnFlagWidthMask) != 0;
223 }
224
225 /*
226 * Extract the unsigned 16-bit instruction width from "flags".
227 */
dvmInsnGetWidth(const InsnFlags * insnFlags,int addr)228 INLINE int dvmInsnGetWidth(const InsnFlags* insnFlags, int addr) {
229 return insnFlags[addr] & kInsnFlagWidthMask;
230 }
231
232 /*
233 * Changed?
234 */
dvmInsnIsChanged(const InsnFlags * insnFlags,int addr)235 INLINE bool dvmInsnIsChanged(const InsnFlags* insnFlags, int addr) {
236 return (insnFlags[addr] & kInsnFlagChanged) != 0;
237 }
dvmInsnSetChanged(InsnFlags * insnFlags,int addr,bool changed)238 INLINE void dvmInsnSetChanged(InsnFlags* insnFlags, int addr, bool changed)
239 {
240 if (changed)
241 insnFlags[addr] |= kInsnFlagChanged;
242 else
243 insnFlags[addr] &= ~kInsnFlagChanged;
244 }
245
246 /*
247 * Visited?
248 */
dvmInsnIsVisited(const InsnFlags * insnFlags,int addr)249 INLINE bool dvmInsnIsVisited(const InsnFlags* insnFlags, int addr) {
250 return (insnFlags[addr] & kInsnFlagVisited) != 0;
251 }
dvmInsnSetVisited(InsnFlags * insnFlags,int addr,bool changed)252 INLINE void dvmInsnSetVisited(InsnFlags* insnFlags, int addr, bool changed)
253 {
254 if (changed)
255 insnFlags[addr] |= kInsnFlagVisited;
256 else
257 insnFlags[addr] &= ~kInsnFlagVisited;
258 }
259
260 /*
261 * Visited or changed?
262 */
dvmInsnIsVisitedOrChanged(const InsnFlags * insnFlags,int addr)263 INLINE bool dvmInsnIsVisitedOrChanged(const InsnFlags* insnFlags, int addr) {
264 return (insnFlags[addr] & (kInsnFlagVisited|kInsnFlagChanged)) != 0;
265 }
266
267 /*
268 * In a "try" block?
269 */
dvmInsnIsInTry(const InsnFlags * insnFlags,int addr)270 INLINE bool dvmInsnIsInTry(const InsnFlags* insnFlags, int addr) {
271 return (insnFlags[addr] & kInsnFlagInTry) != 0;
272 }
dvmInsnSetInTry(InsnFlags * insnFlags,int addr,bool inTry)273 INLINE void dvmInsnSetInTry(InsnFlags* insnFlags, int addr, bool inTry)
274 {
275 assert(inTry);
276 //if (inTry)
277 insnFlags[addr] |= kInsnFlagInTry;
278 //else
279 // insnFlags[addr] &= ~kInsnFlagInTry;
280 }
281
282 /*
283 * Instruction is a branch target or exception handler?
284 */
dvmInsnIsBranchTarget(const InsnFlags * insnFlags,int addr)285 INLINE bool dvmInsnIsBranchTarget(const InsnFlags* insnFlags, int addr) {
286 return (insnFlags[addr] & kInsnFlagBranchTarget) != 0;
287 }
dvmInsnSetBranchTarget(InsnFlags * insnFlags,int addr,bool isBranch)288 INLINE void dvmInsnSetBranchTarget(InsnFlags* insnFlags, int addr,
289 bool isBranch)
290 {
291 assert(isBranch);
292 //if (isBranch)
293 insnFlags[addr] |= kInsnFlagBranchTarget;
294 //else
295 // insnFlags[addr] &= ~kInsnFlagBranchTarget;
296 }
297
298 /*
299 * Instruction is a GC point?
300 */
dvmInsnIsGcPoint(const InsnFlags * insnFlags,int addr)301 INLINE bool dvmInsnIsGcPoint(const InsnFlags* insnFlags, int addr) {
302 return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
303 }
dvmInsnSetGcPoint(InsnFlags * insnFlags,int addr,bool isGcPoint)304 INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
305 bool isGcPoint)
306 {
307 assert(isGcPoint);
308 //if (isGcPoint)
309 insnFlags[addr] |= kInsnFlagGcPoint;
310 //else
311 // insnFlags[addr] &= ~kInsnFlagGcPoint;
312 }
313
314
315 /*
316 * Create a new UninitInstanceMap.
317 */
318 UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
319 const InsnFlags* insnFlags, int newInstanceCount);
320
321 /*
322 * Release the storage associated with an UninitInstanceMap.
323 */
324 void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap);
325
326 /*
327 * Verify bytecode in "meth". "insnFlags" should be populated with
328 * instruction widths and "in try" flags.
329 */
330 bool dvmVerifyCodeFlow(VerifierData* vdata);
331
332 #endif // DALVIK_CODEVERIFY_H_
333