1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/wasm/jump-table-assembler.h"
6
7 #include "src/codegen/assembler-inl.h"
8 #include "src/codegen/macro-assembler-inl.h"
9
10 namespace v8 {
11 namespace internal {
12 namespace wasm {
13
14 // The implementation is compact enough to implement it inline here. If it gets
15 // much bigger, we might want to split it in a separate file per architecture.
16 #if V8_TARGET_ARCH_X64
EmitLazyCompileJumpSlot(uint32_t func_index,Address lazy_compile_target)17 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
18 Address lazy_compile_target) {
19 // Use a push, because mov to an extended register takes 6 bytes.
20 pushq_imm32(func_index); // 5 bytes
21 EmitJumpSlot(lazy_compile_target); // 5 bytes
22 }
23
EmitJumpSlot(Address target)24 bool JumpTableAssembler::EmitJumpSlot(Address target) {
25 intptr_t displacement = static_cast<intptr_t>(
26 reinterpret_cast<byte*>(target) - pc_ - kNearJmpInstrSize);
27 if (!is_int32(displacement)) return false;
28 near_jmp(displacement, RelocInfo::NO_INFO); // 5 bytes
29 return true;
30 }
31
EmitFarJumpSlot(Address target)32 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
33 Label data;
34 int start_offset = pc_offset();
35 jmp(Operand(&data)); // 6 bytes
36 Nop(2); // 2 bytes
37 // The data must be properly aligned, so it can be patched atomically (see
38 // {PatchFarJumpSlot}).
39 DCHECK_EQ(start_offset + kSystemPointerSize, pc_offset());
40 USE(start_offset);
41 bind(&data);
42 dq(target); // 8 bytes
43 }
44
45 // static
PatchFarJumpSlot(Address slot,Address target)46 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
47 // The slot needs to be pointer-size aligned so we can atomically update it.
48 DCHECK(IsAligned(slot, kSystemPointerSize));
49 // Offset of the target is at 8 bytes, see {EmitFarJumpSlot}.
50 reinterpret_cast<std::atomic<Address>*>(slot + kSystemPointerSize)
51 ->store(target, std::memory_order_relaxed);
52 // The update is atomic because the address is properly aligned.
53 // Because of cache coherence, the data update will eventually be seen by all
54 // cores. It's ok if they temporarily jump to the old target.
55 }
56
NopBytes(int bytes)57 void JumpTableAssembler::NopBytes(int bytes) {
58 DCHECK_LE(0, bytes);
59 Nop(bytes);
60 }
61
62 #elif V8_TARGET_ARCH_IA32
63 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
64 Address lazy_compile_target) {
65 mov(kWasmCompileLazyFuncIndexRegister, func_index); // 5 bytes
66 jmp(lazy_compile_target, RelocInfo::NO_INFO); // 5 bytes
67 }
68
69 bool JumpTableAssembler::EmitJumpSlot(Address target) {
70 jmp(target, RelocInfo::NO_INFO);
71 return true;
72 }
73
74 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
75 jmp(target, RelocInfo::NO_INFO);
76 }
77
78 // static
79 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
80 UNREACHABLE();
81 }
82
83 void JumpTableAssembler::NopBytes(int bytes) {
84 DCHECK_LE(0, bytes);
85 Nop(bytes);
86 }
87
88 #elif V8_TARGET_ARCH_ARM
89 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
90 Address lazy_compile_target) {
91 // Load function index to a register.
92 // This generates [movw, movt] on ARMv7 and later, [ldr, constant pool marker,
93 // constant] on ARMv6.
94 Move32BitImmediate(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
95 // EmitJumpSlot emits either [b], [movw, movt, mov] (ARMv7+), or [ldr,
96 // constant].
97 // In total, this is <=5 instructions on all architectures.
98 // TODO(arm): Optimize this for code size; lazy compile is not performance
99 // critical, as it's only executed once per function.
100 EmitJumpSlot(lazy_compile_target);
101 }
102
103 bool JumpTableAssembler::EmitJumpSlot(Address target) {
104 // Note that {Move32BitImmediate} emits [ldr, constant] for the relocation
105 // mode used below, we need this to allow concurrent patching of this slot.
106 Move32BitImmediate(pc, Operand(target, RelocInfo::WASM_CALL));
107 CheckConstPool(true, false); // force emit of const pool
108 return true;
109 }
110
111 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
112 // Load from [pc + kInstrSize] to pc. Note that {pc} points two instructions
113 // after the currently executing one.
114 ldr_pcrel(pc, -kInstrSize); // 1 instruction
115 dd(target); // 4 bytes (== 1 instruction)
116 STATIC_ASSERT(kInstrSize == kInt32Size);
117 STATIC_ASSERT(kFarJumpTableSlotSize == 2 * kInstrSize);
118 }
119
120 // static
121 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
122 UNREACHABLE();
123 }
124
125 void JumpTableAssembler::NopBytes(int bytes) {
126 DCHECK_LE(0, bytes);
127 DCHECK_EQ(0, bytes % kInstrSize);
128 for (; bytes > 0; bytes -= kInstrSize) {
129 nop();
130 }
131 }
132
133 #elif V8_TARGET_ARCH_ARM64
134 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
135 Address lazy_compile_target) {
136 int start = pc_offset();
137 CodeEntry(); // 0-1 instr
138 Mov(kWasmCompileLazyFuncIndexRegister.W(), func_index); // 1-2 instr
139 Jump(lazy_compile_target, RelocInfo::NO_INFO); // 1 instr
140 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
141 DCHECK(nop_bytes == 0 || nop_bytes == kInstrSize);
142 if (nop_bytes) nop();
143 }
144
145 bool JumpTableAssembler::EmitJumpSlot(Address target) {
146 if (!TurboAssembler::IsNearCallOffset(
147 (reinterpret_cast<byte*>(target) - pc_) / kInstrSize)) {
148 return false;
149 }
150
151 CodeEntry();
152
153 Jump(target, RelocInfo::NO_INFO);
154 return true;
155 }
156
157 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
158 // This code uses hard-coded registers and instructions (and avoids
159 // {UseScratchRegisterScope} or {InstructionAccurateScope}) because this code
160 // will only be called for the very specific runtime slot table, and we want
161 // to have maximum control over the generated code.
162 // Do not reuse this code without validating that the same assumptions hold.
163 CodeEntry(); // 0-1 instructions
164 constexpr Register kTmpReg = x16;
165 DCHECK(TmpList()->IncludesAliasOf(kTmpReg));
166 int kOffset = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 3 : 2;
167 // Load from [pc + kOffset * kInstrSize] to {kTmpReg}, then branch there.
168 ldr_pcrel(kTmpReg, kOffset); // 1 instruction
169 br(kTmpReg); // 1 instruction
170 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
171 nop(); // To keep the target below aligned to kSystemPointerSize.
172 #endif
173 dq(target); // 8 bytes (== 2 instructions)
174 STATIC_ASSERT(2 * kInstrSize == kSystemPointerSize);
175 const int kSlotCount = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 6 : 4;
176 STATIC_ASSERT(kFarJumpTableSlotSize == kSlotCount * kInstrSize);
177 }
178
179 // static
180 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
181 // See {EmitFarJumpSlot} for the offset of the target (16 bytes with
182 // CFI enabled, 8 bytes otherwise).
183 int kTargetOffset =
184 ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 4 * kInstrSize : 2 * kInstrSize;
185 // The slot needs to be pointer-size aligned so we can atomically update it.
186 DCHECK(IsAligned(slot + kTargetOffset, kSystemPointerSize));
187 reinterpret_cast<std::atomic<Address>*>(slot + kTargetOffset)
188 ->store(target, std::memory_order_relaxed);
189 // The data update is guaranteed to be atomic since it's a properly aligned
190 // and stores a single machine word. This update will eventually be observed
191 // by any concurrent [ldr] on the same address because of the data cache
192 // coherence. It's ok if other cores temporarily jump to the old target.
193 }
194
195 void JumpTableAssembler::NopBytes(int bytes) {
196 DCHECK_LE(0, bytes);
197 DCHECK_EQ(0, bytes % kInstrSize);
198 for (; bytes > 0; bytes -= kInstrSize) {
199 nop();
200 }
201 }
202
203 #elif V8_TARGET_ARCH_S390X
204 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
205 Address lazy_compile_target) {
206 // Load function index to r7. 6 bytes
207 lgfi(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
208 // Jump to {lazy_compile_target}. 6 bytes or 12 bytes
209 mov(r1, Operand(lazy_compile_target, RelocInfo::CODE_TARGET));
210 b(r1); // 2 bytes
211 }
212
213 bool JumpTableAssembler::EmitJumpSlot(Address target) {
214 intptr_t relative_target = reinterpret_cast<byte*>(target) - pc_;
215
216 if (!is_int32(relative_target / 2)) {
217 return false;
218 }
219
220 brcl(al, Operand(relative_target / 2));
221 nop(0); // make the slot align to 8 bytes
222 return true;
223 }
224
225 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
226 Label target_addr;
227 lgrl(ip, &target_addr); // 6 bytes
228 b(ip); // 8 bytes
229
230 CHECK_EQ(reinterpret_cast<Address>(pc_) & 0x7, 0); // Alignment
231 bind(&target_addr);
232 dp(target);
233 }
234
235 // static
236 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
237 Address target_addr = slot + 8;
238 reinterpret_cast<std::atomic<Address>*>(target_addr)
239 ->store(target, std::memory_order_relaxed);
240 }
241
242 void JumpTableAssembler::NopBytes(int bytes) {
243 DCHECK_LE(0, bytes);
244 DCHECK_EQ(0, bytes % 2);
245 for (; bytes > 0; bytes -= 2) {
246 nop(0);
247 }
248 }
249
250 #elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
251 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
252 Address lazy_compile_target) {
253 int start = pc_offset();
254 li(kWasmCompileLazyFuncIndexRegister, func_index); // max. 2 instr
255 // Jump produces max. 4 instructions for 32-bit platform
256 // and max. 6 instructions for 64-bit platform.
257 Jump(lazy_compile_target, RelocInfo::NO_INFO);
258 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
259 DCHECK_EQ(nop_bytes % kInstrSize, 0);
260 for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
261 }
262
263 bool JumpTableAssembler::EmitJumpSlot(Address target) {
264 PatchAndJump(target);
265 return true;
266 }
267
268 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
269 JumpToOffHeapInstructionStream(target);
270 }
271
272 // static
273 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
274 UNREACHABLE();
275 }
276
277 void JumpTableAssembler::NopBytes(int bytes) {
278 DCHECK_LE(0, bytes);
279 DCHECK_EQ(0, bytes % kInstrSize);
280 for (; bytes > 0; bytes -= kInstrSize) {
281 nop();
282 }
283 }
284
285 #elif V8_TARGET_ARCH_LOONG64
286 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
287 Address lazy_compile_target) {
288 DCHECK(is_int32(func_index));
289 int start = pc_offset();
290 li(kWasmCompileLazyFuncIndexRegister, (int32_t)func_index); // max. 2 instr
291 // Jump produces max 4 instructions.
292 Jump(lazy_compile_target, RelocInfo::NO_INFO);
293 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
294 DCHECK_EQ(nop_bytes % kInstrSize, 0);
295 for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
296 }
297 bool JumpTableAssembler::EmitJumpSlot(Address target) {
298 PatchAndJump(target);
299 return true;
300 }
301 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
302 JumpToOffHeapInstructionStream(target);
303 }
304 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
305 UNREACHABLE();
306 }
307 void JumpTableAssembler::NopBytes(int bytes) {
308 DCHECK_LE(0, bytes);
309 DCHECK_EQ(0, bytes % kInstrSize);
310 for (; bytes > 0; bytes -= kInstrSize) {
311 nop();
312 }
313 }
314
315 #elif V8_TARGET_ARCH_PPC64
316 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
317 Address lazy_compile_target) {
318 int start = pc_offset();
319 // Load function index to register. max 5 instrs
320 mov(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
321 // Jump to {lazy_compile_target}. max 5 instrs
322 mov(r0, Operand(lazy_compile_target));
323 mtctr(r0);
324 bctr();
325 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
326 DCHECK_EQ(nop_bytes % kInstrSize, 0);
327 for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
328 }
329
330 bool JumpTableAssembler::EmitJumpSlot(Address target) {
331 intptr_t relative_target = reinterpret_cast<byte*>(target) - pc_;
332
333 if (!is_int26(relative_target)) {
334 return false;
335 }
336
337 b(relative_target, LeaveLK);
338 return true;
339 }
340
341 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
342 byte* start = pc_;
343 mov(ip, Operand(reinterpret_cast<Address>(start + kFarJumpTableSlotSize -
344 8))); // 5 instr
345 LoadU64(ip, MemOperand(ip));
346 mtctr(ip);
347 bctr();
348 byte* end = pc_;
349 int used = end - start;
350 CHECK(used < kFarJumpTableSlotSize - 8);
351 NopBytes(kFarJumpTableSlotSize - 8 - used);
352 CHECK_EQ(reinterpret_cast<Address>(pc_) & 0x7, 0); // Alignment
353 dp(target);
354 }
355
356 // static
357 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
358 Address target_addr = slot + kFarJumpTableSlotSize - 8;
359 reinterpret_cast<std::atomic<Address>*>(target_addr)
360 ->store(target, std::memory_order_relaxed);
361 }
362
363 void JumpTableAssembler::NopBytes(int bytes) {
364 DCHECK_LE(0, bytes);
365 DCHECK_EQ(0, bytes % 4);
366 for (; bytes > 0; bytes -= 4) {
367 nop(0);
368 }
369 }
370
371 #elif V8_TARGET_ARCH_RISCV64
372 void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
373 Address lazy_compile_target) {
374 int start = pc_offset();
375 li(kWasmCompileLazyFuncIndexRegister, func_index); // max. 2 instr
376 // Jump produces max. 8 instructions (include constant pool and j)
377 Jump(lazy_compile_target, RelocInfo::NO_INFO);
378 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
379 DCHECK_EQ(nop_bytes % kInstrSize, 0);
380 for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
381 }
382
383 bool JumpTableAssembler::EmitJumpSlot(Address target) {
384 PatchAndJump(target);
385 return true;
386 }
387
388 void JumpTableAssembler::EmitFarJumpSlot(Address target) {
389 UseScratchRegisterScope temp(this);
390 Register rd = temp.Acquire();
391 auipc(rd, 0);
392 ld(rd, rd, 4 * kInstrSize);
393 Jump(rd);
394 nop();
395 dq(target);
396 }
397
398 // static
399 void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
400 UNREACHABLE();
401 }
402
403 void JumpTableAssembler::NopBytes(int bytes) {
404 DCHECK_LE(0, bytes);
405 DCHECK_EQ(0, bytes % kInstrSize);
406 for (; bytes > 0; bytes -= kInstrSize) {
407 nop();
408 }
409 }
410
411 #else
412 #error Unknown architecture.
413 #endif
414
415 } // namespace wasm
416 } // namespace internal
417 } // namespace v8
418