1 // Copyright 2018 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if !V8_ENABLE_WEBASSEMBLY 6 #error This header should only be included if WebAssembly is enabled. 7 #endif // !V8_ENABLE_WEBASSEMBLY 8 9 #ifndef V8_WASM_JUMP_TABLE_ASSEMBLER_H_ 10 #define V8_WASM_JUMP_TABLE_ASSEMBLER_H_ 11 12 #include "src/codegen/macro-assembler.h" 13 14 namespace v8 { 15 namespace internal { 16 namespace wasm { 17 18 // The jump table is the central dispatch point for all (direct and indirect) 19 // invocations in WebAssembly. It holds one slot per function in a module, with 20 // each slot containing a dispatch to the currently published {WasmCode} that 21 // corresponds to the function. 22 // 23 // Additionally to this main jump table, there exist special jump tables for 24 // other purposes: 25 // - the far stub table contains one entry per wasm runtime stub (see 26 // {WasmCode::RuntimeStubId}, which jumps to the corresponding embedded 27 // builtin, plus (if not the full address space can be reached via the jump 28 // table) one entry per wasm function. 29 // - the lazy compile table contains one entry per wasm function which jumps to 30 // the common {WasmCompileLazy} builtin and passes the function index that was 31 // invoked. 32 // 33 // The main jump table is split into lines of fixed size, with lines laid out 34 // consecutively within the executable memory of the {NativeModule}. The slots 35 // in turn are consecutive within a line, but do not cross line boundaries. 36 // 37 // +- L1 -------------------+ +- L2 -------------------+ +- L3 ... 38 // | S1 | S2 | ... | Sn | x | | S1 | S2 | ... | Sn | x | | S1 ... 39 // +------------------------+ +------------------------+ +---- ... 40 // 41 // The above illustrates jump table lines {Li} containing slots {Si} with each 42 // line containing {n} slots and some padding {x} for alignment purposes. 43 // Other jump tables are just consecutive. 44 // 45 // The main jump table will be patched concurrently while other threads execute 46 // it. The code at the new target might also have been emitted concurrently, so 47 // we need to ensure that there is proper synchronization between code emission, 48 // jump table patching and code execution. 49 // On Intel platforms, this all works out of the box because there is cache 50 // coherency between i-cache and d-cache. 51 // On ARM, it is safe because the i-cache flush after code emission executes an 52 // "ic ivau" (Instruction Cache line Invalidate by Virtual Address to Point of 53 // Unification), which broadcasts to all cores. A core which sees the jump table 54 // update thus also sees the new code. Since the other core does not explicitly 55 // execute an "isb" (Instruction Synchronization Barrier), it might still 56 // execute the old code afterwards, which is no problem, since that code remains 57 // available until it is garbage collected. Garbage collection itself is a 58 // synchronization barrier though. 59 class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler { 60 public: 61 // Translate an offset into the continuous jump table to a jump table index. SlotOffsetToIndex(uint32_t slot_offset)62 static uint32_t SlotOffsetToIndex(uint32_t slot_offset) { 63 uint32_t line_index = slot_offset / kJumpTableLineSize; 64 uint32_t line_offset = slot_offset % kJumpTableLineSize; 65 DCHECK_EQ(0, line_offset % kJumpTableSlotSize); 66 return line_index * kJumpTableSlotsPerLine + 67 line_offset / kJumpTableSlotSize; 68 } 69 70 // Translate a jump table index to an offset into the continuous jump table. JumpSlotIndexToOffset(uint32_t slot_index)71 static uint32_t JumpSlotIndexToOffset(uint32_t slot_index) { 72 uint32_t line_index = slot_index / kJumpTableSlotsPerLine; 73 uint32_t line_offset = 74 (slot_index % kJumpTableSlotsPerLine) * kJumpTableSlotSize; 75 return line_index * kJumpTableLineSize + line_offset; 76 } 77 78 // Determine the size of a jump table containing the given number of slots. SizeForNumberOfSlots(uint32_t slot_count)79 static constexpr uint32_t SizeForNumberOfSlots(uint32_t slot_count) { 80 return ((slot_count + kJumpTableSlotsPerLine - 1) / 81 kJumpTableSlotsPerLine) * 82 kJumpTableLineSize; 83 } 84 85 // Translate a far jump table index to an offset into the table. FarJumpSlotIndexToOffset(uint32_t slot_index)86 static uint32_t FarJumpSlotIndexToOffset(uint32_t slot_index) { 87 return slot_index * kFarJumpTableSlotSize; 88 } 89 90 // Translate a far jump table offset to the index into the table. FarJumpSlotOffsetToIndex(uint32_t offset)91 static uint32_t FarJumpSlotOffsetToIndex(uint32_t offset) { 92 DCHECK_EQ(0, offset % kFarJumpTableSlotSize); 93 return offset / kFarJumpTableSlotSize; 94 } 95 96 // Determine the size of a far jump table containing the given number of 97 // slots. SizeForNumberOfFarJumpSlots(int num_runtime_slots,int num_function_slots)98 static constexpr uint32_t SizeForNumberOfFarJumpSlots( 99 int num_runtime_slots, int num_function_slots) { 100 int num_entries = num_runtime_slots + num_function_slots; 101 return num_entries * kFarJumpTableSlotSize; 102 } 103 104 // Translate a slot index to an offset into the lazy compile table. LazyCompileSlotIndexToOffset(uint32_t slot_index)105 static uint32_t LazyCompileSlotIndexToOffset(uint32_t slot_index) { 106 return slot_index * kLazyCompileTableSlotSize; 107 } 108 109 // Determine the size of a lazy compile table. SizeForNumberOfLazyFunctions(uint32_t slot_count)110 static constexpr uint32_t SizeForNumberOfLazyFunctions(uint32_t slot_count) { 111 return slot_count * kLazyCompileTableSlotSize; 112 } 113 GenerateLazyCompileTable(Address base,uint32_t num_slots,uint32_t num_imported_functions,Address wasm_compile_lazy_target)114 static void GenerateLazyCompileTable(Address base, uint32_t num_slots, 115 uint32_t num_imported_functions, 116 Address wasm_compile_lazy_target) { 117 uint32_t lazy_compile_table_size = num_slots * kLazyCompileTableSlotSize; 118 // Assume enough space, so the Assembler does not try to grow the buffer. 119 JumpTableAssembler jtasm(base, lazy_compile_table_size + 256); 120 for (uint32_t slot_index = 0; slot_index < num_slots; ++slot_index) { 121 DCHECK_EQ(slot_index * kLazyCompileTableSlotSize, jtasm.pc_offset()); 122 jtasm.EmitLazyCompileJumpSlot(slot_index + num_imported_functions, 123 wasm_compile_lazy_target); 124 } 125 DCHECK_EQ(lazy_compile_table_size, jtasm.pc_offset()); 126 FlushInstructionCache(base, lazy_compile_table_size); 127 } 128 GenerateFarJumpTable(Address base,Address * stub_targets,int num_runtime_slots,int num_function_slots)129 static void GenerateFarJumpTable(Address base, Address* stub_targets, 130 int num_runtime_slots, 131 int num_function_slots) { 132 uint32_t table_size = 133 SizeForNumberOfFarJumpSlots(num_runtime_slots, num_function_slots); 134 // Assume enough space, so the Assembler does not try to grow the buffer. 135 JumpTableAssembler jtasm(base, table_size + 256); 136 int offset = 0; 137 for (int index = 0; index < num_runtime_slots + num_function_slots; 138 ++index) { 139 DCHECK_EQ(offset, FarJumpSlotIndexToOffset(index)); 140 // Functions slots initially jump to themselves. They are patched before 141 // being used. 142 Address target = 143 index < num_runtime_slots ? stub_targets[index] : base + offset; 144 jtasm.EmitFarJumpSlot(target); 145 offset += kFarJumpTableSlotSize; 146 DCHECK_EQ(offset, jtasm.pc_offset()); 147 } 148 FlushInstructionCache(base, table_size); 149 } 150 PatchJumpTableSlot(Address jump_table_slot,Address far_jump_table_slot,Address target)151 static void PatchJumpTableSlot(Address jump_table_slot, 152 Address far_jump_table_slot, Address target) { 153 // First, try to patch the jump table slot. 154 JumpTableAssembler jtasm(jump_table_slot); 155 if (!jtasm.EmitJumpSlot(target)) { 156 // If that fails, we need to patch the far jump table slot, and then 157 // update the jump table slot to jump to this far jump table slot. 158 DCHECK_NE(kNullAddress, far_jump_table_slot); 159 JumpTableAssembler::PatchFarJumpSlot(far_jump_table_slot, target); 160 CHECK(jtasm.EmitJumpSlot(far_jump_table_slot)); 161 } 162 jtasm.NopBytes(kJumpTableSlotSize - jtasm.pc_offset()); 163 FlushInstructionCache(jump_table_slot, kJumpTableSlotSize); 164 } 165 166 private: 167 // Instantiate a {JumpTableAssembler} for patching. 168 explicit JumpTableAssembler(Address slot_addr, int size = 256) MacroAssembler(nullptr,JumpTableAssemblerOptions (),CodeObjectRequired::kNo,ExternalAssemblerBuffer (reinterpret_cast<uint8_t * > (slot_addr),size))169 : MacroAssembler(nullptr, JumpTableAssemblerOptions(), 170 CodeObjectRequired::kNo, 171 ExternalAssemblerBuffer( 172 reinterpret_cast<uint8_t*>(slot_addr), size)) {} 173 174 // To allow concurrent patching of the jump table entries, we need to ensure 175 // that the instruction containing the call target does not cross cache-line 176 // boundaries. The jump table line size has been chosen to satisfy this. 177 #if V8_TARGET_ARCH_X64 178 static constexpr int kJumpTableLineSize = 64; 179 static constexpr int kJumpTableSlotSize = 5; 180 static constexpr int kFarJumpTableSlotSize = 16; 181 static constexpr int kLazyCompileTableSlotSize = 10; 182 #elif V8_TARGET_ARCH_IA32 183 static constexpr int kJumpTableLineSize = 64; 184 static constexpr int kJumpTableSlotSize = 5; 185 static constexpr int kFarJumpTableSlotSize = 5; 186 static constexpr int kLazyCompileTableSlotSize = 10; 187 #elif V8_TARGET_ARCH_ARM 188 static constexpr int kJumpTableLineSize = 3 * kInstrSize; 189 static constexpr int kJumpTableSlotSize = 3 * kInstrSize; 190 static constexpr int kFarJumpTableSlotSize = 2 * kInstrSize; 191 static constexpr int kLazyCompileTableSlotSize = 5 * kInstrSize; 192 #elif V8_TARGET_ARCH_ARM64 && V8_ENABLE_CONTROL_FLOW_INTEGRITY 193 static constexpr int kJumpTableLineSize = 2 * kInstrSize; 194 static constexpr int kJumpTableSlotSize = 2 * kInstrSize; 195 static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize; 196 static constexpr int kLazyCompileTableSlotSize = 4 * kInstrSize; 197 #elif V8_TARGET_ARCH_ARM64 && !V8_ENABLE_CONTROL_FLOW_INTEGRITY 198 static constexpr int kJumpTableLineSize = 1 * kInstrSize; 199 static constexpr int kJumpTableSlotSize = 1 * kInstrSize; 200 static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize; 201 static constexpr int kLazyCompileTableSlotSize = 3 * kInstrSize; 202 #elif V8_TARGET_ARCH_S390X 203 static constexpr int kJumpTableLineSize = 128; 204 static constexpr int kJumpTableSlotSize = 8; 205 static constexpr int kFarJumpTableSlotSize = 16; 206 static constexpr int kLazyCompileTableSlotSize = 20; 207 #elif V8_TARGET_ARCH_PPC64 208 static constexpr int kJumpTableLineSize = 64; 209 static constexpr int kJumpTableSlotSize = 1 * kInstrSize; 210 static constexpr int kFarJumpTableSlotSize = 12 * kInstrSize; 211 static constexpr int kLazyCompileTableSlotSize = 12 * kInstrSize; 212 #elif V8_TARGET_ARCH_MIPS 213 static constexpr int kJumpTableLineSize = 8 * kInstrSize; 214 static constexpr int kJumpTableSlotSize = 8 * kInstrSize; 215 static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize; 216 static constexpr int kLazyCompileTableSlotSize = 6 * kInstrSize; 217 #elif V8_TARGET_ARCH_MIPS64 218 static constexpr int kJumpTableLineSize = 8 * kInstrSize; 219 static constexpr int kJumpTableSlotSize = 8 * kInstrSize; 220 static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize; 221 static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize; 222 #elif V8_TARGET_ARCH_RISCV64 223 static constexpr int kJumpTableLineSize = 6 * kInstrSize; 224 static constexpr int kJumpTableSlotSize = 6 * kInstrSize; 225 static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize; 226 static constexpr int kLazyCompileTableSlotSize = 10 * kInstrSize; 227 #elif V8_TARGET_ARCH_LOONG64 228 static constexpr int kJumpTableLineSize = 8 * kInstrSize; 229 static constexpr int kJumpTableSlotSize = 8 * kInstrSize; 230 static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize; 231 static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize; 232 #else 233 #error Unknown architecture. 234 #endif 235 236 static constexpr int kJumpTableSlotsPerLine = 237 kJumpTableLineSize / kJumpTableSlotSize; 238 STATIC_ASSERT(kJumpTableSlotsPerLine >= 1); 239 240 // {JumpTableAssembler} is never used during snapshot generation, and its code 241 // must be independent of the code range of any isolate anyway. Just ensure 242 // that no relocation information is recorded, there is no buffer to store it 243 // since it is instantiated in patching mode in existing code directly. JumpTableAssemblerOptions()244 static AssemblerOptions JumpTableAssemblerOptions() { 245 AssemblerOptions options; 246 options.disable_reloc_info_for_patching = true; 247 return options; 248 } 249 250 void EmitLazyCompileJumpSlot(uint32_t func_index, 251 Address lazy_compile_target); 252 253 // Returns {true} if the jump fits in the jump table slot, {false} otherwise. 254 bool EmitJumpSlot(Address target); 255 256 // Initially emit a far jump slot. 257 void EmitFarJumpSlot(Address target); 258 259 // Patch an existing far jump slot, and make sure that this updated eventually 260 // becomes available to all execution units that might execute this code. 261 static void PatchFarJumpSlot(Address slot, Address target); 262 263 void NopBytes(int bytes); 264 }; 265 266 } // namespace wasm 267 } // namespace internal 268 } // namespace v8 269 270 #endif // V8_WASM_JUMP_TABLE_ASSEMBLER_H_ 271