1/* SPDX-License-Identifier: GPL-2.0 */ 2 3#include <linux/stringify.h> 4#include <linux/linkage.h> 5#include <asm/dwarf2.h> 6#include <asm/cpufeatures.h> 7#include <asm/alternative.h> 8#include <asm/export.h> 9#include <asm/nospec-branch.h> 10#include <asm/unwind_hints.h> 11#include <asm/frame.h> 12#include <asm/nops.h> 13 14 .section .text..__x86.indirect_thunk 15 16.macro RETPOLINE reg 17 ANNOTATE_INTRA_FUNCTION_CALL 18 call .Ldo_rop_\@ 19.Lspec_trap_\@: 20 UNWIND_HINT_EMPTY 21 pause 22 lfence 23 jmp .Lspec_trap_\@ 24.Ldo_rop_\@: 25 mov %\reg, (%_ASM_SP) 26 UNWIND_HINT_FUNC 27 RET 28.endm 29 30.macro THUNK reg 31 32 .align RETPOLINE_THUNK_SIZE 33SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) 34 UNWIND_HINT_EMPTY 35 36 ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ 37 __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ 38 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) 39 40.endm 41 42/* 43 * Despite being an assembler file we can't just use .irp here 44 * because __KSYM_DEPS__ only uses the C preprocessor and would 45 * only see one instance of "__x86_indirect_thunk_\reg" rather 46 * than one per register with the correct names. So we do it 47 * the simple and nasty way... 48 * 49 * Worse, you can only have a single EXPORT_SYMBOL per line, 50 * and CPP can't insert newlines, so we have to repeat everything 51 * at least twice. 52 */ 53 54#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) 55#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) 56 57 .align RETPOLINE_THUNK_SIZE 58SYM_CODE_START(__x86_indirect_thunk_array) 59 60#define GEN(reg) THUNK reg 61#include <asm/GEN-for-each-reg.h> 62#undef GEN 63 64 .align RETPOLINE_THUNK_SIZE 65SYM_CODE_END(__x86_indirect_thunk_array) 66 67#define GEN(reg) EXPORT_THUNK(reg) 68#include <asm/GEN-for-each-reg.h> 69#undef GEN 70 71/* 72 * This function name is magical and is used by -mfunction-return=thunk-extern 73 * for the compiler to generate JMPs to it. 74 */ 75#ifdef CONFIG_RETHUNK 76 77/* 78 * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at 79 * special addresses: 80 * 81 * - srso_alias_untrain_ret() is 2M aligned 82 * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 83 * and 20 in its virtual address are set (while those bits in the 84 * srso_alias_untrain_ret() function are cleared). 85 * 86 * This guarantees that those two addresses will alias in the branch 87 * target buffer of Zen3/4 generations, leading to any potential 88 * poisoned entries at that BTB slot to get evicted. 89 * 90 * As a result, srso_alias_safe_ret() becomes a safe return. 91 */ 92#ifdef CONFIG_CPU_SRSO 93 .section .text..__x86.rethunk_untrain 94 95SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 96 UNWIND_HINT_FUNC 97 ASM_NOP2 98 lfence 99 jmp srso_alias_return_thunk 100SYM_FUNC_END(srso_alias_untrain_ret) 101__EXPORT_THUNK(srso_alias_untrain_ret) 102 103 .section .text..__x86.rethunk_safe 104#else 105/* dummy definition for alternatives */ 106SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 107 ANNOTATE_UNRET_SAFE 108 ret 109 int3 110SYM_FUNC_END(srso_alias_untrain_ret) 111#endif 112 113SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) 114 lea 8(%_ASM_SP), %_ASM_SP 115 UNWIND_HINT_FUNC 116 ANNOTATE_UNRET_SAFE 117 ret 118 int3 119SYM_FUNC_END(srso_alias_safe_ret) 120 121 .section .text..__x86.return_thunk 122 123SYM_CODE_START(srso_alias_return_thunk) 124 UNWIND_HINT_FUNC 125 ANNOTATE_NOENDBR 126 call srso_alias_safe_ret 127 ud2 128SYM_CODE_END(srso_alias_return_thunk) 129 130/* 131 * Some generic notes on the untraining sequences: 132 * 133 * They are interchangeable when it comes to flushing potentially wrong 134 * RET predictions from the BTB. 135 * 136 * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the 137 * Retbleed sequence because the return sequence done there 138 * (srso_safe_ret()) is longer and the return sequence must fully nest 139 * (end before) the untraining sequence. Therefore, the untraining 140 * sequence must fully overlap the return sequence. 141 * 142 * Regarding alignment - the instructions which need to be untrained, 143 * must all start at a cacheline boundary for Zen1/2 generations. That 144 * is, instruction sequences starting at srso_safe_ret() and 145 * the respective instruction sequences at retbleed_return_thunk() 146 * must start at a cacheline boundary. 147 */ 148 149/* 150 * Safety details here pertain to the AMD Zen{1,2} microarchitecture: 151 * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for 152 * alignment within the BTB. 153 * 2) The instruction at retbleed_untrain_ret must contain, and not 154 * end with, the 0xc3 byte of the RET. 155 * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread 156 * from re-poisioning the BTB prediction. 157 */ 158 .align 64 159 .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc 160SYM_FUNC_START_NOALIGN(retbleed_untrain_ret); 161 162 /* 163 * As executed from retbleed_untrain_ret, this is: 164 * 165 * TEST $0xcc, %bl 166 * LFENCE 167 * JMP retbleed_return_thunk 168 * 169 * Executing the TEST instruction has a side effect of evicting any BTB 170 * prediction (potentially attacker controlled) attached to the RET, as 171 * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. 172 */ 173 .byte 0xf6 174 175 /* 176 * As executed from retbleed_return_thunk, this is a plain RET. 177 * 178 * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. 179 * 180 * We subsequently jump backwards and architecturally execute the RET. 181 * This creates a correct BTB prediction (type=ret), but in the 182 * meantime we suffer Straight Line Speculation (because the type was 183 * no branch) which is halted by the INT3. 184 * 185 * With SMT enabled and STIBP active, a sibling thread cannot poison 186 * RET's prediction to a type of its choice, but can evict the 187 * prediction due to competitive sharing. If the prediction is 188 * evicted, retbleed_return_thunk will suffer Straight Line Speculation 189 * which will be contained safely by the INT3. 190 */ 191SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) 192 ret 193 int3 194SYM_CODE_END(retbleed_return_thunk) 195 196 /* 197 * Ensure the TEST decoding / BTB invalidation is complete. 198 */ 199 lfence 200 201 /* 202 * Jump back and execute the RET in the middle of the TEST instruction. 203 * INT3 is for SLS protection. 204 */ 205 jmp retbleed_return_thunk 206 int3 207SYM_FUNC_END(retbleed_untrain_ret) 208__EXPORT_THUNK(retbleed_untrain_ret) 209 210/* 211 * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() 212 * above. On kernel entry, srso_untrain_ret() is executed which is a 213 * 214 * movabs $0xccccc30824648d48,%rax 215 * 216 * and when the return thunk executes the inner label srso_safe_ret() 217 * later, it is a stack manipulation and a RET which is mispredicted and 218 * thus a "safe" one to use. 219 */ 220 .align 64 221 .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc 222SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) 223 .byte 0x48, 0xb8 224 225/* 226 * This forces the function return instruction to speculate into a trap 227 * (UD2 in srso_return_thunk() below). This RET will then mispredict 228 * and execution will continue at the return site read from the top of 229 * the stack. 230 */ 231SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) 232 lea 8(%_ASM_SP), %_ASM_SP 233 ret 234 int3 235 int3 236 /* end of movabs */ 237 lfence 238 call srso_safe_ret 239 ud2 240SYM_CODE_END(srso_safe_ret) 241SYM_FUNC_END(srso_untrain_ret) 242__EXPORT_THUNK(srso_untrain_ret) 243 244SYM_CODE_START(srso_return_thunk) 245 UNWIND_HINT_FUNC 246 ANNOTATE_NOENDBR 247 call srso_safe_ret 248 ud2 249SYM_CODE_END(srso_return_thunk) 250 251SYM_FUNC_START(entry_untrain_ret) 252 ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ 253 "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ 254 "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS 255SYM_FUNC_END(entry_untrain_ret) 256__EXPORT_THUNK(entry_untrain_ret) 257 258SYM_CODE_START(__x86_return_thunk) 259 UNWIND_HINT_FUNC 260 ANNOTATE_NOENDBR 261 ANNOTATE_UNRET_SAFE 262 ret 263 int3 264SYM_CODE_END(__x86_return_thunk) 265EXPORT_SYMBOL(__x86_return_thunk) 266 267#endif /* CONFIG_RETHUNK */ 268