1#!/usr/bin/env perl 2# Copyright (c) 2018, Google Inc. 3# 4# Permission to use, copy, modify, and/or distribute this software for any 5# purpose with or without fee is hereby granted, provided that the above 6# copyright notice and this permission notice appear in all copies. 7# 8# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 11# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 13# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 14# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 16# This file defines helper functions for crypto/test/abi_test.h on x86_64. See 17# that header for details on how to use this. 18# 19# For convenience, this file is linked into libcrypto, where consuming builds 20# already support architecture-specific sources. The static linker should drop 21# this code in non-test binaries. This includes a shared library build of 22# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is 23# used. 24# 25# References: 26# 27# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf 28# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017 29 30use strict; 31 32my $flavour = shift; 33my $output = shift; 34if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 35 36my $win64 = 0; 37$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 38 39$0 =~ m/(.*[\/\\])[^\/\\]+$/; 40my $dir = $1; 41my $xlate; 42( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 43( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or 44die "can't locate x86_64-xlate.pl"; 45 46open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; 47*STDOUT = *OUT; 48 49# @inp is the registers used for function inputs, in order. 50my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") : 51 ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"); 52 53# @caller_state is the list of registers that the callee must preserve for the 54# caller. This must match the definition of CallerState in abi_test.h. 55my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15"); 56if ($win64) { 57 @caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14", 58 "%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", 59 "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15"); 60} 61 62# $caller_state_size is the size of CallerState, in bytes. 63my $caller_state_size = 0; 64foreach (@caller_state) { 65 if (/^%r/) { 66 $caller_state_size += 8; 67 } elsif (/^%xmm/) { 68 $caller_state_size += 16; 69 } else { 70 die "unknown register $_"; 71 } 72} 73 74# load_caller_state returns code which loads a CallerState structure at 75# $off($reg) into the respective registers. No other registers are touched, but 76# $reg may not be a register in CallerState. $cb is an optional callback to 77# add extra lines after each movq or movdqa. $cb is passed the offset, relative 78# to $reg, and name of each register. 79sub load_caller_state { 80 my ($off, $reg, $cb) = @_; 81 my $ret = ""; 82 foreach (@caller_state) { 83 my $old_off = $off; 84 if (/^%r/) { 85 $ret .= "\tmovq\t$off($reg), $_\n"; 86 $off += 8; 87 } elsif (/^%xmm/) { 88 $ret .= "\tmovdqa\t$off($reg), $_\n"; 89 $off += 16; 90 } else { 91 die "unknown register $_"; 92 } 93 $ret .= $cb->($old_off, $_) if (defined($cb)); 94 } 95 return $ret; 96} 97 98# store_caller_state behaves like load_caller_state, except that it writes the 99# current values of the registers into $off($reg). 100sub store_caller_state { 101 my ($off, $reg, $cb) = @_; 102 my $ret = ""; 103 foreach (@caller_state) { 104 my $old_off = $off; 105 if (/^%r/) { 106 $ret .= "\tmovq\t$_, $off($reg)\n"; 107 $off += 8; 108 } elsif (/^%xmm/) { 109 $ret .= "\tmovdqa\t$_, $off($reg)\n"; 110 $off += 16; 111 } else { 112 die "unknown register $_"; 113 } 114 $ret .= $cb->($old_off, $_) if (defined($cb)); 115 } 116 return $ret; 117} 118 119# $max_params is the maximum number of parameters abi_test_trampoline supports. 120my $max_params = 10; 121 122# Windows reserves stack space for the register-based parameters, while SysV 123# only reserves space for the overflow ones. 124my $stack_params_skip = $win64 ? scalar(@inp) : 0; 125my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp); 126 127my ($func, $state, $argv, $argc, $unwind) = @inp; 128my $code = <<____; 129.text 130 131# abi_test_trampoline loads callee-saved registers from |state|, calls |func| 132# with |argv|, then saves the callee-saved registers into |state|. It returns 133# the result of |func|. If |unwind| is non-zero, this function triggers unwind 134# instrumentation. 135# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, 136# const uint64_t *argv, size_t argc, 137# int unwind); 138.type abi_test_trampoline, \@abi-omnipotent 139.globl abi_test_trampoline 140.align 16 141abi_test_trampoline: 142.cfi_startproc 143.seh_startproc 144 _CET_ENDBR 145 # Stack layout: 146 # 8 bytes - align 147 # $caller_state_size bytes - saved caller registers 148 # 8 bytes - scratch space 149 # 8 bytes - saved copy of \$unwind (SysV-only) 150 # 8 bytes - saved copy of \$state 151 # 8 bytes - saved copy of \$func 152 # 8 bytes - if needed for stack alignment 153 # 8*$num_stack_params bytes - parameters for \$func 154____ 155my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params; 156if (!$win64) { 157 $stack_alloc_size += 8; 158} 159# SysV and Windows both require the stack to be 16-byte-aligned. The call 160# instruction offsets it by 8, so stack allocations must be 8 mod 16. 161if ($stack_alloc_size % 16 != 8) { 162 $num_stack_params++; 163 $stack_alloc_size += 8; 164} 165my $stack_params_offset = 8 * $stack_params_skip; 166my $func_offset = 8 * $num_stack_params; 167my $state_offset = $func_offset + 8; 168# On Win64, unwind is already passed in memory. On SysV, it is passed in as 169# register and we must reserve stack space for it. 170my ($unwind_offset, $scratch_offset); 171if ($win64) { 172 $unwind_offset = $stack_alloc_size + 5*8; 173 $scratch_offset = $state_offset + 8; 174} else { 175 $unwind_offset = $state_offset + 8; 176 $scratch_offset = $unwind_offset + 8; 177} 178my $caller_state_offset = $scratch_offset + 8; 179$code .= <<____; 180 subq \$$stack_alloc_size, %rsp 181.cfi_adjust_cfa_offset $stack_alloc_size 182.seh_allocstack $stack_alloc_size 183____ 184$code .= <<____ if (!$win64); 185 movq $unwind, $unwind_offset(%rsp) 186____ 187# Store our caller's state. This is needed because we modify it ourselves, and 188# also to isolate the test infrastruction from the function under test failing 189# to save some register. 190$code .= store_caller_state($caller_state_offset, "%rsp", sub { 191 my ($off, $reg) = @_; 192 $reg = substr($reg, 1); 193 # SEH records offsets relative to %rsp (when there is no frame pointer), while 194 # CFI records them relative to the CFA, the value of the parent's stack 195 # pointer just before the call. 196 my $cfi_off = $off - $stack_alloc_size - 8; 197 my $seh_dir = ".seh_savereg"; 198 $seh_dir = ".seh_savexmm128" if ($reg =~ /^xmm/); 199 return <<____; 200.cfi_offset $reg, $cfi_off 201$seh_dir \%$reg, $off 202____ 203}); 204 205$code .= load_caller_state(0, $state); 206$code .= <<____; 207 # Stash \$func and \$state, so they are available after the call returns. 208 movq $func, $func_offset(%rsp) 209 movq $state, $state_offset(%rsp) 210 211 # Load parameters. Note this will clobber \$argv and \$argc, so we can 212 # only use non-parameter volatile registers. There are three, and they 213 # are the same between SysV and Win64: %rax, %r10, and %r11. 214 movq $argv, %r10 215 movq $argc, %r11 216____ 217foreach (@inp) { 218 $code .= <<____; 219 dec %r11 220 js .Largs_done 221 movq (%r10), $_ 222 addq \$8, %r10 223____ 224} 225$code .= <<____; 226 leaq $stack_params_offset(%rsp), %rax 227.Largs_loop: 228 dec %r11 229 js .Largs_done 230 231 # This block should be: 232 # movq (%r10), %rtmp 233 # movq %rtmp, (%rax) 234 # There are no spare registers available, so we spill into the scratch 235 # space. 236 movq %r11, $scratch_offset(%rsp) 237 movq (%r10), %r11 238 movq %r11, (%rax) 239 movq $scratch_offset(%rsp), %r11 240 241 addq \$8, %r10 242 addq \$8, %rax 243 jmp .Largs_loop 244 245.Largs_done: 246 movq $func_offset(%rsp), %rax 247 movq $unwind_offset(%rsp), %r10 248 testq %r10, %r10 249 jz .Lno_unwind 250 251 # Set the trap flag. 252 pushfq 253 orq \$0x100, 0(%rsp) 254 popfq 255 256 # Run an instruction to trigger a breakpoint immediately before the 257 # call. 258 nop 259.globl abi_test_unwind_start 260abi_test_unwind_start: 261 262 call *%rax 263.globl abi_test_unwind_return 264abi_test_unwind_return: 265 266 # Clear the trap flag. Note this assumes the trap flag was clear on 267 # entry. We do not support instrumenting an unwind-instrumented 268 # |abi_test_trampoline|. 269 pushfq 270 andq \$-0x101, 0(%rsp) # -0x101 is ~0x100 271 popfq 272.globl abi_test_unwind_stop 273abi_test_unwind_stop: 274 275 jmp .Lcall_done 276 277.Lno_unwind: 278 call *%rax 279 280.Lcall_done: 281 # Store what \$func did our state, so our caller can check. 282 movq $state_offset(%rsp), $state 283____ 284$code .= store_caller_state(0, $state); 285 286# Restore our caller's state. 287$code .= load_caller_state($caller_state_offset, "%rsp", sub { 288 my ($off, $reg) = @_; 289 $reg = substr($reg, 1); 290 return ".cfi_restore\t$reg\n"; 291}); 292$code .= <<____; 293 addq \$$stack_alloc_size, %rsp 294.cfi_adjust_cfa_offset -$stack_alloc_size 295 296 # %rax already contains \$func's return value, unmodified. 297 ret 298.cfi_endproc 299.seh_endproc 300.size abi_test_trampoline,.-abi_test_trampoline 301____ 302 303# abi_test_clobber_* zeros the corresponding register. These are used to test 304# the ABI-testing framework. 305foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) { 306 $code .= <<____; 307.type abi_test_clobber_r$_, \@abi-omnipotent 308.globl abi_test_clobber_r$_ 309.align 16 310abi_test_clobber_r$_: 311 _CET_ENDBR 312 xorq %r$_, %r$_ 313 ret 314.size abi_test_clobber_r$_,.-abi_test_clobber_r$_ 315____ 316} 317 318foreach (0..15) { 319 $code .= <<____; 320.type abi_test_clobber_xmm$_, \@abi-omnipotent 321.globl abi_test_clobber_xmm$_ 322.align 16 323abi_test_clobber_xmm$_: 324 _CET_ENDBR 325 pxor %xmm$_, %xmm$_ 326 ret 327.size abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_ 328____ 329} 330 331$code .= <<____; 332# abi_test_bad_unwind_wrong_register preserves the ABI, but annotates the wrong 333# register in unwind metadata. 334# void abi_test_bad_unwind_wrong_register(void); 335.type abi_test_bad_unwind_wrong_register, \@abi-omnipotent 336.globl abi_test_bad_unwind_wrong_register 337.align 16 338abi_test_bad_unwind_wrong_register: 339.cfi_startproc 340.seh_startproc 341 _CET_ENDBR 342 pushq %r12 343.cfi_push %r13 # This should be %r13 344.seh_pushreg %r13 # This should be %r13 345 # Windows evaluates epilogs directly in the unwinder, rather than using 346 # unwind codes. Add a nop so there is one non-epilog point (immediately 347 # before the nop) where the unwinder can observe the mistake. 348 nop 349 popq %r12 350.cfi_pop %r12 351 ret 352.seh_endproc 353.cfi_endproc 354.size abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register 355 356# abi_test_bad_unwind_temporary preserves the ABI, but temporarily corrupts the 357# storage space for a saved register, breaking unwind. 358# void abi_test_bad_unwind_temporary(void); 359.type abi_test_bad_unwind_temporary, \@abi-omnipotent 360.globl abi_test_bad_unwind_temporary 361.align 16 362abi_test_bad_unwind_temporary: 363.cfi_startproc 364.seh_startproc 365 _CET_ENDBR 366 pushq %r12 367.cfi_push %r12 368.seh_pushreg %r12 369 370 movq %r12, %rax 371 inc %rax 372 movq %rax, (%rsp) 373 # Unwinding from here is incorrect. Although %r12 itself has not been 374 # changed, the unwind codes say to look in (%rsp) instead. 375 376 movq %r12, (%rsp) 377 # Unwinding is now fixed. 378 379 popq %r12 380.cfi_pop %r12 381 ret 382.cfi_endproc 383.seh_endproc 384.size abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary 385 386# abi_test_get_and_clear_direction_flag clears the direction flag. If the flag 387# was previously set, it returns one. Otherwise, it returns zero. 388# int abi_test_get_and_clear_direction_flag(void); 389.type abi_test_set_direction_flag, \@abi-omnipotent 390.globl abi_test_get_and_clear_direction_flag 391abi_test_get_and_clear_direction_flag: 392 _CET_ENDBR 393 pushfq 394 popq %rax 395 andq \$0x400, %rax 396 shrq \$10, %rax 397 cld 398 ret 399.size abi_test_get_and_clear_direction_flag,.-abi_test_get_and_clear_direction_flag 400 401# abi_test_set_direction_flag sets the direction flag. 402# void abi_test_set_direction_flag(void); 403.type abi_test_set_direction_flag, \@abi-omnipotent 404.globl abi_test_set_direction_flag 405abi_test_set_direction_flag: 406 _CET_ENDBR 407 std 408 ret 409.size abi_test_set_direction_flag,.-abi_test_set_direction_flag 410____ 411 412if ($win64) { 413 $code .= <<____; 414# abi_test_bad_unwind_epilog preserves the ABI, and correctly annotates the 415# prolog, but the epilog does not match Win64's rules, breaking unwind during 416# the epilog. 417# void abi_test_bad_unwind_epilog(void); 418.type abi_test_bad_unwind_epilog, \@abi-omnipotent 419.globl abi_test_bad_unwind_epilog 420.align 16 421abi_test_bad_unwind_epilog: 422.seh_startproc 423 pushq %r12 424.seh_pushreg %r12 425 426 nop 427 428 # The epilog should begin here, but the nop makes it invalid. 429 popq %r12 430 nop 431 ret 432.seh_endproc 433.size abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog 434____ 435} 436 437print $code; 438close STDOUT or die "error closing STDOUT: $!"; 439