1#! /usr/bin/env perl 2# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the OpenSSL license (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# Ascetic x86_64 AT&T to MASM/NASM assembler translator by <appro>. 11# 12# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T 13# format is way easier to parse. Because it's simpler to "gear" from 14# Unix ABI to Windows one [see cross-reference "card" at the end of 15# file]. Because Linux targets were available first... 16# 17# In addition the script also "distills" code suitable for GNU 18# assembler, so that it can be compiled with more rigid assemblers, 19# such as Solaris /usr/ccs/bin/as. 20# 21# This translator is not designed to convert *arbitrary* assembler 22# code from AT&T format to MASM one. It's designed to convert just 23# enough to provide for dual-ABI OpenSSL modules development... 24# There *are* limitations and you might have to modify your assembler 25# code or this script to achieve the desired result... 26# 27# Currently recognized limitations: 28# 29# - can't use multiple ops per line; 30# 31# Dual-ABI styling rules. 32# 33# 1. Adhere to Unix register and stack layout [see cross-reference 34# ABI "card" at the end for explanation]. 35# 2. Forget about "red zone," stick to more traditional blended 36# stack frame allocation. If volatile storage is actually required 37# that is. If not, just leave the stack as is. 38# 3. Functions tagged with ".type name,@function" get crafted with 39# unified Win64 prologue and epilogue automatically. If you want 40# to take care of ABI differences yourself, tag functions as 41# ".type name,@abi-omnipotent" instead. 42# 4. To optimize the Win64 prologue you can specify number of input 43# arguments as ".type name,@function,N." Keep in mind that if N is 44# larger than 6, then you *have to* write "abi-omnipotent" code, 45# because >6 cases can't be addressed with unified prologue. 46# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: 47# (sorry about latter). 48# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is 49# required to identify the spots, where to inject Win64 epilogue! 50# But on the pros, it's then prefixed with rep automatically:-) 51# 7. Stick to explicit ip-relative addressing. If you have to use 52# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??. 53# Both are recognized and translated to proper Win64 addressing 54# modes. 55# 56# 8. In order to provide for structured exception handling unified 57# Win64 prologue copies %rsp value to %rax. For further details 58# see SEH paragraph at the end. 59# 9. .init segment is allowed to contain calls to functions only. 60# a. If function accepts more than 4 arguments *and* >4th argument 61# is declared as non 64-bit value, do clear its upper part. 62 63 64use strict; 65 66my $flavour = shift; 67my $output = shift; 68if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 69 70open STDOUT,">$output" || die "can't open $output: $!" 71 if (defined($output)); 72 73my $gas=1; $gas=0 if ($output =~ /\.asm$/); 74my $elf=1; $elf=0 if (!$gas); 75my $win64=0; 76my $prefix=""; 77my $decor=".L"; 78 79my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005 80my $masm=0; 81my $PTR=" PTR"; 82 83my $nasmref=2.03; 84my $nasm=0; 85 86if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1; 87 # TODO(davidben): Before supporting the 88 # mingw64 perlasm flavour, do away with this 89 # environment variable check. 90 die "mingw64 not supported"; 91 $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`; 92 $prefix =~ s|\R$||; # Better chomp 93 } 94elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; } 95elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; } 96elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; } 97elsif (!$gas) { die "unknown flavour $flavour"; } 98 99my $current_segment; 100my $current_function; 101my %globals; 102 103{ package opcode; # pick up opcodes 104 sub re { 105 my ($class, $line) = @_; 106 my $self = {}; 107 my $ret; 108 109 if ($$line =~ /^([a-z][a-z0-9]*)/i) { 110 bless $self,$class; 111 $self->{op} = $1; 112 $ret = $self; 113 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 114 115 undef $self->{sz}; 116 if ($self->{op} =~ /^(movz)x?([bw]).*/) { # movz is pain... 117 $self->{op} = $1; 118 $self->{sz} = $2; 119 } elsif ($self->{op} =~ /call|jmp/) { 120 $self->{sz} = ""; 121 } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op|insrw)/) { # SSEn 122 $self->{sz} = ""; 123 } elsif ($self->{op} =~ /^[vk]/) { # VEX or k* such as kmov 124 $self->{sz} = ""; 125 } elsif ($self->{op} =~ /mov[dq]/ && $$line =~ /%xmm/) { 126 $self->{sz} = ""; 127 } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { 128 $self->{op} = $1; 129 $self->{sz} = $2; 130 } 131 } 132 $ret; 133 } 134 sub size { 135 my ($self, $sz) = @_; 136 $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); 137 $self->{sz}; 138 } 139 sub out { 140 my $self = shift; 141 if ($gas) { 142 if ($self->{op} eq "movz") { # movz is pain... 143 sprintf "%s%s%s",$self->{op},$self->{sz},shift; 144 } elsif ($self->{op} =~ /^set/) { 145 "$self->{op}"; 146 } elsif ($self->{op} eq "ret") { 147 my $epilogue = ""; 148 if ($win64 && $current_function->{abi} eq "svr4") { 149 $epilogue = "movq 8(%rsp),%rdi\n\t" . 150 "movq 16(%rsp),%rsi\n\t"; 151 } 152 $epilogue . ".byte 0xf3,0xc3"; 153 } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") { 154 ".p2align\t3\n\t.quad"; 155 } else { 156 "$self->{op}$self->{sz}"; 157 } 158 } else { 159 $self->{op} =~ s/^movz/movzx/; 160 if ($self->{op} eq "ret") { 161 $self->{op} = ""; 162 if ($win64 && $current_function->{abi} eq "svr4") { 163 $self->{op} = "mov rdi,QWORD$PTR\[8+rsp\]\t;WIN64 epilogue\n\t". 164 "mov rsi,QWORD$PTR\[16+rsp\]\n\t"; 165 } 166 $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; 167 } elsif ($self->{op} =~ /^(pop|push)f/) { 168 $self->{op} .= $self->{sz}; 169 } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") { 170 $self->{op} = "\tDQ"; 171 } 172 $self->{op}; 173 } 174 } 175 sub mnemonic { 176 my ($self, $op) = @_; 177 $self->{op}=$op if (defined($op)); 178 $self->{op}; 179 } 180} 181{ package const; # pick up constants, which start with $ 182 sub re { 183 my ($class, $line) = @_; 184 my $self = {}; 185 my $ret; 186 187 if ($$line =~ /^\$([^,]+)/) { 188 bless $self, $class; 189 $self->{value} = $1; 190 $ret = $self; 191 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 192 } 193 $ret; 194 } 195 sub out { 196 my $self = shift; 197 198 $self->{value} =~ s/\b(0b[0-1]+)/oct($1)/eig; 199 if ($gas) { 200 # Solaris /usr/ccs/bin/as can't handle multiplications 201 # in $self->{value} 202 my $value = $self->{value}; 203 no warnings; # oct might complain about overflow, ignore here... 204 $value =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi; 205 if ($value =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg) { 206 $self->{value} = $value; 207 } 208 sprintf "\$%s",$self->{value}; 209 } else { 210 $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm); 211 sprintf "%s",$self->{value}; 212 } 213 } 214} 215{ package ea; # pick up effective addresses: expr(%reg,%reg,scale) 216 217 my %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", 218 l=>"DWORD$PTR", d=>"DWORD$PTR", 219 q=>"QWORD$PTR", o=>"OWORD$PTR", 220 x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", 221 z=>"ZMMWORD$PTR" ) if (!$gas); 222 223 sub re { 224 my ($class, $line, $opcode) = @_; 225 my $self = {}; 226 my $ret; 227 228 # optional * ----vvv--- appears in indirect jmp/call 229 if ($$line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)((?:{[^}]+})*)/) { 230 bless $self, $class; 231 $self->{asterisk} = $1; 232 $self->{label} = $2; 233 ($self->{base},$self->{index},$self->{scale})=split(/,/,$3); 234 $self->{scale} = 1 if (!defined($self->{scale})); 235 $self->{opmask} = $4; 236 $ret = $self; 237 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 238 239 if ($win64 && $self->{label} =~ s/\@GOTPCREL//) { 240 die if ($opcode->mnemonic() ne "mov"); 241 $opcode->mnemonic("lea"); 242 } 243 $self->{base} =~ s/^%//; 244 $self->{index} =~ s/^%// if (defined($self->{index})); 245 $self->{opcode} = $opcode; 246 } 247 $ret; 248 } 249 sub size {} 250 sub out { 251 my ($self, $sz) = @_; 252 253 $self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; 254 $self->{label} =~ s/\.L/$decor/g; 255 256 # Silently convert all EAs to 64-bit. This is required for 257 # elder GNU assembler and results in more compact code, 258 # *but* most importantly AES module depends on this feature! 259 $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; 260 $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; 261 262 # Solaris /usr/ccs/bin/as can't handle multiplications 263 # in $self->{label}... 264 use integer; 265 $self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi; 266 $self->{label} =~ s/\b([0-9]+\s*[\*\/\%]\s*[0-9]+)\b/eval($1)/eg; 267 268 # Some assemblers insist on signed presentation of 32-bit 269 # offsets, but sign extension is a tricky business in perl... 270 if ((1<<31)<<1) { 271 $self->{label} =~ s/\b([0-9]+)\b/$1<<32>>32/eg; 272 } else { 273 $self->{label} =~ s/\b([0-9]+)\b/$1>>0/eg; 274 } 275 276 # if base register is %rbp or %r13, see if it's possible to 277 # flip base and index registers [for better performance] 278 if (!$self->{label} && $self->{index} && $self->{scale}==1 && 279 $self->{base} =~ /(rbp|r13)/) { 280 $self->{base} = $self->{index}; $self->{index} = $1; 281 } 282 283 if ($gas) { 284 $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); 285 286 if (defined($self->{index})) { 287 sprintf "%s%s(%s,%%%s,%d)%s", 288 $self->{asterisk},$self->{label}, 289 $self->{base}?"%$self->{base}":"", 290 $self->{index},$self->{scale}, 291 $self->{opmask}; 292 } else { 293 sprintf "%s%s(%%%s)%s", $self->{asterisk},$self->{label}, 294 $self->{base},$self->{opmask}; 295 } 296 } else { 297 $self->{label} =~ s/\./\$/g; 298 $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig; 299 $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); 300 301 my $mnemonic = $self->{opcode}->mnemonic(); 302 ($self->{asterisk}) && ($sz="q") || 303 ($mnemonic =~ /^v?mov([qd])$/) && ($sz=$1) || 304 ($mnemonic =~ /^v?pinsr([qdwb])$/) && ($sz=$1) || 305 ($mnemonic =~ /^vpbroadcast([qdwb])$/) && ($sz=$1) || 306 ($mnemonic =~ /^v(?!perm)[a-z]+[fi]128$/) && ($sz="x"); 307 308 $self->{opmask} =~ s/%(k[0-7])/$1/; 309 310 if (defined($self->{index})) { 311 sprintf "%s[%s%s*%d%s]%s",$szmap{$sz}, 312 $self->{label}?"$self->{label}+":"", 313 $self->{index},$self->{scale}, 314 $self->{base}?"+$self->{base}":"", 315 $self->{opmask}; 316 } elsif ($self->{base} eq "rip") { 317 sprintf "%s[%s]",$szmap{$sz},$self->{label}; 318 } else { 319 sprintf "%s[%s%s]%s", $szmap{$sz}, 320 $self->{label}?"$self->{label}+":"", 321 $self->{base},$self->{opmask}; 322 } 323 } 324 } 325} 326{ package register; # pick up registers, which start with %. 327 sub re { 328 my ($class, $line, $opcode) = @_; 329 my $self = {}; 330 my $ret; 331 332 # optional * ----vvv--- appears in indirect jmp/call 333 if ($$line =~ /^(\*?)%(\w+)((?:{[^}]+})*)/) { 334 bless $self,$class; 335 $self->{asterisk} = $1; 336 $self->{value} = $2; 337 $self->{opmask} = $3; 338 $opcode->size($self->size()); 339 $ret = $self; 340 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 341 } 342 $ret; 343 } 344 sub size { 345 my $self = shift; 346 my $ret; 347 348 if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } 349 elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } 350 elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } 351 elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } 352 elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } 353 elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } 354 elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } 355 elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } 356 357 $ret; 358 } 359 sub out { 360 my $self = shift; 361 if ($gas) { sprintf "%s%%%s%s", $self->{asterisk}, 362 $self->{value}, 363 $self->{opmask}; } 364 else { $self->{opmask} =~ s/%(k[0-7])/$1/; 365 $self->{value}.$self->{opmask}; } 366 } 367} 368{ package label; # pick up labels, which end with : 369 sub re { 370 my ($class, $line) = @_; 371 my $self = {}; 372 my $ret; 373 374 if ($$line =~ /(^[\.\w]+)\:/) { 375 bless $self,$class; 376 $self->{value} = $1; 377 $ret = $self; 378 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 379 380 $self->{value} =~ s/^\.L/$decor/; 381 } 382 $ret; 383 } 384 sub out { 385 my $self = shift; 386 387 if ($gas) { 388 my $func = ($globals{$self->{value}} or $self->{value}) . ":"; 389 if ($win64 && $current_function->{name} eq $self->{value} 390 && $current_function->{abi} eq "svr4") { 391 $func .= "\n"; 392 $func .= " movq %rdi,8(%rsp)\n"; 393 $func .= " movq %rsi,16(%rsp)\n"; 394 $func .= " movq %rsp,%rax\n"; 395 $func .= "${decor}SEH_begin_$current_function->{name}:\n"; 396 my $narg = $current_function->{narg}; 397 $narg=6 if (!defined($narg)); 398 $func .= " movq %rcx,%rdi\n" if ($narg>0); 399 $func .= " movq %rdx,%rsi\n" if ($narg>1); 400 $func .= " movq %r8,%rdx\n" if ($narg>2); 401 $func .= " movq %r9,%rcx\n" if ($narg>3); 402 $func .= " movq 40(%rsp),%r8\n" if ($narg>4); 403 $func .= " movq 48(%rsp),%r9\n" if ($narg>5); 404 } 405 $func; 406 } elsif ($self->{value} ne "$current_function->{name}") { 407 # Make all labels in masm global. 408 $self->{value} .= ":" if ($masm); 409 $self->{value} . ":"; 410 } elsif ($win64 && $current_function->{abi} eq "svr4") { 411 my $func = "$current_function->{name}" . 412 ($nasm ? ":" : "\tPROC $current_function->{scope}") . 413 "\n"; 414 $func .= " mov QWORD$PTR\[8+rsp\],rdi\t;WIN64 prologue\n"; 415 $func .= " mov QWORD$PTR\[16+rsp\],rsi\n"; 416 $func .= " mov rax,rsp\n"; 417 $func .= "${decor}SEH_begin_$current_function->{name}:"; 418 $func .= ":" if ($masm); 419 $func .= "\n"; 420 my $narg = $current_function->{narg}; 421 $narg=6 if (!defined($narg)); 422 $func .= " mov rdi,rcx\n" if ($narg>0); 423 $func .= " mov rsi,rdx\n" if ($narg>1); 424 $func .= " mov rdx,r8\n" if ($narg>2); 425 $func .= " mov rcx,r9\n" if ($narg>3); 426 $func .= " mov r8,QWORD$PTR\[40+rsp\]\n" if ($narg>4); 427 $func .= " mov r9,QWORD$PTR\[48+rsp\]\n" if ($narg>5); 428 $func .= "\n"; 429 } else { 430 "$current_function->{name}". 431 ($nasm ? ":" : "\tPROC $current_function->{scope}"); 432 } 433 } 434} 435{ package expr; # pick up expressions 436 sub re { 437 my ($class, $line, $opcode) = @_; 438 my $self = {}; 439 my $ret; 440 441 if ($$line =~ /(^[^,]+)/) { 442 bless $self,$class; 443 $self->{value} = $1; 444 $ret = $self; 445 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 446 447 $self->{value} =~ s/\@PLT// if (!$elf); 448 $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; 449 $self->{value} =~ s/\.L/$decor/g; 450 $self->{opcode} = $opcode; 451 } 452 $ret; 453 } 454 sub out { 455 my $self = shift; 456 if ($nasm && $self->{opcode}->mnemonic()=~m/^j(?![re]cxz)/) { 457 "NEAR ".$self->{value}; 458 } else { 459 $self->{value}; 460 } 461 } 462} 463{ package cfi_directive; 464 # CFI directives annotate instructions that are significant for 465 # stack unwinding procedure compliant with DWARF specification, 466 # see http://dwarfstd.org/. Besides naturally expected for this 467 # script platform-specific filtering function, this module adds 468 # three auxiliary synthetic directives not recognized by [GNU] 469 # assembler: 470 # 471 # - .cfi_push to annotate push instructions in prologue, which 472 # translates to .cfi_adjust_cfa_offset (if needed) and 473 # .cfi_offset; 474 # - .cfi_pop to annotate pop instructions in epilogue, which 475 # translates to .cfi_adjust_cfa_offset (if needed) and 476 # .cfi_restore; 477 # - [and most notably] .cfi_cfa_expression which encodes 478 # DW_CFA_def_cfa_expression and passes it to .cfi_escape as 479 # byte vector; 480 # 481 # CFA expressions were introduced in DWARF specification version 482 # 3 and describe how to deduce CFA, Canonical Frame Address. This 483 # becomes handy if your stack frame is variable and you can't 484 # spare register for [previous] frame pointer. Suggested directive 485 # syntax is made-up mix of DWARF operator suffixes [subset of] 486 # and references to registers with optional bias. Following example 487 # describes offloaded *original* stack pointer at specific offset 488 # from *current* stack pointer: 489 # 490 # .cfi_cfa_expression %rsp+40,deref,+8 491 # 492 # Final +8 has everything to do with the fact that CFA is defined 493 # as reference to top of caller's stack, and on x86_64 call to 494 # subroutine pushes 8-byte return address. In other words original 495 # stack pointer upon entry to a subroutine is 8 bytes off from CFA. 496 497 # Below constants are taken from "DWARF Expressions" section of the 498 # DWARF specification, section is numbered 7.7 in versions 3 and 4. 499 my %DW_OP_simple = ( # no-arg operators, mapped directly 500 deref => 0x06, dup => 0x12, 501 drop => 0x13, over => 0x14, 502 pick => 0x15, swap => 0x16, 503 rot => 0x17, xderef => 0x18, 504 505 abs => 0x19, and => 0x1a, 506 div => 0x1b, minus => 0x1c, 507 mod => 0x1d, mul => 0x1e, 508 neg => 0x1f, not => 0x20, 509 or => 0x21, plus => 0x22, 510 shl => 0x24, shr => 0x25, 511 shra => 0x26, xor => 0x27, 512 ); 513 514 my %DW_OP_complex = ( # used in specific subroutines 515 constu => 0x10, # uleb128 516 consts => 0x11, # sleb128 517 plus_uconst => 0x23, # uleb128 518 lit0 => 0x30, # add 0-31 to opcode 519 reg0 => 0x50, # add 0-31 to opcode 520 breg0 => 0x70, # add 0-31 to opcole, sleb128 521 regx => 0x90, # uleb28 522 fbreg => 0x91, # sleb128 523 bregx => 0x92, # uleb128, sleb128 524 piece => 0x93, # uleb128 525 ); 526 527 # Following constants are defined in x86_64 ABI supplement, for 528 # example avaiable at https://www.uclibc.org/docs/psABI-x86_64.pdf, 529 # see section 3.7 "Stack Unwind Algorithm". 530 my %DW_reg_idx = ( 531 "%rax"=>0, "%rdx"=>1, "%rcx"=>2, "%rbx"=>3, 532 "%rsi"=>4, "%rdi"=>5, "%rbp"=>6, "%rsp"=>7, 533 "%r8" =>8, "%r9" =>9, "%r10"=>10, "%r11"=>11, 534 "%r12"=>12, "%r13"=>13, "%r14"=>14, "%r15"=>15 535 ); 536 537 my ($cfa_reg, $cfa_rsp); 538 539 # [us]leb128 format is variable-length integer representation base 540 # 2^128, with most significant bit of each byte being 0 denoting 541 # *last* most significat digit. See "Variable Length Data" in the 542 # DWARF specification, numbered 7.6 at least in versions 3 and 4. 543 sub sleb128 { 544 use integer; # get right shift extend sign 545 546 my $val = shift; 547 my $sign = ($val < 0) ? -1 : 0; 548 my @ret = (); 549 550 while(1) { 551 push @ret, $val&0x7f; 552 553 # see if remaining bits are same and equal to most 554 # significant bit of the current digit, if so, it's 555 # last digit... 556 last if (($val>>6) == $sign); 557 558 @ret[-1] |= 0x80; 559 $val >>= 7; 560 } 561 562 return @ret; 563 } 564 sub uleb128 { 565 my $val = shift; 566 my @ret = (); 567 568 while(1) { 569 push @ret, $val&0x7f; 570 571 # see if it's last significant digit... 572 last if (($val >>= 7) == 0); 573 574 @ret[-1] |= 0x80; 575 } 576 577 return @ret; 578 } 579 sub const { 580 my $val = shift; 581 582 if ($val >= 0 && $val < 32) { 583 return ($DW_OP_complex{lit0}+$val); 584 } 585 return ($DW_OP_complex{consts}, sleb128($val)); 586 } 587 sub reg { 588 my $val = shift; 589 590 return if ($val !~ m/^(%r\w+)(?:([\+\-])((?:0x)?[0-9a-f]+))?/); 591 592 my $reg = $DW_reg_idx{$1}; 593 my $off = eval ("0 $2 $3"); 594 595 return (($DW_OP_complex{breg0} + $reg), sleb128($off)); 596 # Yes, we use DW_OP_bregX+0 to push register value and not 597 # DW_OP_regX, because latter would require even DW_OP_piece, 598 # which would be a waste under the circumstances. If you have 599 # to use DWP_OP_reg, use "regx:N"... 600 } 601 sub cfa_expression { 602 my $line = shift; 603 my @ret; 604 605 foreach my $token (split(/,\s*/,$line)) { 606 if ($token =~ /^%r/) { 607 push @ret,reg($token); 608 } elsif ($token =~ /((?:0x)?[0-9a-f]+)\((%r\w+)\)/) { 609 push @ret,reg("$2+$1"); 610 } elsif ($token =~ /(\w+):(\-?(?:0x)?[0-9a-f]+)(U?)/i) { 611 my $i = 1*eval($2); 612 push @ret,$DW_OP_complex{$1}, ($3 ? uleb128($i) : sleb128($i)); 613 } elsif (my $i = 1*eval($token) or $token eq "0") { 614 if ($token =~ /^\+/) { 615 push @ret,$DW_OP_complex{plus_uconst},uleb128($i); 616 } else { 617 push @ret,const($i); 618 } 619 } else { 620 push @ret,$DW_OP_simple{$token}; 621 } 622 } 623 624 # Finally we return DW_CFA_def_cfa_expression, 15, followed by 625 # length of the expression and of course the expression itself. 626 return (15,scalar(@ret),@ret); 627 } 628 sub re { 629 my ($class, $line) = @_; 630 my $self = {}; 631 my $ret; 632 633 if ($$line =~ s/^\s*\.cfi_(\w+)\s*//) { 634 bless $self,$class; 635 $ret = $self; 636 undef $self->{value}; 637 my $dir = $1; 638 639 SWITCH: for ($dir) { 640 # What is $cfa_rsp? Effectively it's difference between %rsp 641 # value and current CFA, Canonical Frame Address, which is 642 # why it starts with -8. Recall that CFA is top of caller's 643 # stack... 644 /startproc/ && do { ($cfa_reg, $cfa_rsp) = ("%rsp", -8); last; }; 645 /endproc/ && do { ($cfa_reg, $cfa_rsp) = ("%rsp", 0); last; }; 646 /def_cfa_register/ 647 && do { $cfa_reg = $$line; last; }; 648 /def_cfa_offset/ 649 && do { $cfa_rsp = -1*eval($$line) if ($cfa_reg eq "%rsp"); 650 last; 651 }; 652 /adjust_cfa_offset/ 653 && do { $cfa_rsp -= 1*eval($$line) if ($cfa_reg eq "%rsp"); 654 last; 655 }; 656 /def_cfa/ && do { if ($$line =~ /(%r\w+)\s*,\s*(.+)/) { 657 $cfa_reg = $1; 658 $cfa_rsp = -1*eval($2) if ($cfa_reg eq "%rsp"); 659 } 660 last; 661 }; 662 /push/ && do { $dir = undef; 663 $cfa_rsp -= 8; 664 if ($cfa_reg eq "%rsp") { 665 $self->{value} = ".cfi_adjust_cfa_offset\t8\n"; 666 } 667 $self->{value} .= ".cfi_offset\t$$line,$cfa_rsp"; 668 last; 669 }; 670 /pop/ && do { $dir = undef; 671 $cfa_rsp += 8; 672 if ($cfa_reg eq "%rsp") { 673 $self->{value} = ".cfi_adjust_cfa_offset\t-8\n"; 674 } 675 $self->{value} .= ".cfi_restore\t$$line"; 676 last; 677 }; 678 /cfa_expression/ 679 && do { $dir = undef; 680 $self->{value} = ".cfi_escape\t" . 681 join(",", map(sprintf("0x%02x", $_), 682 cfa_expression($$line))); 683 last; 684 }; 685 } 686 687 $self->{value} = ".cfi_$dir\t$$line" if ($dir); 688 689 $$line = ""; 690 } 691 692 return $ret; 693 } 694 sub out { 695 my $self = shift; 696 return ($elf ? $self->{value} : undef); 697 } 698} 699{ package directive; # pick up directives, which start with . 700 sub re { 701 my ($class, $line) = @_; 702 my $self = {}; 703 my $ret; 704 my $dir; 705 706 # chain-call to cfi_directive 707 $ret = cfi_directive->re($line) and return $ret; 708 709 if ($$line =~ /^\s*(\.\w+)/) { 710 bless $self,$class; 711 $dir = $1; 712 $ret = $self; 713 undef $self->{value}; 714 $$line = substr($$line,@+[0]); $$line =~ s/^\s+//; 715 716 SWITCH: for ($dir) { 717 /\.global|\.globl|\.extern/ 718 && do { $globals{$$line} = $prefix . $$line; 719 $$line = $globals{$$line} if ($prefix); 720 last; 721 }; 722 /\.type/ && do { my ($sym,$type,$narg) = split(',',$$line); 723 if ($type eq "\@function") { 724 undef $current_function; 725 $current_function->{name} = $sym; 726 $current_function->{abi} = "svr4"; 727 $current_function->{narg} = $narg; 728 $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; 729 } elsif ($type eq "\@abi-omnipotent") { 730 undef $current_function; 731 $current_function->{name} = $sym; 732 $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; 733 } 734 $$line =~ s/\@abi\-omnipotent/\@function/; 735 $$line =~ s/\@function.*/\@function/; 736 last; 737 }; 738 /\.asciz/ && do { if ($$line =~ /^"(.*)"$/) { 739 $dir = ".byte"; 740 $$line = join(",",unpack("C*",$1),0); 741 } 742 last; 743 }; 744 /\.rva|\.long|\.quad/ 745 && do { $$line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; 746 $$line =~ s/\.L/$decor/g; 747 last; 748 }; 749 } 750 751 if ($gas) { 752 $self->{value} = $dir . "\t" . $$line; 753 754 if ($dir =~ /\.extern/) { 755 if ($flavour eq "elf") { 756 $self->{value} .= "\n.hidden $$line"; 757 } else { 758 $self->{value} = ""; 759 } 760 } elsif (!$elf && $dir =~ /\.type/) { 761 $self->{value} = ""; 762 $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" . 763 (defined($globals{$1})?".scl 2;":".scl 3;") . 764 "\t.type 32;\t.endef" 765 if ($win64 && $$line =~ /([^,]+),\@function/); 766 } elsif (!$elf && $dir =~ /\.size/) { 767 $self->{value} = ""; 768 if (defined($current_function)) { 769 $self->{value} .= "${decor}SEH_end_$current_function->{name}:" 770 if ($win64 && $current_function->{abi} eq "svr4"); 771 undef $current_function; 772 } 773 } elsif (!$elf && $dir =~ /\.align/) { 774 $self->{value} = ".p2align\t" . (log($$line)/log(2)); 775 } elsif ($dir eq ".section") { 776 $current_segment=$$line; 777 if (!$elf && $current_segment eq ".init") { 778 if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; } 779 elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; } 780 } 781 } elsif ($dir =~ /\.(text|data)/) { 782 $current_segment=".$1"; 783 } elsif ($dir =~ /\.global|\.globl|\.extern/) { 784 if ($flavour eq "macosx") { 785 $self->{value} .= "\n.private_extern $$line"; 786 } else { 787 $self->{value} .= "\n.hidden $$line"; 788 } 789 } elsif ($dir =~ /\.hidden/) { 790 if ($flavour eq "macosx") { $self->{value} = ".private_extern\t$prefix$$line"; } 791 elsif ($flavour eq "mingw64") { $self->{value} = ""; } 792 } elsif ($dir =~ /\.comm/) { 793 $self->{value} = "$dir\t$prefix$$line"; 794 $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx"); 795 } 796 $$line = ""; 797 return $self; 798 } 799 800 # non-gas case or nasm/masm 801 SWITCH: for ($dir) { 802 /\.text/ && do { my $v=undef; 803 if ($nasm) { 804 $v="section .text code align=64\n"; 805 } else { 806 $v="$current_segment\tENDS\n" if ($current_segment); 807 $current_segment = ".text\$"; 808 $v.="$current_segment\tSEGMENT "; 809 $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE"; 810 $v.=" 'CODE'"; 811 } 812 $self->{value} = $v; 813 last; 814 }; 815 /\.data/ && do { my $v=undef; 816 if ($nasm) { 817 $v="section .data data align=8\n"; 818 } else { 819 $v="$current_segment\tENDS\n" if ($current_segment); 820 $current_segment = "_DATA"; 821 $v.="$current_segment\tSEGMENT"; 822 } 823 $self->{value} = $v; 824 last; 825 }; 826 /\.section/ && do { my $v=undef; 827 $$line =~ s/([^,]*).*/$1/; 828 $$line = ".CRT\$XCU" if ($$line eq ".init"); 829 if ($nasm) { 830 $v="section $$line"; 831 if ($$line=~/\.([px])data/) { 832 $v.=" rdata align="; 833 $v.=$1 eq "p"? 4 : 8; 834 } elsif ($$line=~/\.CRT\$/i) { 835 $v.=" rdata align=8"; 836 } 837 } else { 838 $v="$current_segment\tENDS\n" if ($current_segment); 839 $v.="$$line\tSEGMENT"; 840 if ($$line=~/\.([px])data/) { 841 $v.=" READONLY"; 842 $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); 843 } elsif ($$line=~/\.CRT\$/i) { 844 $v.=" READONLY "; 845 $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD"; 846 } 847 } 848 $current_segment = $$line; 849 $self->{value} = $v; 850 last; 851 }; 852 /\.extern/ && do { $self->{value} = "EXTERN\t".$$line; 853 $self->{value} .= ":NEAR" if ($masm); 854 last; 855 }; 856 /\.globl|.global/ 857 && do { $self->{value} = $masm?"PUBLIC":"global"; 858 $self->{value} .= "\t".$$line; 859 last; 860 }; 861 /\.size/ && do { if (defined($current_function)) { 862 undef $self->{value}; 863 if ($current_function->{abi} eq "svr4") { 864 $self->{value}="${decor}SEH_end_$current_function->{name}:"; 865 $self->{value}.=":\n" if($masm); 866 } 867 $self->{value}.="$current_function->{name}\tENDP" if($masm && $current_function->{name}); 868 undef $current_function; 869 } 870 last; 871 }; 872 /\.align/ && do { my $max = ($masm && $masm>=$masmref) ? 256 : 4096; 873 $self->{value} = "ALIGN\t".($$line>$max?$max:$$line); 874 last; 875 }; 876 /\.(value|long|rva|quad)/ 877 && do { my $sz = substr($1,0,1); 878 my @arr = split(/,\s*/,$$line); 879 my $last = pop(@arr); 880 my $conv = sub { my $var=shift; 881 $var=~s/^(0b[0-1]+)/oct($1)/eig; 882 $var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm); 883 if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva")) 884 { $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; } 885 $var; 886 }; 887 888 $sz =~ tr/bvlrq/BWDDQ/; 889 $self->{value} = "\tD$sz\t"; 890 for (@arr) { $self->{value} .= &$conv($_).","; } 891 $self->{value} .= &$conv($last); 892 last; 893 }; 894 /\.byte/ && do { my @str=split(/,\s*/,$$line); 895 map(s/(0b[0-1]+)/oct($1)/eig,@str); 896 map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm); 897 while ($#str>15) { 898 $self->{value}.="DB\t" 899 .join(",",@str[0..15])."\n"; 900 foreach (0..15) { shift @str; } 901 } 902 $self->{value}.="DB\t" 903 .join(",",@str) if (@str); 904 last; 905 }; 906 /\.comm/ && do { my @str=split(/,\s*/,$$line); 907 my $v=undef; 908 if ($nasm) { 909 $v.="common $prefix@str[0] @str[1]"; 910 } else { 911 $v="$current_segment\tENDS\n" if ($current_segment); 912 $current_segment = "_DATA"; 913 $v.="$current_segment\tSEGMENT\n"; 914 $v.="COMM @str[0]:DWORD:".@str[1]/4; 915 } 916 $self->{value} = $v; 917 last; 918 }; 919 } 920 $$line = ""; 921 } 922 923 $ret; 924 } 925 sub out { 926 my $self = shift; 927 $self->{value}; 928 } 929} 930 931# Upon initial x86_64 introduction SSE>2 extensions were not introduced 932# yet. In order not to be bothered by tracing exact assembler versions, 933# but at the same time to provide a bare security minimum of AES-NI, we 934# hard-code some instructions. Extensions past AES-NI on the other hand 935# are traced by examining assembler version in individual perlasm 936# modules... 937 938my %regrm = ( "%eax"=>0, "%ecx"=>1, "%edx"=>2, "%ebx"=>3, 939 "%esp"=>4, "%ebp"=>5, "%esi"=>6, "%edi"=>7 ); 940 941sub rex { 942 my $opcode=shift; 943 my ($dst,$src,$rex)=@_; 944 945 $rex|=0x04 if($dst>=8); 946 $rex|=0x01 if($src>=8); 947 push @$opcode,($rex|0x40) if ($rex); 948} 949 950my $movq = sub { # elderly gas can't handle inter-register movq 951 my $arg = shift; 952 my @opcode=(0x66); 953 if ($arg =~ /%xmm([0-9]+),\s*%r(\w+)/) { 954 my ($src,$dst)=($1,$2); 955 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } 956 rex(\@opcode,$src,$dst,0x8); 957 push @opcode,0x0f,0x7e; 958 push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M 959 @opcode; 960 } elsif ($arg =~ /%r(\w+),\s*%xmm([0-9]+)/) { 961 my ($src,$dst)=($2,$1); 962 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } 963 rex(\@opcode,$src,$dst,0x8); 964 push @opcode,0x0f,0x6e; 965 push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M 966 @opcode; 967 } else { 968 (); 969 } 970}; 971 972my $pextrd = sub { 973 if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*(%\w+)/) { 974 my @opcode=(0x66); 975 my $imm=$1; 976 my $src=$2; 977 my $dst=$3; 978 if ($dst =~ /%r([0-9]+)d/) { $dst = $1; } 979 elsif ($dst =~ /%e/) { $dst = $regrm{$dst}; } 980 rex(\@opcode,$src,$dst); 981 push @opcode,0x0f,0x3a,0x16; 982 push @opcode,0xc0|(($src&7)<<3)|($dst&7); # ModR/M 983 push @opcode,$imm; 984 @opcode; 985 } else { 986 (); 987 } 988}; 989 990my $pinsrd = sub { 991 if (shift =~ /\$([0-9]+),\s*(%\w+),\s*%xmm([0-9]+)/) { 992 my @opcode=(0x66); 993 my $imm=$1; 994 my $src=$2; 995 my $dst=$3; 996 if ($src =~ /%r([0-9]+)/) { $src = $1; } 997 elsif ($src =~ /%e/) { $src = $regrm{$src}; } 998 rex(\@opcode,$dst,$src); 999 push @opcode,0x0f,0x3a,0x22; 1000 push @opcode,0xc0|(($dst&7)<<3)|($src&7); # ModR/M 1001 push @opcode,$imm; 1002 @opcode; 1003 } else { 1004 (); 1005 } 1006}; 1007 1008my $pshufb = sub { 1009 if (shift =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { 1010 my @opcode=(0x66); 1011 rex(\@opcode,$2,$1); 1012 push @opcode,0x0f,0x38,0x00; 1013 push @opcode,0xc0|($1&7)|(($2&7)<<3); # ModR/M 1014 @opcode; 1015 } else { 1016 (); 1017 } 1018}; 1019 1020my $palignr = sub { 1021 if (shift =~ /\$([0-9]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { 1022 my @opcode=(0x66); 1023 rex(\@opcode,$3,$2); 1024 push @opcode,0x0f,0x3a,0x0f; 1025 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M 1026 push @opcode,$1; 1027 @opcode; 1028 } else { 1029 (); 1030 } 1031}; 1032 1033my $pclmulqdq = sub { 1034 if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { 1035 my @opcode=(0x66); 1036 rex(\@opcode,$3,$2); 1037 push @opcode,0x0f,0x3a,0x44; 1038 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M 1039 my $c=$1; 1040 push @opcode,$c=~/^0/?oct($c):$c; 1041 @opcode; 1042 } else { 1043 (); 1044 } 1045}; 1046 1047my $rdrand = sub { 1048 if (shift =~ /%[er](\w+)/) { 1049 my @opcode=(); 1050 my $dst=$1; 1051 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } 1052 rex(\@opcode,0,$dst,8); 1053 push @opcode,0x0f,0xc7,0xf0|($dst&7); 1054 @opcode; 1055 } else { 1056 (); 1057 } 1058}; 1059 1060my $rdseed = sub { 1061 if (shift =~ /%[er](\w+)/) { 1062 my @opcode=(); 1063 my $dst=$1; 1064 if ($dst !~ /[0-9]+/) { $dst = $regrm{"%e$dst"}; } 1065 rex(\@opcode,0,$dst,8); 1066 push @opcode,0x0f,0xc7,0xf8|($dst&7); 1067 @opcode; 1068 } else { 1069 (); 1070 } 1071}; 1072 1073# Not all AVX-capable assemblers recognize AMD XOP extension. Since we 1074# are using only two instructions hand-code them in order to be excused 1075# from chasing assembler versions... 1076 1077sub rxb { 1078 my $opcode=shift; 1079 my ($dst,$src1,$src2,$rxb)=@_; 1080 1081 $rxb|=0x7<<5; 1082 $rxb&=~(0x04<<5) if($dst>=8); 1083 $rxb&=~(0x01<<5) if($src1>=8); 1084 $rxb&=~(0x02<<5) if($src2>=8); 1085 push @$opcode,$rxb; 1086} 1087 1088my $vprotd = sub { 1089 if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { 1090 my @opcode=(0x8f); 1091 rxb(\@opcode,$3,$2,-1,0x08); 1092 push @opcode,0x78,0xc2; 1093 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M 1094 my $c=$1; 1095 push @opcode,$c=~/^0/?oct($c):$c; 1096 @opcode; 1097 } else { 1098 (); 1099 } 1100}; 1101 1102my $vprotq = sub { 1103 if (shift =~ /\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { 1104 my @opcode=(0x8f); 1105 rxb(\@opcode,$3,$2,-1,0x08); 1106 push @opcode,0x78,0xc3; 1107 push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M 1108 my $c=$1; 1109 push @opcode,$c=~/^0/?oct($c):$c; 1110 @opcode; 1111 } else { 1112 (); 1113 } 1114}; 1115 1116# Intel Control-flow Enforcement Technology extension. All functions and 1117# indirect branch targets will have to start with this instruction... 1118 1119my $endbranch = sub { 1120 (0xf3,0x0f,0x1e,0xfa); 1121}; 1122 1123######################################################################## 1124 1125if ($nasm) { 1126 print <<___; 1127default rel 1128%define XMMWORD 1129%define YMMWORD 1130%define ZMMWORD 1131___ 1132} elsif ($masm) { 1133 print <<___; 1134OPTION DOTNAME 1135___ 1136} 1137print STDOUT "#if defined(__x86_64__)\n" if ($gas); 1138 1139while(defined(my $line=<>)) { 1140 1141 $line =~ s|\R$||; # Better chomp 1142 1143 $line =~ s|[#!].*$||; # get rid of asm-style comments... 1144 $line =~ s|/\*.*\*/||; # ... and C-style comments... 1145 $line =~ s|^\s+||; # ... and skip white spaces in beginning 1146 $line =~ s|\s+$||; # ... and at the end 1147 1148 if (my $label=label->re(\$line)) { print $label->out(); } 1149 1150 if (my $directive=directive->re(\$line)) { 1151 printf "%s",$directive->out(); 1152 } elsif (my $opcode=opcode->re(\$line)) { 1153 my $asm = eval("\$".$opcode->mnemonic()); 1154 1155 if ((ref($asm) eq 'CODE') && scalar(my @bytes=&$asm($line))) { 1156 print $gas?".byte\t":"DB\t",join(',',@bytes),"\n"; 1157 next; 1158 } 1159 1160 my @args; 1161 ARGUMENT: while (1) { 1162 my $arg; 1163 1164 ($arg=register->re(\$line, $opcode))|| 1165 ($arg=const->re(\$line)) || 1166 ($arg=ea->re(\$line, $opcode)) || 1167 ($arg=expr->re(\$line, $opcode)) || 1168 last ARGUMENT; 1169 1170 push @args,$arg; 1171 1172 last ARGUMENT if ($line !~ /^,/); 1173 1174 $line =~ s/^,\s*//; 1175 } # ARGUMENT: 1176 1177 if ($#args>=0) { 1178 my $insn; 1179 my $sz=$opcode->size(); 1180 1181 if ($gas) { 1182 $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); 1183 @args = map($_->out($sz),@args); 1184 printf "\t%s\t%s",$insn,join(",",@args); 1185 } else { 1186 $insn = $opcode->out(); 1187 foreach (@args) { 1188 my $arg = $_->out(); 1189 # $insn.=$sz compensates for movq, pinsrw, ... 1190 if ($arg =~ /^xmm[0-9]+$/) { $insn.=$sz; $sz="x" if(!$sz); last; } 1191 if ($arg =~ /^ymm[0-9]+$/) { $insn.=$sz; $sz="y" if(!$sz); last; } 1192 if ($arg =~ /^zmm[0-9]+$/) { $insn.=$sz; $sz="z" if(!$sz); last; } 1193 if ($arg =~ /^mm[0-9]+$/) { $insn.=$sz; $sz="q" if(!$sz); last; } 1194 } 1195 @args = reverse(@args); 1196 undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); 1197 1198 if ($insn eq "movq" && $#args == 1 && $args[0]->out($sz) eq "xmm0" && $args[1]->out($sz) eq "rax") { 1199 # I have no clue why MASM can't parse this instruction. 1200 printf "DB 66h, 48h, 0fh, 6eh, 0c0h"; 1201 } else { 1202 printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); 1203 } 1204 } 1205 } else { 1206 printf "\t%s",$opcode->out(); 1207 } 1208 } 1209 1210 print $line,"\n"; 1211} 1212 1213print "\n$current_segment\tENDS\n" if ($current_segment && $masm); 1214print "END\n" if ($masm); 1215print "#endif\n" if ($gas); 1216 1217 1218close STDOUT; 1219 1220################################################# 1221# Cross-reference x86_64 ABI "card" 1222# 1223# Unix Win64 1224# %rax * * 1225# %rbx - - 1226# %rcx #4 #1 1227# %rdx #3 #2 1228# %rsi #2 - 1229# %rdi #1 - 1230# %rbp - - 1231# %rsp - - 1232# %r8 #5 #3 1233# %r9 #6 #4 1234# %r10 * * 1235# %r11 * * 1236# %r12 - - 1237# %r13 - - 1238# %r14 - - 1239# %r15 - - 1240# 1241# (*) volatile register 1242# (-) preserved by callee 1243# (#) Nth argument, volatile 1244# 1245# In Unix terms top of stack is argument transfer area for arguments 1246# which could not be accommodated in registers. Or in other words 7th 1247# [integer] argument resides at 8(%rsp) upon function entry point. 1248# 128 bytes above %rsp constitute a "red zone" which is not touched 1249# by signal handlers and can be used as temporal storage without 1250# allocating a frame. 1251# 1252# In Win64 terms N*8 bytes on top of stack is argument transfer area, 1253# which belongs to/can be overwritten by callee. N is the number of 1254# arguments passed to callee, *but* not less than 4! This means that 1255# upon function entry point 5th argument resides at 40(%rsp), as well 1256# as that 32 bytes from 8(%rsp) can always be used as temporal 1257# storage [without allocating a frame]. One can actually argue that 1258# one can assume a "red zone" above stack pointer under Win64 as well. 1259# Point is that at apparently no occasion Windows kernel would alter 1260# the area above user stack pointer in true asynchronous manner... 1261# 1262# All the above means that if assembler programmer adheres to Unix 1263# register and stack layout, but disregards the "red zone" existence, 1264# it's possible to use following prologue and epilogue to "gear" from 1265# Unix to Win64 ABI in leaf functions with not more than 6 arguments. 1266# 1267# omnipotent_function: 1268# ifdef WIN64 1269# movq %rdi,8(%rsp) 1270# movq %rsi,16(%rsp) 1271# movq %rcx,%rdi ; if 1st argument is actually present 1272# movq %rdx,%rsi ; if 2nd argument is actually ... 1273# movq %r8,%rdx ; if 3rd argument is ... 1274# movq %r9,%rcx ; if 4th argument ... 1275# movq 40(%rsp),%r8 ; if 5th ... 1276# movq 48(%rsp),%r9 ; if 6th ... 1277# endif 1278# ... 1279# ifdef WIN64 1280# movq 8(%rsp),%rdi 1281# movq 16(%rsp),%rsi 1282# endif 1283# ret 1284# 1285################################################# 1286# Win64 SEH, Structured Exception Handling. 1287# 1288# Unlike on Unix systems(*) lack of Win64 stack unwinding information 1289# has undesired side-effect at run-time: if an exception is raised in 1290# assembler subroutine such as those in question (basically we're 1291# referring to segmentation violations caused by malformed input 1292# parameters), the application is briskly terminated without invoking 1293# any exception handlers, most notably without generating memory dump 1294# or any user notification whatsoever. This poses a problem. It's 1295# possible to address it by registering custom language-specific 1296# handler that would restore processor context to the state at 1297# subroutine entry point and return "exception is not handled, keep 1298# unwinding" code. Writing such handler can be a challenge... But it's 1299# doable, though requires certain coding convention. Consider following 1300# snippet: 1301# 1302# .type function,@function 1303# function: 1304# movq %rsp,%rax # copy rsp to volatile register 1305# pushq %r15 # save non-volatile registers 1306# pushq %rbx 1307# pushq %rbp 1308# movq %rsp,%r11 1309# subq %rdi,%r11 # prepare [variable] stack frame 1310# andq $-64,%r11 1311# movq %rax,0(%r11) # check for exceptions 1312# movq %r11,%rsp # allocate [variable] stack frame 1313# movq %rax,0(%rsp) # save original rsp value 1314# magic_point: 1315# ... 1316# movq 0(%rsp),%rcx # pull original rsp value 1317# movq -24(%rcx),%rbp # restore non-volatile registers 1318# movq -16(%rcx),%rbx 1319# movq -8(%rcx),%r15 1320# movq %rcx,%rsp # restore original rsp 1321# magic_epilogue: 1322# ret 1323# .size function,.-function 1324# 1325# The key is that up to magic_point copy of original rsp value remains 1326# in chosen volatile register and no non-volatile register, except for 1327# rsp, is modified. While past magic_point rsp remains constant till 1328# the very end of the function. In this case custom language-specific 1329# exception handler would look like this: 1330# 1331# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, 1332# CONTEXT *context,DISPATCHER_CONTEXT *disp) 1333# { ULONG64 *rsp = (ULONG64 *)context->Rax; 1334# ULONG64 rip = context->Rip; 1335# 1336# if (rip >= magic_point) 1337# { rsp = (ULONG64 *)context->Rsp; 1338# if (rip < magic_epilogue) 1339# { rsp = (ULONG64 *)rsp[0]; 1340# context->Rbp = rsp[-3]; 1341# context->Rbx = rsp[-2]; 1342# context->R15 = rsp[-1]; 1343# } 1344# } 1345# context->Rsp = (ULONG64)rsp; 1346# context->Rdi = rsp[1]; 1347# context->Rsi = rsp[2]; 1348# 1349# memcpy (disp->ContextRecord,context,sizeof(CONTEXT)); 1350# RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase, 1351# dips->ControlPc,disp->FunctionEntry,disp->ContextRecord, 1352# &disp->HandlerData,&disp->EstablisherFrame,NULL); 1353# return ExceptionContinueSearch; 1354# } 1355# 1356# It's appropriate to implement this handler in assembler, directly in 1357# function's module. In order to do that one has to know members' 1358# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant 1359# values. Here they are: 1360# 1361# CONTEXT.Rax 120 1362# CONTEXT.Rcx 128 1363# CONTEXT.Rdx 136 1364# CONTEXT.Rbx 144 1365# CONTEXT.Rsp 152 1366# CONTEXT.Rbp 160 1367# CONTEXT.Rsi 168 1368# CONTEXT.Rdi 176 1369# CONTEXT.R8 184 1370# CONTEXT.R9 192 1371# CONTEXT.R10 200 1372# CONTEXT.R11 208 1373# CONTEXT.R12 216 1374# CONTEXT.R13 224 1375# CONTEXT.R14 232 1376# CONTEXT.R15 240 1377# CONTEXT.Rip 248 1378# CONTEXT.Xmm6 512 1379# sizeof(CONTEXT) 1232 1380# DISPATCHER_CONTEXT.ControlPc 0 1381# DISPATCHER_CONTEXT.ImageBase 8 1382# DISPATCHER_CONTEXT.FunctionEntry 16 1383# DISPATCHER_CONTEXT.EstablisherFrame 24 1384# DISPATCHER_CONTEXT.TargetIp 32 1385# DISPATCHER_CONTEXT.ContextRecord 40 1386# DISPATCHER_CONTEXT.LanguageHandler 48 1387# DISPATCHER_CONTEXT.HandlerData 56 1388# UNW_FLAG_NHANDLER 0 1389# ExceptionContinueSearch 1 1390# 1391# In order to tie the handler to the function one has to compose 1392# couple of structures: one for .xdata segment and one for .pdata. 1393# 1394# UNWIND_INFO structure for .xdata segment would be 1395# 1396# function_unwind_info: 1397# .byte 9,0,0,0 1398# .rva handler 1399# 1400# This structure designates exception handler for a function with 1401# zero-length prologue, no stack frame or frame register. 1402# 1403# To facilitate composing of .pdata structures, auto-generated "gear" 1404# prologue copies rsp value to rax and denotes next instruction with 1405# .LSEH_begin_{function_name} label. This essentially defines the SEH 1406# styling rule mentioned in the beginning. Position of this label is 1407# chosen in such manner that possible exceptions raised in the "gear" 1408# prologue would be accounted to caller and unwound from latter's frame. 1409# End of function is marked with respective .LSEH_end_{function_name} 1410# label. To summarize, .pdata segment would contain 1411# 1412# .rva .LSEH_begin_function 1413# .rva .LSEH_end_function 1414# .rva function_unwind_info 1415# 1416# Reference to function_unwind_info from .xdata segment is the anchor. 1417# In case you wonder why references are 32-bit .rvas and not 64-bit 1418# .quads. References put into these two segments are required to be 1419# *relative* to the base address of the current binary module, a.k.a. 1420# image base. No Win64 module, be it .exe or .dll, can be larger than 1421# 2GB and thus such relative references can be and are accommodated in 1422# 32 bits. 1423# 1424# Having reviewed the example function code, one can argue that "movq 1425# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix 1426# rax would contain an undefined value. If this "offends" you, use 1427# another register and refrain from modifying rax till magic_point is 1428# reached, i.e. as if it was a non-volatile register. If more registers 1429# are required prior [variable] frame setup is completed, note that 1430# nobody says that you can have only one "magic point." You can 1431# "liberate" non-volatile registers by denoting last stack off-load 1432# instruction and reflecting it in finer grade unwind logic in handler. 1433# After all, isn't it why it's called *language-specific* handler... 1434# 1435# SE handlers are also involved in unwinding stack when executable is 1436# profiled or debugged. Profiling implies additional limitations that 1437# are too subtle to discuss here. For now it's sufficient to say that 1438# in order to simplify handlers one should either a) offload original 1439# %rsp to stack (like discussed above); or b) if you have a register to 1440# spare for frame pointer, choose volatile one. 1441# 1442# (*) Note that we're talking about run-time, not debug-time. Lack of 1443# unwind information makes debugging hard on both Windows and 1444# Unix. "Unlike" referes to the fact that on Unix signal handler 1445# will always be invoked, core dumped and appropriate exit code 1446# returned to parent (for user notification). 1447