1#!/usr/bin/env perl 2# SPDX-License-Identifier: GPL-2.0 3# 4# Generates a list of Control-Flow Integrity (CFI) jump table symbols 5# for kallsyms. 6# 7# Copyright (C) 2021 Google LLC 8 9use strict; 10use warnings; 11 12## parameters 13my $ismodule = 0; 14my $file; 15 16foreach (@ARGV) { 17 if ($_ eq '--module') { 18 $ismodule = 1; 19 } elsif (!defined($file)) { 20 $file = $_; 21 } else { 22 die "$0: usage $0 [--module] binary"; 23 } 24} 25 26## environment 27my $readelf = $ENV{'READELF'} || die "$0: ERROR: READELF not set?"; 28my $objdump = $ENV{'OBJDUMP'} || die "$0: ERROR: OBJDUMP not set?"; 29my $nm = $ENV{'NM'} || die "$0: ERROR: NM not set?"; 30 31## jump table addresses 32my $cfi_jt = {}; 33## text symbols 34my $text_symbols = {}; 35 36## parser state 37use constant { 38 UNKNOWN => 0, 39 SYMBOL => 1, 40 HINT => 2, 41 BRANCH => 3, 42 RELOC => 4 43}; 44 45## trims leading zeros from a string 46sub trim_zeros { 47 my ($n) = @_; 48 $n =~ s/^0+//; 49 $n = 0 if ($n eq ''); 50 return $n; 51} 52 53## finds __cfi_jt_* symbols from the binary to locate the start and end of the 54## jump table 55sub find_cfi_jt { 56 open(my $fh, "\"$readelf\" --symbols \"$file\" 2>/dev/null | grep __cfi_jt_ |") 57 or die "$0: ERROR: failed to execute \"$readelf\": $!"; 58 59 while (<$fh>) { 60 chomp; 61 62 my ($addr, $name) = $_ =~ /\:.*([a-f0-9]{16}).*\s__cfi_jt_(.*)/; 63 if (defined($addr) && defined($name)) { 64 $cfi_jt->{$name} = $addr; 65 } 66 } 67 68 close($fh); 69 70 die "$0: ERROR: __cfi_jt_start symbol missing" if !exists($cfi_jt->{"start"}); 71 die "$0: ERROR: __cfi_jt_end symbol missing" if !exists($cfi_jt->{"end"}); 72} 73 74my $last = UNKNOWN; 75my $last_symbol; 76my $last_hint_addr; 77my $last_branch_addr; 78my $last_branch_target; 79my $last_reloc_target; 80 81sub is_symbol { 82 my ($line) = @_; 83 my ($addr, $symbol) = $_ =~ /^([a-f0-9]{16})\s<([^>]+)>\:/; 84 85 if (defined($addr) && defined($symbol)) { 86 $last = SYMBOL; 87 $last_symbol = $symbol; 88 return 1; 89 } 90 91 return 0; 92} 93 94sub is_hint { 95 my ($line) = @_; 96 my ($hint) = $_ =~ /^\s*([a-f0-9]+)\:.*\s+hint\s+#/; 97 98 if (defined($hint)) { 99 $last = HINT; 100 $last_hint_addr = $hint; 101 return 1; 102 } 103 104 return 0; 105} 106 107sub find_text_symbol { 108 my ($target) = @_; 109 110 my ($symbol, $expr, $offset) = $target =~ /^(\S*)([-\+])0x([a-f0-9]+)?$/; 111 112 if (!defined($symbol) || !defined(!$expr) || !defined($offset)) { 113 return $target; 114 } 115 116 if ($symbol =~ /^\.((init|exit)\.)?text$/ && $expr eq '+') { 117 $offset = trim_zeros($offset); 118 my $actual = $text_symbols->{"$symbol+$offset"}; 119 120 if (!defined($actual)) { 121 die "$0: unknown symbol at $symbol+0x$offset"; 122 } 123 124 $symbol = $actual; 125 } 126 127 return $symbol; 128} 129 130sub is_branch { 131 my ($line) = @_; 132 my ($addr, $instr, $branch_target) = $_ =~ 133 /^\s*([a-f0-9]+)\:.*(b|jmpq?)\s+0x[a-f0-9]+\s+<([^>]+)>/; 134 135 if (defined($addr) && defined($instr) && defined($branch_target)) { 136 if ($last eq HINT) { 137 $last_branch_addr = $last_hint_addr; 138 } else { 139 $last_branch_addr = $addr; 140 } 141 142 $last = BRANCH; 143 $last_branch_target = find_text_symbol($branch_target); 144 return 1; 145 } 146 147 return 0; 148} 149 150sub is_branch_reloc { 151 my ($line) = @_; 152 153 if ($last ne BRANCH) { 154 return 0; 155 } 156 157 my ($addr, $type, $reloc_target) = /\s*([a-f0-9]{16})\:\s+R_(\S+)\s+(\S+)$/; 158 159 if (defined($addr) && defined($type) && defined($reloc_target)) { 160 $last = RELOC; 161 $last_reloc_target = find_text_symbol($reloc_target); 162 return 1; 163 } 164 165 return 0; 166} 167 168## walks through the jump table looking for branches and prints out a jump 169## table symbol for each branch if one is missing 170sub print_missing_symbols { 171 my @symbols; 172 173 open(my $fh, "\"$objdump\" -d -r " . 174 "--start-address=0x" . $cfi_jt->{"start"} . 175 " --stop-address=0x" . $cfi_jt->{"end"} . 176 " \"$file\" 2>/dev/null |") 177 or die "$0: ERROR: failed to execute \"$objdump\": $!"; 178 179 while (<$fh>) { 180 chomp; 181 182 if (is_symbol($_) || is_hint($_)) { 183 next; 184 } 185 186 my $cfi_jt_symbol; 187 188 if (is_branch($_)) { 189 if ($ismodule) { 190 next; # wait for the relocation 191 } 192 193 $cfi_jt_symbol = $last_branch_target; 194 } elsif (is_branch_reloc($_)) { 195 $cfi_jt_symbol = $last_reloc_target; 196 } else { 197 next; 198 } 199 200 # ignore functions with a canonical jump table 201 if ($cfi_jt_symbol =~ /\.cfi$/) { 202 next; 203 } 204 205 $cfi_jt_symbol .= ".cfi_jt"; 206 $cfi_jt->{$last_branch_addr} = $cfi_jt_symbol; 207 208 if (defined($last_symbol) && $last_symbol eq $cfi_jt_symbol) { 209 next; # already exists 210 } 211 212 # print out the symbol 213 if ($ismodule) { 214 push(@symbols, "\t\t$cfi_jt_symbol = . + 0x$last_branch_addr;"); 215 } else { 216 push(@symbols, "$last_branch_addr t $cfi_jt_symbol"); 217 } 218 } 219 220 close($fh); 221 222 if (!scalar(@symbols)) { 223 return; 224 } 225 226 if ($ismodule) { 227 print "SECTIONS {\n"; 228 # With -fpatchable-function-entry, LLD isn't happy without this 229 print "\t__patchable_function_entries : { *(__patchable_function_entries) }\n"; 230 print "\t.text : {\n"; 231 } 232 233 foreach (@symbols) { 234 print "$_\n"; 235 } 236 237 if ($ismodule) { 238 print "\t}\n}\n"; 239 } 240} 241 242## reads defined text symbols from the file 243sub read_symbols { 244 open(my $fh, "\"$objdump\" --syms \"$file\" 2>/dev/null |") 245 or die "$0: ERROR: failed to execute \"$nm\": $!"; 246 247 while (<$fh>) { 248 chomp; 249 250 # llvm/tools/llvm-objdump/objdump.cpp:objdump::printSymbol 251 my ($addr, $debug, $section, $ref, $symbol) = $_ =~ 252 /^([a-f0-9]{16})\s.{5}(.).{2}(\S+)\s[a-f0-9]{16}(\s\.\S+)?\s(.*)$/; 253 254 if (defined($addr) && defined($section) && defined($symbol)) { 255 if (!($section =~ /^\.((init|exit)\.)?text$/)) { 256 next; 257 } 258 # skip arm mapping symbols 259 if ($symbol =~ /^\$[xd]\.\d+$/) { 260 next; 261 } 262 if (defined($debug) && $debug eq "d") { 263 next; 264 } 265 266 $addr = trim_zeros($addr); 267 $text_symbols->{"$section+$addr"} = $symbol; 268 } 269 } 270 271 close($fh); 272} 273 274## prints out the remaining symbols from nm -n, filtering out the unnecessary 275## __typeid__ symbols aliasing the jump table symbols we added 276sub print_kallsyms { 277 open(my $fh, "\"$nm\" -n \"$file\" 2>/dev/null |") 278 or die "$0: ERROR: failed to execute \"$nm\": $!"; 279 280 while (<$fh>) { 281 chomp; 282 283 my ($addr, $symbol) = $_ =~ /^([a-f0-9]{16})\s.\s(.*)$/; 284 285 if (defined($addr) && defined($symbol)) { 286 # drop duplicate __typeid__ symbols 287 if ($symbol =~ /^__typeid__.*_global_addr$/ && 288 exists($cfi_jt->{$addr})) { 289 next; 290 } 291 } 292 293 print "$_\n"; 294 } 295 296 close($fh); 297} 298 299## main 300find_cfi_jt(); 301 302if ($ismodule) { 303 read_symbols(); 304 print_missing_symbols(); 305} else { 306 print_missing_symbols(); 307 print_kallsyms(); 308} 309