1 // Copyright (C) 2016 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <header_abi_util.h>
16 #include <ir_representation.h>
17
18 #include <llvm/Support/CommandLine.h>
19 #include <llvm/Support/raw_ostream.h>
20
21 #include <memory>
22 #include <mutex>
23 #include <fstream>
24 #include <iostream>
25 #include <string>
26 #include <thread>
27 #include <vector>
28
29 #include <stdlib.h>
30
31 static constexpr std::size_t kSourcesPerBatchThread = 7;
32
33 static llvm::cl::OptionCategory header_linker_category(
34 "header-abi-linker options");
35
36 static llvm::cl::list<std::string> dump_files(
37 llvm::cl::Positional, llvm::cl::desc("<dump-files>"), llvm::cl::Required,
38 llvm::cl::cat(header_linker_category), llvm::cl::OneOrMore);
39
40 static llvm::cl::opt<std::string> linked_dump(
41 "o", llvm::cl::desc("<linked dump>"), llvm::cl::Required,
42 llvm::cl::cat(header_linker_category));
43
44 static llvm::cl::list<std::string> exported_header_dirs(
45 "I", llvm::cl::desc("<export_include_dirs>"), llvm::cl::Prefix,
46 llvm::cl::ZeroOrMore, llvm::cl::cat(header_linker_category));
47
48 static llvm::cl::opt<std::string> version_script(
49 "v", llvm::cl::desc("<version_script>"), llvm::cl::Optional,
50 llvm::cl::cat(header_linker_category));
51
52 static llvm::cl::opt<std::string> api(
53 "api", llvm::cl::desc("<api>"), llvm::cl::Optional,
54 llvm::cl::cat(header_linker_category));
55
56 static llvm::cl::opt<std::string> arch(
57 "arch", llvm::cl::desc("<arch>"), llvm::cl::Optional,
58 llvm::cl::cat(header_linker_category));
59
60 static llvm::cl::opt<bool> no_filter(
61 "no-filter", llvm::cl::desc("Do not filter any abi"), llvm::cl::Optional,
62 llvm::cl::cat(header_linker_category));
63
64 static llvm::cl::opt<std::string> so_file(
65 "so", llvm::cl::desc("<path to so file>"), llvm::cl::Optional,
66 llvm::cl::cat(header_linker_category));
67
68 static llvm::cl::opt<abi_util::TextFormatIR> text_format(
69 "text-format", llvm::cl::desc("Specify text format of abi dumps"),
70 llvm::cl::values(clEnumValN(abi_util::TextFormatIR::ProtobufTextFormat,
71 "ProtobufTextFormat", "ProtobufTextFormat"),
72 clEnumValEnd),
73 llvm::cl::init(abi_util::TextFormatIR::ProtobufTextFormat),
74 llvm::cl::cat(header_linker_category));
75
76 class HeaderAbiLinker {
77 public:
HeaderAbiLinker(const std::vector<std::string> & dump_files,const std::vector<std::string> & exported_header_dirs,const std::string & version_script,const std::string & so_file,const std::string & linked_dump,const std::string & arch,const std::string & api)78 HeaderAbiLinker(
79 const std::vector<std::string> &dump_files,
80 const std::vector<std::string> &exported_header_dirs,
81 const std::string &version_script,
82 const std::string &so_file,
83 const std::string &linked_dump,
84 const std::string &arch,
85 const std::string &api)
86 : dump_files_(dump_files), exported_header_dirs_(exported_header_dirs),
87 version_script_(version_script), so_file_(so_file),
88 out_dump_name_(linked_dump), arch_(arch), api_(api) {};
89
90 bool LinkAndDump();
91
92 private:
93 template <typename T>
94 bool LinkDecl(abi_util::IRDumper *dst,
95 std::set<std::string> *link_set,
96 std::set<std::string> *regex_matched_link_set,
97 const std::regex *vs_regex,
98 const abi_util::AbiElementMap<T> &src,
99 bool use_version_script);
100
101 bool ParseVersionScriptFiles();
102
103 bool ParseSoFile();
104
105 bool LinkTypes(const abi_util::TextFormatToIRReader *ir_reader,
106 abi_util::IRDumper *ir_dumper);
107
108 bool LinkFunctions(const abi_util::TextFormatToIRReader *ir_reader,
109 abi_util::IRDumper *ir_dumper);
110
111 bool LinkGlobalVars(const abi_util::TextFormatToIRReader *ir_reader,
112 abi_util::IRDumper *ir_dumper);
113
114 bool AddElfSymbols(abi_util::IRDumper *ir_dumper);
115
116
117 private:
118 const std::vector<std::string> &dump_files_;
119 const std::vector<std::string> &exported_header_dirs_;
120 const std::string &version_script_;
121 const std::string &so_file_;
122 const std::string &out_dump_name_;
123 const std::string &arch_;
124 const std::string &api_;
125 // TODO: Add to a map of std::sets instead.
126 std::set<std::string> exported_headers_;
127 std::set<std::string> types_set_;
128 std::set<std::string> function_decl_set_;
129 std::set<std::string> globvar_decl_set_;
130 // Version Script Regex Matching.
131 std::set<std::string> functions_regex_matched_set;
132 std::regex functions_vs_regex_;
133 // Version Script Regex Matching.
134 std::set<std::string> globvars_regex_matched_set;
135 std::regex globvars_vs_regex_;
136 };
137
138 template <typename T, typename Iterable>
AddElfSymbols(abi_util::IRDumper * dst,const Iterable & symbols)139 static bool AddElfSymbols(abi_util::IRDumper *dst, const Iterable &symbols) {
140 for (auto &&symbol : symbols) {
141 T elf_symbol(symbol);
142 if (!dst->AddElfSymbolMessageIR(&elf_symbol)) {
143 return false;
144 }
145 }
146 return true;
147 }
148
149 // To be called right after parsing the .so file / version script.
AddElfSymbols(abi_util::IRDumper * ir_dumper)150 bool HeaderAbiLinker::AddElfSymbols(abi_util::IRDumper *ir_dumper) {
151 return ::AddElfSymbols<abi_util::ElfFunctionIR>(ir_dumper,
152 function_decl_set_) &&
153 ::AddElfSymbols<abi_util::ElfObjectIR>(ir_dumper,
154 globvar_decl_set_);
155 }
156
DeDuplicateAbiElementsThread(const std::vector<std::string> & dump_files,const std::set<std::string> * exported_headers,abi_util::TextFormatToIRReader * greader,std::mutex * greader_lock,std::atomic<std::size_t> * cnt)157 static void DeDuplicateAbiElementsThread(
158 const std::vector<std::string> &dump_files,
159 const std::set<std::string> *exported_headers,
160 abi_util::TextFormatToIRReader *greader, std::mutex *greader_lock,
161 std::atomic<std::size_t> *cnt) {
162 std::unique_ptr<abi_util::TextFormatToIRReader> local_reader =
163 abi_util::TextFormatToIRReader::CreateTextFormatToIRReader(
164 text_format, exported_headers);
165 auto begin_it = dump_files.begin();
166 std::size_t num_sources = dump_files.size();
167 while (1) {
168 std::size_t i = cnt->fetch_add(kSourcesPerBatchThread);
169 if (i >= num_sources) {
170 break;
171 }
172 std::size_t end = std::min(i + kSourcesPerBatchThread, num_sources);
173 for (auto it = begin_it; it != begin_it + end; it++) {
174 std::unique_ptr<abi_util::TextFormatToIRReader> reader =
175 abi_util::TextFormatToIRReader::CreateTextFormatToIRReader(
176 text_format, exported_headers);
177 assert(reader != nullptr);
178 if (!reader->ReadDump(*it)) {
179 llvm::errs() << "ReadDump failed\n";
180 ::exit(1);
181 }
182 // This merge is needed since the iterators might not be contigous.
183 local_reader->MergeGraphs(*reader);
184 }
185 }
186 std::lock_guard<std::mutex> lock(*greader_lock);
187 greader->MergeGraphs(*local_reader);
188 }
189
LinkAndDump()190 bool HeaderAbiLinker::LinkAndDump() {
191 // If the user specifies that a version script should be used, use that.
192 if (!so_file_.empty()) {
193 exported_headers_ =
194 abi_util::CollectAllExportedHeaders(exported_header_dirs_);
195 if (!ParseSoFile()) {
196 llvm::errs() << "Couldn't parse so file\n";
197 return false;
198 }
199 } else if (!ParseVersionScriptFiles()) {
200 llvm::errs() << "Failed to parse stub files for exported symbols\n";
201 return false;
202 }
203 std::unique_ptr<abi_util::IRDumper> ir_dumper =
204 abi_util::IRDumper::CreateIRDumper(text_format, out_dump_name_);
205 assert(ir_dumper != nullptr);
206 AddElfSymbols(ir_dumper.get());
207 // Create a reader, on which we never actually call ReadDump(), since multiple
208 // dump files are associated with it.
209 std::unique_ptr<abi_util::TextFormatToIRReader> greader =
210 abi_util::TextFormatToIRReader::CreateTextFormatToIRReader(
211 text_format, &exported_headers_);
212 std::size_t max_threads = std::thread::hardware_concurrency();
213 std::size_t num_threads = kSourcesPerBatchThread < dump_files_.size() ?
214 std::min(dump_files_.size() / kSourcesPerBatchThread,
215 max_threads) : 0;
216 std::vector<std::thread> threads;
217 std::atomic<std::size_t> cnt(0);
218 std::mutex greader_lock;
219 for (std::size_t i = 1; i < num_threads; i++) {
220 threads.emplace_back(DeDuplicateAbiElementsThread, dump_files_,
221 &exported_headers_, greader.get(), &greader_lock,
222 &cnt);
223 }
224 DeDuplicateAbiElementsThread(dump_files_, &exported_headers_, greader.get(),
225 &greader_lock, &cnt);
226 for (auto &thread : threads) {
227 thread.join();
228 }
229
230 if (!LinkTypes(greader.get(), ir_dumper.get()) ||
231 !LinkFunctions(greader.get(), ir_dumper.get()) ||
232 !LinkGlobalVars(greader.get(), ir_dumper.get())) {
233 llvm::errs() << "Failed to link elements\n";
234 return false;
235 }
236 if (!ir_dumper->Dump()) {
237 llvm::errs() << "Serialization to ostream failed\n";
238 return false;
239 }
240 return true;
241 }
242
QueryRegexMatches(std::set<std::string> * regex_matched_link_set,const std::regex * vs_regex,const std::string & symbol)243 static bool QueryRegexMatches(std::set<std::string> *regex_matched_link_set,
244 const std::regex *vs_regex,
245 const std::string &symbol) {
246 assert(regex_matched_link_set != nullptr);
247 assert(vs_regex != nullptr);
248 if (regex_matched_link_set->find(symbol) != regex_matched_link_set->end()) {
249 return false;
250 }
251 if (std::regex_search(symbol, *vs_regex)) {
252 regex_matched_link_set->insert(symbol);
253 return true;
254 }
255 return false;
256 }
257
CreateRegexMatchExprFromSet(const std::set<std::string> & link_set)258 static std::regex CreateRegexMatchExprFromSet(
259 const std::set<std::string> &link_set) {
260 std::string all_regex_match_str = "";
261 std::set<std::string>::iterator it = link_set.begin();
262 while (it != link_set.end()) {
263 std::string regex_match_str_find_glob =
264 abi_util::FindAndReplace(*it, "\\*", ".*");
265 all_regex_match_str += "(\\b" + regex_match_str_find_glob + "\\b)";
266 if (++it != link_set.end()) {
267 all_regex_match_str += "|";
268 }
269 }
270 if (all_regex_match_str == "") {
271 return std::regex();
272 }
273 return std::regex(all_regex_match_str);
274 }
275
276 template <typename T>
LinkDecl(abi_util::IRDumper * dst,std::set<std::string> * link_set,std::set<std::string> * regex_matched_link_set,const std::regex * vs_regex,const abi_util::AbiElementMap<T> & src,bool use_version_script_or_so)277 bool HeaderAbiLinker::LinkDecl(
278 abi_util::IRDumper *dst, std::set<std::string> *link_set,
279 std::set<std::string> *regex_matched_link_set, const std::regex *vs_regex,
280 const abi_util::AbiElementMap<T> &src, bool use_version_script_or_so) {
281 assert(dst != nullptr);
282 assert(link_set != nullptr);
283 for (auto &&element : src) {
284 // If we are not using a version script and exported headers are available,
285 // filter out unexported abi.
286 std::string source_file = element.second.GetSourceFile();
287 // Builtin types will not have source file information.
288 if (!exported_headers_.empty() && !source_file.empty() &&
289 exported_headers_.find(source_file) ==
290 exported_headers_.end()) {
291 continue;
292 }
293 const std::string &element_str = element.first;
294 // Check for the existence of the element in linked dump / symbol file.
295 if (use_version_script_or_so) {
296 std::set<std::string>::iterator it =
297 link_set->find(element_str);
298 if (it == link_set->end()) {
299 if (!QueryRegexMatches(regex_matched_link_set, vs_regex, element_str)) {
300 continue;
301 }
302 } else {
303 // We get a pre-filled link name set while using version script.
304 link_set->erase(*it); // Avoid multiple instances of the same symbol.
305 }
306 }
307 if (!dst->AddLinkableMessageIR(&(element.second))) {
308 llvm::errs() << "Failed to add element to linked dump\n";
309 return false;
310 }
311 }
312 return true;
313 }
314
LinkTypes(const abi_util::TextFormatToIRReader * reader,abi_util::IRDumper * ir_dumper)315 bool HeaderAbiLinker::LinkTypes(const abi_util::TextFormatToIRReader *reader,
316 abi_util::IRDumper *ir_dumper) {
317 assert(reader != nullptr);
318 assert(ir_dumper != nullptr);
319 // Even if version scripts are available we take in types, since the symbols
320 // in the version script might reference a type exposed by the library.
321 return LinkDecl(ir_dumper, &types_set_, nullptr,
322 nullptr, reader->GetRecordTypes(), false) &&
323 LinkDecl(ir_dumper, &types_set_, nullptr,
324 nullptr, reader->GetEnumTypes(), false) &&
325 LinkDecl(ir_dumper, &types_set_, nullptr, nullptr,
326 reader->GetFunctionTypes(), false) &&
327 LinkDecl(ir_dumper, &types_set_, nullptr,
328 nullptr, reader->GetBuiltinTypes(), false) &&
329 LinkDecl(ir_dumper, &types_set_, nullptr,
330 nullptr, reader->GetPointerTypes(), false) &&
331 LinkDecl(ir_dumper, &types_set_, nullptr,
332 nullptr, reader->GetRvalueReferenceTypes(), false) &&
333 LinkDecl(ir_dumper, &types_set_, nullptr,
334 nullptr, reader->GetLvalueReferenceTypes(), false) &&
335 LinkDecl(ir_dumper, &types_set_, nullptr,
336 nullptr, reader->GetArrayTypes(), false) &&
337 LinkDecl(ir_dumper, &types_set_, nullptr,
338 nullptr, reader->GetQualifiedTypes(), false);
339 }
340
LinkFunctions(const abi_util::TextFormatToIRReader * reader,abi_util::IRDumper * ir_dumper)341 bool HeaderAbiLinker::LinkFunctions(
342 const abi_util::TextFormatToIRReader *reader,
343 abi_util::IRDumper *ir_dumper) {
344
345 assert(reader != nullptr);
346 return LinkDecl(ir_dumper, &function_decl_set_,
347 &functions_regex_matched_set, &functions_vs_regex_,
348 reader->GetFunctions(),
349 (!version_script_.empty() || !so_file_.empty()));
350 }
351
LinkGlobalVars(const abi_util::TextFormatToIRReader * reader,abi_util::IRDumper * ir_dumper)352 bool HeaderAbiLinker::LinkGlobalVars(
353 const abi_util::TextFormatToIRReader *reader,
354 abi_util::IRDumper *ir_dumper) {
355
356 assert(reader != nullptr);
357 return LinkDecl(ir_dumper, &globvar_decl_set_,
358 &globvars_regex_matched_set, &globvars_vs_regex_,
359 reader->GetGlobalVariables(),
360 (!version_script.empty() || !so_file_.empty()));
361 }
362
ParseVersionScriptFiles()363 bool HeaderAbiLinker::ParseVersionScriptFiles() {
364 abi_util::VersionScriptParser version_script_parser(version_script_, arch_,
365 api_);
366 if (!version_script_parser.Parse()) {
367 llvm::errs() << "Failed to parse version script\n";
368 return false;
369 }
370 function_decl_set_ = version_script_parser.GetFunctions();
371 globvar_decl_set_ = version_script_parser.GetGlobVars();
372 std::set<std::string> function_regexs =
373 version_script_parser.GetFunctionRegexs();
374 std::set<std::string> globvar_regexs =
375 version_script_parser.GetGlobVarRegexs();
376 functions_vs_regex_ = CreateRegexMatchExprFromSet(function_regexs);
377 globvars_vs_regex_ = CreateRegexMatchExprFromSet(globvar_regexs);
378 return true;
379 }
380
ParseSoFile()381 bool HeaderAbiLinker::ParseSoFile() {
382 auto Binary = llvm::object::createBinary(so_file_);
383
384 if (!Binary) {
385 llvm::errs() << "Couldn't really create object File \n";
386 return false;
387 }
388 llvm::object::ObjectFile *objfile =
389 llvm::dyn_cast<llvm::object::ObjectFile>(&(*Binary.get().getBinary()));
390 if (!objfile) {
391 llvm::errs() << "Not an object file\n";
392 return false;
393 }
394
395 std::unique_ptr<abi_util::SoFileParser> so_parser =
396 abi_util::SoFileParser::Create(objfile);
397 if (so_parser == nullptr) {
398 llvm::errs() << "Couldn't create soFile Parser\n";
399 return false;
400 }
401 so_parser->GetSymbols();
402 function_decl_set_ = so_parser->GetFunctions();
403 globvar_decl_set_ = so_parser->GetGlobVars();
404 return true;
405 }
406
main(int argc,const char ** argv)407 int main(int argc, const char **argv) {
408 llvm::cl::ParseCommandLineOptions(argc, argv, "header-linker");
409 if (so_file.empty() && version_script.empty()) {
410 llvm::errs() << "One of -so or -v needs to be specified\n";
411 return -1;
412 }
413 if (no_filter) {
414 static_cast<std::vector<std::string> &>(exported_header_dirs).clear();
415 }
416 HeaderAbiLinker Linker(dump_files, exported_header_dirs, version_script,
417 so_file, linked_dump, arch, api);
418
419 if (!Linker.LinkAndDump()) {
420 llvm::errs() << "Failed to link and dump elements\n";
421 return -1;
422 }
423 return 0;
424 }
425