1 // Copyright (C) 2021 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <fstream>
16 #include <iostream>
17 #include <map>
18 #include <string>
19 #include <vector>
20
21 #ifdef __ANDROID__
22 #include <benchmark.pb.h>
23 #else
24 #include "schema/benchmark.pb.h"
25 #endif
26
27 #include <google/protobuf/io/zero_copy_stream_impl.h>
28 #include <google/protobuf/text_format.h>
29
30 struct Syscall {
31 std::string name;
32 std::vector<std::string> arguments;
33 std::string return_value;
34 };
35
36 // Reads lines from the provided file with strace output. Returns a list of lines
ReadLines(const std::string & file_path)37 std::vector<std::string> ReadLines(const std::string& file_path) {
38 std::vector<std::string> lines;
39
40 std::string line;
41 std::ifstream input(file_path);
42 while (std::getline(input, line)) {
43 lines.push_back(line);
44 }
45 input.close();
46
47 return lines;
48 }
49
50 // Processes the given line into syscall name, arguments and return value
ProcessLine(const std::string & line)51 Syscall ProcessLine(const std::string& line) {
52 Syscall syscall;
53
54 syscall.name = line.substr(0, line.find('('));
55 std::string raw_arguments = line.substr(line.find('(') + 1, line.find(')') - line.find('(') - 1);
56 syscall.return_value = line.substr(line.find(')'));
57 syscall.return_value = syscall.return_value.substr(syscall.return_value.find("= ") + 2);
58
59 size_t next = 0;
60 size_t last = 0;
61 while ((next = raw_arguments.find(", ", last)) != std::string::npos) {
62 std::string part = raw_arguments.substr(last, next - last);
63 last = next + 2;
64 if (part.size() != 0) syscall.arguments.push_back(part);
65 }
66 std::string part = raw_arguments.substr(last);
67 if (part.size() != 0) syscall.arguments.push_back(part);
68
69 return syscall;
70 }
71
72 // Splits lines by pid. Returns a map where pid maps to a list of lines
SplitByPid(const std::vector<std::string> & lines)73 std::map<int, std::vector<std::string>> SplitByPid(const std::vector<std::string>& lines) {
74 std::map<int, std::vector<std::string>> lines_by_pid;
75
76 for (const auto& line : lines) {
77 int pid = strtoll(line.substr(0, line.find(' ')).c_str(), nullptr, 10);
78 lines_by_pid[pid].push_back(line);
79 }
80
81 return lines_by_pid;
82 }
83
84 // Goes through all the lines for each pid, merges lines with unfinished and resumed tags, then
85 // calls ProcessLine on each of those merged lines. Returns a map where pid maps to a list of
86 // processed lines/syscalls
ProcessLines(const std::map<int,std::vector<std::string>> & lines_by_pid)87 std::map<int, std::vector<Syscall>> ProcessLines(
88 const std::map<int, std::vector<std::string>>& lines_by_pid) {
89 std::map<int, std::vector<Syscall>> processed_syscalls_by_pid;
90
91 for (const auto& [pid, lines] : lines_by_pid) {
92 for (std::size_t i = 0; i < lines.size(); ++i) {
93 auto line = lines[i];
94
95 // If only the resumed part of the syscall was found, ignore it
96 if (line.find("resumed>") != std::string::npos) continue;
97
98 // If the syscall is detached, ignore it
99 if (line.find("<detached ...>") != std::string::npos) continue;
100
101 // If the line contains "unfinished", concatenate it with the next line, which should contain
102 // "resumed"
103 if (line.find("<unfinished ...>") != std::string::npos) {
104 // Remove the "unfinished" tag
105 line = line.substr(0, line.find("<unfinished ...>"));
106
107 // If the next line does not exist, ignore the syscall altogether
108 if (i + 1 >= lines.size()) continue;
109
110 auto second_line = lines[++i];
111
112 // Remove the "resumed" tag
113 second_line = second_line.substr(second_line.find("resumed>") + std::strlen("resumed>"));
114 // Concatenate both lines
115 line += second_line;
116 }
117
118 // Remove the pid
119 line = line.substr(line.find(" ") + 2);
120
121 // If the line starts with "---" or "+++", ignore it
122 if (line.length() >= 3 && (line.substr(0, 3) == "---" || line.substr(0, 3) == "+++"))
123 continue;
124
125 auto processed_syscall = ProcessLine(line);
126 processed_syscalls_by_pid[pid].push_back(processed_syscall);
127 }
128 }
129
130 return processed_syscalls_by_pid;
131 }
132
main(int argc,char ** argv)133 int main(int argc, char** argv) {
134 if (argc != 3) {
135 std::cerr << "Invalid number of arguments.\n";
136 exit(EXIT_FAILURE);
137 }
138
139 auto raw_lines = ReadLines(argv[1]);
140 auto raw_lines_by_pid = SplitByPid(raw_lines);
141 auto processed_syscalls_by_pid = ProcessLines(raw_lines_by_pid);
142
143 std::string absolute_path = argv[2];
144
145 // Initialize .ditto file
146 auto benchmark = std::make_unique<dittosuiteproto::Benchmark>();
147 auto main_instruction_set = benchmark->mutable_main()->mutable_instruction_set();
148 benchmark->mutable_global()->set_absolute_path(absolute_path);
149
150 // Iterate over each pid and its processed lines. Start creating instructions after first openat()
151 // syscall, whose file name includes the provided absolute path, is found
152 for (const auto& [pid, syscalls] : processed_syscalls_by_pid) {
153 std::map<int, std::unique_ptr<dittosuiteproto::InstructionSet>> instruction_set_by_fd;
154 for (const auto& syscall : syscalls) {
155 if (syscall.name == "openat" &&
156 syscall.arguments[1].find(absolute_path) != std::string::npos) {
157 // Remove absolute_path
158 std::string path_name = syscall.arguments[1].substr(absolute_path.size() + 2);
159 // Remove quotes at the end
160 path_name.pop_back();
161
162 // If the return value is -1, ignore it
163 if (syscall.return_value.find("-1") != std::string::npos) continue;
164
165 int fd = strtoll(syscall.return_value.c_str(), nullptr, 10);
166
167 // Create .ditto instruction set for this fd with open file instruction
168 instruction_set_by_fd[fd] = std::make_unique<dittosuiteproto::InstructionSet>();
169 auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_open_file();
170 instruction->set_path_name(path_name);
171 instruction->set_output_fd("fd");
172 } else if (syscall.name == "pread64") {
173 int fd = strtoll(syscall.arguments[0].c_str(), nullptr, 10);
174
175 if (syscall.arguments.size() != 4) continue;
176 if (instruction_set_by_fd.find(fd) == instruction_set_by_fd.end()) continue;
177
178 int64_t size = strtoll(syscall.arguments[2].c_str(), nullptr, 10);
179 int64_t offset = strtoll(syscall.arguments[3].c_str(), nullptr, 10);
180
181 // Create .ditto read file instruction
182 auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_read_file();
183 instruction->set_input_fd("fd");
184 instruction->set_size(size);
185 instruction->set_block_size(size);
186 instruction->set_starting_offset(offset);
187 } else if (syscall.name == "pwrite64") {
188 int fd = strtoll(syscall.arguments[0].c_str(), nullptr, 10);
189
190 if (syscall.arguments.size() != 4) continue;
191 if (instruction_set_by_fd.find(fd) == instruction_set_by_fd.end()) continue;
192
193 int64_t size = strtoll(syscall.arguments[2].c_str(), nullptr, 10);
194 int64_t offset = strtoll(syscall.arguments[3].c_str(), nullptr, 10);
195
196 // Create .ditto write file instruction
197 auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_write_file();
198 instruction->set_input_fd("fd");
199 instruction->set_size(size);
200 instruction->set_block_size(size);
201 instruction->set_starting_offset(offset);
202 } else if (syscall.name == "close") {
203 int fd = strtoll(syscall.arguments[0].c_str(), nullptr, 10);
204
205 if (instruction_set_by_fd.find(fd) == instruction_set_by_fd.end()) continue;
206
207 // Create .ditto close file instruction
208 auto instruction = instruction_set_by_fd[fd]->add_instructions()->mutable_close_file();
209 instruction->set_input_fd("fd");
210
211 // Add the instruction set for this fd to the main instruction set
212 main_instruction_set->add_instructions()->set_allocated_instruction_set(
213 instruction_set_by_fd[fd].release());
214 instruction_set_by_fd.erase(instruction_set_by_fd.find(fd));
215 }
216 }
217 }
218
219 auto output = std::make_unique<google::protobuf::io::OstreamOutputStream>(&std::cout);
220 google::protobuf::TextFormat::Print(*benchmark, output.get());
221 return 0;
222 }
223