1 /**
2 * @file profile_spec.cpp
3 * Contains a PP profile specification
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Philippe Elie
9 */
10
11 #include <algorithm>
12 #include <set>
13 #include <sstream>
14 #include <iterator>
15 #include <iostream>
16 #include <dirent.h>
17
18 #include "file_manip.h"
19 #include "op_config.h"
20 #include "profile_spec.h"
21 #include "string_manip.h"
22 #include "glob_filter.h"
23 #include "locate_images.h"
24 #include "op_exception.h"
25 #include "op_header.h"
26 #include "op_fileio.h"
27
28 using namespace std;
29
30 namespace {
31
32 // PP:3.7, full path, or relative path. If we can't find it,
33 // we should maintain the original to maintain the wordexp etc.
fixup_image_spec(string const & str,extra_images const & extra)34 string const fixup_image_spec(string const & str, extra_images const & extra)
35 {
36 // On error find_image_path() return str, so if an occur we will
37 // use the provided image_name not the fixed one.
38 image_error error;
39 return extra.find_image_path(str, error, true);
40 }
41
fixup_image_spec(vector<string> & images,extra_images const & extra)42 void fixup_image_spec(vector<string> & images, extra_images const & extra)
43 {
44 vector<string>::iterator it = images.begin();
45 vector<string>::iterator const end = images.end();
46
47 for (; it != end; ++it)
48 *it = fixup_image_spec(*it, extra);
49 }
50
51 } // anon namespace
52
53
profile_spec()54 profile_spec::profile_spec()
55 :
56 extra_found_images()
57 {
58 parse_table["archive"] = &profile_spec::parse_archive_path;
59 parse_table["session"] = &profile_spec::parse_session;
60 parse_table["session-exclude"] =
61 &profile_spec::parse_session_exclude;
62 parse_table["image"] = &profile_spec::parse_image;
63 parse_table["image-exclude"] = &profile_spec::parse_image_exclude;
64 parse_table["lib-image"] = &profile_spec::parse_lib_image;
65 parse_table["event"] = &profile_spec::parse_event;
66 parse_table["count"] = &profile_spec::parse_count;
67 parse_table["unit-mask"] = &profile_spec::parse_unitmask;
68 parse_table["tid"] = &profile_spec::parse_tid;
69 parse_table["tgid"] = &profile_spec::parse_tgid;
70 parse_table["cpu"] = &profile_spec::parse_cpu;
71 }
72
73
parse(string const & tag_value)74 void profile_spec::parse(string const & tag_value)
75 {
76 string value;
77 action_t action = get_handler(tag_value, value);
78 if (!action) {
79 throw invalid_argument("profile_spec::parse(): not "
80 "a valid tag \"" + tag_value + "\"");
81 }
82
83 (this->*action)(value);
84 }
85
86
is_valid_tag(string const & tag_value)87 bool profile_spec::is_valid_tag(string const & tag_value)
88 {
89 string value;
90 return get_handler(tag_value, value);
91 }
92
93
set_image_or_lib_name(string const & str)94 void profile_spec::set_image_or_lib_name(string const & str)
95 {
96 /* FIXME: what does spec say about this being allowed to be
97 * a comma list or not ? */
98 image_or_lib_image.push_back(fixup_image_spec(str, extra_found_images));
99 }
100
101
parse_archive_path(string const & str)102 void profile_spec::parse_archive_path(string const & str)
103 {
104 archive_path = op_realpath(str);
105 }
106
107
get_archive_path() const108 string profile_spec::get_archive_path() const
109 {
110 return archive_path;
111 }
112
113
parse_session(string const & str)114 void profile_spec::parse_session(string const & str)
115 {
116 session = separate_token(str, ',');
117 }
118
119
parse_session_exclude(string const & str)120 void profile_spec::parse_session_exclude(string const & str)
121 {
122 session_exclude = separate_token(str, ',');
123 }
124
125
parse_image(string const & str)126 void profile_spec::parse_image(string const & str)
127 {
128 image = separate_token(str, ',');
129 fixup_image_spec(image, extra_found_images);
130 }
131
132
parse_image_exclude(string const & str)133 void profile_spec::parse_image_exclude(string const & str)
134 {
135 image_exclude = separate_token(str, ',');
136 fixup_image_spec(image_exclude, extra_found_images);
137 }
138
139
parse_lib_image(string const & str)140 void profile_spec::parse_lib_image(string const & str)
141 {
142 lib_image = separate_token(str, ',');
143 fixup_image_spec(lib_image, extra_found_images);
144 }
145
146
parse_event(string const & str)147 void profile_spec::parse_event(string const & str)
148 {
149 event.set(str);
150 }
151
152
parse_count(string const & str)153 void profile_spec::parse_count(string const & str)
154 {
155 count.set(str);
156 }
157
158
parse_unitmask(string const & str)159 void profile_spec::parse_unitmask(string const & str)
160 {
161 unitmask.set(str);
162 }
163
164
parse_tid(string const & str)165 void profile_spec::parse_tid(string const & str)
166 {
167 tid.set(str);
168 }
169
170
parse_tgid(string const & str)171 void profile_spec::parse_tgid(string const & str)
172 {
173 tgid.set(str);
174 }
175
176
parse_cpu(string const & str)177 void profile_spec::parse_cpu(string const & str)
178 {
179 cpu.set(str);
180 }
181
182
183 profile_spec::action_t
get_handler(string const & tag_value,string & value)184 profile_spec::get_handler(string const & tag_value, string & value)
185 {
186 string::size_type pos = tag_value.find_first_of(':');
187 if (pos == string::npos)
188 return 0;
189
190 string tag(tag_value.substr(0, pos));
191 value = tag_value.substr(pos + 1);
192
193 parse_table_t::const_iterator it = parse_table.find(tag);
194 if (it == parse_table.end())
195 return 0;
196
197 return it->second;
198 }
199
200
201 namespace {
202
203 /// return true if the value from the profile spec may match the comma
204 /// list
205 template<typename T>
comma_match(comma_list<T> const & cl,generic_spec<T> const & value)206 bool comma_match(comma_list<T> const & cl, generic_spec<T> const & value)
207 {
208 // if the profile spec is "all" we match the sample file
209 if (!cl.is_set())
210 return true;
211
212 // an "all" sample file should never match specified profile
213 // spec values
214 if (!value.is_set())
215 return false;
216
217 // now match each profile spec value against the sample file
218 return cl.match(value.value());
219 }
220
221 }
222
223
match(filename_spec const & spec) const224 bool profile_spec::match(filename_spec const & spec) const
225 {
226 bool matched_by_image_or_lib_image = false;
227
228 // We need the true image name not the one based on the sample
229 // filename for the benefit of module which have /oprofile in their
230 // sample filename. This allow to specify profile spec based on the
231 // real name of the image, e.g. 'binary:*oprofile.ko'
232 string simage = fixup_image_spec(spec.image, extra_found_images);
233 string slib_image = fixup_image_spec(spec.lib_image,
234 extra_found_images);
235
236 // PP:3.19
237 if (!image_or_lib_image.empty()) {
238 glob_filter filter(image_or_lib_image, image_exclude);
239 if (filter.match(simage) || filter.match(slib_image))
240 matched_by_image_or_lib_image = true;
241 }
242
243 if (!matched_by_image_or_lib_image) {
244 // PP:3.7 3.8
245 if (!image.empty()) {
246 glob_filter filter(image, image_exclude);
247 if (!filter.match(simage))
248 return false;
249 } else if (!image_or_lib_image.empty()) {
250 // image.empty() means match all except if user
251 // specified image_or_lib_image
252 return false;
253 }
254
255 // PP:3.9 3.10
256 if (!lib_image.empty()) {
257 glob_filter filter(lib_image, image_exclude);
258 if (!filter.match(slib_image))
259 return false;
260 } else if (image.empty() && !image_or_lib_image.empty()) {
261 // lib_image empty means match all except if user
262 // specified image_or_lib_image *or* we already
263 // matched this spec through image
264 return false;
265 }
266 }
267
268 if (!matched_by_image_or_lib_image) {
269 // if we don't match by image_or_lib_image we must try to
270 // exclude from spec, exclusion from image_or_lib_image has
271 // been handled above
272 vector<string> empty;
273 glob_filter filter(empty, image_exclude);
274 if (!filter.match(simage))
275 return false;
276 if (!spec.lib_image.empty() && !filter.match(slib_image))
277 return false;
278 }
279
280 if (!event.match(spec.event))
281 return false;
282
283 if (!count.match(spec.count))
284 return false;
285
286 if (!unitmask.match(spec.unitmask))
287 return false;
288
289 if (!comma_match(cpu, spec.cpu))
290 return false;
291
292 if (!comma_match(tid, spec.tid))
293 return false;
294
295 if (!comma_match(tgid, spec.tgid))
296 return false;
297
298 return true;
299 }
300
301
create(list<string> const & args,vector<string> const & image_path,string const & root_path)302 profile_spec profile_spec::create(list<string> const & args,
303 vector<string> const & image_path,
304 string const & root_path)
305 {
306 profile_spec spec;
307 set<string> tag_seen;
308 vector<string> temp_image_or_lib;
309
310 list<string>::const_iterator it = args.begin();
311 list<string>::const_iterator end = args.end();
312
313 for (; it != end; ++it) {
314 if (spec.is_valid_tag(*it)) {
315 if (tag_seen.find(*it) != tag_seen.end()) {
316 throw op_runtime_error("tag specified "
317 "more than once: " + *it);
318 }
319 tag_seen.insert(*it);
320 spec.parse(*it);
321 } else {
322 string const file = op_realpath(*it);
323 temp_image_or_lib.push_back(file);
324 }
325 }
326
327 // PP:3.5 no session given means use the current session.
328 if (spec.session.empty())
329 spec.session.push_back("current");
330
331 bool ok = true;
332 vector<string>::const_iterator ip_it = image_path.begin();
333 for ( ; ip_it != image_path.end(); ++ip_it) {
334 if (!is_directory(spec.get_archive_path() + "/" + *ip_it)) {
335 cerr << spec.get_archive_path() + "/" + *ip_it << " isn't a valid directory\n";
336 ok = false;
337 }
338 }
339 if (!ok)
340 throw op_runtime_error("invalid --image-path= options");
341
342 spec.extra_found_images.populate(image_path, spec.get_archive_path(),
343 root_path);
344 vector<string>::const_iterator im = temp_image_or_lib.begin();
345 vector<string>::const_iterator last = temp_image_or_lib.end();
346 for (; im != last; ++im)
347 spec.set_image_or_lib_name(*im);
348
349 return spec;
350 }
351
352 namespace {
353
filter_session(vector<string> const & session,vector<string> const & session_exclude)354 vector<string> filter_session(vector<string> const & session,
355 vector<string> const & session_exclude)
356 {
357 vector<string> result(session);
358
359 if (result.empty())
360 result.push_back("current");
361
362 for (size_t i = 0 ; i < session_exclude.size() ; ++i) {
363 // FIXME: would we use fnmatch on each item, are we allowed
364 // to --session=current* ?
365 vector<string>::iterator it =
366 find(result.begin(), result.end(), session_exclude[i]);
367
368 if (it != result.end())
369 result.erase(it);
370 }
371
372 return result;
373 }
374
375 static bool invalid_sample_file;
valid_candidate(string const & base_dir,string const & filename,profile_spec const & spec,bool exclude_dependent,bool exclude_cg)376 bool valid_candidate(string const & base_dir, string const & filename,
377 profile_spec const & spec, bool exclude_dependent,
378 bool exclude_cg)
379 {
380 if (exclude_cg && filename.find("{cg}") != string::npos)
381 return false;
382
383 // strip out non sample files
384 string const & sub = filename.substr(base_dir.size(), string::npos);
385 if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/"))
386 return false;
387
388 /* When overflows occur in the oprofile kernel driver's sample
389 * buffers (caused by too high of a sampling rate), it's possible
390 * for samples to be mis-attributed. A common scenario is that,
391 * while profiling process 'abc' running binary 'xzy', the task
392 * switch for 'abc' gets dropped somehow. Then, samples are taken
393 * for the 'xyz' binary. In the attempt to attribute the samples to
394 * the associated binary, the oprofile kernel code examines the
395 * the memory mappings for the last process for which it recorded
396 * a task switch. When profiling at a very high rate, the oprofile
397 * daemon is often the process that is mistakenly examined. Then the
398 * sample from binary 'xyz' is matched to some file that's open in
399 * oprofiled's memory space. Because oprofiled has many sample files
400 * open at any given time, there's a good chance the sample's VMA is
401 * contained within one of those sample files. So, once finding this
402 * bogus match, the oprofile kernel records a cookie switch for the
403 * sample file. This scenario is made even more likely if a high
404 * sampling rate (e.g., profiling on several events) is paired with
405 * callgraph data collection.
406 *
407 * When the daemon processes this sample data from the kernel, it
408 * creates a sample file for the sample file, resulting in something
409 * of the form:
410 * <session-dir>/[blah]<session-dir>/[blah]
411 *
412 * When the sample data is post-processed, the sample file is parsed to
413 * try to determine the name of the binary, but it gets horribly confused.
414 * At best, the post-processing tool will spit out some warning messages,
415 * such as:
416 * warning:
417 * /lib64/libdl-2.9.so/CYCLES.10000.0.all.all.all/{dep}/{root}/var/lib/oprofile/samples/current/{root}/lib64/libdl-2.9.so/{dep}/{root}/lib64/libdl-2.9.so/PM_RUN_CYC_GRP12.10000.0.all.all.all
418 * could not be found.
419 *
420 * At worst, the parsing may result in an "invalid argument" runtime error
421 * because of the inability to parse a sample file whose name contains that
422 * of another sample file. This typically seems to happen when callgraph
423 * data is being collected.
424 *
425 * The next several lines of code checks if the passed filename
426 * contains <session-dir>/samples; if so, we discard it as an
427 * invalid sample file.
428 */
429
430 unsigned int j = base_dir.rfind('/');
431 string session_samples_dir = base_dir.substr(0, j);
432 if (sub.find(session_samples_dir) != string::npos) {
433 invalid_sample_file = true;
434 return false;
435 }
436
437 // strip out generated JIT object files for samples of anonymous regions
438 if (is_jit_sample(sub))
439 return false;
440
441 filename_spec file_spec(filename, spec.extra_found_images);
442 if (spec.match(file_spec)) {
443 if (exclude_dependent && file_spec.is_dependent())
444 return false;
445 return true;
446 }
447
448 return false;
449 }
450
451
452 /**
453 * Print a warning message if we detect any sample buffer overflows
454 * occurred in the kernel driver.
455 */
warn_if_kern_buffs_overflow(string const & session_samples_dir)456 void warn_if_kern_buffs_overflow(string const & session_samples_dir)
457 {
458 DIR * dir;
459 struct dirent * dirent;
460 string stats_path;
461 int ret = 0;
462
463 stats_path = session_samples_dir + "stats/";
464 ret = op_read_int_from_file((stats_path + "event_lost_overflow").
465 c_str(), 0);
466
467 if (!(dir = opendir(stats_path.c_str()))) {
468 ret = -1;
469 goto done;
470 }
471
472 while ((dirent = readdir(dir)) && !ret) {
473 int cpu_nr;
474 string path;
475 if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1)
476 continue;
477 path = stats_path + dirent->d_name + "/";
478 ret = op_read_int_from_file((path + "sample_lost_overflow").
479 c_str(), 0);
480 }
481 closedir(dir);
482
483 done:
484 if (ret > 0) {
485 cerr << "WARNING! The OProfile kernel driver reports sample "
486 << "buffer overflows." << endl;
487 cerr << "Such overflows can result in incorrect sample attribution"
488 << ", invalid sample" << endl
489 << "files and other symptoms. "
490 << "See the oprofiled.log for details." << endl;
491 cerr << "You should adjust your sampling frequency to eliminate"
492 << " (or at least minimize)" << endl
493 << "these overflows." << endl;
494 }
495 }
496
497
498 } // anonymous namespace
499
500
generate_file_list(bool exclude_dependent,bool exclude_cg) const501 list<string> profile_spec::generate_file_list(bool exclude_dependent,
502 bool exclude_cg) const
503 {
504 // FIXME: isn't remove_duplicates faster than doing this, then copy() ?
505 set<string> unique_files;
506
507 vector<string> sessions = filter_session(session, session_exclude);
508
509 if (sessions.empty()) {
510 ostringstream os;
511 os << "No session given\n"
512 << "included session was:\n";
513 copy(session.begin(), session.end(),
514 ostream_iterator<string>(os, "\n"));
515 os << "excluded session was:\n";
516 copy(session_exclude.begin(), session_exclude.end(),
517 ostream_iterator<string>(os, "\n"));
518 throw invalid_argument(os.str());
519 }
520
521 bool found_file = false;
522
523 vector<string>::const_iterator cit = sessions.begin();
524 vector<string>::const_iterator end = sessions.end();
525
526 for (; cit != end; ++cit) {
527 if (cit->empty())
528 continue;
529
530 string base_dir;
531 invalid_sample_file = false;
532 if ((*cit)[0] != '.' && (*cit)[0] != '/')
533 base_dir = archive_path + op_samples_dir;
534 base_dir += *cit;
535
536 base_dir = op_realpath(base_dir);
537
538 list<string> files;
539 create_file_list(files, base_dir, "*", true);
540
541 if (!files.empty()) {
542 found_file = true;
543 warn_if_kern_buffs_overflow(base_dir + "/");
544 }
545
546 list<string>::const_iterator it = files.begin();
547 list<string>::const_iterator fend = files.end();
548 for (; it != fend; ++it) {
549 if (valid_candidate(base_dir, *it, *this,
550 exclude_dependent, exclude_cg)) {
551 unique_files.insert(*it);
552 }
553 }
554 if (invalid_sample_file) {
555 cerr << "Warning: Invalid sample files found in "
556 << base_dir << endl;
557 cerr << "This problem can be caused by too high of a sampling rate."
558 << endl;
559 }
560 }
561
562 if (!found_file) {
563 ostringstream os;
564 os << "No sample file found: try running opcontrol --dump\n"
565 << "or specify a session containing sample files\n";
566 throw op_fatal_error(os.str());
567 }
568
569 list<string> result;
570 copy(unique_files.begin(), unique_files.end(), back_inserter(result));
571
572 return result;
573 }
574