• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019-2021 Collabora, Ltd.
3  * Author: Antonio Caggiano <antonio.caggiano@collabora.com>
4  * Author: Rohan Garg <rohan.garg@collabora.com>
5  * Author: Robert Beckett <bob.beckett@collabora.com>
6  * Author: Corentin Noël <corentin.noel@collabora.com>
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "pps_datasource.h"
12 #include "pps_driver.h"
13 
14 #include <condition_variable>
15 #include <thread>
16 #include <variant>
17 
18 // Minimum supported sampling period in nanoseconds
19 #define MIN_SAMPLING_PERIOD_NS 50000
20 
21 #define CORRELATION_TIMESTAMP_PERIOD (1000000000ull)
22 
23 namespace pps
24 {
25 static std::string driver_name;
26 
27 /// Synchronize access to started_cv and started
28 static std::mutex started_m;
29 static std::condition_variable started_cv;
30 static bool started = false;
31 
ms(const std::chrono::nanoseconds & t)32 float ms(const std::chrono::nanoseconds &t)
33 {
34    return t.count() / 1000000.0f;
35 }
36 
OnSetup(const SetupArgs & args)37 void GpuDataSource::OnSetup(const SetupArgs &args)
38 {
39    // Create drivers for all supported devices
40    auto drm_devices = DrmDevice::create_all();
41    for (auto &drm_device : drm_devices) {
42       if (drm_device.name != driver_name)
43          continue;
44 
45       if (auto driver = Driver::get_driver(std::move(drm_device))) {
46          if (!driver->init_perfcnt()) {
47             // Skip failing driver
48             PPS_LOG_ERROR("Failed to initialize %s driver", driver->drm_device.name.c_str());
49             continue;
50          }
51 
52          this->driver = driver;
53       }
54    }
55    if (driver == nullptr) {
56       PPS_LOG_FATAL("No DRM devices supported");
57    }
58 
59    // Parse perfetto config
60    const std::string &config_raw = args.config->gpu_counter_config_raw();
61    perfetto::protos::pbzero::GpuCounterConfig::Decoder config(config_raw);
62 
63    if (config.has_counter_ids()) {
64       // Get enabled counters
65       PPS_LOG_IMPORTANT("Selecting counters");
66       for (auto it = config.counter_ids(); it; ++it) {
67          uint32_t counter_id = it->as_uint32();
68          driver->enable_counter(counter_id);
69       }
70    } else {
71       // Enable all counters
72       driver->enable_all_counters();
73    }
74 
75    // Get sampling period
76    auto min_sampling_period = std::chrono::nanoseconds(MIN_SAMPLING_PERIOD_NS);
77 
78    auto dev_supported = std::chrono::nanoseconds(driver->get_min_sampling_period_ns());
79    if (dev_supported > min_sampling_period) {
80       min_sampling_period = dev_supported;
81    }
82 
83    time_to_sleep = std::max(time_to_sleep, min_sampling_period);
84 
85    if (config.has_counter_period_ns()) {
86       auto requested_sampling_period = std::chrono::nanoseconds(config.counter_period_ns());
87       if (requested_sampling_period < min_sampling_period) {
88          PPS_LOG_ERROR("Sampling period should be greater than %" PRIu64 " ns (%.2f ms)",
89             uint64_t(min_sampling_period.count()),
90             ms(min_sampling_period));
91       } else {
92          time_to_sleep = requested_sampling_period;
93       }
94    }
95    PPS_LOG("Sampling period set to %" PRIu64 " ns", uint64_t(time_to_sleep.count()));
96 }
97 
OnStart(const StartArgs & args)98 void GpuDataSource::OnStart(const StartArgs &args)
99 {
100    driver->enable_perfcnt(time_to_sleep.count());
101 
102    state = State::Start;
103 
104    {
105       std::lock_guard<std::mutex> lock(started_m);
106       started = true;
107    }
108    started_cv.notify_all();
109 }
110 
close_callback(GpuDataSource::TraceContext ctx)111 void close_callback(GpuDataSource::TraceContext ctx)
112 {
113    auto packet = ctx.NewTracePacket();
114    packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
115    packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
116    packet->Finalize();
117    ctx.Flush();
118    PPS_LOG("Context flushed");
119 }
120 
OnStop(const StopArgs & args)121 void GpuDataSource::OnStop(const StopArgs &args)
122 {
123    state = State::Stop;
124    auto stop_closure = args.HandleStopAsynchronously();
125    Trace(close_callback);
126    stop_closure();
127 
128    driver->disable_perfcnt();
129    driver = nullptr;
130 
131    std::lock_guard<std::mutex> lock(started_m);
132    started = false;
133 }
134 
wait_started()135 void GpuDataSource::wait_started()
136 {
137    std::unique_lock<std::mutex> lock(started_m);
138    if (!started) {
139       PPS_LOG("Waiting for start");
140       started_cv.wait(lock, [] { return started; });
141    }
142 }
143 
register_data_source(const std::string & _driver_name)144 void GpuDataSource::register_data_source(const std::string &_driver_name)
145 {
146    driver_name = _driver_name;
147    static perfetto::DataSourceDescriptor dsd;
148    dsd.set_name("gpu.counters." + driver_name);
149    Register(dsd);
150 }
151 
add_group(perfetto::protos::pbzero::GpuCounterDescriptor * desc,const CounterGroup & group,const std::string & prefix,int32_t gpu_num)152 void add_group(perfetto::protos::pbzero::GpuCounterDescriptor *desc,
153    const CounterGroup &group,
154    const std::string &prefix,
155    int32_t gpu_num)
156 {
157    if (!group.counters.empty()) {
158       // Define a block for each group containing counters
159       auto block_desc = desc->add_blocks();
160       block_desc->set_name(prefix + "." + group.name);
161       block_desc->set_block_id(group.id);
162 
163       // Associate counters to blocks
164       for (auto id : group.counters) {
165          block_desc->add_counter_ids(id);
166       }
167    }
168 
169    for (auto const &sub : group.subgroups) {
170       // Perfetto doesnt currently support nested groups.
171       // Flatten group hierarchy, using dot separator
172       add_group(desc, sub, prefix + "." + group.name, gpu_num);
173    }
174 }
175 
add_descriptors(perfetto::protos::pbzero::GpuCounterEvent * event,std::vector<CounterGroup> const & groups,std::vector<Counter> const & counters,Driver & driver)176 void add_descriptors(perfetto::protos::pbzero::GpuCounterEvent *event,
177    std::vector<CounterGroup> const &groups,
178    std::vector<Counter> const &counters,
179    Driver &driver)
180 {
181    // Start a counter descriptor
182    auto desc = event->set_counter_descriptor();
183 
184    // Add the groups
185    for (auto const &group : groups) {
186       add_group(desc, group, driver.drm_device.name, driver.drm_device.gpu_num);
187    }
188 
189    // Add the counters
190    for (auto const &counter : counters) {
191       auto spec = desc->add_specs();
192       spec->set_counter_id(counter.id);
193       spec->set_name(counter.name);
194 
195       auto units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
196       switch (counter.units) {
197       case Counter::Units::Percent:
198          units = perfetto::protos::pbzero::GpuCounterDescriptor::PERCENT;
199          break;
200       case Counter::Units::Byte:
201          units = perfetto::protos::pbzero::GpuCounterDescriptor::BYTE;
202          break;
203       case Counter::Units::Hertz:
204          units = perfetto::protos::pbzero::GpuCounterDescriptor::HERTZ;
205          break;
206       case Counter::Units::None:
207          units = perfetto::protos::pbzero::GpuCounterDescriptor::NONE;
208          break;
209       default:
210          assert(false && "Missing counter units type!");
211          break;
212       }
213       spec->add_numerator_units(units);
214    }
215 }
216 
add_samples(perfetto::protos::pbzero::GpuCounterEvent & event,const Driver & driver)217 void add_samples(perfetto::protos::pbzero::GpuCounterEvent &event, const Driver &driver)
218 {
219    if (driver.enabled_counters.size() == 0) {
220       PPS_LOG_FATAL("There are no counters enabled");
221    }
222 
223    for (const auto &counter : driver.enabled_counters) {
224       auto counter_event = event.add_counters();
225 
226       counter_event->set_counter_id(counter.id);
227 
228       auto value = counter.get_value(driver);
229       if (auto d_value = std::get_if<double>(&value)) {
230          counter_event->set_double_value(*d_value);
231       } else if (auto i_value = std::get_if<int64_t>(&value)) {
232          counter_event->set_int_value(*i_value);
233       } else {
234          PPS_LOG_ERROR("Failed to get value for counter %s", counter.name.c_str());
235       }
236    }
237 }
238 
add_timestamp(perfetto::protos::pbzero::ClockSnapshot * event,const Driver * driver)239 void add_timestamp(perfetto::protos::pbzero::ClockSnapshot *event, const Driver *driver)
240 {
241    uint32_t gpu_clock_id = driver->gpu_clock_id();
242    if (perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME == gpu_clock_id)
243       return;
244 
245    // Send a correlation event between GPU & CPU timestamps
246    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
247    uint64_t gpu_ts = driver->gpu_timestamp();
248 
249    {
250       auto clock = event->add_clocks();
251 
252       clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
253       clock->set_timestamp(cpu_ts);
254    }
255 
256    {
257       auto clock = event->add_clocks();
258 
259       clock->set_clock_id(gpu_clock_id);
260       clock->set_timestamp(gpu_ts);
261    }
262 }
263 
trace(TraceContext & ctx)264 void GpuDataSource::trace(TraceContext &ctx)
265 {
266    using namespace perfetto::protos::pbzero;
267 
268    if (auto state = ctx.GetIncrementalState(); state->was_cleared) {
269       descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
270 
271       {
272          // Mark any incremental state before this point invalid
273          auto packet = ctx.NewTracePacket();
274          packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
275          packet->set_timestamp(descriptor_timestamp);
276          packet->set_sequence_flags(TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
277       }
278 
279       descriptor_timestamp = perfetto::base::GetBootTimeNs().count();
280       {
281          // Counter descriptions
282          auto packet = ctx.NewTracePacket();
283          packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
284          packet->set_timestamp(descriptor_timestamp);
285          auto event = packet->set_gpu_counter_event();
286          event->set_gpu_id(driver->drm_device.gpu_num);
287 
288          auto &groups = driver->groups;
289          auto &counters = driver->enabled_counters;
290          add_descriptors(event, groups, counters, *driver);
291       }
292 
293       {
294          // Initial timestamp correlation event
295          auto packet = ctx.NewTracePacket();
296          packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
297          packet->set_timestamp(descriptor_timestamp);
298          last_correlation_timestamp = perfetto::base::GetBootTimeNs().count();
299          auto event = packet->set_clock_snapshot();
300          add_timestamp(event, driver);
301       }
302 
303       // Capture GPU timestamp of the first packet. Anything prior to this can
304       // be discarded.
305       descriptor_gpu_timestamp = driver->gpu_timestamp();
306       state->was_cleared = false;
307    }
308 
309    // Save current scheduler for restoring later
310    int prev_sched_policy = sched_getscheduler(0);
311    sched_param prev_priority_param;
312    sched_getparam(0, &prev_priority_param);
313 
314    // Use FIFO policy to avoid preemption while collecting counters
315    int sched_policy = SCHED_FIFO;
316    // Do not use max priority to avoid starving migration and watchdog threads
317    int priority_value = sched_get_priority_max(sched_policy) - 1;
318    sched_param priority_param { priority_value };
319    sched_setscheduler(0, sched_policy, &priority_param);
320 
321    if (driver->dump_perfcnt()) {
322       while (auto gpu_timestamp = driver->next()) {
323          if (gpu_timestamp <= descriptor_gpu_timestamp) {
324             // Do not send counter values before counter descriptors
325             PPS_LOG_ERROR("Skipping counter values coming before descriptors");
326             continue;
327          }
328 
329          auto packet = ctx.NewTracePacket();
330          packet->set_timestamp_clock_id(driver->gpu_clock_id());
331          packet->set_timestamp(gpu_timestamp);
332 
333          auto event = packet->set_gpu_counter_event();
334          event->set_gpu_id(driver->drm_device.gpu_num);
335 
336          add_samples(*event, *driver);
337       }
338    }
339 
340    uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
341    if ((cpu_ts - last_correlation_timestamp) > CORRELATION_TIMESTAMP_PERIOD) {
342       auto packet = ctx.NewTracePacket();
343       packet->set_timestamp_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
344       packet->set_timestamp(cpu_ts);
345       auto event = packet->set_clock_snapshot();
346       add_timestamp(event, driver);
347       last_correlation_timestamp = cpu_ts;
348    }
349 
350    // Reset normal scheduler
351    sched_setscheduler(0, prev_sched_policy, &prev_priority_param);
352 }
353 
trace_callback(TraceContext ctx)354 void GpuDataSource::trace_callback(TraceContext ctx)
355 {
356    using namespace std::chrono;
357 
358    nanoseconds sleep_time = nanoseconds(0);
359 
360    if (auto data_source = ctx.GetDataSourceLocked()) {
361       if (data_source->time_to_sleep > data_source->time_to_trace) {
362          sleep_time = data_source->time_to_sleep - data_source->time_to_trace;
363       }
364    }
365 
366    // Wait sampling period before tracing
367    std::this_thread::sleep_for(sleep_time);
368 
369    auto time_zero = perfetto::base::GetBootTimeNs();
370    if (auto data_source = ctx.GetDataSourceLocked()) {
371       // Check data source is still running
372       if (data_source->state == pps::State::Start) {
373          data_source->trace(ctx);
374          data_source->time_to_trace = perfetto::base::GetBootTimeNs() - time_zero;
375       }
376    } else {
377       PPS_LOG("Tracing finished");
378    }
379 }
380 
381 } // namespace pps
382