• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2024 Arm Limited and/or its affiliates.
2  *
3  * This source code is licensed under the BSD-style license found in the
4  * LICENSE file in the root directory of this source tree.
5  */
6 
7 #include <cinttypes>
8 #include <vector>
9 
10 #include "arm_perf_monitor.h"
11 
12 #ifdef ETHOSU
13 #include <ethosu_driver.h>
14 #include <executorch/runtime/platform/log.h>
15 #include <pmu_ethosu.h>
16 
17 static uint32_t ethosu_inference_count = 0;
18 static uint64_t ethosu_ArmBackendExecuteCycleCountStart = 0;
19 static uint64_t ethosu_ArmBackendExecuteCycleCount = 0;
20 static uint64_t ethosu_ArmWhenNPURunCycleCountStart = 0;
21 static uint64_t ethosu_ArmWhenNPURunCycleCount = 0;
22 static uint64_t ethosu_pmuCycleCount = 0;
23 static std::vector<uint64_t> ethosu_pmuEventCounts(
24     ETHOSU_PMU_Get_NumEventCounters(),
25     0);
26 
27 #if defined(ETHOSU55) || defined(ETHOSU65)
28 static const uint32_t ethosu_pmuCountersUsed = 4;
29 #elif defined(ETHOSU85)
30 static const uint32_t ethosu_pmuCountersUsed = 5;
31 #else
32 #error No NPU target defined
33 #endif
34 
35 // ethosu_pmuCountersUsed should match numbers of counters setup in
36 // ethosu_inference_begin() and not be more then the HW supports
37 static_assert(ETHOSU_PMU_NCOUNTERS >= ethosu_pmuCountersUsed);
38 
39 extern "C" {
40 
41 // Callback invoked at start of NPU execution
ethosu_inference_begin(struct ethosu_driver * drv,void *)42 void ethosu_inference_begin(struct ethosu_driver* drv, void*) {
43   // Enable PMU
44   ETHOSU_PMU_Enable(drv);
45   ETHOSU_PMU_PMCCNTR_CFG_Set_Stop_Event(drv, ETHOSU_PMU_NPU_IDLE);
46   ETHOSU_PMU_PMCCNTR_CFG_Set_Start_Event(drv, ETHOSU_PMU_NPU_ACTIVE);
47 
48   // Setup 4 counters
49 #if defined(ETHOSU55) || defined(ETHOSU65)
50   ETHOSU_PMU_Set_EVTYPER(drv, 0, ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED);
51   ETHOSU_PMU_Set_EVTYPER(drv, 1, ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED);
52   ETHOSU_PMU_Set_EVTYPER(drv, 2, ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN);
53   ETHOSU_PMU_Set_EVTYPER(drv, 3, ETHOSU_PMU_NPU_IDLE);
54   // Enable the 4 counters
55   ETHOSU_PMU_CNTR_Enable(
56       drv,
57       ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CNT2_Msk | ETHOSU_PMU_CNT3_Msk |
58           ETHOSU_PMU_CNT4_Msk);
59 #elif defined(ETHOSU85)
60   ETHOSU_PMU_Set_EVTYPER(drv, 0, ETHOSU_PMU_SRAM_RD_DATA_BEAT_RECEIVED);
61   ETHOSU_PMU_Set_EVTYPER(drv, 1, ETHOSU_PMU_SRAM_WR_DATA_BEAT_WRITTEN);
62   ETHOSU_PMU_Set_EVTYPER(drv, 2, ETHOSU_PMU_EXT_RD_DATA_BEAT_RECEIVED);
63   ETHOSU_PMU_Set_EVTYPER(drv, 3, ETHOSU_PMU_EXT_WR_DATA_BEAT_WRITTEN);
64   ETHOSU_PMU_Set_EVTYPER(drv, 4, ETHOSU_PMU_NPU_IDLE);
65   // Enable the 5 counters
66   ETHOSU_PMU_CNTR_Enable(
67       drv,
68       ETHOSU_PMU_CNT1_Msk | ETHOSU_PMU_CNT2_Msk | ETHOSU_PMU_CNT3_Msk |
69           ETHOSU_PMU_CNT4_Msk | ETHOSU_PMU_CNT5_Msk);
70 #else
71 #error No NPU target defined
72 #endif
73 
74   ETHOSU_PMU_CNTR_Enable(drv, ETHOSU_PMU_CCNT_Msk);
75   ETHOSU_PMU_CYCCNT_Reset(drv);
76 
77   // Reset all counters
78   ETHOSU_PMU_EVCNTR_ALL_Reset(drv);
79 
80   // Save Cortex-M cycle clock to calculate total CPU cycles used in
81   // ethosu_inference_end()
82   ethosu_ArmWhenNPURunCycleCountStart = ARM_PMU_Get_CCNTR();
83 }
84 
85 // Callback invoked at end of NPU execution
ethosu_inference_end(struct ethosu_driver * drv,void *)86 void ethosu_inference_end(struct ethosu_driver* drv, void*) {
87   ethosu_inference_count++;
88   ethosu_pmuCycleCount += ETHOSU_PMU_Get_CCNTR(drv);
89 
90   for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) {
91     ethosu_pmuEventCounts[i] += ETHOSU_PMU_Get_EVCNTR(drv, i);
92   }
93   ETHOSU_PMU_Disable(drv);
94   // Add Cortex-M cycle clock used during this NPU execution
95   ethosu_ArmWhenNPURunCycleCount +=
96       (ARM_PMU_Get_CCNTR() - ethosu_ArmWhenNPURunCycleCountStart);
97 }
98 
99 // Callback invoked at start of ArmBackend::execute()
ArmBackend_execute_begin()100 void ArmBackend_execute_begin() {
101   // Save Cortex-M cycle clock to calculate total CPU cycles used in
102   // ArmBackend_execute_end()
103   ethosu_ArmBackendExecuteCycleCountStart = ARM_PMU_Get_CCNTR();
104 }
105 
106 // Callback invoked at end of ArmBackend::execute()
ArmBackend_execute_end()107 void ArmBackend_execute_end() {
108   // Add Cortex-M cycle clock used during this ArmBackend::execute()
109   ethosu_ArmBackendExecuteCycleCount +=
110       (ARM_PMU_Get_CCNTR() - ethosu_ArmBackendExecuteCycleCountStart);
111 }
112 }
113 
StartMeasurements()114 void StartMeasurements() {
115   ethosu_ArmBackendExecuteCycleCount = 0;
116   ethosu_ArmWhenNPURunCycleCount = 0;
117   ethosu_pmuCycleCount = 0;
118 
119   for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) {
120     ethosu_pmuEventCounts[i] = 0;
121   }
122   ARM_PMU_Enable();
123   DCB->DEMCR |= DCB_DEMCR_TRCENA_Msk; // Trace enable
124   ARM_PMU_CYCCNT_Reset();
125   ARM_PMU_CNTR_Enable(PMU_CNTENSET_CCNTR_ENABLE_Msk);
126 }
127 
StopMeasurements()128 void StopMeasurements() {
129   ARM_PMU_CNTR_Disable(
130       PMU_CNTENCLR_CCNTR_ENABLE_Msk | PMU_CNTENCLR_CNT0_ENABLE_Msk |
131       PMU_CNTENCLR_CNT1_ENABLE_Msk);
132   uint32_t cycle_count = ARM_PMU_Get_CCNTR();
133 
134   // Number of comand streams handled by the NPU
135   ET_LOG(Info, "NPU Inferences : %d", ethosu_inference_count);
136   ET_LOG(Info, "Profiler report, CPU cycles per operator:");
137   // This is number of CPU cycles for the ethos-u operator from start to finish
138   // in the framework If there is more then one commandstream the time is added
139   // together
140   ET_LOG(
141       Info,
142       "ethos-u : cycle_cnt : %d cycles",
143       ethosu_ArmBackendExecuteCycleCount);
144   // We could print a list of the cycles used by the other delegates here in the
145   // future but now we only print ethos-u: this means that "Operator(s) total:
146   // ..." will be the same number as ethos-u : cycle_cnt and not the sum of all
147   ET_LOG(
148       Info,
149       "Operator(s) total: %d CPU cycles",
150       ethosu_ArmBackendExecuteCycleCount);
151   // Total CPU cycles used in the executorch method->execute()
152   // Other delegates and no delegates are counted in this
153   ET_LOG(Info, "Inference runtime: %d CPU cycles total", cycle_count);
154 
155   ET_LOG(
156       Info,
157       "NOTE: CPU cycle values and ratio calculations require FPGA and identical CPU/NPU frequency");
158 
159   // Avoid division with zero if ARM_PMU_Get_CCNTR() is not enabled properly.
160   if (cycle_count == 0) {
161     ET_LOG(Info, "Inference CPU ratio: ?.?? %%");
162     ET_LOG(Info, "Inference NPU ratio: ?.?? %%");
163   } else {
164     ET_LOG(
165         Info,
166         "Inference CPU ratio: %.2f %%",
167         100.0 * (cycle_count - ethosu_ArmWhenNPURunCycleCount) / cycle_count);
168     ET_LOG(
169         Info,
170         "Inference NPU ratio: %.2f %%",
171         100.0 * ethosu_ArmWhenNPURunCycleCount / cycle_count);
172   }
173 
174   // CPU cycles used by NPU, e.g. number of CPU cycles used between
175   // ethosu_inference_begin() and ethosu_inference_end()
176   // If there is more then one commandstream the time is added together
177   ET_LOG(
178       Info,
179       "cpu_wait_for_npu_cntr : %" PRIu64 " CPU cycles",
180       ethosu_ArmWhenNPURunCycleCount);
181 
182   ET_LOG(Info, "Ethos-U PMU report:");
183   ET_LOG(Info, "ethosu_pmu_cycle_cntr : %" PRIu64, ethosu_pmuCycleCount);
184 
185   for (size_t i = 0; i < ethosu_pmuCountersUsed; i++) {
186     ET_LOG(Info, "ethosu_pmu_cntr%zd : %" PRIu64, i, ethosu_pmuEventCounts[i]);
187   }
188 #if defined(ETHOSU55) || defined(ETHOSU65)
189   ET_LOG(
190       Info,
191       "Ethos-U PMU Events:[ETHOSU_PMU_AXI0_RD_DATA_BEAT_RECEIVED, ETHOSU_PMU_AXI1_RD_DATA_BEAT_RECEIVED, ETHOSU_PMU_AXI0_WR_DATA_BEAT_WRITTEN, ETHOSU_PMU_NPU_IDLE]");
192 #elif defined(ETHOSU85)
193   ET_LOG(
194       Info,
195       "Ethos-U PMU Events:[ETHOSU_PMU_SRAM_RD_DATA_BEAT_RECEIVED, ETHOSU_PMU_SRAM_WR_DATA_BEAT_WRITTEN, ETHOSU_PMU_EXT_RD_DATA_BEAT_RECEIVED, ETHOSU_PMU_EXT_WR_DATA_BEAT_WRITTEN, ETHOSU_PMU_NPU_IDLE]");
196 #else
197 #error No NPU target defined
198 #endif
199 }
200 
201 #else
StartMeasurements()202 void StartMeasurements() {}
203 
StopMeasurements()204 void StopMeasurements() {}
205 
206 #endif
207