1 #include "Python.h"
2 #include "pycore_ceval.h"         // _PyPerf_Callbacks
3 #include "pycore_frame.h"
4 #include "pycore_interp.h"
5 
6 
7 #ifdef PY_HAVE_PERF_TRAMPOLINE
8 
9 #include <fcntl.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <sys/mman.h>             // mmap()
13 #include <sys/types.h>
14 #include <unistd.h>               // sysconf()
15 #include <sys/time.h>           // gettimeofday()
16 #include <sys/syscall.h>
17 
18 // ----------------------------------
19 //         Perf jitdump API
20 // ----------------------------------
21 
22 typedef struct {
23     FILE* perf_map;
24     PyThread_type_lock map_lock;
25     void* mapped_buffer;
26     size_t mapped_size;
27     int code_id;
28 } PerfMapJitState;
29 
30 static PerfMapJitState perf_jit_map_state;
31 
32 /*
33 Usually the binary and libraries are mapped in separate region like below:
34 
35   address ->
36    --+---------------------+--//--+---------------------+--
37      | .text | .data | ... |      | .text | .data | ... |
38    --+---------------------+--//--+---------------------+--
39          myprog                      libc.so
40 
41 So it'd be easy and straight-forward to find a mapped binary or library from an
42 address.
43 
44 But for JIT code, the code arena only cares about the code section. But the
45 resulting DSOs (which is generated by perf inject -j) contain ELF headers and
46 unwind info too. Then it'd generate following address space with synthesized
47 MMAP events. Let's say it has a sample between address B and C.
48 
49                                                sample
50                                                  |
51   address ->                         A       B   v   C
52   ---------------------------------------------------------------------------------------------------
53   /tmp/jitted-PID-0.so   | (headers) | .text | unwind info |
54   /tmp/jitted-PID-1.so           | (headers) | .text | unwind info |
55   /tmp/jitted-PID-2.so                   | (headers) | .text | unwind info |
56     ...
57   ---------------------------------------------------------------------------------------------------
58 
59 If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see
60 the unwind info. If it maps both .text section and unwind sections, the sample
61 could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing
62 which one is right. So to make perf happy we have non-overlapping ranges for each
63 DSO:
64 
65   address ->
66   -------------------------------------------------------------------------------------------------------
67   /tmp/jitted-PID-0.so   | (headers) | .text | unwind info |
68   /tmp/jitted-PID-1.so                         | (headers) | .text | unwind info |
69   /tmp/jitted-PID-2.so                                               | (headers) | .text | unwind info |
70     ...
71   -------------------------------------------------------------------------------------------------------
72 
73 As the trampolines are constant, we add a constant padding but in general the padding needs to have the
74 size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50
75  */
76 
77 #define PERF_JIT_CODE_PADDING 0x100
78 #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
79 
80 typedef uint64_t uword;
81 typedef const char* CodeComments;
82 
83 #define Pd "d"
84 #define MB (1024 * 1024)
85 
86 #define EM_386      3
87 #define EM_X86_64   62
88 #define EM_ARM      40
89 #define EM_AARCH64  183
90 #define EM_RISCV    243
91 
92 #define TARGET_ARCH_IA32   0
93 #define TARGET_ARCH_X64    0
94 #define TARGET_ARCH_ARM    0
95 #define TARGET_ARCH_ARM64  0
96 #define TARGET_ARCH_RISCV32 0
97 #define TARGET_ARCH_RISCV64 0
98 
99 #define FLAG_generate_perf_jitdump 0
100 #define FLAG_write_protect_code 0
101 #define FLAG_write_protect_vm_isolate 0
102 #define FLAG_code_comments 0
103 
104 #define UNREACHABLE()
105 
GetElfMachineArchitecture(void)106 static uword GetElfMachineArchitecture(void) {
107 #if TARGET_ARCH_IA32
108     return EM_386;
109 #elif TARGET_ARCH_X64
110     return EM_X86_64;
111 #elif TARGET_ARCH_ARM
112     return EM_ARM;
113 #elif TARGET_ARCH_ARM64
114     return EM_AARCH64;
115 #elif TARGET_ARCH_RISCV32 || TARGET_ARCH_RISCV64
116     return EM_RISCV;
117 #else
118     UNREACHABLE();
119     return 0;
120 #endif
121 }
122 
123 typedef struct {
124     uint32_t magic;
125     uint32_t version;
126     uint32_t size;
127     uint32_t elf_mach_target;
128     uint32_t reserved;
129     uint32_t process_id;
130     uint64_t time_stamp;
131     uint64_t flags;
132 } Header;
133 
134  enum PerfEvent {
135     PerfLoad = 0,
136     PerfMove = 1,
137     PerfDebugInfo = 2,
138     PerfClose = 3,
139     PerfUnwindingInfo = 4
140 };
141 
142 struct BaseEvent {
143     uint32_t event;
144     uint32_t size;
145     uint64_t time_stamp;
146   };
147 
148 typedef struct {
149     struct BaseEvent base;
150     uint32_t process_id;
151     uint32_t thread_id;
152     uint64_t vma;
153     uint64_t code_address;
154     uint64_t code_size;
155     uint64_t code_id;
156 } CodeLoadEvent;
157 
158 typedef struct {
159     struct BaseEvent base;
160     uint64_t unwind_data_size;
161     uint64_t eh_frame_hdr_size;
162     uint64_t mapped_size;
163 } CodeUnwindingInfoEvent;
164 
165 static const intptr_t nanoseconds_per_second = 1000000000;
166 
167 // Dwarf encoding constants
168 
169 static const uint8_t DwarfUData4 = 0x03;
170 static const uint8_t DwarfSData4 = 0x0b;
171 static const uint8_t DwarfPcRel = 0x10;
172 static const uint8_t DwarfDataRel = 0x30;
173 // static uint8_t DwarfOmit = 0xff;
174 typedef struct {
175     unsigned char version;
176     unsigned char eh_frame_ptr_enc;
177     unsigned char fde_count_enc;
178     unsigned char table_enc;
179     int32_t eh_frame_ptr;
180     int32_t eh_fde_count;
181     int32_t from;
182     int32_t to;
183 } EhFrameHeader;
184 
get_current_monotonic_ticks(void)185 static int64_t get_current_monotonic_ticks(void) {
186     struct timespec ts;
187     if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
188         UNREACHABLE();
189         return 0;
190     }
191     // Convert to nanoseconds.
192     int64_t result = ts.tv_sec;
193     result *= nanoseconds_per_second;
194     result += ts.tv_nsec;
195     return result;
196 }
197 
get_current_time_microseconds(void)198 static int64_t get_current_time_microseconds(void) {
199   // gettimeofday has microsecond resolution.
200   struct timeval tv;
201   if (gettimeofday(&tv, NULL) < 0) {
202     UNREACHABLE();
203     return 0;
204   }
205   return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec;
206 }
207 
208 
round_up(int64_t value,int64_t multiple)209 static size_t round_up(int64_t value, int64_t multiple) {
210     if (multiple == 0) {
211         // Avoid division by zero
212         return value;
213     }
214 
215     int64_t remainder = value % multiple;
216     if (remainder == 0) {
217         // Value is already a multiple of 'multiple'
218         return value;
219     }
220 
221     // Calculate the difference to the next multiple
222     int64_t difference = multiple - remainder;
223 
224     // Add the difference to the value
225     int64_t rounded_up_value = value + difference;
226 
227     return rounded_up_value;
228 }
229 
230 
perf_map_jit_write_fully(const void * buffer,size_t size)231 static void perf_map_jit_write_fully(const void* buffer, size_t size) {
232     FILE* out_file = perf_jit_map_state.perf_map;
233     const char* ptr = (const char*)(buffer);
234     while (size > 0) {
235         const size_t written = fwrite(ptr, 1, size, out_file);
236         if (written == 0) {
237             UNREACHABLE();
238             break;
239         }
240         size -= written;
241         ptr += written;
242     }
243 }
244 
perf_map_jit_write_header(int pid,FILE * out_file)245 static void perf_map_jit_write_header(int pid, FILE* out_file) {
246     Header header;
247     header.magic = 0x4A695444;
248     header.version = 1;
249     header.size = sizeof(Header);
250     header.elf_mach_target = GetElfMachineArchitecture();
251     header.process_id = pid;
252     header.time_stamp = get_current_time_microseconds();
253     header.flags = 0;
254     perf_map_jit_write_fully(&header, sizeof(header));
255 }
256 
perf_map_jit_init(void)257 static void* perf_map_jit_init(void) {
258     char filename[100];
259     int pid = getpid();
260     snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid);
261     const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666);
262     if (fd == -1) {
263         return NULL;
264     }
265 
266     const long page_size = sysconf(_SC_PAGESIZE);  // NOLINT(runtime/int)
267     if (page_size == -1) {
268         close(fd);
269         return NULL;
270     }
271 
272     // The perf jit interface forces us to map the first page of the file
273     // to signal that we are using the interface.
274     perf_jit_map_state.mapped_buffer = mmap(NULL, page_size, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0);
275     if (perf_jit_map_state.mapped_buffer == NULL) {
276         close(fd);
277         return NULL;
278     }
279     perf_jit_map_state.mapped_size = page_size;
280     perf_jit_map_state.perf_map = fdopen(fd, "w+");
281     if (perf_jit_map_state.perf_map == NULL) {
282         close(fd);
283         return NULL;
284     }
285     setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB);
286     perf_map_jit_write_header(pid, perf_jit_map_state.perf_map);
287 
288     perf_jit_map_state.map_lock = PyThread_allocate_lock();
289     if (perf_jit_map_state.map_lock == NULL) {
290         fclose(perf_jit_map_state.perf_map);
291         return NULL;
292     }
293     perf_jit_map_state.code_id = 0;
294 
295     trampoline_api.code_padding = PERF_JIT_CODE_PADDING;
296     return &perf_jit_map_state;
297 }
298 
299 /* DWARF definitions. */
300 
301 #define DWRF_CIE_VERSION 1
302 
303 enum {
304     DWRF_CFA_nop = 0x0,
305     DWRF_CFA_offset_extended = 0x5,
306     DWRF_CFA_def_cfa = 0xc,
307     DWRF_CFA_def_cfa_offset = 0xe,
308     DWRF_CFA_offset_extended_sf = 0x11,
309     DWRF_CFA_advance_loc = 0x40,
310     DWRF_CFA_offset = 0x80
311 };
312 
313 enum
314   {
315     DWRF_EH_PE_absptr = 0x00,
316     DWRF_EH_PE_omit = 0xff,
317 
318     /* FDE data encoding.  */
319     DWRF_EH_PE_uleb128 = 0x01,
320     DWRF_EH_PE_udata2 = 0x02,
321     DWRF_EH_PE_udata4 = 0x03,
322     DWRF_EH_PE_udata8 = 0x04,
323     DWRF_EH_PE_sleb128 = 0x09,
324     DWRF_EH_PE_sdata2 = 0x0a,
325     DWRF_EH_PE_sdata4 = 0x0b,
326     DWRF_EH_PE_sdata8 = 0x0c,
327     DWRF_EH_PE_signed = 0x08,
328 
329     /* FDE flags.  */
330     DWRF_EH_PE_pcrel = 0x10,
331     DWRF_EH_PE_textrel = 0x20,
332     DWRF_EH_PE_datarel = 0x30,
333     DWRF_EH_PE_funcrel = 0x40,
334     DWRF_EH_PE_aligned = 0x50,
335 
336     DWRF_EH_PE_indirect = 0x80
337   };
338 
339 enum { DWRF_TAG_compile_unit = 0x11 };
340 
341 enum { DWRF_children_no = 0, DWRF_children_yes = 1 };
342 
343 enum { DWRF_AT_name = 0x03, DWRF_AT_stmt_list = 0x10, DWRF_AT_low_pc = 0x11, DWRF_AT_high_pc = 0x12 };
344 
345 enum { DWRF_FORM_addr = 0x01, DWRF_FORM_data4 = 0x06, DWRF_FORM_string = 0x08 };
346 
347 enum { DWRF_LNS_extended_op = 0, DWRF_LNS_copy = 1, DWRF_LNS_advance_pc = 2, DWRF_LNS_advance_line = 3 };
348 
349 enum { DWRF_LNE_end_sequence = 1, DWRF_LNE_set_address = 2 };
350 
351 enum {
352 #ifdef __x86_64__
353     /* Yes, the order is strange, but correct. */
354     DWRF_REG_AX,
355     DWRF_REG_DX,
356     DWRF_REG_CX,
357     DWRF_REG_BX,
358     DWRF_REG_SI,
359     DWRF_REG_DI,
360     DWRF_REG_BP,
361     DWRF_REG_SP,
362     DWRF_REG_8,
363     DWRF_REG_9,
364     DWRF_REG_10,
365     DWRF_REG_11,
366     DWRF_REG_12,
367     DWRF_REG_13,
368     DWRF_REG_14,
369     DWRF_REG_15,
370     DWRF_REG_RA,
371 #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
372     DWRF_REG_SP = 31,
373     DWRF_REG_RA = 30,
374 #else
375 #    error "Unsupported target architecture"
376 #endif
377 };
378 
379 typedef struct ELFObjectContext
380 {
381     uint8_t* p; /* Pointer to next address in obj.space. */
382     uint8_t* startp; /* Pointer to start address in obj.space. */
383     uint8_t* eh_frame_p; /* Pointer to start address in obj.space. */
384     uint32_t code_size; /* Size of machine code. */
385 } ELFObjectContext;
386 
387 /* Append a null-terminated string. */
388 static uint32_t
elfctx_append_string(ELFObjectContext * ctx,const char * str)389 elfctx_append_string(ELFObjectContext* ctx, const char* str)
390 {
391     uint8_t* p = ctx->p;
392     uint32_t ofs = (uint32_t)(p - ctx->startp);
393     do {
394         *p++ = (uint8_t)*str;
395     } while (*str++);
396     ctx->p = p;
397     return ofs;
398 }
399 
400 /* Append a SLEB128 value. */
401 static void
elfctx_append_sleb128(ELFObjectContext * ctx,int32_t v)402 elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v)
403 {
404     uint8_t* p = ctx->p;
405     for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) {
406         *p++ = (uint8_t)((v & 0x7f) | 0x80);
407     }
408     *p++ = (uint8_t)(v & 0x7f);
409     ctx->p = p;
410 }
411 
412 /* Append a ULEB128 to buffer. */
413 static void
elfctx_append_uleb128(ELFObjectContext * ctx,uint32_t v)414 elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v)
415 {
416     uint8_t* p = ctx->p;
417     for (; v >= 0x80; v >>= 7) {
418         *p++ = (char)((v & 0x7f) | 0x80);
419     }
420     *p++ = (char)v;
421     ctx->p = p;
422 }
423 
424 /* Shortcuts to generate DWARF structures. */
425 #define DWRF_U8(x) (*p++ = (x))
426 #define DWRF_I8(x) (*(int8_t*)p = (x), p++)
427 #define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2)
428 #define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4)
429 #define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t))
430 #define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p)
431 #define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p)
432 #define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p)
433 #define DWRF_ALIGNNOP(s)                                                                                \
434     while ((uintptr_t)p & ((s)-1)) {                                                                    \
435         *p++ = DWRF_CFA_nop;                                                                            \
436     }
437 #define DWRF_SECTION(name, stmt)                                                                        \
438     {                                                                                                   \
439         uint32_t* szp_##name = (uint32_t*)p;                                                            \
440         p += 4;                                                                                         \
441         stmt;                                                                                           \
442         *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4);                                       \
443     }
444 
445 /* Initialize .eh_frame section. */
446 static void
elf_init_ehframe(ELFObjectContext * ctx)447 elf_init_ehframe(ELFObjectContext* ctx)
448 {
449     uint8_t* p = ctx->p;
450     uint8_t* framep = p;
451 
452     /* Emit DWARF EH CIE. */
453     DWRF_SECTION(CIE, DWRF_U32(0); /* Offset to CIE itself. */
454                  DWRF_U8(DWRF_CIE_VERSION);
455                  DWRF_STR("zR"); /* Augmentation. */
456                  DWRF_UV(1); /* Code alignment factor. */
457                  DWRF_SV(-(int64_t)sizeof(uintptr_t)); /* Data alignment factor. */
458                  DWRF_U8(DWRF_REG_RA); /* Return address register. */
459                  DWRF_UV(1);
460                  DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); /* Augmentation data. */
461                  DWRF_U8(DWRF_CFA_def_cfa); DWRF_UV(DWRF_REG_SP); DWRF_UV(sizeof(uintptr_t));
462                  DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); DWRF_UV(1);
463                  DWRF_ALIGNNOP(sizeof(uintptr_t));
464     )
465 
466     ctx->eh_frame_p = p;
467 
468     /* Emit DWARF EH FDE. */
469     DWRF_SECTION(FDE, DWRF_U32((uint32_t)(p - framep)); /* Offset to CIE. */
470                  DWRF_U32(-0x30); /* Machine code offset relative to .text. */
471                  DWRF_U32(ctx->code_size); /* Machine code length. */
472                  DWRF_U8(0); /* Augmentation data. */
473     /* Registers saved in CFRAME. */
474 #ifdef __x86_64__
475                  DWRF_U8(DWRF_CFA_advance_loc | 4);
476                  DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16);
477                  DWRF_U8(DWRF_CFA_advance_loc | 6);
478                  DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(8);
479     /* Extra registers saved for JIT-compiled code. */
480 #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
481                  DWRF_U8(DWRF_CFA_advance_loc | 1);
482                  DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16);
483                  DWRF_U8(DWRF_CFA_offset | 29); DWRF_UV(2);
484                  DWRF_U8(DWRF_CFA_offset | 30); DWRF_UV(1);
485                  DWRF_U8(DWRF_CFA_advance_loc | 3);
486                  DWRF_U8(DWRF_CFA_offset | -(64 - 29));
487                  DWRF_U8(DWRF_CFA_offset | -(64 - 30));
488                  DWRF_U8(DWRF_CFA_def_cfa_offset);
489                  DWRF_UV(0);
490 #else
491 #    error "Unsupported target architecture"
492 #endif
493                  DWRF_ALIGNNOP(sizeof(uintptr_t));)
494 
495     ctx->p = p;
496 }
497 
perf_map_jit_write_entry(void * state,const void * code_addr,unsigned int code_size,PyCodeObject * co)498 static void perf_map_jit_write_entry(void *state, const void *code_addr,
499                          unsigned int code_size, PyCodeObject *co)
500 {
501 
502     if (perf_jit_map_state.perf_map == NULL) {
503         void* ret = perf_map_jit_init();
504         if(ret == NULL){
505             return;
506         }
507     }
508 
509     const char *entry = "";
510     if (co->co_qualname != NULL) {
511         entry = PyUnicode_AsUTF8(co->co_qualname);
512     }
513     const char *filename = "";
514     if (co->co_filename != NULL) {
515         filename = PyUnicode_AsUTF8(co->co_filename);
516     }
517 
518 
519     size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1;
520     char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size);
521     if (perf_map_entry == NULL) {
522         return;
523     }
524     snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename);
525 
526     const size_t name_length = strlen(perf_map_entry);
527     uword base = (uword)code_addr;
528     uword size = code_size;
529 
530     // Write the code unwinding info event.
531 
532     // Create unwinding information (eh frame)
533     ELFObjectContext ctx;
534     char buffer[1024];
535     ctx.code_size = code_size;
536     ctx.startp = ctx.p = (uint8_t*)buffer;
537     elf_init_ehframe(&ctx);
538     int eh_frame_size = ctx.p - ctx.startp;
539 
540     // Populate the unwind info event for perf
541     CodeUnwindingInfoEvent ev2;
542     ev2.base.event = PerfUnwindingInfo;
543     ev2.base.time_stamp = get_current_monotonic_ticks();
544     ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size;
545     // Ensure we have enough space between DSOs when perf maps them
546     assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING);
547     ev2.eh_frame_hdr_size = sizeof(EhFrameHeader);
548     ev2.mapped_size = round_up(ev2.unwind_data_size, 16);
549     int content_size = sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size;
550     int padding_size = round_up(content_size, 8) - content_size;
551     ev2.base.size = content_size + padding_size;
552     perf_map_jit_write_fully(&ev2, sizeof(ev2));
553 
554 
555     // Populate the eh Frame header
556     EhFrameHeader f;
557     f.version = 1;
558     f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel;
559     f.fde_count_enc = DwarfUData4;
560     f.table_enc = DwarfSData4 | DwarfDataRel;
561     f.eh_frame_ptr = -(eh_frame_size + 4 * sizeof(unsigned char));
562     f.eh_fde_count = 1;
563     f.from = -(round_up(code_size, 8) + eh_frame_size);
564     int cie_size = ctx.eh_frame_p - ctx.startp;
565     f.to = -(eh_frame_size - cie_size);
566 
567     perf_map_jit_write_fully(ctx.startp, eh_frame_size);
568     perf_map_jit_write_fully(&f, sizeof(f));
569 
570     char padding_bytes[] = "\0\0\0\0\0\0\0\0";
571     perf_map_jit_write_fully(&padding_bytes, padding_size);
572 
573     // Write the code load event.
574     CodeLoadEvent ev;
575     ev.base.event = PerfLoad;
576     ev.base.size = sizeof(ev) + (name_length+1) + size;
577     ev.base.time_stamp = get_current_monotonic_ticks();
578     ev.process_id = getpid();
579     ev.thread_id = syscall(SYS_gettid);
580     ev.vma = base;
581     ev.code_address = base;
582     ev.code_size = size;
583     perf_jit_map_state.code_id += 1;
584     ev.code_id = perf_jit_map_state.code_id;
585 
586     perf_map_jit_write_fully(&ev, sizeof(ev));
587     perf_map_jit_write_fully(perf_map_entry, name_length+1);
588     perf_map_jit_write_fully((void*)(base), size);
589     return;
590 }
591 
perf_map_jit_fini(void * state)592 static int perf_map_jit_fini(void* state) {
593     if (perf_jit_map_state.perf_map != NULL) {
594         // close the file
595         PyThread_acquire_lock(perf_jit_map_state.map_lock, 1);
596         fclose(perf_jit_map_state.perf_map);
597         PyThread_release_lock(perf_jit_map_state.map_lock);
598 
599         // clean up the lock and state
600         PyThread_free_lock(perf_jit_map_state.map_lock);
601         perf_jit_map_state.perf_map = NULL;
602     }
603     if (perf_jit_map_state.mapped_buffer != NULL) {
604         munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size);
605     }
606     trampoline_api.state = NULL;
607     return 0;
608 }
609 
610 _PyPerf_Callbacks _Py_perfmap_jit_callbacks = {
611     &perf_map_jit_init,
612     &perf_map_jit_write_entry,
613     &perf_map_jit_fini,
614 };
615 
616 #endif
617