• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2017 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by a BSD-style license that can be
4  * found in the LICENSE file.
5  */
6 
7 #ifndef __CROSVM_H__
8 #define __CROSVM_H__
9 
10 #include <assert.h>
11 #include <stdint.h>
12 #include <stdbool.h>
13 
14 #include <linux/kvm.h>
15 
16 #ifdef  __cplusplus
17 extern "C" {
18 #endif
19 
20 /*
21  * This module is used to implement a plugin for crosvm.
22  *
23  * A plugin for crosvm interfaces with the virtual machine using the `struct
24  * crosvm` object and its child objects. A typical plugin is expected to call
25  * `crosvm_connect`, perform some amount of setup with the functions defined
26  * here, get a handle to every vcpu using `struct crosvm_vcpu` and then call
27  * `crosvm_start`. Each vcpu will then be waited on with `crosvm_vcpu_wait`,
28  * each event will be responded to by the plugin, and then the vcpu is resumed
29  * with `crosvm_vcpu_resume`. The vcpu state can only be examined and modified
30  * between the `crosvm_vcpu_wait` and `crosvm_vcpu_resume` calls. The crosvm
31  * connection can be used to modify global virtual machine state at any time,
32  * with some structural restrictions after `crosvm_start` is called.
33  *
34  * In general, functions that return an `int` return 0 on success or a non-
35  * negative file descriptor if one is expected. A negative return value is an
36  * errno and indicates error. Functions that take a pointer-to-pointer to an
37  * opaque structure either return a structure or delete and nullify that
38  * structure pointer.
39  */
40 
41 /*
42  * We use Semantic Versioning (http://semver.org/) here, which means that as
43  * long as MAJOR is 0, breaking changes can occur, but once MAJOR is non-zero, a
44  * breaking change requires a MAJOR version bump. The MINOR number increases as
45  * backward compatible functionality is added. The PATCH number increases bug
46  * fixes are done. The version numbers indicate here are for the plugin API and
47  * do not indicate anything about what version of crosvm is running.
48  */
49 #define CROSVM_API_MAJOR 0
50 #define CROSVM_API_MINOR 17
51 #define CROSVM_API_PATCH 0
52 
53 enum crosvm_address_space {
54   /* I/O port */
55   CROSVM_ADDRESS_SPACE_IOPORT = 0,
56   /* physical memory space */
57   CROSVM_ADDRESS_SPACE_MMIO,
58 };
59 
60 /* Handle to the parent crosvm process. */
61 struct crosvm;
62 
63 /* Handle to a register ioeventfd. */
64 struct crosvm_io;
65 
66 /* Handle to a registered range of shared memory. */
67 struct crosvm_memory;
68 
69 /* Handle to a registered irqfd. */
70 struct crosvm_irq;
71 
72 /* Handle to one of the VM's VCPUs. */
73 struct crosvm_vcpu;
74 
75 /*
76  * Connects to the parent crosvm process and returns a new `struct crosvm`
77  * interface object.
78  *
79  * This is the entry point for interfacing with crosvm as a plugin. This should
80  * be called before any other function. The returned object is not-thread safe.
81  */
82 int crosvm_connect(struct crosvm**);
83 
84 /*
85  * Creates another connection for interfacing with crosvm concurrently.
86  *
87  * The new connection behaves exactly like the original `struct crosvm` but can
88  * be used concurrently on a different thread than the original. Actual
89  * execution order of the requests to crosvm is unspecified but every request is
90  * completed when the `crosvm_*` call returns.
91  *
92  * It is invalid to call this after `crosvm_start` is called on any `struct
93  * crosvm`.
94  */
95 int crosvm_new_connection(struct crosvm*, struct crosvm**);
96 
97 /*
98  * Destroys this connection and tells the parent crosvm process to stop
99  * listening for messages from it.
100  */
101 int crosvm_destroy_connection(struct crosvm**);
102 
103 /*
104  * Gets an eventfd that is triggered when this plugin should exit.
105  *
106  * The returned eventfd is owned by the caller but the underlying event is
107  * shared and will therefore only trigger once.
108  */
109 int crosvm_get_shutdown_eventfd(struct crosvm*);
110 
111 /*
112  * Gets a bool indicating if a KVM_CAP_* enum is supported on this VM
113  */
114 int crosvm_check_extension(struct crosvm*, uint32_t __extension,
115                            bool *has_extension);
116 
117 /*
118  * Queries x86 cpuid features which are supported by the hardware and
119  * kvm.
120  */
121 int crosvm_get_supported_cpuid(struct crosvm*, uint32_t __entry_count,
122                                struct kvm_cpuid_entry2 *__cpuid_entries,
123                                uint32_t *__out_count);
124 
125 /*
126  * Queries x86 cpuid features which are emulated by kvm.
127  */
128 int crosvm_get_emulated_cpuid(struct crosvm*, uint32_t __entry_count,
129                               struct kvm_cpuid_entry2 *__cpuid_entries,
130                               uint32_t *__out_count);
131 
132 /*
133  * Queries kvm for list of supported MSRs.
134  */
135 int crosvm_get_msr_index_list(struct crosvm*, uint32_t __entry_count,
136                               uint32_t *__msr_indices,
137                               uint32_t *__out_count);
138 
139 /*
140  * The network configuration for a crosvm instance.
141  */
142 struct crosvm_net_config {
143   /*
144    * The tap device fd. This fd is owned by the caller, and should be closed
145    * by the caller when it is no longer in use.
146    */
147   int tap_fd;
148   /* The IPv4 address of the tap interface, in network (big-endian) format. */
149   uint32_t host_ip;
150   /* The netmask of the tap interface subnet, in network (big-endian) format. */
151   uint32_t netmask;
152   /* The mac address of the host side of the tap interface. */
153   uint8_t host_mac_address[6];
154   uint8_t _padding[2];
155 };
156 
157 #ifdef static_assert
158 static_assert(sizeof(struct crosvm_net_config) == 20,
159               "extra padding in struct crosvm_net_config");
160 #endif
161 
162 /*
163  * Gets the network configuration.
164  */
165 int crosvm_net_get_config(struct crosvm*, struct crosvm_net_config*);
166 
167 /*
168  * Registers a range in the given address space that, when accessed, will block
169  * and wait for a crosvm_vcpu_resume call.
170  *
171  * To unreserve a range previously reserved by this function, pass the |__space|
172  * and |__start| of the old reservation with a 0 |__length|.
173  */
174 int crosvm_reserve_range(struct crosvm*, uint32_t __space, uint64_t __start,
175                          uint64_t __length);
176 
177 /*
178  * Sets the state of the given irq pin.
179  */
180 int crosvm_set_irq(struct crosvm*, uint32_t __irq_id, bool __active);
181 
182 enum crosvm_irq_route_kind {
183   /* IRQ pin to GSI route */
184   CROSVM_IRQ_ROUTE_IRQCHIP = 0,
185   /* MSI address and data to GSI route */
186   CROSVM_IRQ_ROUTE_MSI,
187 };
188 
189 /* One entry in the array of irq routing table */
190 struct crosvm_irq_route {
191   /* The IRQ number to trigger. */
192   uint32_t irq_id;
193   /* A `crosvm_irq_route_kind` indicating which union member to use */
194   uint32_t kind;
195   union {
196     struct {
197       /*
198        * One of KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, or
199        * KVM_IRQCHIP_IOAPIC indicating which irqchip the indicated pin is on.
200        */
201       uint32_t irqchip;
202       /* The pin on the irqchip used to trigger the IRQ. */
203       uint32_t pin;
204     } irqchip;
205 
206     struct {
207       /* Address that triggers the irq. */
208       uint64_t address;
209       /* Data written to `address` that triggers the irq */
210       uint32_t data;
211 
212       uint8_t _reserved[4];
213     } msi;
214 
215     uint8_t _reserved[16];
216   };
217 };
218 
219 #ifdef static_assert
220 static_assert(sizeof(struct crosvm_irq_route) == 24,
221               "extra padding in struct crosvm_irq_route");
222 #endif
223 
224 /*
225  * Sets all the gsi routing entries to those indicated by `routes`.
226  *
227  * To remove all routing entries, pass NULL for `routes` and 0 to route_count.
228  */
229 int crosvm_set_irq_routing(struct crosvm*, uint32_t __route_count,
230                            const struct crosvm_irq_route* __routes);
231 
232 /* Gets the state of interrupt controller in a VM. */
233 int crosvm_get_pic_state(struct crosvm *, bool __primary,
234                          struct kvm_pic_state *__pic_state);
235 
236 /* Sets the state of interrupt controller in a VM. */
237 int crosvm_set_pic_state(struct crosvm *, bool __primary,
238                          const struct kvm_pic_state *__pic_state);
239 
240 /* Gets the state of IOAPIC in a VM. */
241 int crosvm_get_ioapic_state(struct crosvm *,
242                             struct kvm_ioapic_state *__ioapic_state);
243 
244 /* Sets the state of IOAPIC in a VM. */
245 int crosvm_set_ioapic_state(struct crosvm *,
246                             const struct kvm_ioapic_state *__ioapic_state);
247 
248 /* Gets the state of interrupt controller in a VM. */
249 int crosvm_get_pit_state(struct crosvm *, struct kvm_pit_state2 *__pit_state);
250 
251 /* Sets the state of interrupt controller in a VM. */
252 int crosvm_set_pit_state(struct crosvm *,
253                          const struct kvm_pit_state2 *__pit_state);
254 
255 /* Gets the current timestamp of kvmclock as seen by the VM. */
256 int crosvm_get_clock(struct crosvm *, struct kvm_clock_data *__clock_data);
257 
258 /* Sets the current timestamp of kvmclock for the VM. */
259 int crosvm_set_clock(struct crosvm *,
260                      const struct kvm_clock_data *__clock_data);
261 
262 /* Sets the identity map address as in the KVM_SET_IDENTITY_MAP_ADDR ioctl. */
263 int crosvm_set_identity_map_addr(struct crosvm*, uint32_t __addr);
264 
265 /*
266  * Triggers a CROSVM_VCPU_EVENT_KIND_PAUSED event on each vcpu identified
267  * |__cpu_mask|.
268  *
269  * The `user` pointer will be given as the `user` pointer in the `struct
270  * crosvm_vcpu_event` returned by crosvm_vcpu_wait.
271  */
272 int crosvm_pause_vcpus(struct crosvm*, uint64_t __cpu_mask, void* __user);
273 
274 /*
275  * Call once initialization is done. This indicates that crosvm should proceed
276  * with running the VM.
277  *
278  * After this call, this function is no longer valid to call.
279  */
280 int crosvm_start(struct crosvm*);
281 
282 /*
283  * Allocates an eventfd that is triggered asynchronously on write in |__space|
284  * at the given |__addr|.
285  *
286  * If |__datamatch| is non-NULL, it must be contain |__length| bytes that will
287  * be compared to the bytes being written by the vcpu which will only trigger
288  * the eventfd if equal. If datamatch is NULL all writes to the address will
289  * trigger the eventfd.
290  *
291  * On successful allocation, returns a crosvm_io.  Obtain the actual fd
292  * by passing this result to crosvm_io_event_fd().
293  */
294 int crosvm_create_io_event(struct crosvm*, uint32_t __space, uint64_t __addr,
295                            uint32_t __len, const uint8_t* __datamatch,
296                            struct crosvm_io**);
297 
298 /*
299  * Destroys the given io event and unregisters it from the VM.
300  */
301 int crosvm_destroy_io_event(struct crosvm*, struct crosvm_io**);
302 
303 /*
304  * Gets the eventfd triggered by the given io event.
305  *
306  * The returned fd is owned by the given `struct crosvm_io` and has a lifetime
307  * equal to that handle.
308  */
309 int crosvm_io_event_fd(struct crosvm_io*);
310 
311 /*
312  * Creates a shared memory segment backed by a memfd.
313  *
314  * Inserts non-overlapping memory pages in the guest physical address range
315  * specified by |__start| address and |__length| bytes. The memory pages are
316  * backed by the memfd |__fd| and are taken starting at |__offset| bytes from
317  * the beginning of the memfd.
318  *
319  * The `memfd_create` syscall |__fd| must be used to create |__fd| and a shrink
320  * seal must have been added to |__fd|. The memfd must be at least
321  * `__length+__offset` bytes long.
322  *
323  * If |read_only| is true, attempts by the guest to write to this memory region
324  * will trigger an IO access exit.
325  *
326  * To use the `crosvm_memory_get_dirty_log` method with the returned object,
327  * |__dirty_log| must be true.
328  */
329 int crosvm_create_memory(struct crosvm*, int __fd, uint64_t __offset,
330                          uint64_t __length, uint64_t __start,
331                          bool __read_only, bool __dirty_log,
332                          struct crosvm_memory**);
333 
334 /*
335  * Destroys the given shared memory and unregisters it from guest physical
336  * address space.
337  */
338 int crosvm_destroy_memory(struct crosvm*, struct crosvm_memory**);
339 
340 /*
341  * For a given memory region returns a bitmap containing any pages
342  * dirtied since the last call to this function.
343  *
344  * The `log` array must have as many bits as the memory segment has pages.
345  */
346 int crosvm_memory_get_dirty_log(struct crosvm*, struct crosvm_memory*,
347                                 uint8_t* __log);
348 
349 /*
350  * Creates an irq eventfd that can be used to trigger an irq asynchronously.
351  *
352  * The irq that will be triggered is identified as pin |__irq_id|.
353  */
354 int crosvm_create_irq_event(struct crosvm*, uint32_t __irq_id,
355                             struct crosvm_irq**);
356 
357 /*
358  * Unregisters and destroys an irq eventfd.
359  */
360 int crosvm_destroy_irq_event(struct crosvm*, struct crosvm_irq**);
361 
362 /*
363  * Gets the eventfd used to trigger the irq
364  *
365  * The returned fd is owned by the given `struct crosvm_irq` and has a lifetime
366  * equal to that handle.
367  */
368 int crosvm_irq_event_get_fd(const struct crosvm_irq*);
369 
370 /*
371  * Gets the resample eventfd associated with the crosvm_irq object.
372  */
373 int crosvm_irq_event_get_resample_fd(const struct crosvm_irq*);
374 
375 enum crosvm_vcpu_event_kind {
376   /*
377    * The first event returned by crosvm_vcpu_wait, indicating the VCPU has been
378    * created but not yet started for the first time.
379    */
380   CROSVM_VCPU_EVENT_KIND_INIT = 0,
381 
382   /*
383    * Access to an address in a space previously reserved by
384    * crosvm_reserve_range.
385    */
386   CROSVM_VCPU_EVENT_KIND_IO_ACCESS,
387 
388   /*
389    * A pause on this vcpu (and possibly others) was requested by this plugin in
390    * a `crosvm_pause_vcpus` call.
391    */
392   CROSVM_VCPU_EVENT_KIND_PAUSED,
393 };
394 
395 struct crosvm_vcpu_event {
396   /* Indicates the kind of event and which union member is valid. */
397   uint32_t kind;
398 
399   uint8_t _padding[4];
400 
401   union {
402     /* CROSVM_VCPU_EVENT_KIND_IO_ACCESS */
403     struct {
404       /*
405        * One of `enum crosvm_address_space` indicating which address space the
406        * access occurred in.
407        */
408       uint32_t address_space;
409 
410       uint8_t _padding[4];
411 
412       /* The address that the access occurred at. */
413       uint64_t address;
414 
415       /*
416        * In the case that `is_write` is true, the first `length` bytes are the
417        * data being written by the vcpu.
418        */
419       uint8_t *data;
420 
421       /*
422        * Number of bytes in the access. In the case that the access is larger
423        * than 8 bytes, such as by AVX-512 instructions, multiple vcpu access
424        * events are generated serially to cover each 8 byte fragment of the
425        * access.
426        *
427        * Larger I/O accesses are possible.  "rep in" can generate I/Os larger
428        * than 8 bytes, though such accesses can also be split into multiple
429        * events.  Currently kvm doesn't seem to batch "rep out" I/Os.
430        */
431       uint32_t length;
432 
433       /*
434        * True if the vcpu was attempting to write, false in case of an attempt
435        * to read.
436        */
437       uint8_t is_write;
438 
439       uint8_t _reserved[3];
440     } io_access;
441 
442     /* CROSVM_VCPU_EVENT_KIND_PAUSED */
443     void *user;
444 
445     uint8_t _reserved[64];
446   };
447 };
448 
449 #ifdef static_assert
450 static_assert(sizeof(struct crosvm_vcpu_event) == 72,
451               "extra padding in struct crosvm_vcpu_event");
452 #endif
453 
454 /*
455  * Gets the vcpu object for the given |__cpu_id|.
456  *
457  *
458  * The `struct crosvm_vcpu` is owned by `struct crosvm`. Each call with the same
459  * `crosvm` and |__cpu_id| will yield the same pointer. The `crosvm_vcpu` does
460  * not need to be destroyed or created explicitly.
461  *
462  * The range of valid |__cpu_id|s is 0 to the number of vcpus - 1. To get every
463  * `crosvm_vcpu`, simply call this function iteratively with increasing
464  * |__cpu_id| until `-ENOENT` is returned.
465  *
466  */
467 int crosvm_get_vcpu(struct crosvm*, uint32_t __cpu_id, struct crosvm_vcpu**);
468 
469 /*
470  * Blocks until a vcpu event happens that requires a response.
471  *
472  * When crosvm_vcpu_wait returns successfully, the event structure is filled
473  * with the description of the event that occurred. The vcpu will suspend
474  * execution until a matching call to `crosvm_vcpu_resume` is made. Until such a
475  * call is made, the vcpu's run structure can be read and written using any
476  * `crosvm_vcpu_get` or `crosvm_vcpu_set` function.
477  */
478 int crosvm_vcpu_wait(struct crosvm_vcpu*, struct crosvm_vcpu_event*);
479 
480 /*
481  * Resumes execution of a vcpu after a call to `crosvm_vcpu_wait` returns.
482  *
483  * In the case that the event was a read operation, `data` indicates what the
484  * result of that read operation should be. If the read operation was larger
485  * than 8 bytes, such as by AVX-512 instructions, this will not actually resume
486  * the vcpu, but instead generate another vcpu access event of the next fragment
487  * of the read, which can be handled by the next `crosvm_vcpu_wait` call.
488  *
489  * Once the vcpu event has been responded to sufficiently enough to resume
490  * execution, `crosvm_vcpu_resume` should be called. After `crosvm_vcpu_resume`
491  * is called, none of the vcpu state operations are valid until the next time
492  * `crosvm_vcpu_wait` returns.
493  */
494 int crosvm_vcpu_resume(struct crosvm_vcpu*);
495 
496 /* Gets the state of the vcpu's registers. */
497 int crosvm_vcpu_get_regs(struct crosvm_vcpu*, struct kvm_regs*);
498 /* Sets the state of the vcpu's registers. */
499 int crosvm_vcpu_set_regs(struct crosvm_vcpu*, const struct kvm_regs*);
500 
501 /* Gets the state of the vcpu's special registers. */
502 int crosvm_vcpu_get_sregs(struct crosvm_vcpu*, struct kvm_sregs*);
503 /* Sets the state of the vcpu's special registers. */
504 int crosvm_vcpu_set_sregs(struct crosvm_vcpu*, const struct kvm_sregs*);
505 
506 /* Gets the state of the vcpu's floating point unint. */
507 int crosvm_vcpu_get_fpu(struct crosvm_vcpu*, struct kvm_fpu*);
508 /* Sets the state of the vcpu's floating point unint. */
509 int crosvm_vcpu_set_fpu(struct crosvm_vcpu*, const struct kvm_fpu*);
510 
511 /* Gets the state of the vcpu's debug registers. */
512 int crosvm_vcpu_get_debugregs(struct crosvm_vcpu*, struct kvm_debugregs*);
513 /* Sets the state of the vcpu's debug registers */
514 int crosvm_vcpu_set_debugregs(struct crosvm_vcpu*, const struct kvm_debugregs*);
515 
516 /* Gets the state of the vcpu's xcr registers. */
517 int crosvm_vcpu_get_xcrs(struct crosvm_vcpu*, struct kvm_xcrs*);
518 /* Sets the state of the vcpu's xcr registers. */
519 int crosvm_vcpu_set_xcrs(struct crosvm_vcpu*, const struct kvm_xcrs*);
520 
521 /* Gets the MSRs of the vcpu indicated by the index field of each entry. */
522 int crosvm_vcpu_get_msrs(struct crosvm_vcpu*, uint32_t __msr_count,
523                          struct kvm_msr_entry *__msr_entries,
524                          uint32_t *__out_count);
525 /* Sets the MSRs of the vcpu indicated by the index field of each entry. */
526 int crosvm_vcpu_set_msrs(struct crosvm_vcpu*, uint32_t __msr_count,
527                          const struct kvm_msr_entry *__msr_entries);
528 
529 /* Sets the responses to the cpuid instructions executed on this vcpu, */
530 int crosvm_vcpu_set_cpuid(struct crosvm_vcpu*, uint32_t __cpuid_count,
531                           const struct kvm_cpuid_entry2 *__cpuid_entries);
532 
533 /* Gets state of LAPIC of the VCPU. */
534 int crosvm_vcpu_get_lapic_state(struct crosvm_vcpu *,
535                                 struct kvm_lapic_state *__lapic_state);
536 /* Sets state of LAPIC of the VCPU. */
537 int crosvm_vcpu_set_lapic_state(struct crosvm_vcpu *,
538                                 const struct kvm_lapic_state *__lapic_state);
539 
540 /* Gets the "multiprocessor state" of given VCPU. */
541 int crosvm_vcpu_get_mp_state(struct crosvm_vcpu *,
542                              struct kvm_mp_state *__mp_state);
543 /* Sets the "multiprocessor state" of given VCPU. */
544 int crosvm_vcpu_set_mp_state(struct crosvm_vcpu *,
545                              const struct kvm_mp_state *__mp_state);
546 
547 /* Gets currently pending exceptions, interrupts, NMIs, etc for VCPU. */
548 int crosvm_vcpu_get_vcpu_events(struct crosvm_vcpu *,
549                                 struct kvm_vcpu_events *);
550 
551 /* Sets currently pending exceptions, interrupts, NMIs, etc for VCPU. */
552 int crosvm_vcpu_set_vcpu_events(struct crosvm_vcpu *,
553                                 const struct kvm_vcpu_events *);
554 
555 #ifdef  __cplusplus
556 }
557 #endif
558 
559 #endif
560