1 /* 2 * Copyright 2017 The Chromium OS Authors. All rights reserved. 3 * Use of this source code is governed by a BSD-style license that can be 4 * found in the LICENSE file. 5 */ 6 7 #ifndef __CROSVM_H__ 8 #define __CROSVM_H__ 9 10 #include <assert.h> 11 #include <stdint.h> 12 #include <stdbool.h> 13 14 #include <linux/kvm.h> 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 /* 21 * This module is used to implement a plugin for crosvm. 22 * 23 * A plugin for crosvm interfaces with the virtual machine using the `struct 24 * crosvm` object and its child objects. A typical plugin is expected to call 25 * `crosvm_connect`, perform some amount of setup with the functions defined 26 * here, get a handle to every vcpu using `struct crosvm_vcpu` and then call 27 * `crosvm_start`. Each vcpu will then be waited on with `crosvm_vcpu_wait`, 28 * each event will be responded to by the plugin, and then the vcpu is resumed 29 * with `crosvm_vcpu_resume`. The vcpu state can only be examined and modified 30 * between the `crosvm_vcpu_wait` and `crosvm_vcpu_resume` calls. The crosvm 31 * connection can be used to modify global virtual machine state at any time, 32 * with some structural restrictions after `crosvm_start` is called. 33 * 34 * In general, functions that return an `int` return 0 on success or a non- 35 * negative file descriptor if one is expected. A negative return value is an 36 * errno and indicates error. Functions that take a pointer-to-pointer to an 37 * opaque structure either return a structure or delete and nullify that 38 * structure pointer. 39 */ 40 41 /* 42 * We use Semantic Versioning (http://semver.org/) here, which means that as 43 * long as MAJOR is 0, breaking changes can occur, but once MAJOR is non-zero, a 44 * breaking change requires a MAJOR version bump. The MINOR number increases as 45 * backward compatible functionality is added. The PATCH number increases bug 46 * fixes are done. The version numbers indicate here are for the plugin API and 47 * do not indicate anything about what version of crosvm is running. 48 */ 49 #define CROSVM_API_MAJOR 0 50 #define CROSVM_API_MINOR 17 51 #define CROSVM_API_PATCH 0 52 53 enum crosvm_address_space { 54 /* I/O port */ 55 CROSVM_ADDRESS_SPACE_IOPORT = 0, 56 /* physical memory space */ 57 CROSVM_ADDRESS_SPACE_MMIO, 58 }; 59 60 /* Handle to the parent crosvm process. */ 61 struct crosvm; 62 63 /* Handle to a register ioeventfd. */ 64 struct crosvm_io; 65 66 /* Handle to a registered range of shared memory. */ 67 struct crosvm_memory; 68 69 /* Handle to a registered irqfd. */ 70 struct crosvm_irq; 71 72 /* Handle to one of the VM's VCPUs. */ 73 struct crosvm_vcpu; 74 75 /* 76 * Connects to the parent crosvm process and returns a new `struct crosvm` 77 * interface object. 78 * 79 * This is the entry point for interfacing with crosvm as a plugin. This should 80 * be called before any other function. The returned object is not-thread safe. 81 */ 82 int crosvm_connect(struct crosvm**); 83 84 /* 85 * Creates another connection for interfacing with crosvm concurrently. 86 * 87 * The new connection behaves exactly like the original `struct crosvm` but can 88 * be used concurrently on a different thread than the original. Actual 89 * execution order of the requests to crosvm is unspecified but every request is 90 * completed when the `crosvm_*` call returns. 91 * 92 * It is invalid to call this after `crosvm_start` is called on any `struct 93 * crosvm`. 94 */ 95 int crosvm_new_connection(struct crosvm*, struct crosvm**); 96 97 /* 98 * Destroys this connection and tells the parent crosvm process to stop 99 * listening for messages from it. 100 */ 101 int crosvm_destroy_connection(struct crosvm**); 102 103 /* 104 * Gets an eventfd that is triggered when this plugin should exit. 105 * 106 * The returned eventfd is owned by the caller but the underlying event is 107 * shared and will therefore only trigger once. 108 */ 109 int crosvm_get_shutdown_eventfd(struct crosvm*); 110 111 /* 112 * Gets a bool indicating if a KVM_CAP_* enum is supported on this VM 113 */ 114 int crosvm_check_extension(struct crosvm*, uint32_t __extension, 115 bool *has_extension); 116 117 /* 118 * Queries x86 cpuid features which are supported by the hardware and 119 * kvm. 120 */ 121 int crosvm_get_supported_cpuid(struct crosvm*, uint32_t __entry_count, 122 struct kvm_cpuid_entry2 *__cpuid_entries, 123 uint32_t *__out_count); 124 125 /* 126 * Queries x86 cpuid features which are emulated by kvm. 127 */ 128 int crosvm_get_emulated_cpuid(struct crosvm*, uint32_t __entry_count, 129 struct kvm_cpuid_entry2 *__cpuid_entries, 130 uint32_t *__out_count); 131 132 /* 133 * Queries kvm for list of supported MSRs. 134 */ 135 int crosvm_get_msr_index_list(struct crosvm*, uint32_t __entry_count, 136 uint32_t *__msr_indices, 137 uint32_t *__out_count); 138 139 /* 140 * The network configuration for a crosvm instance. 141 */ 142 struct crosvm_net_config { 143 /* 144 * The tap device fd. This fd is owned by the caller, and should be closed 145 * by the caller when it is no longer in use. 146 */ 147 int tap_fd; 148 /* The IPv4 address of the tap interface, in network (big-endian) format. */ 149 uint32_t host_ip; 150 /* The netmask of the tap interface subnet, in network (big-endian) format. */ 151 uint32_t netmask; 152 /* The mac address of the host side of the tap interface. */ 153 uint8_t host_mac_address[6]; 154 uint8_t _padding[2]; 155 }; 156 157 #ifdef static_assert 158 static_assert(sizeof(struct crosvm_net_config) == 20, 159 "extra padding in struct crosvm_net_config"); 160 #endif 161 162 /* 163 * Gets the network configuration. 164 */ 165 int crosvm_net_get_config(struct crosvm*, struct crosvm_net_config*); 166 167 /* 168 * Registers a range in the given address space that, when accessed, will block 169 * and wait for a crosvm_vcpu_resume call. 170 * 171 * To unreserve a range previously reserved by this function, pass the |__space| 172 * and |__start| of the old reservation with a 0 |__length|. 173 */ 174 int crosvm_reserve_range(struct crosvm*, uint32_t __space, uint64_t __start, 175 uint64_t __length); 176 177 /* 178 * Sets the state of the given irq pin. 179 */ 180 int crosvm_set_irq(struct crosvm*, uint32_t __irq_id, bool __active); 181 182 enum crosvm_irq_route_kind { 183 /* IRQ pin to GSI route */ 184 CROSVM_IRQ_ROUTE_IRQCHIP = 0, 185 /* MSI address and data to GSI route */ 186 CROSVM_IRQ_ROUTE_MSI, 187 }; 188 189 /* One entry in the array of irq routing table */ 190 struct crosvm_irq_route { 191 /* The IRQ number to trigger. */ 192 uint32_t irq_id; 193 /* A `crosvm_irq_route_kind` indicating which union member to use */ 194 uint32_t kind; 195 union { 196 struct { 197 /* 198 * One of KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, or 199 * KVM_IRQCHIP_IOAPIC indicating which irqchip the indicated pin is on. 200 */ 201 uint32_t irqchip; 202 /* The pin on the irqchip used to trigger the IRQ. */ 203 uint32_t pin; 204 } irqchip; 205 206 struct { 207 /* Address that triggers the irq. */ 208 uint64_t address; 209 /* Data written to `address` that triggers the irq */ 210 uint32_t data; 211 212 uint8_t _reserved[4]; 213 } msi; 214 215 uint8_t _reserved[16]; 216 }; 217 }; 218 219 #ifdef static_assert 220 static_assert(sizeof(struct crosvm_irq_route) == 24, 221 "extra padding in struct crosvm_irq_route"); 222 #endif 223 224 /* 225 * Sets all the gsi routing entries to those indicated by `routes`. 226 * 227 * To remove all routing entries, pass NULL for `routes` and 0 to route_count. 228 */ 229 int crosvm_set_irq_routing(struct crosvm*, uint32_t __route_count, 230 const struct crosvm_irq_route* __routes); 231 232 /* Gets the state of interrupt controller in a VM. */ 233 int crosvm_get_pic_state(struct crosvm *, bool __primary, 234 struct kvm_pic_state *__pic_state); 235 236 /* Sets the state of interrupt controller in a VM. */ 237 int crosvm_set_pic_state(struct crosvm *, bool __primary, 238 const struct kvm_pic_state *__pic_state); 239 240 /* Gets the state of IOAPIC in a VM. */ 241 int crosvm_get_ioapic_state(struct crosvm *, 242 struct kvm_ioapic_state *__ioapic_state); 243 244 /* Sets the state of IOAPIC in a VM. */ 245 int crosvm_set_ioapic_state(struct crosvm *, 246 const struct kvm_ioapic_state *__ioapic_state); 247 248 /* Gets the state of interrupt controller in a VM. */ 249 int crosvm_get_pit_state(struct crosvm *, struct kvm_pit_state2 *__pit_state); 250 251 /* Sets the state of interrupt controller in a VM. */ 252 int crosvm_set_pit_state(struct crosvm *, 253 const struct kvm_pit_state2 *__pit_state); 254 255 /* Gets the current timestamp of kvmclock as seen by the VM. */ 256 int crosvm_get_clock(struct crosvm *, struct kvm_clock_data *__clock_data); 257 258 /* Sets the current timestamp of kvmclock for the VM. */ 259 int crosvm_set_clock(struct crosvm *, 260 const struct kvm_clock_data *__clock_data); 261 262 /* Sets the identity map address as in the KVM_SET_IDENTITY_MAP_ADDR ioctl. */ 263 int crosvm_set_identity_map_addr(struct crosvm*, uint32_t __addr); 264 265 /* 266 * Triggers a CROSVM_VCPU_EVENT_KIND_PAUSED event on each vcpu identified 267 * |__cpu_mask|. 268 * 269 * The `user` pointer will be given as the `user` pointer in the `struct 270 * crosvm_vcpu_event` returned by crosvm_vcpu_wait. 271 */ 272 int crosvm_pause_vcpus(struct crosvm*, uint64_t __cpu_mask, void* __user); 273 274 /* 275 * Call once initialization is done. This indicates that crosvm should proceed 276 * with running the VM. 277 * 278 * After this call, this function is no longer valid to call. 279 */ 280 int crosvm_start(struct crosvm*); 281 282 /* 283 * Allocates an eventfd that is triggered asynchronously on write in |__space| 284 * at the given |__addr|. 285 * 286 * If |__datamatch| is non-NULL, it must be contain |__length| bytes that will 287 * be compared to the bytes being written by the vcpu which will only trigger 288 * the eventfd if equal. If datamatch is NULL all writes to the address will 289 * trigger the eventfd. 290 * 291 * On successful allocation, returns a crosvm_io. Obtain the actual fd 292 * by passing this result to crosvm_io_event_fd(). 293 */ 294 int crosvm_create_io_event(struct crosvm*, uint32_t __space, uint64_t __addr, 295 uint32_t __len, const uint8_t* __datamatch, 296 struct crosvm_io**); 297 298 /* 299 * Destroys the given io event and unregisters it from the VM. 300 */ 301 int crosvm_destroy_io_event(struct crosvm*, struct crosvm_io**); 302 303 /* 304 * Gets the eventfd triggered by the given io event. 305 * 306 * The returned fd is owned by the given `struct crosvm_io` and has a lifetime 307 * equal to that handle. 308 */ 309 int crosvm_io_event_fd(struct crosvm_io*); 310 311 /* 312 * Creates a shared memory segment backed by a memfd. 313 * 314 * Inserts non-overlapping memory pages in the guest physical address range 315 * specified by |__start| address and |__length| bytes. The memory pages are 316 * backed by the memfd |__fd| and are taken starting at |__offset| bytes from 317 * the beginning of the memfd. 318 * 319 * The `memfd_create` syscall |__fd| must be used to create |__fd| and a shrink 320 * seal must have been added to |__fd|. The memfd must be at least 321 * `__length+__offset` bytes long. 322 * 323 * If |read_only| is true, attempts by the guest to write to this memory region 324 * will trigger an IO access exit. 325 * 326 * To use the `crosvm_memory_get_dirty_log` method with the returned object, 327 * |__dirty_log| must be true. 328 */ 329 int crosvm_create_memory(struct crosvm*, int __fd, uint64_t __offset, 330 uint64_t __length, uint64_t __start, 331 bool __read_only, bool __dirty_log, 332 struct crosvm_memory**); 333 334 /* 335 * Destroys the given shared memory and unregisters it from guest physical 336 * address space. 337 */ 338 int crosvm_destroy_memory(struct crosvm*, struct crosvm_memory**); 339 340 /* 341 * For a given memory region returns a bitmap containing any pages 342 * dirtied since the last call to this function. 343 * 344 * The `log` array must have as many bits as the memory segment has pages. 345 */ 346 int crosvm_memory_get_dirty_log(struct crosvm*, struct crosvm_memory*, 347 uint8_t* __log); 348 349 /* 350 * Creates an irq eventfd that can be used to trigger an irq asynchronously. 351 * 352 * The irq that will be triggered is identified as pin |__irq_id|. 353 */ 354 int crosvm_create_irq_event(struct crosvm*, uint32_t __irq_id, 355 struct crosvm_irq**); 356 357 /* 358 * Unregisters and destroys an irq eventfd. 359 */ 360 int crosvm_destroy_irq_event(struct crosvm*, struct crosvm_irq**); 361 362 /* 363 * Gets the eventfd used to trigger the irq 364 * 365 * The returned fd is owned by the given `struct crosvm_irq` and has a lifetime 366 * equal to that handle. 367 */ 368 int crosvm_irq_event_get_fd(const struct crosvm_irq*); 369 370 /* 371 * Gets the resample eventfd associated with the crosvm_irq object. 372 */ 373 int crosvm_irq_event_get_resample_fd(const struct crosvm_irq*); 374 375 enum crosvm_vcpu_event_kind { 376 /* 377 * The first event returned by crosvm_vcpu_wait, indicating the VCPU has been 378 * created but not yet started for the first time. 379 */ 380 CROSVM_VCPU_EVENT_KIND_INIT = 0, 381 382 /* 383 * Access to an address in a space previously reserved by 384 * crosvm_reserve_range. 385 */ 386 CROSVM_VCPU_EVENT_KIND_IO_ACCESS, 387 388 /* 389 * A pause on this vcpu (and possibly others) was requested by this plugin in 390 * a `crosvm_pause_vcpus` call. 391 */ 392 CROSVM_VCPU_EVENT_KIND_PAUSED, 393 }; 394 395 struct crosvm_vcpu_event { 396 /* Indicates the kind of event and which union member is valid. */ 397 uint32_t kind; 398 399 uint8_t _padding[4]; 400 401 union { 402 /* CROSVM_VCPU_EVENT_KIND_IO_ACCESS */ 403 struct { 404 /* 405 * One of `enum crosvm_address_space` indicating which address space the 406 * access occurred in. 407 */ 408 uint32_t address_space; 409 410 uint8_t _padding[4]; 411 412 /* The address that the access occurred at. */ 413 uint64_t address; 414 415 /* 416 * In the case that `is_write` is true, the first `length` bytes are the 417 * data being written by the vcpu. 418 */ 419 uint8_t *data; 420 421 /* 422 * Number of bytes in the access. In the case that the access is larger 423 * than 8 bytes, such as by AVX-512 instructions, multiple vcpu access 424 * events are generated serially to cover each 8 byte fragment of the 425 * access. 426 * 427 * Larger I/O accesses are possible. "rep in" can generate I/Os larger 428 * than 8 bytes, though such accesses can also be split into multiple 429 * events. Currently kvm doesn't seem to batch "rep out" I/Os. 430 */ 431 uint32_t length; 432 433 /* 434 * True if the vcpu was attempting to write, false in case of an attempt 435 * to read. 436 */ 437 uint8_t is_write; 438 439 uint8_t _reserved[3]; 440 } io_access; 441 442 /* CROSVM_VCPU_EVENT_KIND_PAUSED */ 443 void *user; 444 445 uint8_t _reserved[64]; 446 }; 447 }; 448 449 #ifdef static_assert 450 static_assert(sizeof(struct crosvm_vcpu_event) == 72, 451 "extra padding in struct crosvm_vcpu_event"); 452 #endif 453 454 /* 455 * Gets the vcpu object for the given |__cpu_id|. 456 * 457 * 458 * The `struct crosvm_vcpu` is owned by `struct crosvm`. Each call with the same 459 * `crosvm` and |__cpu_id| will yield the same pointer. The `crosvm_vcpu` does 460 * not need to be destroyed or created explicitly. 461 * 462 * The range of valid |__cpu_id|s is 0 to the number of vcpus - 1. To get every 463 * `crosvm_vcpu`, simply call this function iteratively with increasing 464 * |__cpu_id| until `-ENOENT` is returned. 465 * 466 */ 467 int crosvm_get_vcpu(struct crosvm*, uint32_t __cpu_id, struct crosvm_vcpu**); 468 469 /* 470 * Blocks until a vcpu event happens that requires a response. 471 * 472 * When crosvm_vcpu_wait returns successfully, the event structure is filled 473 * with the description of the event that occurred. The vcpu will suspend 474 * execution until a matching call to `crosvm_vcpu_resume` is made. Until such a 475 * call is made, the vcpu's run structure can be read and written using any 476 * `crosvm_vcpu_get` or `crosvm_vcpu_set` function. 477 */ 478 int crosvm_vcpu_wait(struct crosvm_vcpu*, struct crosvm_vcpu_event*); 479 480 /* 481 * Resumes execution of a vcpu after a call to `crosvm_vcpu_wait` returns. 482 * 483 * In the case that the event was a read operation, `data` indicates what the 484 * result of that read operation should be. If the read operation was larger 485 * than 8 bytes, such as by AVX-512 instructions, this will not actually resume 486 * the vcpu, but instead generate another vcpu access event of the next fragment 487 * of the read, which can be handled by the next `crosvm_vcpu_wait` call. 488 * 489 * Once the vcpu event has been responded to sufficiently enough to resume 490 * execution, `crosvm_vcpu_resume` should be called. After `crosvm_vcpu_resume` 491 * is called, none of the vcpu state operations are valid until the next time 492 * `crosvm_vcpu_wait` returns. 493 */ 494 int crosvm_vcpu_resume(struct crosvm_vcpu*); 495 496 /* Gets the state of the vcpu's registers. */ 497 int crosvm_vcpu_get_regs(struct crosvm_vcpu*, struct kvm_regs*); 498 /* Sets the state of the vcpu's registers. */ 499 int crosvm_vcpu_set_regs(struct crosvm_vcpu*, const struct kvm_regs*); 500 501 /* Gets the state of the vcpu's special registers. */ 502 int crosvm_vcpu_get_sregs(struct crosvm_vcpu*, struct kvm_sregs*); 503 /* Sets the state of the vcpu's special registers. */ 504 int crosvm_vcpu_set_sregs(struct crosvm_vcpu*, const struct kvm_sregs*); 505 506 /* Gets the state of the vcpu's floating point unint. */ 507 int crosvm_vcpu_get_fpu(struct crosvm_vcpu*, struct kvm_fpu*); 508 /* Sets the state of the vcpu's floating point unint. */ 509 int crosvm_vcpu_set_fpu(struct crosvm_vcpu*, const struct kvm_fpu*); 510 511 /* Gets the state of the vcpu's debug registers. */ 512 int crosvm_vcpu_get_debugregs(struct crosvm_vcpu*, struct kvm_debugregs*); 513 /* Sets the state of the vcpu's debug registers */ 514 int crosvm_vcpu_set_debugregs(struct crosvm_vcpu*, const struct kvm_debugregs*); 515 516 /* Gets the state of the vcpu's xcr registers. */ 517 int crosvm_vcpu_get_xcrs(struct crosvm_vcpu*, struct kvm_xcrs*); 518 /* Sets the state of the vcpu's xcr registers. */ 519 int crosvm_vcpu_set_xcrs(struct crosvm_vcpu*, const struct kvm_xcrs*); 520 521 /* Gets the MSRs of the vcpu indicated by the index field of each entry. */ 522 int crosvm_vcpu_get_msrs(struct crosvm_vcpu*, uint32_t __msr_count, 523 struct kvm_msr_entry *__msr_entries, 524 uint32_t *__out_count); 525 /* Sets the MSRs of the vcpu indicated by the index field of each entry. */ 526 int crosvm_vcpu_set_msrs(struct crosvm_vcpu*, uint32_t __msr_count, 527 const struct kvm_msr_entry *__msr_entries); 528 529 /* Sets the responses to the cpuid instructions executed on this vcpu, */ 530 int crosvm_vcpu_set_cpuid(struct crosvm_vcpu*, uint32_t __cpuid_count, 531 const struct kvm_cpuid_entry2 *__cpuid_entries); 532 533 /* Gets state of LAPIC of the VCPU. */ 534 int crosvm_vcpu_get_lapic_state(struct crosvm_vcpu *, 535 struct kvm_lapic_state *__lapic_state); 536 /* Sets state of LAPIC of the VCPU. */ 537 int crosvm_vcpu_set_lapic_state(struct crosvm_vcpu *, 538 const struct kvm_lapic_state *__lapic_state); 539 540 /* Gets the "multiprocessor state" of given VCPU. */ 541 int crosvm_vcpu_get_mp_state(struct crosvm_vcpu *, 542 struct kvm_mp_state *__mp_state); 543 /* Sets the "multiprocessor state" of given VCPU. */ 544 int crosvm_vcpu_set_mp_state(struct crosvm_vcpu *, 545 const struct kvm_mp_state *__mp_state); 546 547 /* Gets currently pending exceptions, interrupts, NMIs, etc for VCPU. */ 548 int crosvm_vcpu_get_vcpu_events(struct crosvm_vcpu *, 549 struct kvm_vcpu_events *); 550 551 /* Sets currently pending exceptions, interrupts, NMIs, etc for VCPU. */ 552 int crosvm_vcpu_set_vcpu_events(struct crosvm_vcpu *, 553 const struct kvm_vcpu_events *); 554 555 #ifdef __cplusplus 556 } 557 #endif 558 559 #endif 560