1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. 3 */ 4 #ifndef _UAPI_IOMMUFD_H 5 #define _UAPI_IOMMUFD_H 6 7 #include <linux/ioctl.h> 8 #include <linux/types.h> 9 10 #define IOMMUFD_TYPE (';') 11 12 /** 13 * DOC: General ioctl format 14 * 15 * The ioctl interface follows a general format to allow for extensibility. Each 16 * ioctl is passed in a structure pointer as the argument providing the size of 17 * the structure in the first u32. The kernel checks that any structure space 18 * beyond what it understands is 0. This allows userspace to use the backward 19 * compatible portion while consistently using the newer, larger, structures. 20 * 21 * ioctls use a standard meaning for common errnos: 22 * 23 * - ENOTTY: The IOCTL number itself is not supported at all 24 * - E2BIG: The IOCTL number is supported, but the provided structure has 25 * non-zero in a part the kernel does not understand. 26 * - EOPNOTSUPP: The IOCTL number is supported, and the structure is 27 * understood, however a known field has a value the kernel does not 28 * understand or support. 29 * - EINVAL: Everything about the IOCTL was understood, but a field is not 30 * correct. 31 * - ENOENT: An ID or IOVA provided does not exist. 32 * - ENOMEM: Out of memory. 33 * - EOVERFLOW: Mathematics overflowed. 34 * 35 * As well as additional errnos, within specific ioctls. 36 */ 37 enum { 38 IOMMUFD_CMD_BASE = 0x80, 39 IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, 40 IOMMUFD_CMD_IOAS_ALLOC = 0x81, 41 IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, 42 IOMMUFD_CMD_IOAS_COPY = 0x83, 43 IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, 44 IOMMUFD_CMD_IOAS_MAP = 0x85, 45 IOMMUFD_CMD_IOAS_UNMAP = 0x86, 46 IOMMUFD_CMD_OPTION = 0x87, 47 IOMMUFD_CMD_VFIO_IOAS = 0x88, 48 IOMMUFD_CMD_HWPT_ALLOC = 0x89, 49 IOMMUFD_CMD_GET_HW_INFO = 0x8a, 50 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, 51 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, 52 IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, 53 IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, 54 }; 55 56 /** 57 * struct iommu_destroy - ioctl(IOMMU_DESTROY) 58 * @size: sizeof(struct iommu_destroy) 59 * @id: iommufd object ID to destroy. Can be any destroyable object type. 60 * 61 * Destroy any object held within iommufd. 62 */ 63 struct iommu_destroy { 64 __u32 size; 65 __u32 id; 66 }; 67 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) 68 69 /** 70 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) 71 * @size: sizeof(struct iommu_ioas_alloc) 72 * @flags: Must be 0 73 * @out_ioas_id: Output IOAS ID for the allocated object 74 * 75 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) 76 * to memory mapping. 77 */ 78 struct iommu_ioas_alloc { 79 __u32 size; 80 __u32 flags; 81 __u32 out_ioas_id; 82 }; 83 #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) 84 85 /** 86 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) 87 * @start: First IOVA 88 * @last: Inclusive last IOVA 89 * 90 * An interval in IOVA space. 91 */ 92 struct iommu_iova_range { 93 __aligned_u64 start; 94 __aligned_u64 last; 95 }; 96 97 /** 98 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) 99 * @size: sizeof(struct iommu_ioas_iova_ranges) 100 * @ioas_id: IOAS ID to read ranges from 101 * @num_iovas: Input/Output total number of ranges in the IOAS 102 * @__reserved: Must be 0 103 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range 104 * @out_iova_alignment: Minimum alignment required for mapping IOVA 105 * 106 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges 107 * is not allowed. num_iovas will be set to the total number of iovas and 108 * the allowed_iovas[] will be filled in as space permits. 109 * 110 * The allowed ranges are dependent on the HW path the DMA operation takes, and 111 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a 112 * full range, and each attached device will narrow the ranges based on that 113 * device's HW restrictions. Detaching a device can widen the ranges. Userspace 114 * should query ranges after every attach/detach to know what IOVAs are valid 115 * for mapping. 116 * 117 * On input num_iovas is the length of the allowed_iovas array. On output it is 118 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set 119 * num_iovas to the required value if num_iovas is too small. In this case the 120 * caller should allocate a larger output array and re-issue the ioctl. 121 * 122 * out_iova_alignment returns the minimum IOVA alignment that can be given 123 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: 124 * 125 * starting_iova % out_iova_alignment == 0 126 * (starting_iova + length) % out_iova_alignment == 0 127 * 128 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot 129 * be higher than the system PAGE_SIZE. 130 */ 131 struct iommu_ioas_iova_ranges { 132 __u32 size; 133 __u32 ioas_id; 134 __u32 num_iovas; 135 __u32 __reserved; 136 __aligned_u64 allowed_iovas; 137 __aligned_u64 out_iova_alignment; 138 }; 139 #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) 140 141 /** 142 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) 143 * @size: sizeof(struct iommu_ioas_allow_iovas) 144 * @ioas_id: IOAS ID to allow IOVAs from 145 * @num_iovas: Input/Output total number of ranges in the IOAS 146 * @__reserved: Must be 0 147 * @allowed_iovas: Pointer to array of struct iommu_iova_range 148 * 149 * Ensure a range of IOVAs are always available for allocation. If this call 150 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges 151 * that are narrower than the ranges provided here. This call will fail if 152 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. 153 * 154 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as 155 * devices are attached the IOVA will narrow based on the device restrictions. 156 * When an allowed range is specified any narrowing will be refused, ie device 157 * attachment can fail if the device requires limiting within the allowed range. 158 * 159 * Automatic IOVA allocation is also impacted by this call. MAP will only 160 * allocate within the allowed IOVAs if they are present. 161 * 162 * This call replaces the entire allowed list with the given list. 163 */ 164 struct iommu_ioas_allow_iovas { 165 __u32 size; 166 __u32 ioas_id; 167 __u32 num_iovas; 168 __u32 __reserved; 169 __aligned_u64 allowed_iovas; 170 }; 171 #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) 172 173 /** 174 * enum iommufd_ioas_map_flags - Flags for map and copy 175 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate 176 * IOVA to place the mapping at 177 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping 178 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping 179 */ 180 enum iommufd_ioas_map_flags { 181 IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, 182 IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, 183 IOMMU_IOAS_MAP_READABLE = 1 << 2, 184 }; 185 186 /** 187 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) 188 * @size: sizeof(struct iommu_ioas_map) 189 * @flags: Combination of enum iommufd_ioas_map_flags 190 * @ioas_id: IOAS ID to change the mapping of 191 * @__reserved: Must be 0 192 * @user_va: Userspace pointer to start mapping from 193 * @length: Number of bytes to map 194 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set 195 * then this must be provided as input. 196 * 197 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the 198 * mapping will be established at iova, otherwise a suitable location based on 199 * the reserved and allowed lists will be automatically selected and returned in 200 * iova. 201 * 202 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently 203 * be unused, existing IOVA cannot be replaced. 204 */ 205 struct iommu_ioas_map { 206 __u32 size; 207 __u32 flags; 208 __u32 ioas_id; 209 __u32 __reserved; 210 __aligned_u64 user_va; 211 __aligned_u64 length; 212 __aligned_u64 iova; 213 }; 214 #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) 215 216 /** 217 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) 218 * @size: sizeof(struct iommu_ioas_copy) 219 * @flags: Combination of enum iommufd_ioas_map_flags 220 * @dst_ioas_id: IOAS ID to change the mapping of 221 * @src_ioas_id: IOAS ID to copy from 222 * @length: Number of bytes to copy and map 223 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is 224 * set then this must be provided as input. 225 * @src_iova: IOVA to start the copy 226 * 227 * Copy an already existing mapping from src_ioas_id and establish it in 228 * dst_ioas_id. The src iova/length must exactly match a range used with 229 * IOMMU_IOAS_MAP. 230 * 231 * This may be used to efficiently clone a subset of an IOAS to another, or as a 232 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over 233 * establishing equivalent new mappings, as internal resources are shared, and 234 * the kernel will pin the user memory only once. 235 */ 236 struct iommu_ioas_copy { 237 __u32 size; 238 __u32 flags; 239 __u32 dst_ioas_id; 240 __u32 src_ioas_id; 241 __aligned_u64 length; 242 __aligned_u64 dst_iova; 243 __aligned_u64 src_iova; 244 }; 245 #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) 246 247 /** 248 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) 249 * @size: sizeof(struct iommu_ioas_unmap) 250 * @ioas_id: IOAS ID to change the mapping of 251 * @iova: IOVA to start the unmapping at 252 * @length: Number of bytes to unmap, and return back the bytes unmapped 253 * 254 * Unmap an IOVA range. The iova/length must be a superset of a previously 255 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or 256 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap 257 * everything. 258 */ 259 struct iommu_ioas_unmap { 260 __u32 size; 261 __u32 ioas_id; 262 __aligned_u64 iova; 263 __aligned_u64 length; 264 }; 265 #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) 266 267 /** 268 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and 269 * ioctl(IOMMU_OPTION_HUGE_PAGES) 270 * @IOMMU_OPTION_RLIMIT_MODE: 271 * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege 272 * to invoke this. Value 0 (default) is user based accouting, 1 uses process 273 * based accounting. Global option, object_id must be 0 274 * @IOMMU_OPTION_HUGE_PAGES: 275 * Value 1 (default) allows contiguous pages to be combined when generating 276 * iommu mappings. Value 0 disables combining, everything is mapped to 277 * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS 278 * option, the object_id must be the IOAS ID. 279 */ 280 enum iommufd_option { 281 IOMMU_OPTION_RLIMIT_MODE = 0, 282 IOMMU_OPTION_HUGE_PAGES = 1, 283 }; 284 285 /** 286 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and 287 * ioctl(IOMMU_OPTION_OP_GET) 288 * @IOMMU_OPTION_OP_SET: Set the option's value 289 * @IOMMU_OPTION_OP_GET: Get the option's value 290 */ 291 enum iommufd_option_ops { 292 IOMMU_OPTION_OP_SET = 0, 293 IOMMU_OPTION_OP_GET = 1, 294 }; 295 296 /** 297 * struct iommu_option - iommu option multiplexer 298 * @size: sizeof(struct iommu_option) 299 * @option_id: One of enum iommufd_option 300 * @op: One of enum iommufd_option_ops 301 * @__reserved: Must be 0 302 * @object_id: ID of the object if required 303 * @val64: Option value to set or value returned on get 304 * 305 * Change a simple option value. This multiplexor allows controlling options 306 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET 307 * will return the current value. 308 */ 309 struct iommu_option { 310 __u32 size; 311 __u32 option_id; 312 __u16 op; 313 __u16 __reserved; 314 __u32 object_id; 315 __aligned_u64 val64; 316 }; 317 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) 318 319 /** 320 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls 321 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS 322 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS 323 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility 324 */ 325 enum iommufd_vfio_ioas_op { 326 IOMMU_VFIO_IOAS_GET = 0, 327 IOMMU_VFIO_IOAS_SET = 1, 328 IOMMU_VFIO_IOAS_CLEAR = 2, 329 }; 330 331 /** 332 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) 333 * @size: sizeof(struct iommu_vfio_ioas) 334 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set 335 * For IOMMU_VFIO_IOAS_GET will output the IOAS ID 336 * @op: One of enum iommufd_vfio_ioas_op 337 * @__reserved: Must be 0 338 * 339 * The VFIO compatibility support uses a single ioas because VFIO APIs do not 340 * support the ID field. Set or Get the IOAS that VFIO compatibility will use. 341 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the 342 * compatibility ioas, either by taking what is already set, or auto creating 343 * one. From then on VFIO will continue to use that ioas and is not effected by 344 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. 345 */ 346 struct iommu_vfio_ioas { 347 __u32 size; 348 __u32 ioas_id; 349 __u16 op; 350 __u16 __reserved; 351 }; 352 #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) 353 354 /** 355 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation 356 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as 357 * the parent HWPT in a nesting configuration. 358 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is 359 * enforced on device attachment 360 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is 361 * valid. 362 */ 363 enum iommufd_hwpt_alloc_flags { 364 IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, 365 IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, 366 IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, 367 }; 368 369 /** 370 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table 371 * entry attributes 372 * @IOMMU_VTD_S1_SRE: Supervisor request 373 * @IOMMU_VTD_S1_EAFE: Extended access enable 374 * @IOMMU_VTD_S1_WPE: Write protect enable 375 */ 376 enum iommu_hwpt_vtd_s1_flags { 377 IOMMU_VTD_S1_SRE = 1 << 0, 378 IOMMU_VTD_S1_EAFE = 1 << 1, 379 IOMMU_VTD_S1_WPE = 1 << 2, 380 }; 381 382 /** 383 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table 384 * info (IOMMU_HWPT_DATA_VTD_S1) 385 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags 386 * @pgtbl_addr: The base address of the stage-1 page table. 387 * @addr_width: The address width of the stage-1 page table 388 * @__reserved: Must be 0 389 */ 390 struct iommu_hwpt_vtd_s1 { 391 __aligned_u64 flags; 392 __aligned_u64 pgtbl_addr; 393 __u32 addr_width; 394 __u32 __reserved; 395 }; 396 397 /** 398 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type 399 * @IOMMU_HWPT_DATA_NONE: no data 400 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table 401 */ 402 enum iommu_hwpt_data_type { 403 IOMMU_HWPT_DATA_NONE = 0, 404 IOMMU_HWPT_DATA_VTD_S1 = 1, 405 }; 406 407 /** 408 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) 409 * @size: sizeof(struct iommu_hwpt_alloc) 410 * @flags: Combination of enum iommufd_hwpt_alloc_flags 411 * @dev_id: The device to allocate this HWPT for 412 * @pt_id: The IOAS or HWPT to connect this HWPT to 413 * @out_hwpt_id: The ID of the new HWPT 414 * @__reserved: Must be 0 415 * @data_type: One of enum iommu_hwpt_data_type 416 * @data_len: Length of the type specific data 417 * @data_uptr: User pointer to the type specific data 418 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of 419 * IOMMU_HWPT_FAULT_ID_VALID is set. 420 * @__reserved2: Padding to 64-bit alignment. Must be 0. 421 * 422 * Explicitly allocate a hardware page table object. This is the same object 423 * type that is returned by iommufd_device_attach() and represents the 424 * underlying iommu driver's iommu_domain kernel object. 425 * 426 * A kernel-managed HWPT will be created with the mappings from the given 427 * IOAS via the @pt_id. The @data_type for this allocation must be set to 428 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a 429 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. 430 * 431 * A user-managed nested HWPT will be created from a given parent HWPT via 432 * @pt_id, in which the parent HWPT must be allocated previously via the 433 * same ioctl from a given IOAS (@pt_id). In this case, the @data_type 434 * must be set to a pre-defined type corresponding to an I/O page table 435 * type supported by the underlying IOMMU hardware. 436 * 437 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and 438 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr 439 * must be given. 440 */ 441 struct iommu_hwpt_alloc { 442 __u32 size; 443 __u32 flags; 444 __u32 dev_id; 445 __u32 pt_id; 446 __u32 out_hwpt_id; 447 __u32 __reserved; 448 __u32 data_type; 449 __u32 data_len; 450 __aligned_u64 data_uptr; 451 __u32 fault_id; 452 __u32 __reserved2; 453 }; 454 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) 455 456 /** 457 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info 458 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings 459 * on a nested_parent domain. 460 * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html 461 */ 462 enum iommu_hw_info_vtd_flags { 463 IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, 464 }; 465 466 /** 467 * struct iommu_hw_info_vtd - Intel VT-d hardware information 468 * 469 * @flags: Combination of enum iommu_hw_info_vtd_flags 470 * @__reserved: Must be 0 471 * 472 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec 473 * section 11.4.2 Capability Register. 474 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec 475 * section 11.4.3 Extended Capability Register. 476 * 477 * User needs to understand the Intel VT-d specification to decode the 478 * register value. 479 */ 480 struct iommu_hw_info_vtd { 481 __u32 flags; 482 __u32 __reserved; 483 __aligned_u64 cap_reg; 484 __aligned_u64 ecap_reg; 485 }; 486 487 /** 488 * enum iommu_hw_info_type - IOMMU Hardware Info Types 489 * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware 490 * info 491 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type 492 */ 493 enum iommu_hw_info_type { 494 IOMMU_HW_INFO_TYPE_NONE = 0, 495 IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, 496 }; 497 498 /** 499 * enum iommufd_hw_capabilities 500 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking 501 * If available, it means the following APIs 502 * are supported: 503 * 504 * IOMMU_HWPT_GET_DIRTY_BITMAP 505 * IOMMU_HWPT_SET_DIRTY_TRACKING 506 * 507 * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it 508 * when the struct 509 * iommu_hw_info::out_max_pasid_log2 is zero. 510 * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it 511 * when the struct 512 * iommu_hw_info::out_max_pasid_log2 is zero. 513 */ 514 enum iommufd_hw_capabilities { 515 IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, 516 IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, 517 IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, 518 }; 519 520 /** 521 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) 522 * @size: sizeof(struct iommu_hw_info) 523 * @flags: Must be 0 524 * @dev_id: The device bound to the iommufd 525 * @data_len: Input the length of a user buffer in bytes. Output the length of 526 * data that kernel supports 527 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill 528 * the iommu type specific hardware information data 529 * @out_data_type: Output the iommu hardware info type as defined in the enum 530 * iommu_hw_info_type. 531 * @out_capabilities: Output the generic iommu capability info type as defined 532 * in the enum iommu_hw_capabilities. 533 * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. 534 * PCI devices turn to out_capabilities to check if the 535 * specific capabilities is supported or not. 536 * @__reserved: Must be 0 537 * 538 * Query an iommu type specific hardware information data from an iommu behind 539 * a given device that has been bound to iommufd. This hardware info data will 540 * be used to sync capabilities between the virtual iommu and the physical 541 * iommu, e.g. a nested translation setup needs to check the hardware info, so 542 * a guest stage-1 page table can be compatible with the physical iommu. 543 * 544 * To capture an iommu type specific hardware information data, @data_uptr and 545 * its length @data_len must be provided. Trailing bytes will be zeroed if the 546 * user buffer is larger than the data that kernel has. Otherwise, kernel only 547 * fills the buffer using the given length in @data_len. If the ioctl succeeds, 548 * @data_len will be updated to the length that kernel actually supports, 549 * @out_data_type will be filled to decode the data filled in the buffer 550 * pointed by @data_uptr. Input @data_len == zero is allowed. 551 */ 552 struct iommu_hw_info { 553 __u32 size; 554 __u32 flags; 555 __u32 dev_id; 556 __u32 data_len; 557 __aligned_u64 data_uptr; 558 __u32 out_data_type; 559 __u8 out_max_pasid_log2; 560 __u8 __reserved[3]; 561 __aligned_u64 out_capabilities; 562 }; 563 #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) 564 565 /* 566 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty 567 * tracking 568 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking 569 */ 570 enum iommufd_hwpt_set_dirty_tracking_flags { 571 IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, 572 }; 573 574 /** 575 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) 576 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) 577 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags 578 * @hwpt_id: HW pagetable ID that represents the IOMMU domain 579 * @__reserved: Must be 0 580 * 581 * Toggle dirty tracking on an HW pagetable. 582 */ 583 struct iommu_hwpt_set_dirty_tracking { 584 __u32 size; 585 __u32 flags; 586 __u32 hwpt_id; 587 __u32 __reserved; 588 }; 589 #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ 590 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) 591 592 /** 593 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits 594 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing 595 * any dirty bits metadata. This flag 596 * can be passed in the expectation 597 * where the next operation is an unmap 598 * of the same IOVA range. 599 * 600 */ 601 enum iommufd_hwpt_get_dirty_bitmap_flags { 602 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, 603 }; 604 605 /** 606 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) 607 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) 608 * @hwpt_id: HW pagetable ID that represents the IOMMU domain 609 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags 610 * @__reserved: Must be 0 611 * @iova: base IOVA of the bitmap first bit 612 * @length: IOVA range size 613 * @page_size: page size granularity of each bit in the bitmap 614 * @data: bitmap where to set the dirty bits. The bitmap bits each 615 * represent a page_size which you deviate from an arbitrary iova. 616 * 617 * Checking a given IOVA is dirty: 618 * 619 * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) 620 * 621 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap 622 * with the dirty IOVAs. In doing so it will also by default clear any 623 * dirty bit metadata set in the IOPTE. 624 */ 625 struct iommu_hwpt_get_dirty_bitmap { 626 __u32 size; 627 __u32 hwpt_id; 628 __u32 flags; 629 __u32 __reserved; 630 __aligned_u64 iova; 631 __aligned_u64 length; 632 __aligned_u64 page_size; 633 __aligned_u64 data; 634 }; 635 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ 636 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) 637 638 /** 639 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation 640 * Data Type 641 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 642 */ 643 enum iommu_hwpt_invalidate_data_type { 644 IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, 645 }; 646 647 /** 648 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d 649 * stage-1 cache invalidation 650 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies 651 * to all-levels page structure cache or just 652 * the leaf PTE cache. 653 */ 654 enum iommu_hwpt_vtd_s1_invalidate_flags { 655 IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, 656 }; 657 658 /** 659 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation 660 * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) 661 * @addr: The start address of the range to be invalidated. It needs to 662 * be 4KB aligned. 663 * @npages: Number of contiguous 4K pages to be invalidated. 664 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags 665 * @__reserved: Must be 0 666 * 667 * The Intel VT-d specific invalidation data for user-managed stage-1 cache 668 * invalidation in nested translation. Userspace uses this structure to 669 * tell the impacted cache scope after modifying the stage-1 page table. 670 * 671 * Invalidating all the caches related to the page table by setting @addr 672 * to be 0 and @npages to be U64_MAX. 673 * 674 * The device TLB will be invalidated automatically if ATS is enabled. 675 */ 676 struct iommu_hwpt_vtd_s1_invalidate { 677 __aligned_u64 addr; 678 __aligned_u64 npages; 679 __u32 flags; 680 __u32 __reserved; 681 }; 682 683 /** 684 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) 685 * @size: sizeof(struct iommu_hwpt_invalidate) 686 * @hwpt_id: ID of a nested HWPT for cache invalidation 687 * @data_uptr: User pointer to an array of driver-specific cache invalidation 688 * data. 689 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data 690 * type of all the entries in the invalidation request array. It 691 * should be a type supported by the hwpt pointed by @hwpt_id. 692 * @entry_len: Length (in bytes) of a request entry in the request array 693 * @entry_num: Input the number of cache invalidation requests in the array. 694 * Output the number of requests successfully handled by kernel. 695 * @__reserved: Must be 0. 696 * 697 * Invalidate the iommu cache for user-managed page table. Modifications on a 698 * user-managed page table should be followed by this operation to sync cache. 699 * Each ioctl can support one or more cache invalidation requests in the array 700 * that has a total size of @entry_len * @entry_num. 701 * 702 * An empty invalidation request array by setting @entry_num==0 is allowed, and 703 * @entry_len and @data_uptr would be ignored in this case. This can be used to 704 * check if the given @data_type is supported or not by kernel. 705 */ 706 struct iommu_hwpt_invalidate { 707 __u32 size; 708 __u32 hwpt_id; 709 __aligned_u64 data_uptr; 710 __u32 data_type; 711 __u32 entry_len; 712 __u32 entry_num; 713 __u32 __reserved; 714 }; 715 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) 716 717 /** 718 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault 719 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is 720 * valid. 721 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. 722 */ 723 enum iommu_hwpt_pgfault_flags { 724 IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), 725 IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), 726 }; 727 728 /** 729 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault 730 * @IOMMU_PGFAULT_PERM_READ: request for read permission 731 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission 732 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the 733 * Execute Requested bit set in PASID TLP Prefix. 734 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the 735 * Privileged Mode Requested bit set in PASID TLP 736 * Prefix. 737 */ 738 enum iommu_hwpt_pgfault_perm { 739 IOMMU_PGFAULT_PERM_READ = (1 << 0), 740 IOMMU_PGFAULT_PERM_WRITE = (1 << 1), 741 IOMMU_PGFAULT_PERM_EXEC = (1 << 2), 742 IOMMU_PGFAULT_PERM_PRIV = (1 << 3), 743 }; 744 745 /** 746 * struct iommu_hwpt_pgfault - iommu page fault data 747 * @flags: Combination of enum iommu_hwpt_pgfault_flags 748 * @dev_id: id of the originated device 749 * @pasid: Process Address Space ID 750 * @grpid: Page Request Group Index 751 * @perm: Combination of enum iommu_hwpt_pgfault_perm 752 * @__reserved: Must be 0. 753 * @addr: Fault address 754 * @length: a hint of how much data the requestor is expecting to fetch. For 755 * example, if the PRI initiator knows it is going to do a 10MB 756 * transfer, it could fill in 10MB and the OS could pre-fault in 757 * 10MB of IOVA. It's default to 0 if there's no such hint. 758 * @cookie: kernel-managed cookie identifying a group of fault messages. The 759 * cookie number encoded in the last page fault of the group should 760 * be echoed back in the response message. 761 */ 762 struct iommu_hwpt_pgfault { 763 __u32 flags; 764 __u32 dev_id; 765 __u32 pasid; 766 __u32 grpid; 767 __u32 perm; 768 __u32 __reserved; 769 __aligned_u64 addr; 770 __u32 length; 771 __u32 cookie; 772 }; 773 774 /** 775 * enum iommufd_page_response_code - Return status of fault handlers 776 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables 777 * populated, retry the access. This is the 778 * "Success" defined in PCI 10.4.2.1. 779 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the 780 * access. This is the "Invalid Request" in PCI 781 * 10.4.2.1. 782 */ 783 enum iommufd_page_response_code { 784 IOMMUFD_PAGE_RESP_SUCCESS = 0, 785 IOMMUFD_PAGE_RESP_INVALID = 1, 786 }; 787 788 /** 789 * struct iommu_hwpt_page_response - IOMMU page fault response 790 * @cookie: The kernel-managed cookie reported in the fault message. 791 * @code: One of response code in enum iommufd_page_response_code. 792 */ 793 struct iommu_hwpt_page_response { 794 __u32 cookie; 795 __u32 code; 796 }; 797 798 /** 799 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) 800 * @size: sizeof(struct iommu_fault_alloc) 801 * @flags: Must be 0 802 * @out_fault_id: The ID of the new FAULT 803 * @out_fault_fd: The fd of the new FAULT 804 * 805 * Explicitly allocate a fault handling object. 806 */ 807 struct iommu_fault_alloc { 808 __u32 size; 809 __u32 flags; 810 __u32 out_fault_id; 811 __u32 out_fault_fd; 812 }; 813 #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) 814 #endif 815