1 /// Values supported by [`Mmap::advise`][crate::Mmap::advise] and [`MmapMut::advise`][crate::MmapMut::advise] functions. 2 /// 3 /// See [madvise()](https://man7.org/linux/man-pages/man2/madvise.2.html) map page. 4 #[repr(i32)] 5 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] 6 pub enum Advice { 7 /// **MADV_NORMAL** 8 /// 9 /// No special treatment. This is the default. 10 Normal = libc::MADV_NORMAL, 11 12 /// **MADV_RANDOM** 13 /// 14 /// Expect page references in random order. (Hence, read 15 /// ahead may be less useful than normally.) 16 Random = libc::MADV_RANDOM, 17 18 /// **MADV_SEQUENTIAL** 19 /// 20 /// Expect page references in sequential order. (Hence, pages 21 /// in the given range can be aggressively read ahead, and may 22 /// be freed soon after they are accessed.) 23 Sequential = libc::MADV_SEQUENTIAL, 24 25 /// **MADV_WILLNEED** 26 /// 27 /// Expect access in the near future. (Hence, it might be a 28 /// good idea to read some pages ahead.) 29 WillNeed = libc::MADV_WILLNEED, 30 31 /// **MADV_DONTFORK** - Linux only (since Linux 2.6.16) 32 /// 33 /// Do not make the pages in this range available to the child 34 /// after a fork(2). This is useful to prevent copy-on-write 35 /// semantics from changing the physical location of a page if 36 /// the parent writes to it after a fork(2). (Such page 37 /// relocations cause problems for hardware that DMAs into the 38 /// page.) 39 #[cfg(target_os = "linux")] 40 DontFork = libc::MADV_DONTFORK, 41 42 /// **MADV_DOFORK** - Linux only (since Linux 2.6.16) 43 /// 44 /// Undo the effect of MADV_DONTFORK, restoring the default 45 /// behavior, whereby a mapping is inherited across fork(2). 46 #[cfg(target_os = "linux")] 47 DoFork = libc::MADV_DOFORK, 48 49 /// **MADV_MERGEABLE** - Linux only (since Linux 2.6.32) 50 /// 51 /// Enable Kernel Samepage Merging (KSM) for the pages in the 52 /// range specified by addr and length. The kernel regularly 53 /// scans those areas of user memory that have been marked as 54 /// mergeable, looking for pages with identical content. 55 /// These are replaced by a single write-protected page (which 56 /// is automatically copied if a process later wants to update 57 /// the content of the page). KSM merges only private 58 /// anonymous pages (see mmap(2)). 59 /// 60 /// The KSM feature is intended for applications that generate 61 /// many instances of the same data (e.g., virtualization 62 /// systems such as KVM). It can consume a lot of processing 63 /// power; use with care. See the Linux kernel source file 64 /// Documentation/admin-guide/mm/ksm.rst for more details. 65 /// 66 /// The MADV_MERGEABLE and MADV_UNMERGEABLE operations are 67 /// available only if the kernel was configured with 68 /// CONFIG_KSM. 69 #[cfg(target_os = "linux")] 70 Mergeable = libc::MADV_MERGEABLE, 71 72 /// **MADV_UNMERGEABLE** - Linux only (since Linux 2.6.32) 73 /// 74 /// Undo the effect of an earlier MADV_MERGEABLE operation on 75 /// the specified address range; KSM unmerges whatever pages 76 /// it had merged in the address range specified by addr and 77 /// length. 78 #[cfg(target_os = "linux")] 79 Unmergeable = libc::MADV_UNMERGEABLE, 80 81 /// **MADV_HUGEPAGE** - Linux only (since Linux 2.6.38) 82 /// 83 /// Enable Transparent Huge Pages (THP) for pages in the range 84 /// specified by addr and length. Currently, Transparent Huge 85 /// Pages work only with private anonymous pages (see 86 /// mmap(2)). The kernel will regularly scan the areas marked 87 /// as huge page candidates to replace them with huge pages. 88 /// The kernel will also allocate huge pages directly when the 89 /// region is naturally aligned to the huge page size (see 90 /// posix_memalign(2)). 91 /// 92 /// This feature is primarily aimed at applications that use 93 /// large mappings of data and access large regions of that 94 /// memory at a time (e.g., virtualization systems such as 95 /// QEMU). It can very easily waste memory (e.g., a 2 MB 96 /// mapping that only ever accesses 1 byte will result in 2 MB 97 /// of wired memory instead of one 4 KB page). See the Linux 98 /// kernel source file 99 /// Documentation/admin-guide/mm/transhuge.rst for more 100 /// details. 101 /// 102 /// Most common kernels configurations provide MADV_HUGEPAGE- 103 /// style behavior by default, and thus MADV_HUGEPAGE is 104 /// normally not necessary. It is mostly intended for 105 /// embedded systems, where MADV_HUGEPAGE-style behavior may 106 /// not be enabled by default in the kernel. On such systems, 107 /// this flag can be used in order to selectively enable THP. 108 /// Whenever MADV_HUGEPAGE is used, it should always be in 109 /// regions of memory with an access pattern that the 110 /// developer knows in advance won't risk to increase the 111 /// memory footprint of the application when transparent 112 /// hugepages are enabled. 113 /// 114 /// The MADV_HUGEPAGE and MADV_NOHUGEPAGE operations are 115 /// available only if the kernel was configured with 116 /// CONFIG_TRANSPARENT_HUGEPAGE. 117 #[cfg(target_os = "linux")] 118 HugePage = libc::MADV_HUGEPAGE, 119 120 /// **MADV_NOHUGEPAGE** - Linux only (since Linux 2.6.38) 121 /// 122 /// Ensures that memory in the address range specified by addr 123 /// and length will not be backed by transparent hugepages. 124 #[cfg(target_os = "linux")] 125 NoHugePage = libc::MADV_NOHUGEPAGE, 126 127 /// **MADV_DONTDUMP** - Linux only (since Linux 3.4) 128 /// 129 /// Exclude from a core dump those pages in the range 130 /// specified by addr and length. This is useful in 131 /// applications that have large areas of memory that are 132 /// known not to be useful in a core dump. The effect of 133 /// **MADV_DONTDUMP** takes precedence over the bit mask that is 134 /// set via the `/proc/[pid]/coredump_filter` file (see 135 /// core(5)). 136 #[cfg(target_os = "linux")] 137 DontDump = libc::MADV_DONTDUMP, 138 139 /// **MADV_DODUMP** - Linux only (since Linux 3.4) 140 /// 141 /// Undo the effect of an earlier MADV_DONTDUMP. 142 #[cfg(target_os = "linux")] 143 DoDump = libc::MADV_DODUMP, 144 145 /// **MADV_HWPOISON** - Linux only (since Linux 2.6.32) 146 /// 147 /// Poison the pages in the range specified by addr and length 148 /// and handle subsequent references to those pages like a 149 /// hardware memory corruption. This operation is available 150 /// only for privileged (CAP_SYS_ADMIN) processes. This 151 /// operation may result in the calling process receiving a 152 /// SIGBUS and the page being unmapped. 153 /// 154 /// This feature is intended for testing of memory error- 155 /// handling code; it is available only if the kernel was 156 /// configured with CONFIG_MEMORY_FAILURE. 157 #[cfg(target_os = "linux")] 158 HwPoison = libc::MADV_HWPOISON, 159 160 /// **MADV_POPULATE_READ** - Linux only (since Linux 5.14) 161 /// 162 /// Populate (prefault) page tables readable, faulting in all 163 /// pages in the range just as if manually reading from each 164 /// page; however, avoid the actual memory access that would have 165 /// been performed after handling the fault. 166 /// 167 /// In contrast to MAP_POPULATE, MADV_POPULATE_READ does not hide 168 /// errors, can be applied to (parts of) existing mappings and 169 /// will always populate (prefault) page tables readable. One 170 /// example use case is prefaulting a file mapping, reading all 171 /// file content from disk; however, pages won't be dirtied and 172 /// consequently won't have to be written back to disk when 173 /// evicting the pages from memory. 174 /// 175 /// Depending on the underlying mapping, map the shared zeropage, 176 /// preallocate memory or read the underlying file; files with 177 /// holes might or might not preallocate blocks. If populating 178 /// fails, a SIGBUS signal is not generated; instead, an error is 179 /// returned. 180 /// 181 /// If MADV_POPULATE_READ succeeds, all page tables have been 182 /// populated (prefaulted) readable once. If MADV_POPULATE_READ 183 /// fails, some page tables might have been populated. 184 /// 185 /// MADV_POPULATE_READ cannot be applied to mappings without read 186 /// permissions and special mappings, for example, mappings 187 /// marked with kernel-internal flags such as VM_PFNMAP or VM_IO, 188 /// or secret memory regions created using memfd_secret(2). 189 /// 190 /// Note that with MADV_POPULATE_READ, the process can be killed 191 /// at any moment when the system runs out of memory. 192 #[cfg(target_os = "linux")] 193 PopulateRead = libc::MADV_POPULATE_READ, 194 195 /// **MADV_POPULATE_WRITE** - Linux only (since Linux 5.14) 196 /// 197 /// Populate (prefault) page tables writable, faulting in all 198 /// pages in the range just as if manually writing to each each 199 /// page; however, avoid the actual memory access that would have 200 /// been performed after handling the fault. 201 /// 202 /// In contrast to MAP_POPULATE, MADV_POPULATE_WRITE does not 203 /// hide errors, can be applied to (parts of) existing mappings 204 /// and will always populate (prefault) page tables writable. 205 /// One example use case is preallocating memory, breaking any 206 /// CoW (Copy on Write). 207 /// 208 /// Depending on the underlying mapping, preallocate memory or 209 /// read the underlying file; files with holes will preallocate 210 /// blocks. If populating fails, a SIGBUS signal is not gener‐ 211 /// ated; instead, an error is returned. 212 /// 213 /// If MADV_POPULATE_WRITE succeeds, all page tables have been 214 /// populated (prefaulted) writable once. If MADV_POPULATE_WRITE 215 /// fails, some page tables might have been populated. 216 /// 217 /// MADV_POPULATE_WRITE cannot be applied to mappings without 218 /// write permissions and special mappings, for example, mappings 219 /// marked with kernel-internal flags such as VM_PFNMAP or VM_IO, 220 /// or secret memory regions created using memfd_secret(2). 221 /// 222 /// Note that with MADV_POPULATE_WRITE, the process can be killed 223 /// at any moment when the system runs out of memory. 224 #[cfg(target_os = "linux")] 225 PopulateWrite = libc::MADV_POPULATE_WRITE, 226 227 /// **MADV_ZERO_WIRED_PAGES** - Darwin only 228 /// 229 /// Indicates that the application would like the wired pages in this address range to be 230 /// zeroed out if the address range is deallocated without first unwiring the pages (i.e. 231 /// a munmap(2) without a preceding munlock(2) or the application quits). This is used 232 /// with madvise() system call. 233 #[cfg(any(target_os = "macos", target_os = "ios"))] 234 ZeroWiredPages = libc::MADV_ZERO_WIRED_PAGES, 235 } 236 237 /// Values supported by [`Mmap::unsafe_advise`][crate::Mmap::unsafe_advise] and [`MmapMut::unsafe_advise`][crate::MmapMut::unsafe_advise] functions. 238 /// 239 /// These flags can be passed to the [madvise (2)][man_page] system call 240 /// and effects on the mapped pages which are conceptually writes, 241 /// i.e. the change the observable contents of these pages which 242 /// implies undefined behaviour if the mapping is still borrowed. 243 /// 244 /// Hence, these potentially unsafe flags must be used with the unsafe 245 /// methods and the programmer has to justify that the code 246 /// does not keep any borrows of the mapping active while the mapped pages 247 /// are updated by the kernel's memory management subsystem. 248 /// 249 /// [man_page]: https://man7.org/linux/man-pages/man2/madvise.2.html 250 #[repr(i32)] 251 #[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] 252 pub enum UncheckedAdvice { 253 /// **MADV_DONTNEED** 254 /// 255 /// Do not expect access in the near future. (For the time 256 /// being, the application is finished with the given range, 257 /// so the kernel can free resources associated with it.) 258 /// 259 /// After a successful MADV_DONTNEED operation, the semantics 260 /// of memory access in the specified region are changed: 261 /// subsequent accesses of pages in the range will succeed, 262 /// but will result in either repopulating the memory contents 263 /// from the up-to-date contents of the underlying mapped file 264 /// (for shared file mappings, shared anonymous mappings, and 265 /// shmem-based techniques such as System V shared memory 266 /// segments) or zero-fill-on-demand pages for anonymous 267 /// private mappings. 268 /// 269 /// Note that, when applied to shared mappings, MADV_DONTNEED 270 /// might not lead to immediate freeing of the pages in the 271 /// range. The kernel is free to delay freeing the pages 272 /// until an appropriate moment. The resident set size (RSS) 273 /// of the calling process will be immediately reduced 274 /// however. 275 /// 276 /// **MADV_DONTNEED** cannot be applied to locked pages, Huge TLB 277 /// pages, or VM_PFNMAP pages. (Pages marked with the kernel- 278 /// internal VM_PFNMAP flag are special memory areas that are 279 /// not managed by the virtual memory subsystem. Such pages 280 /// are typically created by device drivers that map the pages 281 /// into user space.) 282 /// 283 /// # Safety 284 /// 285 /// Using the returned value with conceptually write to the 286 /// mapped pages, i.e. borrowing the mapping when the pages 287 /// are freed results in undefined behaviour. 288 DontNeed = libc::MADV_DONTNEED, 289 290 // 291 // The rest are Linux-specific 292 // 293 /// **MADV_FREE** - Linux (since Linux 4.5) and Darwin 294 /// 295 /// The application no longer requires the pages in the range 296 /// specified by addr and len. The kernel can thus free these 297 /// pages, but the freeing could be delayed until memory 298 /// pressure occurs. For each of the pages that has been 299 /// marked to be freed but has not yet been freed, the free 300 /// operation will be canceled if the caller writes into the 301 /// page. After a successful MADV_FREE operation, any stale 302 /// data (i.e., dirty, unwritten pages) will be lost when the 303 /// kernel frees the pages. However, subsequent writes to 304 /// pages in the range will succeed and then kernel cannot 305 /// free those dirtied pages, so that the caller can always 306 /// see just written data. If there is no subsequent write, 307 /// the kernel can free the pages at any time. Once pages in 308 /// the range have been freed, the caller will see zero-fill- 309 /// on-demand pages upon subsequent page references. 310 /// 311 /// The MADV_FREE operation can be applied only to private 312 /// anonymous pages (see mmap(2)). In Linux before version 313 /// 4.12, when freeing pages on a swapless system, the pages 314 /// in the given range are freed instantly, regardless of 315 /// memory pressure. 316 /// 317 /// # Safety 318 /// 319 /// Using the returned value with conceptually write to the 320 /// mapped pages, i.e. borrowing the mapping while the pages 321 /// are still being freed results in undefined behaviour. 322 #[cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))] 323 Free = libc::MADV_FREE, 324 325 /// **MADV_REMOVE** - Linux only (since Linux 2.6.16) 326 /// 327 /// Free up a given range of pages and its associated backing 328 /// store. This is equivalent to punching a hole in the 329 /// corresponding byte range of the backing store (see 330 /// fallocate(2)). Subsequent accesses in the specified 331 /// address range will see bytes containing zero. 332 /// 333 /// The specified address range must be mapped shared and 334 /// writable. This flag cannot be applied to locked pages, 335 /// Huge TLB pages, or VM_PFNMAP pages. 336 /// 337 /// In the initial implementation, only tmpfs(5) was supported 338 /// **MADV_REMOVE**; but since Linux 3.5, any filesystem which 339 /// supports the fallocate(2) FALLOC_FL_PUNCH_HOLE mode also 340 /// supports MADV_REMOVE. Hugetlbfs fails with the error 341 /// EINVAL and other filesystems fail with the error 342 /// EOPNOTSUPP. 343 /// 344 /// # Safety 345 /// 346 /// Using the returned value with conceptually write to the 347 /// mapped pages, i.e. borrowing the mapping when the pages 348 /// are freed results in undefined behaviour. 349 #[cfg(target_os = "linux")] 350 Remove = libc::MADV_REMOVE, 351 352 /// **MADV_FREE_REUSABLE** - Darwin only 353 /// 354 /// Behaves like **MADV_FREE**, but the freed pages are accounted for in the RSS of the process. 355 /// 356 /// # Safety 357 /// 358 /// Using the returned value with conceptually write to the 359 /// mapped pages, i.e. borrowing the mapping while the pages 360 /// are still being freed results in undefined behaviour. 361 #[cfg(any(target_os = "macos", target_os = "ios"))] 362 FreeReusable = libc::MADV_FREE_REUSABLE, 363 364 /// **MADV_FREE_REUSE** - Darwin only 365 /// 366 /// Marks a memory region previously freed by **MADV_FREE_REUSABLE** as non-reusable, accounts 367 /// for the pages in the RSS of the process. Pages that have been freed will be replaced by 368 /// zero-filled pages on demand, other pages will be left as is. 369 /// 370 /// # Safety 371 /// 372 /// Using the returned value with conceptually write to the 373 /// mapped pages, i.e. borrowing the mapping while the pages 374 /// are still being freed results in undefined behaviour. 375 #[cfg(any(target_os = "macos", target_os = "ios"))] 376 FreeReuse = libc::MADV_FREE_REUSE, 377 } 378 379 // Future expansion: 380 // MADV_SOFT_OFFLINE (since Linux 2.6.33) 381 // MADV_WIPEONFORK (since Linux 4.14) 382 // MADV_KEEPONFORK (since Linux 4.14) 383 // MADV_COLD (since Linux 5.4) 384 // MADV_PAGEOUT (since Linux 5.4) 385 386 #[cfg(target_os = "linux")] 387 impl Advice { 388 /// Performs a runtime check if this advice is supported by the kernel. 389 /// Only supported on Linux. See the [`madvise(2)`] man page. 390 /// 391 /// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html#VERSIONS is_supported(self) -> bool392 pub fn is_supported(self) -> bool { 393 (unsafe { libc::madvise(std::ptr::null_mut(), 0, self as libc::c_int) }) == 0 394 } 395 } 396 397 #[cfg(target_os = "linux")] 398 impl UncheckedAdvice { 399 /// Performs a runtime check if this advice is supported by the kernel. 400 /// Only supported on Linux. See the [`madvise(2)`] man page. 401 /// 402 /// [`madvise(2)`]: https://man7.org/linux/man-pages/man2/madvise.2.html#VERSIONS is_supported(self) -> bool403 pub fn is_supported(self) -> bool { 404 (unsafe { libc::madvise(std::ptr::null_mut(), 0, self as libc::c_int) }) == 0 405 } 406 } 407 408 #[cfg(test)] 409 mod tests { 410 #[cfg(target_os = "linux")] 411 #[test] test_is_supported()412 fn test_is_supported() { 413 use super::*; 414 415 assert!(Advice::Normal.is_supported()); 416 assert!(Advice::Random.is_supported()); 417 assert!(Advice::Sequential.is_supported()); 418 assert!(Advice::WillNeed.is_supported()); 419 420 assert!(UncheckedAdvice::DontNeed.is_supported()); 421 } 422 } 423