// Copyright (c) 2016 The vulkano developers
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.

//! Device memory allocation and memory pools.
//!
//! By default, memory allocation is automatically handled by the vulkano library when you create
//! a buffer or an image. But if you want more control, you have the possibility to customise the
//! memory allocation strategy.
//!
//! # Memory types and heaps
//!
//! A physical device is composed of one or more **memory heaps**. A memory heap is a pool of
//! memory that can be allocated.
//!
//! ```
//! // Enumerating memory heaps.
//! # let physical_device: vulkano::device::physical::PhysicalDevice = return;
//! for (index, heap) in physical_device.memory_properties().memory_heaps.iter().enumerate() {
//!     println!("Heap #{:?} has a capacity of {:?} bytes", index, heap.size);
//! }
//! ```
//!
//! However you can't allocate directly from a memory heap. A memory heap is shared amongst one or
//! multiple **memory types**, which you can allocate memory from. Each memory type has different
//! characteristics.
//!
//! A memory type may or may not be visible to the host. In other words, it may or may not be
//! directly writable by the CPU. A memory type may or may not be device-local. A device-local
//! memory type has a much quicker access time from the GPU than a non-device-local type. Note
//! that non-device-local memory types are still accessible by the device, they are just slower.
//!
//! ```
//! // Enumerating memory types.
//! # let physical_device: vulkano::device::physical::PhysicalDevice = return;
//! for ty in physical_device.memory_properties().memory_types.iter() {
//!     println!("Memory type belongs to heap #{:?}", ty.heap_index);
//!     println!("Property flags: {:?}", ty.property_flags);
//! }
//! ```
//!
//! Memory types are order from "best" to "worse". In other words, the implementation prefers that
//! you use the memory types that are earlier in the list. This means that selecting a memory type
//! should always be done by enumerating them and taking the first one that matches our criteria.
//!
//! ## In practice
//!
//! In practice, desktop machines usually have two memory heaps: one that represents the RAM of
//! the CPU, and one that represents the RAM of the GPU. The CPU's RAM is host-accessible but not
//! device-local, while the GPU's RAM is not host-accessible but is device-local.
//!
//! Mobile machines usually have a single memory heap that is "equally local" to both the CPU and
//! the GPU. It is both host-accessible and device-local.
//!
//! # Allocating memory and memory pools
//!
//! Allocating memory can be done by calling `DeviceMemory::allocate()`.
//!
//! Here is an example:
//!
//! ```
//! use vulkano::memory::{DeviceMemory, MemoryAllocateInfo};
//!
//! # let device: std::sync::Arc<vulkano::device::Device> = return;
//! // Taking the first memory type for the sake of this example.
//! let memory_type_index = 0;
//!
//! let memory = DeviceMemory::allocate(
//!     device.clone(),
//!     MemoryAllocateInfo {
//!         allocation_size: 1024,
//!         memory_type_index,
//!         ..Default::default()
//!     },
//! ).expect("Failed to allocate memory");
//!
//! // The memory is automatically freed when `memory` is destroyed.
//! ```
//!
//! However allocating and freeing memory is very slow (up to several hundred milliseconds
//! sometimes). Instead you are strongly encouraged to use a memory pool. A memory pool is not
//! a Vulkan concept but a vulkano concept.
//!
//! A memory pool is any object that implements the `MemoryPool` trait. You can implement that
//! trait on your own structure and then use it when you create buffers and images so that they
//! get memory from that pool. By default if you don't specify any pool when creating a buffer or
//! an image, an instance of `StandardMemoryPool` that is shared by the `Device` object is used.

pub use self::alignment::DeviceAlignment;
use self::allocator::DeviceLayout;
pub use self::device_memory::{
    DeviceMemory, DeviceMemoryError, ExternalMemoryHandleType, ExternalMemoryHandleTypes,
    MappedDeviceMemory, MemoryAllocateFlags, MemoryAllocateInfo, MemoryImportInfo, MemoryMapError,
};
use crate::{
    buffer::{sys::RawBuffer, Subbuffer},
    image::{sys::RawImage, ImageAccess, ImageAspects},
    macros::vulkan_bitflags,
    sync::semaphore::Semaphore,
    DeviceSize,
};
use std::{
    num::NonZeroU64,
    ops::{Bound, Range, RangeBounds, RangeTo},
    sync::Arc,
};

mod alignment;
pub mod allocator;
mod device_memory;

/// Properties of the memory in a physical device.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct MemoryProperties {
    /// The available memory types.
    pub memory_types: Vec<MemoryType>,

    /// The available memory heaps.
    pub memory_heaps: Vec<MemoryHeap>,
}

impl From<ash::vk::PhysicalDeviceMemoryProperties> for MemoryProperties {
    #[inline]
    fn from(val: ash::vk::PhysicalDeviceMemoryProperties) -> Self {
        Self {
            memory_types: val.memory_types[0..val.memory_type_count as usize]
                .iter()
                .map(|vk_memory_type| MemoryType {
                    property_flags: vk_memory_type.property_flags.into(),
                    heap_index: vk_memory_type.heap_index,
                })
                .collect(),
            memory_heaps: val.memory_heaps[0..val.memory_heap_count as usize]
                .iter()
                .map(|vk_memory_heap| MemoryHeap {
                    size: vk_memory_heap.size,
                    flags: vk_memory_heap.flags.into(),
                })
                .collect(),
        }
    }
}

/// A memory type in a physical device.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct MemoryType {
    /// The properties of this memory type.
    pub property_flags: MemoryPropertyFlags,

    /// The index of the memory heap that this memory type corresponds to.
    pub heap_index: u32,
}

vulkan_bitflags! {
    #[non_exhaustive]

    /// Properties of a memory type.
    MemoryPropertyFlags = MemoryPropertyFlags(u32);

    /// The memory is located on the device, and is allocated from a heap that also has the
    /// [`DEVICE_LOCAL`] flag set.
    ///
    /// For some devices, particularly integrated GPUs, the device shares memory with the host and
    /// all memory may be device-local, so the distinction is moot. However, if the device has
    /// non-device-local memory, it is usually faster for the device to access device-local memory.
    /// Therefore, device-local memory is preferred for data that will only be accessed by
    /// the device.
    ///
    /// If the device and host do not share memory, data transfer between host and device may
    /// involve sending the data over the data bus that connects the two. Accesses are faster if
    /// they do not have to cross this barrier: device-local memory is fast for the device to
    /// access, but slower to access by the host. However, there are devices that share memory with
    /// the host, yet have distinct device-local and non-device local memory types. In that case,
    /// the speed difference may not be large.
    ///
    /// For data transfer between host and device, it is most efficient if the memory is located
    /// at the destination of the transfer. Thus, if [`HOST_VISIBLE`] versions of both are
    /// available, device-local memory is preferred for host-to-device data transfer, while
    /// non-device-local memory is preferred for device-to-host data transfer. This is because data
    /// is usually written only once but potentially read several times, and because reads can take
    /// advantage of caching while writes cannot.
    ///
    /// Devices may have memory types that are neither `DEVICE_LOCAL` nor [`HOST_VISIBLE`]. This
    /// is regular host memory that is made available to the device exclusively. Although it will be
    /// slower to access from the device than `DEVICE_LOCAL` memory, it can be faster than
    /// [`HOST_VISIBLE`] memory. It can be used as overflow space if the device is out of memory.
    ///
    /// [`DEVICE_LOCAL`]: MemoryHeapFlags::DEVICE_LOCAL
    /// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
    DEVICE_LOCAL = DEVICE_LOCAL,

    /// The memory can be mapped into the memory space of the host and accessed as regular RAM.
    ///
    /// Memory of this type is required to transfer data between the host and the device. If
    /// the memory is going to be accessed by the device more than a few times, it is recommended
    /// to copy the data to non-`HOST_VISIBLE` memory first if it is available.
    ///
    /// `HOST_VISIBLE` memory is always at least either [`HOST_COHERENT`] or [`HOST_CACHED`],
    /// but it can be both.
    ///
    /// [`HOST_COHERENT`]: MemoryPropertyFlags::HOST_COHERENT
    /// [`HOST_CACHED`]: MemoryPropertyFlags::HOST_CACHED
    HOST_VISIBLE = HOST_VISIBLE,

    /// Host access to the memory does not require calling [`invalidate_range`] to make device
    /// writes visible to the host, nor [`flush_range`] to flush host writes back to the device.
    ///
    /// [`invalidate_range`]: MappedDeviceMemory::invalidate_range
    /// [`flush_range`]: MappedDeviceMemory::flush_range
    HOST_COHERENT = HOST_COHERENT,

    /// The memory is cached by the host.
    ///
    /// `HOST_CACHED` memory is fast for reads and random access from the host, so it is preferred
    /// for device-to-host data transfer. Memory that is [`HOST_VISIBLE`] but not `HOST_CACHED` is
    /// often slow for all accesses other than sequential writing, so it is more suited for
    /// host-to-device transfer, and it is often beneficial to write the data in sequence.
    ///
    /// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
    HOST_CACHED = HOST_CACHED,

    /// Allocations made from the memory are lazy.
    ///
    /// This means that no actual allocation is performed. Instead memory is automatically
    /// allocated by the Vulkan implementation based on need. You can call
    /// [`DeviceMemory::commitment`] to query how much memory is currently committed to an
    /// allocation.
    ///
    /// Memory of this type can only be used on images created with a certain flag, and is never
    /// [`HOST_VISIBLE`].
    ///
    /// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
    LAZILY_ALLOCATED = LAZILY_ALLOCATED,

    /// The memory can only be accessed by the device, and allows protected queue access.
    ///
    /// Memory of this type is never [`HOST_VISIBLE`], [`HOST_COHERENT`] or [`HOST_CACHED`].
    ///
    /// [`HOST_VISIBLE`]: MemoryPropertyFlags::HOST_VISIBLE
    /// [`HOST_COHERENT`]: MemoryPropertyFlags::HOST_COHERENT
    /// [`HOST_CACHED`]: MemoryPropertyFlags::HOST_CACHED
    PROTECTED = PROTECTED {
        api_version: V1_1,
    },

    /// Device accesses to the memory are automatically made available and visible to other device
    /// accesses.
    ///
    /// Memory of this type is slower to access by the device, so it is best avoided for general
    /// purpose use. Because of its coherence properties, however, it may be useful for debugging.
    DEVICE_COHERENT = DEVICE_COHERENT_AMD {
        device_extensions: [amd_device_coherent_memory],
    },

    /// The memory is not cached on the device.
    ///
    /// `DEVICE_UNCACHED` memory is always also [`DEVICE_COHERENT`].
    ///
    /// [`DEVICE_COHERENT`]: MemoryPropertyFlags::DEVICE_COHERENT
    DEVICE_UNCACHED = DEVICE_UNCACHED_AMD {
        device_extensions: [amd_device_coherent_memory],
    },

    /// Other devices can access the memory via remote direct memory access (RDMA).
    RDMA_CAPABLE = RDMA_CAPABLE_NV {
        device_extensions: [nv_external_memory_rdma],
    },
}

/// A memory heap in a physical device.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct MemoryHeap {
    /// The size of the heap in bytes.
    pub size: DeviceSize,

    /// Attributes of the heap.
    pub flags: MemoryHeapFlags,
}

vulkan_bitflags! {
    #[non_exhaustive]

    /// Attributes of a memory heap.
    MemoryHeapFlags = MemoryHeapFlags(u32);

    /// The heap corresponds to device-local memory.
    DEVICE_LOCAL = DEVICE_LOCAL,

    /// If used on a logical device that represents more than one physical device, allocations are
    /// replicated across each physical device's instance of this heap.
    MULTI_INSTANCE = MULTI_INSTANCE {
        api_version: V1_1,
        instance_extensions: [khr_device_group_creation],
    },
}

/// Represents requirements expressed by the Vulkan implementation when it comes to binding memory
/// to a resource.
#[derive(Clone, Copy, Debug)]
pub struct MemoryRequirements {
    /// Memory layout required for the resource.
    pub layout: DeviceLayout,

    /// Indicates which memory types can be used. Each bit that is set to 1 means that the memory
    /// type whose index is the same as the position of the bit can be used.
    pub memory_type_bits: u32,

    /// Whether implementation prefers to use dedicated allocations (in other words, allocate
    /// a whole block of memory dedicated to this resource alone).
    /// This will be `false` if the device API version is less than 1.1 and the
    /// [`khr_get_memory_requirements2`](crate::device::DeviceExtensions::khr_get_memory_requirements2)
    /// extension is not enabled on the device.
    pub prefers_dedicated_allocation: bool,

    /// Whether implementation requires the use of a dedicated allocation (in other words, allocate
    /// a whole block of memory dedicated to this resource alone).
    /// This will be `false` if the device API version is less than 1.1 and the
    /// [`khr_get_memory_requirements2`](crate::device::DeviceExtensions::khr_get_memory_requirements2)
    /// extension is not enabled on the device.
    pub requires_dedicated_allocation: bool,
}

/// Indicates a specific resource to allocate memory for.
///
/// Using dedicated allocations can yield better performance, but requires the
/// [`khr_dedicated_allocation`](crate::device::DeviceExtensions::khr_dedicated_allocation)
/// extension to be enabled on the device.
///
/// If a dedicated allocation is performed, it must not be bound to any resource other than the
/// one that was passed with the enumeration.
#[derive(Clone, Copy, Debug)]
pub enum DedicatedAllocation<'a> {
    /// Allocation dedicated to a buffer.
    Buffer(&'a RawBuffer),
    /// Allocation dedicated to an image.
    Image(&'a RawImage),
}

#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum DedicatedTo {
    Buffer(NonZeroU64),
    Image(NonZeroU64),
}

impl From<DedicatedAllocation<'_>> for DedicatedTo {
    fn from(dedicated_allocation: DedicatedAllocation<'_>) -> Self {
        match dedicated_allocation {
            DedicatedAllocation::Buffer(buffer) => Self::Buffer(buffer.id()),
            DedicatedAllocation::Image(image) => Self::Image(image.id()),
        }
    }
}

/// The properties for exporting or importing external memory, when a buffer or image is created
/// with a specific configuration.
#[derive(Clone, Debug, Default)]
#[non_exhaustive]
pub struct ExternalMemoryProperties {
    /// Whether a dedicated memory allocation is required for the queried external handle type.
    pub dedicated_only: bool,

    /// Whether memory can be exported to an external source with the queried
    /// external handle type.
    pub exportable: bool,

    /// Whether memory can be imported from an external source with the queried
    /// external handle type.
    pub importable: bool,

    /// Which external handle types can be re-exported after the queried external handle type has
    /// been imported.
    pub export_from_imported_handle_types: ExternalMemoryHandleTypes,

    /// Which external handle types can be enabled along with the queried external handle type
    /// when creating the buffer or image.
    pub compatible_handle_types: ExternalMemoryHandleTypes,
}

impl From<ash::vk::ExternalMemoryProperties> for ExternalMemoryProperties {
    #[inline]
    fn from(val: ash::vk::ExternalMemoryProperties) -> Self {
        Self {
            dedicated_only: val
                .external_memory_features
                .intersects(ash::vk::ExternalMemoryFeatureFlags::DEDICATED_ONLY),
            exportable: val
                .external_memory_features
                .intersects(ash::vk::ExternalMemoryFeatureFlags::EXPORTABLE),
            importable: val
                .external_memory_features
                .intersects(ash::vk::ExternalMemoryFeatureFlags::IMPORTABLE),
            export_from_imported_handle_types: val.export_from_imported_handle_types.into(),
            compatible_handle_types: val.compatible_handle_types.into(),
        }
    }
}

/// Parameters to execute sparse bind operations on a queue.
#[derive(Clone, Debug)]
pub struct BindSparseInfo {
    /// The semaphores to wait for before beginning the execution of this batch of
    /// sparse bind operations.
    ///
    /// The default value is empty.
    pub wait_semaphores: Vec<Arc<Semaphore>>,

    /// The bind operations to perform for buffers.
    ///
    /// The default value is empty.
    pub buffer_binds: Vec<(Subbuffer<[u8]>, Vec<SparseBufferMemoryBind>)>,

    /// The bind operations to perform for images with an opaque memory layout.
    ///
    /// This should be used for mip tail regions, the metadata aspect, and for the normal regions
    /// of images that do not have the `sparse_residency` flag set.
    ///
    /// The default value is empty.
    pub image_opaque_binds: Vec<(Arc<dyn ImageAccess>, Vec<SparseImageOpaqueMemoryBind>)>,

    /// The bind operations to perform for images with a known memory layout.
    ///
    /// This type of sparse bind can only be used for images that have the `sparse_residency`
    /// flag set.
    /// Only the normal texel regions can be bound this way, not the mip tail regions or metadata
    /// aspect.
    ///
    /// The default value is empty.
    pub image_binds: Vec<(Arc<dyn ImageAccess>, Vec<SparseImageMemoryBind>)>,

    /// The semaphores to signal after the execution of this batch of sparse bind operations
    /// has completed.
    ///
    /// The default value is empty.
    pub signal_semaphores: Vec<Arc<Semaphore>>,

    pub _ne: crate::NonExhaustive,
}

impl Default for BindSparseInfo {
    #[inline]
    fn default() -> Self {
        Self {
            wait_semaphores: Vec::new(),
            buffer_binds: Vec::new(),
            image_opaque_binds: Vec::new(),
            image_binds: Vec::new(),
            signal_semaphores: Vec::new(),
            _ne: crate::NonExhaustive(()),
        }
    }
}

/// Parameters for a single sparse bind operation on a buffer.
#[derive(Clone, Debug, Default)]
pub struct SparseBufferMemoryBind {
    /// The offset in bytes from the start of the buffer's memory, where memory is to be (un)bound.
    ///
    /// The default value is `0`.
    pub offset: DeviceSize,

    /// The size in bytes of the memory to be (un)bound.
    ///
    /// The default value is `0`, which must be overridden.
    pub size: DeviceSize,

    /// If `Some`, specifies the memory and an offset into that memory that is to be bound.
    /// The provided memory must match the buffer's memory requirements.
    ///
    /// If `None`, specifies that existing memory at the specified location is to be unbound.
    ///
    /// The default value is `None`.
    pub memory: Option<(Arc<DeviceMemory>, DeviceSize)>,
}

/// Parameters for a single sparse bind operation on parts of an image with an opaque memory layout.
///
/// This type of sparse bind should be used for mip tail regions, the metadata aspect, and for the
/// normal regions of images that do not have the `sparse_residency` flag set.
#[derive(Clone, Debug, Default)]
pub struct SparseImageOpaqueMemoryBind {
    /// The offset in bytes from the start of the image's memory, where memory is to be (un)bound.
    ///
    /// The default value is `0`.
    pub offset: DeviceSize,

    /// The size in bytes of the memory to be (un)bound.
    ///
    /// The default value is `0`, which must be overridden.
    pub size: DeviceSize,

    /// If `Some`, specifies the memory and an offset into that memory that is to be bound.
    /// The provided memory must match the image's memory requirements.
    ///
    /// If `None`, specifies that existing memory at the specified location is to be unbound.
    ///
    /// The default value is `None`.
    pub memory: Option<(Arc<DeviceMemory>, DeviceSize)>,

    /// Sets whether the binding should apply to the metadata aspect of the image, or to the
    /// normal texel data.
    ///
    /// The default value is `false`.
    pub metadata: bool,
}

/// Parameters for a single sparse bind operation on parts of an image with a known memory layout.
///
/// This type of sparse bind can only be used for images that have the `sparse_residency` flag set.
/// Only the normal texel regions can be bound this way, not the mip tail regions or metadata
/// aspect.
#[derive(Clone, Debug, Default)]
pub struct SparseImageMemoryBind {
    /// The aspects of the image where memory is to be (un)bound.
    ///
    /// The default value is `ImageAspects::empty()`, which must be overridden.
    pub aspects: ImageAspects,

    /// The mip level of the image where memory is to be (un)bound.
    ///
    /// The default value is `0`.
    pub mip_level: u32,

    /// The array layer of the image where memory is to be (un)bound.
    ///
    /// The default value is `0`.
    pub array_layer: u32,

    /// The offset in texels (or for compressed images, texel blocks) from the origin of the image,
    /// where memory is to be (un)bound.
    ///
    /// This must be a multiple of the
    /// [`SparseImageFormatProperties::image_granularity`](crate::image::SparseImageFormatProperties::image_granularity)
    /// value of the image.
    ///
    /// The default value is `[0; 3]`.
    pub offset: [u32; 3],

    /// The extent in texels (or for compressed images, texel blocks) of the image where
    /// memory is to be (un)bound.
    ///
    /// This must be a multiple of the
    /// [`SparseImageFormatProperties::image_granularity`](crate::image::SparseImageFormatProperties::image_granularity)
    /// value of the image, or `offset + extent` for that dimension must equal the image's total
    /// extent.
    ///
    /// The default value is `[0; 3]`, which must be overridden.
    pub extent: [u32; 3],

    /// If `Some`, specifies the memory and an offset into that memory that is to be bound.
    /// The provided memory must match the image's memory requirements.
    ///
    /// If `None`, specifies that existing memory at the specified location is to be unbound.
    ///
    /// The default value is `None`.
    pub memory: Option<(Arc<DeviceMemory>, DeviceSize)>,
}

#[inline(always)]
pub(crate) fn is_aligned(offset: DeviceSize, alignment: DeviceAlignment) -> bool {
    offset & (alignment.as_devicesize() - 1) == 0
}

/// Performs bounds-checking of a Vulkan memory range. Analog of `std::slice::range`.
pub(crate) fn range(
    range: impl RangeBounds<DeviceSize>,
    bounds: RangeTo<DeviceSize>,
) -> Option<Range<DeviceSize>> {
    let len = bounds.end;

    let start = match range.start_bound() {
        Bound::Included(&start) => start,
        Bound::Excluded(start) => start.checked_add(1)?,
        Bound::Unbounded => 0,
    };

    let end = match range.end_bound() {
        Bound::Included(end) => end.checked_add(1)?,
        Bound::Excluded(&end) => end,
        Bound::Unbounded => len,
    };

    (start <= end && end <= len).then_some(Range { start, end })
}