// Copyright (c) 2016 The vulkano developers
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.

//! Buffer whose content is accessible to the CPU.
//!
//! The `CpuAccessibleBuffer` is a basic general-purpose buffer. It can be used in any situation
//! but may not perform as well as other buffer types.
//!
//! Each access from the CPU or from the GPU locks the whole buffer for either reading or writing.
//! You can read the buffer multiple times simultaneously. Trying to read and write simultaneously,
//! or write and write simultaneously will block.

use crate::buffer::sys::BufferCreationError;
use crate::buffer::sys::UnsafeBuffer;
use crate::buffer::traits::BufferAccess;
use crate::buffer::traits::BufferInner;
use crate::buffer::traits::TypedBufferAccess;
use crate::buffer::BufferUsage;
use crate::device::physical::QueueFamily;
use crate::device::Device;
use crate::device::DeviceOwned;
use crate::device::Queue;
use crate::memory::pool::AllocFromRequirementsFilter;
use crate::memory::pool::AllocLayout;
use crate::memory::pool::MappingRequirement;
use crate::memory::pool::MemoryPool;
use crate::memory::pool::MemoryPoolAlloc;
use crate::memory::pool::PotentialDedicatedAllocation;
use crate::memory::pool::StdMemoryPoolAlloc;
use crate::memory::Content;
use crate::memory::CpuAccess as MemCpuAccess;
use crate::memory::DedicatedAlloc;
use crate::memory::DeviceMemoryAllocError;
use crate::sync::AccessError;
use crate::sync::Sharing;
use crate::DeviceSize;
use parking_lot::RwLock;
use parking_lot::RwLockReadGuard;
use parking_lot::RwLockWriteGuard;
use smallvec::SmallVec;
use std::error;
use std::fmt;
use std::hash::Hash;
use std::hash::Hasher;
use std::iter;
use std::marker::PhantomData;
use std::mem;
use std::ops::Deref;
use std::ops::DerefMut;
use std::ptr;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;

/// Buffer whose content is accessible by the CPU.
///
/// Setting the `host_cached` field on the various initializers to `true` will make it so
/// the `CpuAccessibleBuffer` prefers to allocate from host_cached memory. Host cached
/// memory caches GPU data on the CPU side. This can be more performant in cases where
/// the cpu needs to read data coming off the GPU.
#[derive(Debug)]
pub struct CpuAccessibleBuffer<T: ?Sized, A = PotentialDedicatedAllocation<StdMemoryPoolAlloc>> {
    // Inner content.
    inner: UnsafeBuffer,

    // The memory held by the buffer.
    memory: A,

    // Access pattern of the buffer.
    // Every time the user tries to read or write the buffer from the CPU, this `RwLock` is kept
    // locked and its content is checked to verify that we are allowed access. Every time the user
    // tries to submit this buffer for the GPU, this `RwLock` is briefly locked and modified.
    access: RwLock<CurrentGpuAccess>,

    // Queue families allowed to access this buffer.
    queue_families: SmallVec<[u32; 4]>,

    // Necessary to make it compile.
    marker: PhantomData<Box<T>>,
}

#[derive(Debug)]
enum CurrentGpuAccess {
    NonExclusive {
        // Number of non-exclusive GPU accesses. Can be 0.
        num: AtomicUsize,
    },
    Exclusive {
        // Number of exclusive locks. Cannot be 0. If 0 is reached, we must jump to `NonExclusive`.
        num: usize,
    },
}

impl<T> CpuAccessibleBuffer<T> {
    /// Builds a new buffer with some data in it. Only allowed for sized data.
    pub fn from_data(
        device: Arc<Device>,
        usage: BufferUsage,
        host_cached: bool,
        data: T,
    ) -> Result<Arc<CpuAccessibleBuffer<T>>, DeviceMemoryAllocError>
    where
        T: Content + Copy + 'static,
    {
        unsafe {
            let uninitialized = CpuAccessibleBuffer::raw(
                device,
                mem::size_of::<T>() as DeviceSize,
                usage,
                host_cached,
                iter::empty(),
            )?;

            // Note that we are in panic-unsafety land here. However a panic should never ever
            // happen here, so in theory we are safe.
            // TODO: check whether that's true ^

            {
                let mut mapping = uninitialized.write().unwrap();
                ptr::write::<T>(&mut *mapping, data)
            }

            Ok(uninitialized)
        }
    }

    /// Builds a new uninitialized buffer. Only allowed for sized data.
    #[inline]
    pub unsafe fn uninitialized(
        device: Arc<Device>,
        usage: BufferUsage,
        host_cached: bool,
    ) -> Result<Arc<CpuAccessibleBuffer<T>>, DeviceMemoryAllocError> {
        CpuAccessibleBuffer::raw(
            device,
            mem::size_of::<T>() as DeviceSize,
            usage,
            host_cached,
            iter::empty(),
        )
    }
}

impl<T> CpuAccessibleBuffer<[T]> {
    /// Builds a new buffer that contains an array `T`. The initial data comes from an iterator
    /// that produces that list of Ts.
    pub fn from_iter<I>(
        device: Arc<Device>,
        usage: BufferUsage,
        host_cached: bool,
        data: I,
    ) -> Result<Arc<CpuAccessibleBuffer<[T]>>, DeviceMemoryAllocError>
    where
        I: ExactSizeIterator<Item = T>,
        T: Content + 'static,
    {
        unsafe {
            let uninitialized = CpuAccessibleBuffer::uninitialized_array(
                device,
                data.len() as DeviceSize,
                usage,
                host_cached,
            )?;

            // Note that we are in panic-unsafety land here. However a panic should never ever
            // happen here, so in theory we are safe.
            // TODO: check whether that's true ^

            {
                let mut mapping = uninitialized.write().unwrap();

                for (i, o) in data.zip(mapping.iter_mut()) {
                    ptr::write(o, i);
                }
            }

            Ok(uninitialized)
        }
    }

    /// Builds a new buffer. Can be used for arrays.
    #[inline]
    pub unsafe fn uninitialized_array(
        device: Arc<Device>,
        len: DeviceSize,
        usage: BufferUsage,
        host_cached: bool,
    ) -> Result<Arc<CpuAccessibleBuffer<[T]>>, DeviceMemoryAllocError> {
        CpuAccessibleBuffer::raw(
            device,
            len * mem::size_of::<T>() as DeviceSize,
            usage,
            host_cached,
            iter::empty(),
        )
    }
}

impl<T: ?Sized> CpuAccessibleBuffer<T> {
    /// Builds a new buffer without checking the size.
    ///
    /// # Safety
    ///
    /// You must ensure that the size that you pass is correct for `T`.
    ///
    pub unsafe fn raw<'a, I>(
        device: Arc<Device>,
        size: DeviceSize,
        usage: BufferUsage,
        host_cached: bool,
        queue_families: I,
    ) -> Result<Arc<CpuAccessibleBuffer<T>>, DeviceMemoryAllocError>
    where
        I: IntoIterator<Item = QueueFamily<'a>>,
    {
        let queue_families = queue_families
            .into_iter()
            .map(|f| f.id())
            .collect::<SmallVec<[u32; 4]>>();

        let (buffer, mem_reqs) = {
            let sharing = if queue_families.len() >= 2 {
                Sharing::Concurrent(queue_families.iter().cloned())
            } else {
                Sharing::Exclusive
            };

            match UnsafeBuffer::new(device.clone(), size, usage, sharing, None) {
                Ok(b) => b,
                Err(BufferCreationError::AllocError(err)) => return Err(err),
                Err(_) => unreachable!(), // We don't use sparse binding, therefore the other
                                          // errors can't happen
            }
        };

        let mem = MemoryPool::alloc_from_requirements(
            &Device::standard_pool(&device),
            &mem_reqs,
            AllocLayout::Linear,
            MappingRequirement::Map,
            DedicatedAlloc::Buffer(&buffer),
            |m| {
                if m.is_host_cached() {
                    if host_cached {
                        AllocFromRequirementsFilter::Preferred
                    } else {
                        AllocFromRequirementsFilter::Allowed
                    }
                } else {
                    if host_cached {
                        AllocFromRequirementsFilter::Allowed
                    } else {
                        AllocFromRequirementsFilter::Preferred
                    }
                }
            },
        )?;
        debug_assert!((mem.offset() % mem_reqs.alignment) == 0);
        debug_assert!(mem.mapped_memory().is_some());
        buffer.bind_memory(mem.memory(), mem.offset())?;

        Ok(Arc::new(CpuAccessibleBuffer {
            inner: buffer,
            memory: mem,
            access: RwLock::new(CurrentGpuAccess::NonExclusive {
                num: AtomicUsize::new(0),
            }),
            queue_families: queue_families,
            marker: PhantomData,
        }))
    }
}

impl<T: ?Sized, A> CpuAccessibleBuffer<T, A> {
    /// Returns the queue families this buffer can be used on.
    // TODO: use a custom iterator
    #[inline]
    pub fn queue_families(&self) -> Vec<QueueFamily> {
        self.queue_families
            .iter()
            .map(|&num| {
                self.device()
                    .physical_device()
                    .queue_family_by_id(num)
                    .unwrap()
            })
            .collect()
    }
}

impl<T: ?Sized, A> CpuAccessibleBuffer<T, A>
where
    T: Content + 'static,
    A: MemoryPoolAlloc,
{
    /// Locks the buffer in order to read its content from the CPU.
    ///
    /// If the buffer is currently used in exclusive mode by the GPU, this function will return
    /// an error. Similarly if you called `write()` on the buffer and haven't dropped the lock,
    /// this function will return an error as well.
    ///
    /// After this function successfully locks the buffer, any attempt to submit a command buffer
    /// that uses it in exclusive mode will fail. You can still submit this buffer for non-exclusive
    /// accesses (ie. reads).
    #[inline]
    pub fn read(&self) -> Result<ReadLock<T>, ReadLockError> {
        let lock = match self.access.try_read() {
            Some(l) => l,
            // TODO: if a user simultaneously calls .write(), and write() is currently finding out
            //       that the buffer is in fact GPU locked, then we will return a CpuWriteLocked
            //       error instead of a GpuWriteLocked ; is this a problem? how do we fix this?
            None => return Err(ReadLockError::CpuWriteLocked),
        };

        if let CurrentGpuAccess::Exclusive { .. } = *lock {
            return Err(ReadLockError::GpuWriteLocked);
        }

        let offset = self.memory.offset();
        let range = offset..offset + self.inner.size();

        Ok(ReadLock {
            inner: unsafe { self.memory.mapped_memory().unwrap().read_write(range) },
            lock: lock,
        })
    }

    /// Locks the buffer in order to write its content from the CPU.
    ///
    /// If the buffer is currently in use by the GPU, this function will return an error. Similarly
    /// if you called `read()` on the buffer and haven't dropped the lock, this function will
    /// return an error as well.
    ///
    /// After this function successfully locks the buffer, any attempt to submit a command buffer
    /// that uses it and any attempt to call `read()` will return an error.
    #[inline]
    pub fn write(&self) -> Result<WriteLock<T>, WriteLockError> {
        let lock = match self.access.try_write() {
            Some(l) => l,
            // TODO: if a user simultaneously calls .read() or .write(), and the function is
            //       currently finding out that the buffer is in fact GPU locked, then we will
            //       return a CpuLocked error instead of a GpuLocked ; is this a problem?
            //       how do we fix this?
            None => return Err(WriteLockError::CpuLocked),
        };

        match *lock {
            CurrentGpuAccess::NonExclusive { ref num } if num.load(Ordering::SeqCst) == 0 => (),
            _ => return Err(WriteLockError::GpuLocked),
        }

        let offset = self.memory.offset();
        let range = offset..offset + self.inner.size();

        Ok(WriteLock {
            inner: unsafe { self.memory.mapped_memory().unwrap().read_write(range) },
            lock: lock,
        })
    }
}

unsafe impl<T: ?Sized, A> BufferAccess for CpuAccessibleBuffer<T, A>
where
    T: 'static + Send + Sync,
{
    #[inline]
    fn inner(&self) -> BufferInner {
        BufferInner {
            buffer: &self.inner,
            offset: 0,
        }
    }

    #[inline]
    fn size(&self) -> DeviceSize {
        self.inner.size()
    }

    #[inline]
    fn conflict_key(&self) -> (u64, u64) {
        (self.inner.key(), 0)
    }

    #[inline]
    fn try_gpu_lock(&self, exclusive_access: bool, _: &Queue) -> Result<(), AccessError> {
        if exclusive_access {
            let mut lock = match self.access.try_write() {
                Some(lock) => lock,
                None => return Err(AccessError::AlreadyInUse),
            };

            match *lock {
                CurrentGpuAccess::NonExclusive { ref num } if num.load(Ordering::SeqCst) == 0 => (),
                _ => return Err(AccessError::AlreadyInUse),
            };

            *lock = CurrentGpuAccess::Exclusive { num: 1 };
            Ok(())
        } else {
            let lock = match self.access.try_read() {
                Some(lock) => lock,
                None => return Err(AccessError::AlreadyInUse),
            };

            match *lock {
                CurrentGpuAccess::Exclusive { .. } => return Err(AccessError::AlreadyInUse),
                CurrentGpuAccess::NonExclusive { ref num } => num.fetch_add(1, Ordering::SeqCst),
            };

            Ok(())
        }
    }

    #[inline]
    unsafe fn increase_gpu_lock(&self) {
        // First, handle if we have a non-exclusive access.
        {
            // Since the buffer is in use by the GPU, it is invalid to hold a write-lock to
            // the buffer. The buffer can still be briefly in a write-locked state for the duration
            // of the check though.
            let read_lock = self.access.read();
            if let CurrentGpuAccess::NonExclusive { ref num } = *read_lock {
                let prev = num.fetch_add(1, Ordering::SeqCst);
                debug_assert!(prev >= 1);
                return;
            }
        }

        // If we reach here, this means that `access` contains `CurrentGpuAccess::Exclusive`.
        {
            // Same remark as above, but for writing.
            let mut write_lock = self.access.write();
            if let CurrentGpuAccess::Exclusive { ref mut num } = *write_lock {
                *num += 1;
            } else {
                unreachable!()
            }
        }
    }

    #[inline]
    unsafe fn unlock(&self) {
        // First, handle if we had a non-exclusive access.
        {
            // Since the buffer is in use by the GPU, it is invalid to hold a write-lock to
            // the buffer. The buffer can still be briefly in a write-locked state for the duration
            // of the check though.
            let read_lock = self.access.read();
            if let CurrentGpuAccess::NonExclusive { ref num } = *read_lock {
                let prev = num.fetch_sub(1, Ordering::SeqCst);
                debug_assert!(prev >= 1);
                return;
            }
        }

        // If we reach here, this means that `access` contains `CurrentGpuAccess::Exclusive`.
        {
            // Same remark as above, but for writing.
            let mut write_lock = self.access.write();
            if let CurrentGpuAccess::Exclusive { ref mut num } = *write_lock {
                if *num != 1 {
                    *num -= 1;
                    return;
                }
            } else {
                // Can happen if we lock in exclusive mode N times, and unlock N+1 times with the
                // last two unlocks happen simultaneously.
                panic!()
            }

            *write_lock = CurrentGpuAccess::NonExclusive {
                num: AtomicUsize::new(0),
            };
        }
    }
}

unsafe impl<T: ?Sized, A> TypedBufferAccess for CpuAccessibleBuffer<T, A>
where
    T: 'static + Send + Sync,
{
    type Content = T;
}

unsafe impl<T: ?Sized, A> DeviceOwned for CpuAccessibleBuffer<T, A> {
    #[inline]
    fn device(&self) -> &Arc<Device> {
        self.inner.device()
    }
}

impl<T: ?Sized, A> PartialEq for CpuAccessibleBuffer<T, A>
where
    T: 'static + Send + Sync,
{
    #[inline]
    fn eq(&self, other: &Self) -> bool {
        self.inner() == other.inner() && self.size() == other.size()
    }
}

impl<T: ?Sized, A> Eq for CpuAccessibleBuffer<T, A> where T: 'static + Send + Sync {}

impl<T: ?Sized, A> Hash for CpuAccessibleBuffer<T, A>
where
    T: 'static + Send + Sync,
{
    #[inline]
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.inner().hash(state);
        self.size().hash(state);
    }
}

/// Object that can be used to read or write the content of a `CpuAccessibleBuffer`.
///
/// Note that this object holds a rwlock read guard on the chunk. If another thread tries to access
/// this buffer's content or tries to submit a GPU command that uses this buffer, it will block.
pub struct ReadLock<'a, T: ?Sized + 'a> {
    inner: MemCpuAccess<'a, T>,
    lock: RwLockReadGuard<'a, CurrentGpuAccess>,
}

impl<'a, T: ?Sized + 'a> ReadLock<'a, T> {
    /// Makes a new `ReadLock` to access a sub-part of the current `ReadLock`.
    #[inline]
    pub fn map<U: ?Sized + 'a, F>(self, f: F) -> ReadLock<'a, U>
    where
        F: FnOnce(&mut T) -> &mut U,
    {
        ReadLock {
            inner: self.inner.map(|ptr| unsafe { f(&mut *ptr) as *mut _ }),
            lock: self.lock,
        }
    }
}

impl<'a, T: ?Sized + 'a> Deref for ReadLock<'a, T> {
    type Target = T;

    #[inline]
    fn deref(&self) -> &T {
        self.inner.deref()
    }
}

/// Error when attempting to CPU-read a buffer.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ReadLockError {
    /// The buffer is already locked for write mode by the CPU.
    CpuWriteLocked,
    /// The buffer is already locked for write mode by the GPU.
    GpuWriteLocked,
}

impl error::Error for ReadLockError {}

impl fmt::Display for ReadLockError {
    #[inline]
    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(
            fmt,
            "{}",
            match *self {
                ReadLockError::CpuWriteLocked => {
                    "the buffer is already locked for write mode by the CPU"
                }
                ReadLockError::GpuWriteLocked => {
                    "the buffer is already locked for write mode by the GPU"
                }
            }
        )
    }
}

/// Object that can be used to read or write the content of a `CpuAccessibleBuffer`.
///
/// Note that this object holds a rwlock write guard on the chunk. If another thread tries to access
/// this buffer's content or tries to submit a GPU command that uses this buffer, it will block.
pub struct WriteLock<'a, T: ?Sized + 'a> {
    inner: MemCpuAccess<'a, T>,
    lock: RwLockWriteGuard<'a, CurrentGpuAccess>,
}

impl<'a, T: ?Sized + 'a> WriteLock<'a, T> {
    /// Makes a new `WriteLock` to access a sub-part of the current `WriteLock`.
    #[inline]
    pub fn map<U: ?Sized + 'a, F>(self, f: F) -> WriteLock<'a, U>
    where
        F: FnOnce(&mut T) -> &mut U,
    {
        WriteLock {
            inner: self.inner.map(|ptr| unsafe { f(&mut *ptr) as *mut _ }),
            lock: self.lock,
        }
    }
}

impl<'a, T: ?Sized + 'a> Deref for WriteLock<'a, T> {
    type Target = T;

    #[inline]
    fn deref(&self) -> &T {
        self.inner.deref()
    }
}

impl<'a, T: ?Sized + 'a> DerefMut for WriteLock<'a, T> {
    #[inline]
    fn deref_mut(&mut self) -> &mut T {
        self.inner.deref_mut()
    }
}

/// Error when attempting to CPU-write a buffer.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum WriteLockError {
    /// The buffer is already locked by the CPU.
    CpuLocked,
    /// The buffer is already locked by the GPU.
    GpuLocked,
}

impl error::Error for WriteLockError {}

impl fmt::Display for WriteLockError {
    #[inline]
    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        write!(
            fmt,
            "{}",
            match *self {
                WriteLockError::CpuLocked => "the buffer is already locked by the CPU",
                WriteLockError::GpuLocked => "the buffer is already locked by the GPU",
            }
        )
    }
}

#[cfg(test)]
mod tests {
    use crate::buffer::{BufferUsage, CpuAccessibleBuffer};

    #[test]
    fn create_empty_buffer() {
        let (device, queue) = gfx_dev_and_queue!();

        const EMPTY: [i32; 0] = [];

        let _ = CpuAccessibleBuffer::from_data(device.clone(), BufferUsage::all(), false, EMPTY);
        let _ = CpuAccessibleBuffer::from_iter(device, BufferUsage::all(), false, EMPTY.iter());
    }
}