android-14.0.0_r21/s

#![allow(non_camel_case_types)]
use core::ops::{Add, AddAssign, BitAnd, BitOr, BitXor, BitXorAssign, Not};

pub trait AndNot {
    type Output;
    fn andnot(self, rhs: Self) -> Self::Output;
}
pub trait BSwap {
    fn bswap(self) -> Self;
}
/// Ops that depend on word size
pub trait ArithOps: Add<Output = Self> + AddAssign + Sized + Copy + Clone + BSwap {}
/// Ops that are independent of word size and endian
pub trait BitOps0:
    BitAnd<Output = Self>
    + BitOr<Output = Self>
    + BitXor<Output = Self>
    + BitXorAssign
    + Not<Output = Self>
    + AndNot<Output = Self>
    + Sized
    + Copy
    + Clone
{
}

pub trait BitOps32: BitOps0 + RotateEachWord32 {}
pub trait BitOps64: BitOps32 + RotateEachWord64 {}
pub trait BitOps128: BitOps64 + RotateEachWord128 {}

pub trait RotateEachWord32 {
    fn rotate_each_word_right7(self) -> Self;
    fn rotate_each_word_right8(self) -> Self;
    fn rotate_each_word_right11(self) -> Self;
    fn rotate_each_word_right12(self) -> Self;
    fn rotate_each_word_right16(self) -> Self;
    fn rotate_each_word_right20(self) -> Self;
    fn rotate_each_word_right24(self) -> Self;
    fn rotate_each_word_right25(self) -> Self;
}

pub trait RotateEachWord64 {
    fn rotate_each_word_right32(self) -> Self;
}

pub trait RotateEachWord128 {}

// Vector type naming scheme:
// uN[xP]xL
// Unsigned; N-bit words * P bits per lane * L lanes
//
// A lane is always 128-bits, chosen because common SIMD architectures treat 128-bit units of
// wide vectors specially (supporting e.g. intra-lane shuffles), and tend to have limited and
// slow inter-lane operations.

use crate::arch::{vec128_storage, vec256_storage, vec512_storage};

#[allow(clippy::missing_safety_doc)]
pub trait UnsafeFrom<T> {
    unsafe fn unsafe_from(t: T) -> Self;
}

/// A vector composed of two elements, which may be words or themselves vectors.
pub trait Vec2<W> {
    fn extract(self, i: u32) -> W;
    fn insert(self, w: W, i: u32) -> Self;
}

/// A vector composed of four elements, which may be words or themselves vectors.
pub trait Vec4<W> {
    fn extract(self, i: u32) -> W;
    fn insert(self, w: W, i: u32) -> Self;
}
/// Vec4 functions which may not be implemented yet for all Vec4 types.
/// NOTE: functions in this trait may be moved to Vec4 in any patch release. To avoid breakage,
/// import Vec4Ext only together with Vec4, and don't qualify its methods.
pub trait Vec4Ext<W> {
    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
    where
        Self: Sized;
}
pub trait Vector<T> {
    fn to_scalars(self) -> T;
}

// TODO: multiples of 4 should inherit this
/// A vector composed of four words; depending on their size, operations may cross lanes.
pub trait Words4 {
    fn shuffle1230(self) -> Self;
    fn shuffle2301(self) -> Self;
    fn shuffle3012(self) -> Self;
}

/// A vector composed one or more lanes each composed of four words.
pub trait LaneWords4 {
    fn shuffle_lane_words1230(self) -> Self;
    fn shuffle_lane_words2301(self) -> Self;
    fn shuffle_lane_words3012(self) -> Self;
}

// TODO: make this a part of BitOps
/// Exchange neigboring ranges of bits of the specified size
pub trait Swap64 {
    fn swap1(self) -> Self;
    fn swap2(self) -> Self;
    fn swap4(self) -> Self;
    fn swap8(self) -> Self;
    fn swap16(self) -> Self;
    fn swap32(self) -> Self;
    fn swap64(self) -> Self;
}

pub trait u32x4<M: Machine>:
    BitOps32
    + Store<vec128_storage>
    + ArithOps
    + Vec4<u32>
    + Words4
    + LaneWords4
    + StoreBytes
    + MultiLane<[u32; 4]>
    + Into<vec128_storage>
{
}
pub trait u64x2<M: Machine>:
    BitOps64 + Store<vec128_storage> + ArithOps + Vec2<u64> + MultiLane<[u64; 2]> + Into<vec128_storage>
{
}
pub trait u128x1<M: Machine>:
    BitOps128 + Store<vec128_storage> + Swap64 + MultiLane<[u128; 1]> + Into<vec128_storage>
{
}

pub trait u32x4x2<M: Machine>:
    BitOps32
    + Store<vec256_storage>
    + Vec2<M::u32x4>
    + MultiLane<[M::u32x4; 2]>
    + ArithOps
    + Into<vec256_storage>
    + StoreBytes
{
}
pub trait u64x2x2<M: Machine>:
    BitOps64
    + Store<vec256_storage>
    + Vec2<M::u64x2>
    + MultiLane<[M::u64x2; 2]>
    + ArithOps
    + StoreBytes
    + Into<vec256_storage>
{
}
pub trait u64x4<M: Machine>:
    BitOps64
    + Store<vec256_storage>
    + Vec4<u64>
    + MultiLane<[u64; 4]>
    + ArithOps
    + Words4
    + StoreBytes
    + Into<vec256_storage>
{
}
pub trait u128x2<M: Machine>:
    BitOps128
    + Store<vec256_storage>
    + Vec2<M::u128x1>
    + MultiLane<[M::u128x1; 2]>
    + Swap64
    + Into<vec256_storage>
{
}

pub trait u32x4x4<M: Machine>:
    BitOps32
    + Store<vec512_storage>
    + Vec4<M::u32x4>
    + Vec4Ext<M::u32x4>
    + Vector<[u32; 16]>
    + MultiLane<[M::u32x4; 4]>
    + ArithOps
    + LaneWords4
    + Into<vec512_storage>
    + StoreBytes
{
}
pub trait u64x2x4<M: Machine>:
    BitOps64
    + Store<vec512_storage>
    + Vec4<M::u64x2>
    + MultiLane<[M::u64x2; 4]>
    + ArithOps
    + Into<vec512_storage>
{
}
// TODO: Words4
pub trait u128x4<M: Machine>:
    BitOps128
    + Store<vec512_storage>
    + Vec4<M::u128x1>
    + MultiLane<[M::u128x1; 4]>
    + Swap64
    + Into<vec512_storage>
{
}

/// A vector composed of multiple 128-bit lanes.
pub trait MultiLane<Lanes> {
    /// Split a multi-lane vector into single-lane vectors.
    fn to_lanes(self) -> Lanes;
    /// Build a multi-lane vector from individual lanes.
    fn from_lanes(lanes: Lanes) -> Self;
}

/// Combine single vectors into a multi-lane vector.
pub trait VZip<V> {
    fn vzip(self) -> V;
}

impl<V, T> VZip<V> for T
where
    V: MultiLane<T>,
{
    #[inline(always)]
    fn vzip(self) -> V {
        V::from_lanes(self)
    }
}

pub trait Machine: Sized + Copy {
    type u32x4: u32x4<Self>;
    type u64x2: u64x2<Self>;
    type u128x1: u128x1<Self>;

    type u32x4x2: u32x4x2<Self>;
    type u64x2x2: u64x2x2<Self>;
    type u64x4: u64x4<Self>;
    type u128x2: u128x2<Self>;

    type u32x4x4: u32x4x4<Self>;
    type u64x2x4: u64x2x4<Self>;
    type u128x4: u128x4<Self>;

    #[inline(always)]
    fn unpack<S, V: Store<S>>(self, s: S) -> V {
        unsafe { V::unpack(s) }
    }

    #[inline(always)]
    fn vec<V, A>(self, a: A) -> V
    where
        V: MultiLane<A>,
    {
        V::from_lanes(a)
    }

    #[inline(always)]
    fn read_le<V>(self, input: &[u8]) -> V
    where
        V: StoreBytes,
    {
        unsafe { V::unsafe_read_le(input) }
    }

    #[inline(always)]
    fn read_be<V>(self, input: &[u8]) -> V
    where
        V: StoreBytes,
    {
        unsafe { V::unsafe_read_be(input) }
    }

    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn instance() -> Self;
}

pub trait Store<S> {
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn unpack(p: S) -> Self;
}

pub trait StoreBytes {
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn unsafe_read_le(input: &[u8]) -> Self;
    /// # Safety
    /// Caller must ensure the type of Self is appropriate for the hardware of the execution
    /// environment.
    unsafe fn unsafe_read_be(input: &[u8]) -> Self;
    fn write_le(self, out: &mut [u8]);
    fn write_be(self, out: &mut [u8]);
}