/third_party/ffmpeg/libavfilter/cuda/ |
D | vector_helpers.cuh | 43 template<typename T, typename V> inline __device__ V to_floatN(const T &a) { return (V)a; } in to_floatN() 44 template<typename T, typename V> inline __device__ T from_floatN(const V &a) { return (T)a; } in from_floatN() 47 …template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.… 48 …template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.… 49 …template<typename V> inline __device__ T operator*(const T &a, V b) { return make_ ## T (a.x * b, … 50 …template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, … 51 …template<typename V> inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b… 52 …template<typename V> inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b… 53 …template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; r… 54 … template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; } \ [all …]
|
/third_party/ffmpeg/compat/cuda/ |
D | cuda_runtime.h | 28 #define __device__ __attribute__((device)) macro 94 #define GET(name, reg) static inline __device__ uint3 name() {\ 123 #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsi… 137 inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) 149 inline __device__ float4 tex2D<float4>(cudaTextureObject_t texObject, float x, float y) 159 inline __device__ float tex2D<float>(cudaTextureObject_t texObject, float x, float y) 165 inline __device__ float2 tex2D<float2>(cudaTextureObject_t texObject, float x, float y) 172 static inline __device__ float floorf(float a) { return __builtin_floorf(a); } in floorf() 173 static inline __device__ float floor(float a) { return __builtin_floorf(a); } in floor() 174 static inline __device__ double floor(double a) { return __builtin_floor(a); } in floor() [all …]
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/ |
D | util.cuh | 31 __device__ static inline double MsAtomicAdd(double *address, const double val) { in MsAtomicAdd() 42 __device__ static inline float MsAtomicAdd(float *address, const float val) { return atomicAdd(addr… in MsAtomicAdd() 44 __device__ static inline int MsAtomicAdd(int *address, int val) { return atomicAdd(address, val); } in MsAtomicAdd() 46 __device__ static inline unsigned int MsAtomicAdd(unsigned int *address, unsigned int val) { in MsAtomicAdd() 50 __device__ static inline int8_t MsAtomicAdd(int8_t *address, int8_t val) { in MsAtomicAdd() 69 __device__ static inline int64_t MsAtomicAdd(int64_t *address, int64_t val) { in MsAtomicAdd() 83 __device__ static inline bool MsAtomicAdd(bool *address, bool val) { in MsAtomicAdd() 88 __device__ static inline unsigned char MsAtomicAdd(short *address, short val) { // NOLINT in MsAtomicAdd() 114 __device__ static inline half MsAtomicAdd(half *address, half val) { in MsAtomicAdd() 134 __device__ static inline unsigned char MsAtomicAdd(unsigned char *address, unsigned char val) { in MsAtomicAdd() [all …]
|
D | sparse_apply_proximal_adagrad_impl.cu | 20 __device__ __forceinline__ bool CompareFunc(T x, T y) { in CompareFunc() 25 __device__ __forceinline__ bool CompareFunc(half x, half y) { in CompareFunc() 30 __device__ __forceinline__ T RsqrtFunc(T x) { in RsqrtFunc() 35 __device__ __forceinline__ half RsqrtFunc(half x) { in RsqrtFunc() 40 __device__ __forceinline__ T AbsFunc(T x) { in AbsFunc() 45 __device__ __forceinline__ half AbsFunc(half x) { in AbsFunc() 50 __device__ __forceinline__ T Sgn(T x) { in Sgn() 55 __device__ __forceinline__ half Sgn(half x) { in Sgn()
|
D | broadcast_impl.cu | 26 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs > rhs… in operator ()() 31 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs < rhs… in operator ()() 36 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs == rh… in operator ()() 41 __device__ __host__ __forceinline__ bool operator()(const half &lhs, const half &rhs) { in operator ()() 48 __device__ __host__ __forceinline__ bool operator()(const float &lhs, const float &rhs) { in operator ()() 55 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs >= rh… in operator ()() 60 __device__ __host__ __forceinline__ bool operator()(const half &lhs, const half &rhs) { in operator ()() 69 __device__ __host__ __forceinline__ bool operator()(const float &lhs, const float &rhs) { in operator ()() 76 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs <= rh… in operator ()() 81 __device__ __host__ __forceinline__ bool operator()(const half &lhs, const half &rhs) { in operator ()() [all …]
|
D | topk_lib.cuh | 20 constexpr __host__ __device__ int Log2(int n, int p = 0) { return (n <= 1) ? p : Log2(n / 2, p + 1)… in Log2() 21 constexpr __host__ __device__ bool IsPow2(int v) { return (v && !(v & (v - 1))); } in IsPow2() 22 constexpr __host__ __device__ int NextPow2(int v) { return (IsPow2(v) ? 2 * v : (1 << static_cast<i… in NextPow2() 24 __device__ __forceinline__ int GetLaneId() { in GetLaneId() 32 …__device__ static inline bool gt(T k1, S v1, T k2, S v2) { return k1 > k2 || (k1 == k2 && v1 < v2)… in gt() 33 …__device__ static inline bool lt(T k1, S v1, T k2, S v2) { return k1 < k2 || (k1 == k2 && v1 > v2)… in lt() 38 __device__ static inline bool lt(T a, T b) { return a < b; } in lt() 39 __device__ static inline bool gt(T a, T b) { return a > b; } in gt() 43 inline __device__ T shfl_xor(const T val, int laneMask, int width = kWarpSize) { in shfl_xor() 48 inline __device__ void L2CompareAndSwap(T *a, S *b, int i_1, int i_2) { in L2CompareAndSwap() [all …]
|
D | ftrl_impl.cu | 20 __device__ __forceinline__ T PowFunc(T x, T y) { in PowFunc() 25 __device__ __forceinline__ half PowFunc(half x, half y) { in PowFunc() 30 __device__ __forceinline__ bool CompareFunc(T x, T y) { in CompareFunc() 35 __device__ __forceinline__ bool CompareFunc(half x, half y) { in CompareFunc() 40 __device__ __forceinline__ T Sgn(T x) { in Sgn() 45 __device__ __forceinline__ half Sgn(half x) { in Sgn()
|
D | sparse_ftrl_impl.cu | 22 __device__ __forceinline__ T PowFunc(T x, T y) { in PowFunc() 27 __device__ __forceinline__ half PowFunc(half x, half y) { in PowFunc() 32 __device__ __forceinline__ bool CompareFunc(T x, T y) { in CompareFunc() 37 __device__ __forceinline__ bool CompareFunc(half x, half y) { in CompareFunc() 42 __device__ __forceinline__ T Sgn(T x) { in Sgn() 47 __device__ __forceinline__ half Sgn(half x) { in Sgn()
|
D | layer_norm_grad_impl.cu | 27 inline __device__ T my_pow(T a, double b) { in my_pow() 32 inline __device__ half my_pow(half a, double b) { in my_pow() 37 inline __device__ void GammaAndBetaThreadReduce(const int &col, const int &row_dim, const int &col_… in GammaAndBetaThreadReduce() 57 inline __device__ void GammaAndBetaWarpReduce(T *dg, T *db) { in GammaAndBetaWarpReduce() 65 inline __device__ void GammaAndBetaBlockReduce(const int &col, const int &row_dim, T *dg, T *db, T … in GammaAndBetaBlockReduce() 110 inline __device__ void InputThreadReduce(const int &row, const int &col_dim, const int ¶m_dim, … in InputThreadReduce() 134 inline __device__ void InputThreadReduce(const int &row, const int &col_dim, const int ¶m_dim, … in InputThreadReduce() 158 inline __device__ void InputWarpReduce(T *sum1, T *sum2, T *sum3) { in InputWarpReduce() 167 inline __device__ void InputBlockReduce(const int &col_dim, T *sum1, T *sum2, T *sum3, T *share_mem… in InputBlockReduce() 190 inline __device__ void InputProp(const int &row, const int &col_dim, const int ¶m_dim, const T … in InputProp() [all …]
|
D | layer_norm_impl.cu | 26 inline __device__ void MeanAndVarAccumulation(T *mean, T *var, T *num, const T &val) { in MeanAndVarAccumulation() 37 inline __device__ void MeanAndVarMerge(T *m1, T *v1, T *n1, const T &m2, const T &v2, const T &n2) { in MeanAndVarMerge() 50 inline __device__ void ThreadReduce(const int &col_dim, const T *block_addr, T *mean, T *var, T *nu… in ThreadReduce() 64 inline __device__ void WarpReduce(T *mean, T *var, T *num) { in WarpReduce() 74 inline __device__ void BlockReduce(const int &col_dim, T *mean, T *var, T *num, T *mean_addr, T *va… in BlockReduce() 103 inline __device__ void LayerNorm(const int &row, const int &col_dim, const int ¶m_dim, const T … in LayerNorm() 113 inline __device__ void LayerNorm(const int &row, const int &col_dim, const int ¶m_dim, const ha… in LayerNorm()
|
D | layer_norm_grad_grad_impl.cu | 30 inline __device__ T my_pow(T a, double b) { in my_pow() 36 inline __device__ half my_pow(half a, double b) { in my_pow() 42 inline __device__ void GammaAndBetaThreadReduce(const int &col, const int &row_dim, const int &col_… in GammaAndBetaThreadReduce() 69 inline __device__ void GammaAndBetaWarpReduce(T *part1, T *part2, T *part3) { in GammaAndBetaWarpReduce() 79 inline __device__ void GammaAndBetaBlockReduce(const int &col, const int &row_dim, T *part1, T *par… in GammaAndBetaBlockReduce() 125 inline __device__ void InputThreadReduceInnerMean(const int &row, const int &col_dim, const int &pa… in InputThreadReduceInnerMean() 154 inline __device__ void InputWarpReduceInnerMean(T *sum1, T *sum2, T *sum3, T *sum4) { in InputWarpReduceInnerMean() 165 inline __device__ void InputBlockReduceInnerMean(const int &col_dim, T *sum1, T *sum2, T *sum3, T *… in InputBlockReduceInnerMean() 192 inline __device__ void InputThreadReduceOuterMean(const int &row, const int &col_dim, const int &pa… in InputThreadReduceOuterMean() 230 inline __device__ void InputThreadReduceOuterMean(const int &row, const int &col_dim, const int &pa… in InputThreadReduceOuterMean() [all …]
|
D | cast_impl.cu | 25 __device__ __forceinline__ void CastBase(const S *input_addr, T *output_addr) { in CastBase() 30 __device__ __forceinline__ void CastBase(const half *input_addr, uint64_t *output_addr) { in CastBase() 34 __device__ __forceinline__ void CastBase(const half *input_addr, int64_t *output_addr) { in CastBase() 38 __device__ __forceinline__ void CastBase(const half *input_addr, uint32_t *output_addr) { in CastBase() 42 __device__ __forceinline__ void CastBase(const half *input_addr, int32_t *output_addr) { in CastBase() 46 __device__ __forceinline__ void CastBase(const half *input_addr, uint16_t *output_addr) { in CastBase() 50 __device__ __forceinline__ void CastBase(const half *input_addr, int16_t *output_addr) { in CastBase() 54 __device__ __forceinline__ void CastBase(const half *input_addr, uint8_t *output_addr) { in CastBase() 58 __device__ __forceinline__ void CastBase(const half *input_addr, int8_t *output_addr) { in CastBase() 63 __device__ __forceinline__ void CastBase(const uint64_t *input_addr, half *output_addr) { in CastBase() [all …]
|
D | layer_norm_impl.cuh | 26 __device__ float *addr() { in addr() 33 __device__ half *addr() { in addr()
|
D | broadcast_grad_impl.cu | 23 …__device__ __forceinline__ void operator()(const T &x1, const T &x2, const bool &grad_x1, const bo… in operator ()() 35 …__device__ __forceinline__ void operator()(const T &x1, const T &x2, const bool &grad_x1, const bo… in operator ()() 45 __device__ __forceinline__ int Index(const int &index, const int &dim) { return dim == 1 ? 0 : inde… in Index() 48 __device__ __forceinline__ void BroadcastGradOperator(const int &l0, const int &l1, const int &l2, … in BroadcastGradOperator() 91 __device__ __forceinline__ void NoBroadcastOperator(const int &nums, const bool &grad_x1, const boo… in NoBroadcastOperator()
|
D | roi_align_impl.cu | 21 inline __device__ int roi_cast_int(float x) { return __float2int_rd(x); } in roi_cast_int() 22 inline __device__ int roi_cast_int(half x) { return __half2int_rd(x); } in roi_cast_int() 23 inline __device__ int roi_round_int(float x) { return __float2int_rn(x + 0.00007); } in roi_round_int() 24 inline __device__ int roi_round_int(half x) { return __half2int_rn(x + static_cast<half>(0.00007));… in roi_round_int() 27 __device__ void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *… in bilinear_interpolate() 71 __device__ void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, co… in bin_box()
|
D | loss_with_reduction_impl.cu | 22 inline __device__ float logT(float x) { return logf(x); } in logT() 23 inline __device__ half logT(half x) { return hlog(x); } in logT() 24 inline __device__ float castT(float ref, int x) { return __int2float_rd(x); } in castT() 25 inline __device__ half castT(half ref, int x) { return __int2half_rd(x); } in castT() 26 inline __device__ float maxT(float a, float b) { return fmaxf(a, b); } in maxT() 27 inline __device__ half maxT(half a, half b) { return a > b ? a : b; } in maxT() 57 __device__ void MultiplyDevice(const S a, const T b, T *out) { in MultiplyDevice() 62 __device__ void MultiplyDevice(const half a, const float b, float *out) { in MultiplyDevice() 69 __device__ void MultiplyDevice(const float a, const half b, half *out) { in MultiplyDevice()
|
D | iou_impl.cu | 19 __device__ float CoordinateMax(const float a, const float b) { in CoordinateMax() 23 __device__ float CoordinateMin(const float a, const float b) { in CoordinateMin()
|
D | unsorted_segment_min.cu | 21 __device__ __forceinline__ void max_val_init(T *init_val) { in max_val_init() 26 __device__ __forceinline__ void max_val_init(half *init_val) { in max_val_init()
|
D | sync_batch_norm_grad_impl.cu | 25 __inline__ __device__ float HalfFloatInputConvert(const half val) { return __half2float(val); } in HalfFloatInputConvert() 26 __inline__ __device__ float HalfFloatInputConvert(const float val) { return val; } in HalfFloatInputConvert() 27 __inline__ __device__ void HalfFloatOutputAssign(const float val, float *arr, int idx) { arr[idx] =… in HalfFloatOutputAssign() 28 __inline__ __device__ void HalfFloatOutputAssign(const float val, half *arr, int idx) { arr[idx] = … in HalfFloatOutputAssign()
|
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/ |
D | common_sponge.cuh | 125 __device__ __host__ static inline VECTOR operator-(const VECTOR &veca, const VECTOR &vecb) { in operator -() 133 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a, in Get_Periodic_Displacement() 143 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a, in Get_Periodic_Displacement() 153 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR… in Get_Periodic_Displacement() 163 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR… in Get_Periodic_Displacement() 174 __device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) { in operator +() 182 __device__ __host__ static inline float operator*(const VECTOR &veca, const VECTOR &vecb) { in operator *() 185 __device__ __host__ static inline VECTOR operator*(const float &a, const VECTOR &vecb) { in operator *() 193 __device__ __host__ static inline VECTOR operator-(const VECTOR &vecb) { in operator -() 201 __device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VECTOR &vecb) { in operator ^() [all …]
|
/third_party/boost/boost/numeric/odeint/external/thrust/ |
D | thrust_operations.hpp | 42 __host__ __device__ 59 __host__ __device__ 79 __host__ __device__ 101 __host__ __device__ 128 __host__ __device__ 157 __host__ __device__ 188 __host__ __device__ 214 __host__ __device__
|
/third_party/ffmpeg/libavfilter/ |
D | vf_scale_cuda_bicubic.cu | 28 __device__ inline float4 lanczos_coeffs(float x, float param) in lanczos_coeffs() 50 __device__ inline float4 bicubic_coeffs(float x, float param) in bicubic_coeffs() 63 __device__ inline void derived_fast_coeffs(float4 coeffs, float x, float *h0, float *h1, float *s) in derived_fast_coeffs() 74 __device__ inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3) in apply_coeffs() 85 __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function, in Subsample_Bicubic() 128 __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function, in Subsample_FastBicubic()
|
D | vf_yadif_cuda.cu | 22 __inline__ __device__ T spatial_predictor(T a, T b, T c, T d, T e, T f, T g, in spatial_predictor() 51 __inline__ __device__ int max3(int a, int b, int c) in max3() 57 __inline__ __device__ int min3(int a, int b, int c) in min3() 64 __inline__ __device__ T temporal_predictor(T A, T B, T C, T D, T E, T F, in temporal_predictor() 97 __inline__ __device__ void yadif_single(T *dst, in yadif_single() 167 __inline__ __device__ void yadif_double(T *dst, in yadif_double()
|
D | vf_scale_cuda.cu | 26 __device__ inline void Subsample_Nearest(cudaTextureObject_t tex, in Subsample_Nearest() 47 __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex, in Subsample_Bilinear()
|
/third_party/boost/libs/numeric/odeint/examples/thrust/ |
D | phase_oscillator_ensemble.cu | 60 __host__ __device__ 69 __host__ __device__ 114 __host__ __device__
|