__device__ (reference) in projects: third_party

Project(s)

Full Search
Definition
Symbol
File Path
History
Type

Searched refs:__device__ (Results 1 – 25 of 56) sorted by relevance

12 3

/third_party/ffmpeg/libavfilter/cuda/
D	vector_helpers.cuh	43 template<typename T, typename V> inline __device__ V to_floatN(const T &a) { return (V)a; } in to_floatN() 44 template<typename T, typename V> inline __device__ T from_floatN(const V &a) { return (T)a; } in from_floatN() 47 …template<typename V> inline __device__ T operator+(const T &a, const V &b) { return make_ ## T (a.… 48 …template<typename V> inline __device__ T operator-(const T &a, const V &b) { return make_ ## T (a.… 49 …template<typename V> inline __device__ T operator(const T &a, V b) { return make_ ## T (a.x b, … 50 …template<typename V> inline __device__ T operator/(const T &a, V b) { return make_ ## T (a.x / b, … 51 …template<typename V> inline __device__ T operator>>(const T &a, V b) { return make_ ## T (a.x >> b… 52 …template<typename V> inline __device__ T operator<<(const T &a, V b) { return make_ ## T (a.x << b… 53 …template<typename V> inline __device__ T &operator+=(T &a, const V &b) { a.x += b.x; a.y += b.y; r… 54 … template<typename V> inline __device__ void vec_set(T &a, const V &b) { a.x = b.x; a.y = b.y; } \ [all …]
/third_party/ffmpeg/compat/cuda/
D	cuda_runtime.h	28 #define __device__ __attribute__((device)) macro 94 #define GET(name, reg) static inline __device__ uint3 name() {\ 123 #define TEX2D(type, ret) static inline __device__ void conv(type* out, unsigned a, unsigned b, unsi… 137 inline __device__ T tex2D(cudaTextureObject_t texObject, float x, float y) 149 inline __device__ float4 tex2D<float4>(cudaTextureObject_t texObject, float x, float y) 159 inline __device__ float tex2D<float>(cudaTextureObject_t texObject, float x, float y) 165 inline __device__ float2 tex2D<float2>(cudaTextureObject_t texObject, float x, float y) 172 static inline __device__ float floorf(float a) { return __builtin_floorf(a); } in floorf() 173 static inline __device__ float floor(float a) { return __builtin_floorf(a); } in floor() 174 static inline __device__ double floor(double a) { return __builtin_floor(a); } in floor() [all …]
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/
D	util.cuh	31 __device__ static inline double MsAtomicAdd(double address, const double val) { in MsAtomicAdd() 42 __device__ static inline float MsAtomicAdd(float address, const float val) { return atomicAdd(addr… in MsAtomicAdd() 44 __device__ static inline int MsAtomicAdd(int address, int val) { return atomicAdd(address, val); } in MsAtomicAdd() 46 __device__ static inline unsigned int MsAtomicAdd(unsigned int address, unsigned int val) { in MsAtomicAdd() 50 __device__ static inline int8_t MsAtomicAdd(int8_t address, int8_t val) { in MsAtomicAdd() 69 __device__ static inline int64_t MsAtomicAdd(int64_t address, int64_t val) { in MsAtomicAdd() 83 __device__ static inline bool MsAtomicAdd(bool address, bool val) { in MsAtomicAdd() 88 __device__ static inline unsigned char MsAtomicAdd(short address, short val) { // NOLINT in MsAtomicAdd() 114 __device__ static inline half MsAtomicAdd(half address, half val) { in MsAtomicAdd() 134 __device__ static inline unsigned char MsAtomicAdd(unsigned char address, unsigned char val) { in MsAtomicAdd() [all …]
D	sparse_apply_proximal_adagrad_impl.cu	20 __device__ __forceinline__ bool CompareFunc(T x, T y) { in CompareFunc() 25 __device__ __forceinline__ bool CompareFunc(half x, half y) { in CompareFunc() 30 __device__ __forceinline__ T RsqrtFunc(T x) { in RsqrtFunc() 35 __device__ __forceinline__ half RsqrtFunc(half x) { in RsqrtFunc() 40 __device__ __forceinline__ T AbsFunc(T x) { in AbsFunc() 45 __device__ __forceinline__ half AbsFunc(half x) { in AbsFunc() 50 __device__ __forceinline__ T Sgn(T x) { in Sgn() 55 __device__ __forceinline__ half Sgn(half x) { in Sgn()
D	broadcast_impl.cu	26 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs > rhs… in operator ()() 31 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs < rhs… in operator ()() 36 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs == rh… in operator ()() 41 __device__ __host__ __forceinline__ bool operator()(const half &lhs, const half &rhs) { in operator ()() 48 __device__ __host__ __forceinline__ bool operator()(const float &lhs, const float &rhs) { in operator ()() 55 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs >= rh… in operator ()() 60 __device__ __host__ __forceinline__ bool operator()(const half &lhs, const half &rhs) { in operator ()() 69 __device__ __host__ __forceinline__ bool operator()(const float &lhs, const float &rhs) { in operator ()() 76 …__device__ __host__ __forceinline__ bool operator()(const T &lhs, const T &rhs) { return lhs <= rh… in operator ()() 81 __device__ __host__ __forceinline__ bool operator()(const half &lhs, const half &rhs) { in operator ()() [all …]
D	topk_lib.cuh	20 constexpr __host__ __device__ int Log2(int n, int p = 0) { return (n <= 1) ? p : Log2(n / 2, p + 1)… in Log2() 21 constexpr __host__ __device__ bool IsPow2(int v) { return (v && !(v & (v - 1))); } in IsPow2() 22 constexpr __host__ __device__ int NextPow2(int v) { return (IsPow2(v) ? 2 * v : (1 << static_cast<i… in NextPow2() 24 __device__ __forceinline__ int GetLaneId() { in GetLaneId() 32 …__device__ static inline bool gt(T k1, S v1, T k2, S v2) { return k1 > k2 \|\| (k1 == k2 && v1 < v2)… in gt() 33 …__device__ static inline bool lt(T k1, S v1, T k2, S v2) { return k1 < k2 \|\| (k1 == k2 && v1 > v2)… in lt() 38 __device__ static inline bool lt(T a, T b) { return a < b; } in lt() 39 __device__ static inline bool gt(T a, T b) { return a > b; } in gt() 43 inline __device__ T shfl_xor(const T val, int laneMask, int width = kWarpSize) { in shfl_xor() 48 inline __device__ void L2CompareAndSwap(T a, S b, int i_1, int i_2) { in L2CompareAndSwap() [all …]
D	ftrl_impl.cu	`20 __device__ __forceinline__ T PowFunc(T x, T y) { in PowFunc() 25 __device__ __forceinline__ half PowFunc(half x, half y) { in PowFunc() 30 __device__ __forceinline__ bool CompareFunc(T x, T y) { in CompareFunc() 35 __device__ __forceinline__ bool CompareFunc(half x, half y) { in CompareFunc() 40 __device__ __forceinline__ T Sgn(T x) { in Sgn() 45 __device__ __forceinline__ half Sgn(half x) { in Sgn()`
D	sparse_ftrl_impl.cu	`22 __device__ __forceinline__ T PowFunc(T x, T y) { in PowFunc() 27 __device__ __forceinline__ half PowFunc(half x, half y) { in PowFunc() 32 __device__ __forceinline__ bool CompareFunc(T x, T y) { in CompareFunc() 37 __device__ __forceinline__ bool CompareFunc(half x, half y) { in CompareFunc() 42 __device__ __forceinline__ T Sgn(T x) { in Sgn() 47 __device__ __forceinline__ half Sgn(half x) { in Sgn()`
D	layer_norm_grad_impl.cu	27 inline __device__ T my_pow(T a, double b) { in my_pow() 32 inline __device__ half my_pow(half a, double b) { in my_pow() 37 inline __device__ void GammaAndBetaThreadReduce(const int &col, const int &row_dim, const int &col_… in GammaAndBetaThreadReduce() 57 inline __device__ void GammaAndBetaWarpReduce(T dg, T db) { in GammaAndBetaWarpReduce() 65 inline __device__ void GammaAndBetaBlockReduce(const int &col, const int &row_dim, T dg, T db, T … in GammaAndBetaBlockReduce() 110 inline __device__ void InputThreadReduce(const int &row, const int &col_dim, const int &param_dim, … in InputThreadReduce() 134 inline __device__ void InputThreadReduce(const int &row, const int &col_dim, const int &param_dim, … in InputThreadReduce() 158 inline __device__ void InputWarpReduce(T sum1, T sum2, T sum3) { in InputWarpReduce() 167 inline __device__ void InputBlockReduce(const int &col_dim, T sum1, T sum2, T sum3, T *share_mem… in InputBlockReduce() 190 inline __device__ void InputProp(const int &row, const int &col_dim, const int &param_dim, const T … in InputProp() [all …]
D	layer_norm_impl.cu	26 inline __device__ void MeanAndVarAccumulation(T mean, T var, T num, const T &val) { in MeanAndVarAccumulation() 37 inline __device__ void MeanAndVarMerge(T m1, T v1, T n1, const T &m2, const T &v2, const T &n2) { in MeanAndVarMerge() 50 inline __device__ void ThreadReduce(const int &col_dim, const T block_addr, T mean, T var, T nu… in ThreadReduce() 64 inline __device__ void WarpReduce(T mean, T var, T num) { in WarpReduce() 74 inline __device__ void BlockReduce(const int &col_dim, T mean, T var, T num, T mean_addr, T va… in BlockReduce() 103 inline __device__ void LayerNorm(const int &row, const int &col_dim, const int &param_dim, const T … in LayerNorm() 113 inline __device__ void LayerNorm(const int &row, const int &col_dim, const int &param_dim, const ha… in LayerNorm()
D	layer_norm_grad_grad_impl.cu	30 inline __device__ T my_pow(T a, double b) { in my_pow() 36 inline __device__ half my_pow(half a, double b) { in my_pow() 42 inline __device__ void GammaAndBetaThreadReduce(const int &col, const int &row_dim, const int &col_… in GammaAndBetaThreadReduce() 69 inline __device__ void GammaAndBetaWarpReduce(T part1, T part2, T part3) { in GammaAndBetaWarpReduce() 79 inline __device__ void GammaAndBetaBlockReduce(const int &col, const int &row_dim, T part1, T par… in GammaAndBetaBlockReduce() 125 inline __device__ void InputThreadReduceInnerMean(const int &row, const int &col_dim, const int &pa… in InputThreadReduceInnerMean() 154 inline __device__ void InputWarpReduceInnerMean(T sum1, T sum2, T sum3, T sum4) { in InputWarpReduceInnerMean() 165 inline __device__ void InputBlockReduceInnerMean(const int &col_dim, T sum1, T sum2, T sum3, T *… in InputBlockReduceInnerMean() 192 inline __device__ void InputThreadReduceOuterMean(const int &row, const int &col_dim, const int &pa… in InputThreadReduceOuterMean() 230 inline __device__ void InputThreadReduceOuterMean(const int &row, const int &col_dim, const int &pa… in InputThreadReduceOuterMean() [all …]
D	cast_impl.cu	25 __device__ __forceinline__ void CastBase(const S input_addr, T output_addr) { in CastBase() 30 __device__ __forceinline__ void CastBase(const half input_addr, uint64_t output_addr) { in CastBase() 34 __device__ __forceinline__ void CastBase(const half input_addr, int64_t output_addr) { in CastBase() 38 __device__ __forceinline__ void CastBase(const half input_addr, uint32_t output_addr) { in CastBase() 42 __device__ __forceinline__ void CastBase(const half input_addr, int32_t output_addr) { in CastBase() 46 __device__ __forceinline__ void CastBase(const half input_addr, uint16_t output_addr) { in CastBase() 50 __device__ __forceinline__ void CastBase(const half input_addr, int16_t output_addr) { in CastBase() 54 __device__ __forceinline__ void CastBase(const half input_addr, uint8_t output_addr) { in CastBase() 58 __device__ __forceinline__ void CastBase(const half input_addr, int8_t output_addr) { in CastBase() 63 __device__ __forceinline__ void CastBase(const uint64_t input_addr, half output_addr) { in CastBase() [all …]
D	layer_norm_impl.cuh	`26 __device__ float addr() { in addr() 33 __device__ half addr() { in addr()`
D	broadcast_grad_impl.cu	23 …__device__ __forceinline__ void operator()(const T &x1, const T &x2, const bool &grad_x1, const bo… in operator ()() 35 …__device__ __forceinline__ void operator()(const T &x1, const T &x2, const bool &grad_x1, const bo… in operator ()() 45 __device__ __forceinline__ int Index(const int &index, const int &dim) { return dim == 1 ? 0 : inde… in Index() 48 __device__ __forceinline__ void BroadcastGradOperator(const int &l0, const int &l1, const int &l2, … in BroadcastGradOperator() 91 __device__ __forceinline__ void NoBroadcastOperator(const int &nums, const bool &grad_x1, const boo… in NoBroadcastOperator()
D	roi_align_impl.cu	21 inline __device__ int roi_cast_int(float x) { return __float2int_rd(x); } in roi_cast_int() 22 inline __device__ int roi_cast_int(half x) { return __half2int_rd(x); } in roi_cast_int() 23 inline __device__ int roi_round_int(float x) { return __float2int_rn(x + 0.00007); } in roi_round_int() 24 inline __device__ int roi_round_int(half x) { return __half2int_rn(x + static_cast<half>(0.00007));… in roi_round_int() 27 __device__ void bilinear_interpolate(const int height, const int width, T y, T x, int x_low, int … in bilinear_interpolate() 71 __device__ void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, co… in bin_box()
D	loss_with_reduction_impl.cu	22 inline __device__ float logT(float x) { return logf(x); } in logT() 23 inline __device__ half logT(half x) { return hlog(x); } in logT() 24 inline __device__ float castT(float ref, int x) { return __int2float_rd(x); } in castT() 25 inline __device__ half castT(half ref, int x) { return __int2half_rd(x); } in castT() 26 inline __device__ float maxT(float a, float b) { return fmaxf(a, b); } in maxT() 27 inline __device__ half maxT(half a, half b) { return a > b ? a : b; } in maxT() 57 __device__ void MultiplyDevice(const S a, const T b, T out) { in MultiplyDevice() 62 __device__ void MultiplyDevice(const half a, const float b, float out) { in MultiplyDevice() 69 __device__ void MultiplyDevice(const float a, const half b, half *out) { in MultiplyDevice()
D	iou_impl.cu	`19 __device__ float CoordinateMax(const float a, const float b) { in CoordinateMax() 23 __device__ float CoordinateMin(const float a, const float b) { in CoordinateMin()`
D	unsorted_segment_min.cu	`21 __device__ __forceinline__ void max_val_init(T init_val) { in max_val_init() 26 __device__ __forceinline__ void max_val_init(half init_val) { in max_val_init()`
D	sync_batch_norm_grad_impl.cu	25 __inline__ __device__ float HalfFloatInputConvert(const half val) { return __half2float(val); } in HalfFloatInputConvert() 26 __inline__ __device__ float HalfFloatInputConvert(const float val) { return val; } in HalfFloatInputConvert() 27 __inline__ __device__ void HalfFloatOutputAssign(const float val, float arr, int idx) { arr[idx] =… in HalfFloatOutputAssign() 28 __inline__ __device__ void HalfFloatOutputAssign(const float val, half arr, int idx) { arr[idx] = … in HalfFloatOutputAssign()
/third_party/mindspore/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/sponge/
D	common_sponge.cuh	125 __device__ __host__ static inline VECTOR operator-(const VECTOR &veca, const VECTOR &vecb) { in operator -() 133 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UNSIGNED_INT_VECTOR uvec_a, in Get_Periodic_Displacement() 143 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const UINT_VECTOR_LJ_TYPE uvec_a, in Get_Periodic_Displacement() 153 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR… in Get_Periodic_Displacement() 163 __device__ __host__ static inline VECTOR Get_Periodic_Displacement(const VECTOR vec_a, const VECTOR… in Get_Periodic_Displacement() 174 __device__ __host__ static inline VECTOR operator+(const VECTOR &veca, const VECTOR &vecb) { in operator +() 182 __device__ __host__ static inline float operator(const VECTOR &veca, const VECTOR &vecb) { in operator () 185 __device__ __host__ static inline VECTOR operator(const float &a, const VECTOR &vecb) { in operator () 193 __device__ __host__ static inline VECTOR operator-(const VECTOR &vecb) { in operator -() 201 __device__ __host__ static inline VECTOR operator^(const VECTOR &veca, const VECTOR &vecb) { in operator ^() [all …]
/third_party/boost/boost/numeric/odeint/external/thrust/
D	thrust_operations.hpp	`42 __host__ __device__ 59 __host__ __device__ 79 __host__ __device__ 101 __host__ __device__ 128 __host__ __device__ 157 __host__ __device__ 188 __host__ __device__ 214 __host__ __device__`
/third_party/ffmpeg/libavfilter/
D	vf_scale_cuda_bicubic.cu	28 __device__ inline float4 lanczos_coeffs(float x, float param) in lanczos_coeffs() 50 __device__ inline float4 bicubic_coeffs(float x, float param) in bicubic_coeffs() 63 __device__ inline void derived_fast_coeffs(float4 coeffs, float x, float h0, float h1, float *s) in derived_fast_coeffs() 74 __device__ inline V apply_coeffs(float4 coeffs, V c0, V c1, V c2, V c3) in apply_coeffs() 85 __device__ inline void Subsample_Bicubic(coeffs_function_t coeffs_function, in Subsample_Bicubic() 128 __device__ inline void Subsample_FastBicubic(coeffs_function_t coeffs_function, in Subsample_FastBicubic()
D	vf_yadif_cuda.cu	`22 __inline__ __device__ T spatial_predictor(T a, T b, T c, T d, T e, T f, T g, in spatial_predictor() 51 __inline__ __device__ int max3(int a, int b, int c) in max3() 57 __inline__ __device__ int min3(int a, int b, int c) in min3() 64 __inline__ __device__ T temporal_predictor(T A, T B, T C, T D, T E, T F, in temporal_predictor() 97 __inline__ __device__ void yadif_single(T dst, in yadif_single() 167 __inline__ __device__ void yadif_double(T dst, in yadif_double()`
D	vf_scale_cuda.cu	`26 __device__ inline void Subsample_Nearest(cudaTextureObject_t tex, in Subsample_Nearest() 47 __device__ inline void Subsample_Bilinear(cudaTextureObject_t tex, in Subsample_Bilinear()`
/third_party/boost/libs/numeric/odeint/examples/thrust/
D	phase_oscillator_ensemble.cu	`60 __host__ __device__ 69 __host__ __device__ 114 __host__ __device__`

12 3