ldg (reference) in projects: external - OpenGrok search results

Project(s)

Full Search
Definition
Symbol
File Path
History
Type

Searched refs:ldg (Results 1 – 25 of 50) sorted by relevance

/external/tensorflow/tensorflow/core/kernels/
D	compare_and_bitpack_op_gpu.cu.cc	44 const T thresh = ldg(threshold); in CompareAndBitpackKernel() 48 ((((ldg(block) > thresh) << 7)) \| (((ldg(block + 1) > thresh) << 6)) \| in CompareAndBitpackKernel() 49 (((ldg(block + 2) > thresh) << 5)) \| in CompareAndBitpackKernel() 50 (((ldg(block + 3) > thresh) << 4)) \| in CompareAndBitpackKernel() 51 (((ldg(block + 4) > thresh) << 3)) \| in CompareAndBitpackKernel() 52 (((ldg(block + 5) > thresh) << 2)) \| in CompareAndBitpackKernel() 53 (((ldg(block + 6) > thresh) << 1)) \| (((ldg(block + 7) > thresh)))); in CompareAndBitpackKernel() 66 const int64 block = ldg(reinterpret_cast<const int64>(input + 8 i)); in CompareAndBitpackKernel() 84 const float thresh = ldg(threshold); in CompareAndBitpackKernel() 86 const float4 block0 = ldg(reinterpret_cast<const float4>(input + 8 i)); in CompareAndBitpackKernel() [all …]
D	population_count_op_gpu.cu.cc	`39 CUDA_1D_KERNEL_LOOP(i, size) { output[i] = __popc(ldg(input + i)); } in PopulationCountKernel() 47 output[i] = __popc(ldg(reinterpret_cast<const uint8>(input + i))); in PopulationCountKernel() 56 output[i] = __popc(ldg(reinterpret_cast<const uint16>(input + i))); in PopulationCountKernel() 63 CUDA_1D_KERNEL_LOOP(i, size) { output[i] = __popcll(ldg(input + i)); } in PopulationCountKernel()`
D	bias_op_gpu.cu.cc	60 output[index] = ldg(input + index) + ldg(bias + bias_offset); in BiasNHWCKernel() 70 output[index] = ldg(input + index) + ldg(bias + bias_offset); in BiasNCHWKernel() 106 CudaAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); in BiasGradNHWC_Naive() 118 CudaAtomicAdd(bias_backprop + bias_offset, ldg(output_backprop + index)); in BiasGradNCHW_Naive() 138 CudaAtomicAdd(s_data + bias_offset, AccT(ldg(output_backprop + index))); in BiasGradNHWC_SharedAtomics() 171 T val = ldg(output_backprop + in BiasGradNCHW_SharedAtomics()
D	sparse_tensor_dense_matmul_op_gpu.cu.cc	`42 const int i = ldg(a_indices + 2 * a_ix + ((ADJ_A) ? 1 : 0)); in SparseTensorDenseMatMulKernel() 43 const int k = ldg(a_indices + 2 * a_ix + ((ADJ_A) ? 0 : 1)); in SparseTensorDenseMatMulKernel() 55 const T a_value = ldg(a_values + a_ix); in SparseTensorDenseMatMulKernel() 58 const T b_value = ldg(b + ((ADJ_B) ? j * b_cols + k : k * b_cols + j)); in SparseTensorDenseMatMulKernel()`
D	depthwise_conv_op_gpu.h	131 sum += ldg(input + input_offset) * ldg(filter + filter_offset); in DepthwiseConv2dGPUKernelNHWC() 153 sum += ldg(input + input_offset) * ldg(filter + filter_offset); in DepthwiseConv2dGPUKernelNHWC() 264 tile_ptr[0] = static_cast<S>(ldg(in_ptr)); in DepthwiseConv2dGPUKernelNHWCSmall() 266 tile_ptr[tile_offset] = static_cast<S>(ldg(tensor_offset + in_ptr)); in DepthwiseConv2dGPUKernelNHWCSmall() 271 static_cast<S>(ldg(filter_offset + filter)); in DepthwiseConv2dGPUKernelNHWCSmall() 409 sum += ldg(input + input_offset) * ldg(filter + filter_offset); in DepthwiseConv2dGPUKernelNCHW() 436 sum += ldg(input + input_offset) * ldg(filter + filter_offset); in DepthwiseConv2dGPUKernelNCHW() 548 tile_ptr[0] = static_cast<S>(ldg(in_ptr)); in DepthwiseConv2dGPUKernelNCHWSmall() 550 tile_ptr[tile_offset] = static_cast<S>(ldg(block_pixels + in_ptr)); in DepthwiseConv2dGPUKernelNCHWSmall() 558 static_cast<S>(ldg(filter_offset + filter)); in DepthwiseConv2dGPUKernelNCHWSmall() [all …]
D	softmax_op_gpu.cu.cc	`83 U max_val = strict_cast<U>(ldg(max_logits + row)); in GenerateNormalizedProb() 88 result = input - max_val - log(ldg(sum_probs + row)); in GenerateNormalizedProb() 90 result = exp(input - max_val) / ldg(sum_probs + row); in GenerateNormalizedProb() 106 strict_cast<U>(logits_[gid] - ldg(max_logits_ + gid / num_cols_)); in operator ()()`
D	inplace_ops_functor_gpu.cu.cc	`38 p = ldg(q); in DoParallelConcatOpKernel() 93 p = ldg(q); in DoInplaceOpKernel() 96 p += ldg(q); in DoInplaceOpKernel() 99 p -= ldg(q); in DoInplaceOpKernel()`
D	resize_nearest_neighbor_op_gpu.cu.cc	`62 top_data[index] = ldg(bottom_data_n + idx); in ResizeNearestNeighborNHWC() 91 top_data[index] = ldg(bottom_data_n + idx); in LegacyResizeNearestNeighborNHWC() 122 CudaAtomicAdd(bottom_diff_n + idx, ldg(top_diff + index)); in ResizeNearestNeighborBackwardNHWC() 151 CudaAtomicAdd(bottom_diff_n + idx, ldg(top_diff + index)); in LegacyResizeNearestNeighborBackwardNHWC()`
D	gather_functor_gpu.cu.h	`54 Index gather_i = ldg(indices + indices_i); in GatherOpKernel() 67 out[i] = ldg(params + params_i); in GatherOpKernel()`
D	depthtospace_op_gpu.cu.cc	`58 (output_ptr + out_idx) = ldg(input_ptr + inp_idx); in D2S_NHWC() 97 (output_ptr + output_idx) = ldg(input_ptr + input_idx); in D2S_NCHW() 135 output_ptr[bY * output_width + bX] = ldg( in D2S_NCHW_LOOP()`
D	spacetodepth_op_gpu.cu.cc	`57 (output_ptr + out_idx) = ldg(input_ptr + inp_idx); in S2D_NHWC() 95 (output_ptr + output_idx) = ldg(input_ptr + input_idx); in S2D_NCHW() 135 ldg(input_ptr + bY * input_width + bX); in S2D_NCHW_LOOP()`
D	gather_nd_op_gpu.cu.cc	`44 const Index index_j = ldg(indices_i + j); in GatherSliceOpKernel() 61 out[i] = (out_of_bounds) ? T(0) : ldg(params + offset + loc_offset); in GatherSliceOpKernel()`
D	multinomial_op_gpu.cu.cc	`47 if (ldg(maxima + maxima_idx) == ldg(scores + index)) { in MultinomialKernel()`
D	spacetobatch_functor_gpu.cu.cc	`89 ldg(space_tensor_ptr + space_tensor_idx); in S2B() 92 ldg(batch_tensor_ptr + batch_tensor_idx); in S2B()`
D	scatter_nd_op_gpu.cu.cc	`95 const Index ix_d = internal::SubtleMustCopy(ldg(indices + offset)); in ScatterNdOpKernel() 102 update(out + i + si, ldg(updates + (index * slice_size + si))); in ScatterNdOpKernel()`
D	segment_reduction_ops_gpu.cu.cc	`91 sum += ldg(input + (input_outer_dim_index_base + j) * inner_dim_size + in SortedSegmentSumCustomKernel() 124 KernelReductionFunctor()(output + output_index, ldg(input + input_index)); in UnsortedSegmentCustomKernel()`
D	transpose_functor_gpu.cu.cc	`49 dst[o_idx] = Eigen::numext::conj(ldg(src + i_idx)); in TransposeKernel() 51 dst[o_idx] = ldg(src + i_idx); in TransposeKernel()`
D	dynamic_stitch_op_gpu.cu.cc	`44 output[output_index] = ldg(data_ptrs[input_index] + slice_offset); in DynamicStitchKernel()`
D	dynamic_partition_op_gpu.cu.cc	`68 int32 N = min(ldg(size), out_size); in MoveValuesKernel() 70 int32 key = ldg(keys + i); in MoveValuesKernel() 71 int32 value = ldg(values + i); in MoveValuesKernel()`
/external/tensorflow/tensorflow/contrib/seq2seq/kernels/
D	beam_search_ops_gpu.cu.cc	`39 Eigen::numext::mini(max_time, ldg(max_sequence_lengths + batch)); in GatherTreeOpKernel() 47 beams[initial_beam_ix] = ldg(step_ids + initial_beam_ix); in GatherTreeOpKernel() 48 int32 parent = ldg(parent_ids + initial_beam_ix); in GatherTreeOpKernel() 58 beams[level_beam_ix] = ldg(step_ids + level_parent_ix); in GatherTreeOpKernel() 59 parent = ldg(parent_ids + level_parent_ix); in GatherTreeOpKernel()`
/external/swiftshader/third_party/llvm-7.0/llvm/test/CodeGen/NVPTX/
D	ldu-ldg.ll	`6 declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) 7 declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) 27 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) 34 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)`
D	bug26185-2.ll	`3 ; Verify that we correctly emit code for extending ldg/ldu. We do not expose 4 ; extending variants in the backend, but the ldg/ldu selection code may pick`
D	read-global-variable-constant.ll	`4 ; ld.global.nc (aka ldg).`
/external/llvm/test/CodeGen/NVPTX/
D	ldu-ldg.ll	`6 declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align) 7 declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align) 27 %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4) 34 %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)`
D	bug26185-2.ll	`3 ; Verify that we correctly emit code for extending ldg/ldu. We do not expose 4 ; extending variants in the backend, but the ldg/ldu selection code may pick`

art
bionic
bootable
build
cts
dalvik
developers
development
device
external
frameworks
hardware
kernel
libcore
libnativehelper
packages
pdk
platform_testing
sdk
system
test
toolchain
tools