Searched refs:lane_id (Results 1 – 8 of 8) sorted by relevance
/external/llvm-project/llvm/test/CodeGen/Hexagon/ |
D | jt-in-text.ll | 15 define void @test2(i32 %lane_id, i32 %rx_pwr_st) #0 { 17 %lane_id.addr = alloca i32, align 4 19 store i32 %lane_id, i32* %lane_id.addr, align 4 21 %0 = load i32, i32* %lane_id.addr, align 4
|
/external/tensorflow/tensorflow/core/util/ |
D | gpu_device_functions.h | 200 unsigned int lane_id; in GpuLaneId() local 205 asm("mov.u32 %0, %%laneid;" : "=r"(lane_id)); in GpuLaneId() 208 lane_id = __lane_id(); in GpuLaneId() 210 return lane_id; in GpuLaneId() 246 int lane_id = GpuLaneId(); in GpuShuffleGetSrcLane() local 247 int lane_base = lane_id & ~width + 1; in GpuShuffleGetSrcLane() 255 unsigned lane_id = GpuLaneId(); in GpuShuffleUpGetSrcLane() local 256 if ((lane_id & width - 1) < delta) { in GpuShuffleUpGetSrcLane() 257 return lane_id; in GpuShuffleUpGetSrcLane() 259 return lane_id - delta; in GpuShuffleUpGetSrcLane() [all …]
|
D | gpu_kernel_helper_test.cu.cc | 100 unsigned lane_id = GpuLaneId(); in GpuShuffleGetSrcLaneTest() local 106 op_name, param, width, lane_id, actual, expected); in GpuShuffleGetSrcLaneTest() 117 GpuShuffleSync(kCudaWarpAll, lane_id, src_lane, width); in GpuShuffleGetSrcLaneTest() 124 GpuShuffleUpSync(kCudaWarpAll, lane_id, delta, width); in GpuShuffleGetSrcLaneTest() 131 GpuShuffleDownSync(kCudaWarpAll, lane_id, delta, width); in GpuShuffleGetSrcLaneTest() 138 GpuShuffleXorSync(kCudaWarpAll, lane_id, lane_lane, width); in GpuShuffleGetSrcLaneTest()
|
/external/llvm-project/openmp/libomptarget/deviceRTLs/nvptx/docs/ |
D | ReductionDesign.txt | 63 b. its lane_id 64 c. the offset of the lane_id which hosts a remote ReduceData 107 ShuffleReduceFn is used here with lane_id set to 0 because it is not used 108 therefore we save instructions by not retrieving lane_id from the corresponding 134 int lane_id) { 140 ShuffleReduceFn(reduce_data, lane_id, offset, 1); 153 if (lane_id < offset) { 213 from physical lane_id as defined by nvidia. 215 lane_id (instead of the physical one defined by nvidia) that would make 223 offsets, instead of absolute lane_id. Therefore the subtraction is performed [all …]
|
/external/llvm-project/openmp/libomptarget/deviceRTLs/ |
D | interface.h | 197 typedef void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id,
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | ir_emitter_unnested.h | 120 llvm::Value* lane_id; member
|
D | ir_emitter_unnested.cc | 4485 ksl.If("intra_warp_reduce_write", is_zero(thread_id_info.lane_id), [&] { in EmitEpilogueForReduction() 4496 {b_.getInt32(0), constant(j), thread_id_info.lane_id})); in EmitEpilogueForReduction() 4555 b_.CreateAnd(has_output, is_zero(thread_id_info.lane_id)), [&] { in EmitEpilogueForReduction()
|
/external/mesa3d/src/amd/compiler/ |
D | aco_instruction_selection.cpp | 7169 Temp lane_id = emit_mbcnt(ctx, bld.tmp(v1)); in emit_boolean_reduce() local 7170 …fset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(~uint32_t(cluster_size - 1)), lane_id); in emit_boolean_reduce()
|