Home
last modified time | relevance | path

Searched refs:shfl (Results 1 – 25 of 34) sorted by relevance

12

/external/llvm/test/CodeGen/NVPTX/
Dshfl.ll3 declare i32 @llvm.nvvm.shfl.down.i32(i32, i32, i32)
4 declare float @llvm.nvvm.shfl.down.f32(float, i32, i32)
5 declare i32 @llvm.nvvm.shfl.up.i32(i32, i32, i32)
6 declare float @llvm.nvvm.shfl.up.f32(float, i32, i32)
7 declare i32 @llvm.nvvm.shfl.bfly.i32(i32, i32, i32)
8 declare float @llvm.nvvm.shfl.bfly.f32(float, i32, i32)
9 declare i32 @llvm.nvvm.shfl.idx.i32(i32, i32, i32)
10 declare float @llvm.nvvm.shfl.idx.f32(float, i32, i32)
13 ; shfl.down.
15 ; CHECK-LABEL: .func{{.*}}shfl.down1
[all …]
/external/llvm-project/llvm/test/CodeGen/NVPTX/
Dshfl.ll3 declare i32 @llvm.nvvm.shfl.down.i32(i32, i32, i32)
4 declare float @llvm.nvvm.shfl.down.f32(float, i32, i32)
5 declare i32 @llvm.nvvm.shfl.up.i32(i32, i32, i32)
6 declare float @llvm.nvvm.shfl.up.f32(float, i32, i32)
7 declare i32 @llvm.nvvm.shfl.bfly.i32(i32, i32, i32)
8 declare float @llvm.nvvm.shfl.bfly.f32(float, i32, i32)
9 declare i32 @llvm.nvvm.shfl.idx.i32(i32, i32, i32)
10 declare float @llvm.nvvm.shfl.idx.f32(float, i32, i32)
13 ; shfl.down.
15 ; CHECK-LABEL: .func{{.*}}shfl.down1
[all …]
Dshfl-p.ll3 declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32)
4 declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32)
5 declare {i32, i1} @llvm.nvvm.shfl.up.i32p(i32, i32, i32)
6 declare {float, i1} @llvm.nvvm.shfl.up.f32p(float, i32, i32)
7 declare {i32, i1} @llvm.nvvm.shfl.bfly.i32p(i32, i32, i32)
8 declare {float, i1} @llvm.nvvm.shfl.bfly.f32p(float, i32, i32)
9 declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32)
10 declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32)
12 ; CHECK-LABEL: .func{{.*}}shfl.i32.rrr
13 define {i32, i1} @shfl.i32.rrr(i32 %a, i32 %b, i32 %c) {
[all …]
Dshfl-sync-p.ll3 declare {i32, i1} @llvm.nvvm.shfl.sync.down.i32p(i32, i32, i32, i32)
4 declare {float, i1} @llvm.nvvm.shfl.sync.down.f32p(i32, float, i32, i32)
5 declare {i32, i1} @llvm.nvvm.shfl.sync.up.i32p(i32, i32, i32, i32)
6 declare {float, i1} @llvm.nvvm.shfl.sync.up.f32p(i32, float, i32, i32)
7 declare {i32, i1} @llvm.nvvm.shfl.sync.bfly.i32p(i32, i32, i32, i32)
8 declare {float, i1} @llvm.nvvm.shfl.sync.bfly.f32p(i32, float, i32, i32)
9 declare {i32, i1} @llvm.nvvm.shfl.sync.idx.i32p(i32, i32, i32, i32)
10 declare {float, i1} @llvm.nvvm.shfl.sync.idx.f32p(i32, float, i32, i32)
12 ; CHECK-LABEL: .func{{.*}}shfl.sync.i32.rrr
13 define {i32, i1} @shfl.sync.i32.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) {
[all …]
Dshfl-sync.ll3 declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32)
4 declare float @llvm.nvvm.shfl.sync.down.f32(float, i32, i32, i32)
5 declare i32 @llvm.nvvm.shfl.sync.up.i32(i32, i32, i32, i32)
6 declare float @llvm.nvvm.shfl.sync.up.f32(float, i32, i32, i32)
7 declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32)
8 declare float @llvm.nvvm.shfl.sync.bfly.f32(float, i32, i32, i32)
9 declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32)
10 declare float @llvm.nvvm.shfl.sync.idx.f32(float, i32, i32, i32)
12 ; CHECK-LABEL: .func{{.*}}shfl.sync.rrr
13 define i32 @shfl.sync.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) {
[all …]
/external/llvm-project/mlir/test/Dialect/LLVMIR/
Dnvvm.mlir40 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i32
41 %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 : !llvm.i32
42 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.float
43 %1 = nvvm.shfl.sync.bfly %arg0, %arg4, %arg1, %arg2 : !llvm.float
50 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.struct<(i32, i1)>
51 …%0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i3…
52 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.struct<(float, i1)>
53 …%1 = nvvm.shfl.sync.bfly %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(fl…
Dinvalid.mlir350 %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.i32
357 …%0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i3…
364 …%0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i3…
/external/llvm-project/mlir/test/Target/
Dnvvmir.mlir44 …// CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}…
45 %6 = nvvm.shfl.sync.bfly %0, %3, %1, %2 : !llvm.i32
46 …// CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{…
47 %7 = nvvm.shfl.sync.bfly %0, %4, %1, %2 : !llvm.float
54 …// CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i…
55 %6 = nvvm.shfl.sync.bfly %0, %3, %1, %2 {return_value_and_is_valid} : !llvm.struct<(i32, i1)>
56 …// CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}…
57 %7 = nvvm.shfl.sync.bfly %0, %4, %1, %2 {return_value_and_is_valid} : !llvm.struct<(float, i1)>
/external/llvm-project/mlir/test/mlir-cuda-runner/
Dshuffle.mlir18 %shfl, %valid = gpu.shuffle %val, %offset, %width xor : f32
19 cond_br %valid, ^bb1(%shfl : f32), ^bb0
/external/llvm-project/mlir/lib/Conversion/GPUToNVVM/
DLowerGpuOpsToNVVMOps.cpp79 Value shfl = rewriter.create<NVVM::ShflBflyOp>( in matchAndRewrite() local
83 loc, valueTy, shfl, rewriter.getIndexArrayAttr(0)); in matchAndRewrite()
85 loc, predTy, shfl, rewriter.getIndexArrayAttr(1)); in matchAndRewrite()
/external/llvm-project/llvm/test/MC/RISCV/
Drv32zbp-valid.s27 # CHECK-ASM-AND-OBJ: shfl t0, t1, t2
29 shfl t0, t1, t2 label
Drv32zbp-invalid.s18 shfl t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction label
/external/mesa3d/src/gallium/drivers/nouveau/codegen/
Dnv50_ir_lowering_gm107.cpp207 Instruction *shfl; in handleDFDX() local
224 shfl = bld.mkOp3(OP_SHFL, TYPE_F32, bld.getScratch(), insn->getSrc(0), in handleDFDX()
226 shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; in handleDFDX()
231 insn->setSrc(0, shfl->getDef(0)); in handleDFDX()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/
Dreduce_unnested.hlo186 // CHECK: %[[VAL_108:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
190 // CHECK: %[[VAL_110:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
194 // CHECK: %[[VAL_112:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
198 // CHECK: %[[VAL_114:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
202 // CHECK: %[[VAL_116:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
220 // CHECK: %[[VAL_132:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
224 // CHECK: %[[VAL_134:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
228 // CHECK: %[[VAL_136:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
232 // CHECK: %[[VAL_138:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
236 // CHECK: %[[VAL_140:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1…
[all …]
/external/llvm-project/mlir/test/Conversion/GPUToNVVM/
Dgpu-to-nvvm.mlir81 // CHECK: nvvm.shfl.sync.bfly
97 // CHECK: nvvm.shfl.sync.bfly
123 …// CHECK: %[[#SHFL:]] = nvvm.shfl.sync.bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] : !…
126 … %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (f32, i1)
128 std.return %shfl : f32
/external/llvm-project/mlir/include/mlir/Dialect/LLVMIR/
DNVVMOps.td94 NVVM_Op<"shfl.sync.bfly">,
/external/llvm-project/mlir/test/Dialect/GPU/
Dinvalid.mlir320 %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (i32, i1)
327 %shfl, %pred = gpu.shuffle %arg0, %arg1, %arg2 xor : index
Dops.mlir57 %shfl, %pred = gpu.shuffle %arg0, %offset, %width xor : f32
/external/llvm/include/llvm/IR/
DIntrinsicsNVVM.td3695 // shfl.down.b32 dest, val, offset, mask_and_clamp
3698 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.down.i32">,
3702 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.down.f32">,
3705 // shfl.up.b32 dest, val, offset, mask_and_clamp
3708 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.up.i32">,
3712 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.up.f32">,
3715 // shfl.bfly.b32 dest, val, offset, mask_and_clamp
3718 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">,
3722 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">,
3725 // shfl.idx.b32 dest, val, lane, mask_and_clamp
[all …]
/external/swiftshader/third_party/llvm-10.0/configs/common/include/llvm/IR/
DIntrinsicImpl.inc4678 "llvm.nvvm.shfl.bfly.f32",
4679 "llvm.nvvm.shfl.bfly.f32p",
4680 "llvm.nvvm.shfl.bfly.i32",
4681 "llvm.nvvm.shfl.bfly.i32p",
4682 "llvm.nvvm.shfl.down.f32",
4683 "llvm.nvvm.shfl.down.f32p",
4684 "llvm.nvvm.shfl.down.i32",
4685 "llvm.nvvm.shfl.down.i32p",
4686 "llvm.nvvm.shfl.idx.f32",
4687 "llvm.nvvm.shfl.idx.f32p",
[all …]
/external/llvm-project/mlir/docs/Dialects/
DVector.md84 For `GPU`, the `NVVM` dialect adds operations such as `mma.sync`, `shfl` and
/external/llvm/lib/Target/NVPTX/
DNVPTXIntrinsics.td68 // shfl.{up,down,bfly,idx}.b32
70 // The last two parameters to shfl can be regs or imms. ptxas is smart
77 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
83 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
89 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
95 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
/external/llvm-project/llvm/lib/Target/RISCV/
DRISCVInstrInfoB.td322 def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
/external/llvm-project/llvm/include/llvm/IR/
DIntrinsicsNVVM.td309 string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
4000 // Generate intrinsics for all variants of shfl instruction.
/external/swiftshader/third_party/llvm-10.0/llvm/include/llvm/IR/
DIntrinsicsNVVM.td316 string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix);
4007 // Generate intrinsics for all variants of shfl instruction.

12