/external/llvm/test/CodeGen/NVPTX/ |
D | shfl.ll | 3 declare i32 @llvm.nvvm.shfl.down.i32(i32, i32, i32) 4 declare float @llvm.nvvm.shfl.down.f32(float, i32, i32) 5 declare i32 @llvm.nvvm.shfl.up.i32(i32, i32, i32) 6 declare float @llvm.nvvm.shfl.up.f32(float, i32, i32) 7 declare i32 @llvm.nvvm.shfl.bfly.i32(i32, i32, i32) 8 declare float @llvm.nvvm.shfl.bfly.f32(float, i32, i32) 9 declare i32 @llvm.nvvm.shfl.idx.i32(i32, i32, i32) 10 declare float @llvm.nvvm.shfl.idx.f32(float, i32, i32) 13 ; shfl.down. 15 ; CHECK-LABEL: .func{{.*}}shfl.down1 [all …]
|
/external/llvm-project/llvm/test/CodeGen/NVPTX/ |
D | shfl.ll | 3 declare i32 @llvm.nvvm.shfl.down.i32(i32, i32, i32) 4 declare float @llvm.nvvm.shfl.down.f32(float, i32, i32) 5 declare i32 @llvm.nvvm.shfl.up.i32(i32, i32, i32) 6 declare float @llvm.nvvm.shfl.up.f32(float, i32, i32) 7 declare i32 @llvm.nvvm.shfl.bfly.i32(i32, i32, i32) 8 declare float @llvm.nvvm.shfl.bfly.f32(float, i32, i32) 9 declare i32 @llvm.nvvm.shfl.idx.i32(i32, i32, i32) 10 declare float @llvm.nvvm.shfl.idx.f32(float, i32, i32) 13 ; shfl.down. 15 ; CHECK-LABEL: .func{{.*}}shfl.down1 [all …]
|
D | shfl-p.ll | 3 declare {i32, i1} @llvm.nvvm.shfl.down.i32p(i32, i32, i32) 4 declare {float, i1} @llvm.nvvm.shfl.down.f32p(float, i32, i32) 5 declare {i32, i1} @llvm.nvvm.shfl.up.i32p(i32, i32, i32) 6 declare {float, i1} @llvm.nvvm.shfl.up.f32p(float, i32, i32) 7 declare {i32, i1} @llvm.nvvm.shfl.bfly.i32p(i32, i32, i32) 8 declare {float, i1} @llvm.nvvm.shfl.bfly.f32p(float, i32, i32) 9 declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32) 10 declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32) 12 ; CHECK-LABEL: .func{{.*}}shfl.i32.rrr 13 define {i32, i1} @shfl.i32.rrr(i32 %a, i32 %b, i32 %c) { [all …]
|
D | shfl-sync-p.ll | 3 declare {i32, i1} @llvm.nvvm.shfl.sync.down.i32p(i32, i32, i32, i32) 4 declare {float, i1} @llvm.nvvm.shfl.sync.down.f32p(i32, float, i32, i32) 5 declare {i32, i1} @llvm.nvvm.shfl.sync.up.i32p(i32, i32, i32, i32) 6 declare {float, i1} @llvm.nvvm.shfl.sync.up.f32p(i32, float, i32, i32) 7 declare {i32, i1} @llvm.nvvm.shfl.sync.bfly.i32p(i32, i32, i32, i32) 8 declare {float, i1} @llvm.nvvm.shfl.sync.bfly.f32p(i32, float, i32, i32) 9 declare {i32, i1} @llvm.nvvm.shfl.sync.idx.i32p(i32, i32, i32, i32) 10 declare {float, i1} @llvm.nvvm.shfl.sync.idx.f32p(i32, float, i32, i32) 12 ; CHECK-LABEL: .func{{.*}}shfl.sync.i32.rrr 13 define {i32, i1} @shfl.sync.i32.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { [all …]
|
D | shfl-sync.ll | 3 declare i32 @llvm.nvvm.shfl.sync.down.i32(i32, i32, i32, i32) 4 declare float @llvm.nvvm.shfl.sync.down.f32(float, i32, i32, i32) 5 declare i32 @llvm.nvvm.shfl.sync.up.i32(i32, i32, i32, i32) 6 declare float @llvm.nvvm.shfl.sync.up.f32(float, i32, i32, i32) 7 declare i32 @llvm.nvvm.shfl.sync.bfly.i32(i32, i32, i32, i32) 8 declare float @llvm.nvvm.shfl.sync.bfly.f32(float, i32, i32, i32) 9 declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32) 10 declare float @llvm.nvvm.shfl.sync.idx.f32(float, i32, i32, i32) 12 ; CHECK-LABEL: .func{{.*}}shfl.sync.rrr 13 define i32 @shfl.sync.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { [all …]
|
/external/llvm-project/mlir/test/Dialect/LLVMIR/ |
D | nvvm.mlir | 40 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.i32 41 %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 : !llvm.i32 42 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.float 43 %1 = nvvm.shfl.sync.bfly %arg0, %arg4, %arg1, %arg2 : !llvm.float 50 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.struct<(i32, i1)> 51 …%0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i3… 52 // CHECK: nvvm.shfl.sync.bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : !llvm.struct<(float, i1)> 53 …%1 = nvvm.shfl.sync.bfly %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(fl…
|
D | invalid.mlir | 350 %0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.i32 357 …%0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i3… 364 …%0 = nvvm.shfl.sync.bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : !llvm.struct<(i3…
|
/external/llvm-project/mlir/test/Target/ |
D | nvvmir.mlir | 44 …// CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}… 45 %6 = nvvm.shfl.sync.bfly %0, %3, %1, %2 : !llvm.i32 46 …// CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{… 47 %7 = nvvm.shfl.sync.bfly %0, %4, %1, %2 : !llvm.float 54 …// CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i… 55 %6 = nvvm.shfl.sync.bfly %0, %3, %1, %2 {return_value_and_is_valid} : !llvm.struct<(i32, i1)> 56 …// CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}… 57 %7 = nvvm.shfl.sync.bfly %0, %4, %1, %2 {return_value_and_is_valid} : !llvm.struct<(float, i1)>
|
/external/llvm-project/mlir/test/mlir-cuda-runner/ |
D | shuffle.mlir | 18 %shfl, %valid = gpu.shuffle %val, %offset, %width xor : f32 19 cond_br %valid, ^bb1(%shfl : f32), ^bb0
|
/external/llvm-project/mlir/lib/Conversion/GPUToNVVM/ |
D | LowerGpuOpsToNVVMOps.cpp | 79 Value shfl = rewriter.create<NVVM::ShflBflyOp>( in matchAndRewrite() local 83 loc, valueTy, shfl, rewriter.getIndexArrayAttr(0)); in matchAndRewrite() 85 loc, predTy, shfl, rewriter.getIndexArrayAttr(1)); in matchAndRewrite()
|
/external/llvm-project/llvm/test/MC/RISCV/ |
D | rv32zbp-valid.s | 27 # CHECK-ASM-AND-OBJ: shfl t0, t1, t2 29 shfl t0, t1, t2 label
|
D | rv32zbp-invalid.s | 18 shfl t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction label
|
/external/mesa3d/src/gallium/drivers/nouveau/codegen/ |
D | nv50_ir_lowering_gm107.cpp | 207 Instruction *shfl; in handleDFDX() local 224 shfl = bld.mkOp3(OP_SHFL, TYPE_F32, bld.getScratch(), insn->getSrc(0), in handleDFDX() 226 shfl->subOp = NV50_IR_SUBOP_SHFL_BFLY; in handleDFDX() 231 insn->setSrc(0, shfl->getDef(0)); in handleDFDX()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/tests/ |
D | reduce_unnested.hlo | 186 // CHECK: %[[VAL_108:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 190 // CHECK: %[[VAL_110:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 194 // CHECK: %[[VAL_112:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 198 // CHECK: %[[VAL_114:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 202 // CHECK: %[[VAL_116:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 220 // CHECK: %[[VAL_132:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 224 // CHECK: %[[VAL_134:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 228 // CHECK: %[[VAL_136:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 232 // CHECK: %[[VAL_138:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… 236 // CHECK: %[[VAL_140:.*]] = call float @llvm.nvvm.shfl.sync.down.f32(i32 -1, float %[[VAL_1… [all …]
|
/external/llvm-project/mlir/test/Conversion/GPUToNVVM/ |
D | gpu-to-nvvm.mlir | 81 // CHECK: nvvm.shfl.sync.bfly 97 // CHECK: nvvm.shfl.sync.bfly 123 …// CHECK: %[[#SHFL:]] = nvvm.shfl.sync.bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] : !… 126 … %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (f32, i1) 128 std.return %shfl : f32
|
/external/llvm-project/mlir/include/mlir/Dialect/LLVMIR/ |
D | NVVMOps.td | 94 NVVM_Op<"shfl.sync.bfly">,
|
/external/llvm-project/mlir/test/Dialect/GPU/ |
D | invalid.mlir | 320 %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (i32, i1) 327 %shfl, %pred = gpu.shuffle %arg0, %arg1, %arg2 xor : index
|
D | ops.mlir | 57 %shfl, %pred = gpu.shuffle %arg0, %offset, %width xor : f32
|
/external/llvm/include/llvm/IR/ |
D | IntrinsicsNVVM.td | 3695 // shfl.down.b32 dest, val, offset, mask_and_clamp 3698 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.down.i32">, 3702 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.down.f32">, 3705 // shfl.up.b32 dest, val, offset, mask_and_clamp 3708 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.up.i32">, 3712 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.up.f32">, 3715 // shfl.bfly.b32 dest, val, offset, mask_and_clamp 3718 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.bfly.i32">, 3722 [IntrNoMem, IntrConvergent], "llvm.nvvm.shfl.bfly.f32">, 3725 // shfl.idx.b32 dest, val, lane, mask_and_clamp [all …]
|
/external/swiftshader/third_party/llvm-10.0/configs/common/include/llvm/IR/ |
D | IntrinsicImpl.inc | 4678 "llvm.nvvm.shfl.bfly.f32", 4679 "llvm.nvvm.shfl.bfly.f32p", 4680 "llvm.nvvm.shfl.bfly.i32", 4681 "llvm.nvvm.shfl.bfly.i32p", 4682 "llvm.nvvm.shfl.down.f32", 4683 "llvm.nvvm.shfl.down.f32p", 4684 "llvm.nvvm.shfl.down.i32", 4685 "llvm.nvvm.shfl.down.i32p", 4686 "llvm.nvvm.shfl.idx.f32", 4687 "llvm.nvvm.shfl.idx.f32p", [all …]
|
/external/llvm-project/mlir/docs/Dialects/ |
D | Vector.md | 84 For `GPU`, the `NVVM` dialect adds operations such as `mma.sync`, `shfl` and
|
/external/llvm/lib/Target/NVPTX/ |
D | NVPTXIntrinsics.td | 68 // shfl.{up,down,bfly,idx}.b32 70 // The last two parameters to shfl can be regs or imms. ptxas is smart 77 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 83 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 89 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 95 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"),
|
/external/llvm-project/llvm/lib/Target/RISCV/ |
D | RISCVInstrInfoB.td | 322 def SHFL : ALU_rr<0b0000100, 0b001, "shfl">, Sched<[]>;
|
/external/llvm-project/llvm/include/llvm/IR/ |
D | IntrinsicsNVVM.td | 309 string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix); 4000 // Generate intrinsics for all variants of shfl instruction.
|
/external/swiftshader/third_party/llvm-10.0/llvm/include/llvm/IR/ |
D | IntrinsicsNVVM.td | 316 string IntrName = "llvm.nvvm.shfl." # !subst("_",".", Suffix); 4007 // Generate intrinsics for all variants of shfl instruction.
|