1// RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s 2 3gpu.module @foo { 4 5 // Verify that the attribution was indeed introduced 6 // CHECK-LABEL: @memref3d 7 // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32> 8 // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, 3>) 9 gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel { 10 // Verify that loop bounds are emitted, the order does not matter. 11 // CHECK-DAG: %[[c1:.*]] = constant 1 12 // CHECK-DAG: %[[c4:.*]] = constant 4 13 // CHECK-DAG: %[[c5:.*]] = constant 5 14 // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"} 15 // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"} 16 // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"} 17 // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"} 18 // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"} 19 // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"} 20 21 // Verify that loops for the copy are emitted. We only check the number of 22 // loops here since their bounds are produced by mapLoopToProcessorIds, 23 // tested separately. 24 // CHECK: scf.for %[[i0:.*]] = 25 // CHECK: scf.for %[[i1:.*]] = 26 // CHECK: scf.for %[[i2:.*]] = 27 28 // Verify that the copy is emitted and uses only the last two loops. 29 // CHECK: %[[v:.*]] = load %[[arg]][%[[i1]], %[[i2]]] 30 // CHECK: store %[[v]], %[[promoted]][%[[i1]], %[[i2]]] 31 32 // Verify that the use has been rewritten. 33 // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, 3>) 34 "use"(%arg0) : (memref<5x4xf32>) -> () 35 36 37 // Verify that loops for the copy are emitted. We only check the number of 38 // loops here since their bounds are produced by mapLoopToProcessorIds, 39 // tested separately. 40 // CHECK: scf.for %[[i0:.*]] = 41 // CHECK: scf.for %[[i1:.*]] = 42 // CHECK: scf.for %[[i2:.*]] = 43 44 // Verify that the copy is emitted and uses only the last two loops. 45 // CHECK: %[[v:.*]] = load %[[promoted]][%[[i1]], %[[i2]]] 46 // CHECK: store %[[v]], %[[arg]][%[[i1]], %[[i2]]] 47 gpu.return 48 } 49} 50 51// ----- 52 53gpu.module @foo { 54 55 // Verify that the attribution was indeed introduced 56 // CHECK-LABEL: @memref5d 57 // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32> 58 // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, 3>) 59 gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel { 60 // Verify that loop bounds are emitted, the order does not matter. 61 // CHECK-DAG: %[[c0:.*]] = constant 0 62 // CHECK-DAG: %[[c1:.*]] = constant 1 63 // CHECK-DAG: %[[c4:.*]] = constant 4 64 // CHECK-DAG: %[[c5:.*]] = constant 5 65 // CHECK-DAG: %[[c6:.*]] = constant 6 66 // CHECK-DAG: %[[c7:.*]] = constant 7 67 // CHECK-DAG: %[[c8:.*]] = constant 8 68 // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"} 69 // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"} 70 // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"} 71 // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"} 72 // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"} 73 // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"} 74 75 // Verify that loops for the copy are emitted. 76 // CHECK: scf.for %[[i0:.*]] = 77 // CHECK: scf.for %[[i1:.*]] = 78 // CHECK: scf.for %[[i2:.*]] = 79 // CHECK: scf.for %[[i3:.*]] = 80 // CHECK: scf.for %[[i4:.*]] = 81 82 // Verify that the copy is emitted. 83 // CHECK: %[[v:.*]] = load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 84 // CHECK: store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 85 86 // Verify that the use has been rewritten. 87 // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, 3>) 88 "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> () 89 90 // Verify that loop loops for the copy are emitted. 91 // CHECK: scf.for %[[i0:.*]] = 92 // CHECK: scf.for %[[i1:.*]] = 93 // CHECK: scf.for %[[i2:.*]] = 94 // CHECK: scf.for %[[i3:.*]] = 95 // CHECK: scf.for %[[i4:.*]] = 96 97 // Verify that the copy is emitted. 98 // CHECK: %[[v:.*]] = load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 99 // CHECK: store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 100 gpu.return 101 } 102} 103 104// ----- 105 106gpu.module @foo { 107 108 // Check that attribution insertion works fine. 109 // CHECK-LABEL: @insert 110 // CHECK-SAME: (%{{.*}}: memref<4xf32> 111 // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, 3> 112 // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, 3>) 113 // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>) 114 gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup}) 115 workgroup(%arg1: memref<1x1xf64, 3>) 116 private(%arg2: memref<1x1xi64, 5>) 117 kernel { 118 // CHECK: "use"(%[[wg2]]) 119 "use"(%arg0) : (memref<4xf32>) -> () 120 gpu.return 121 } 122} 123