• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// RUN: mlir-opt -allow-unregistered-dialect -test-gpu-memory-promotion -pass-pipeline='gpu.module(gpu.func(test-gpu-memory-promotion))' -split-input-file %s | FileCheck %s
2
3gpu.module @foo {
4
5  // Verify that the attribution was indeed introduced
6  // CHECK-LABEL: @memref3d
7  // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32>
8  // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, 3>)
9  gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel {
10    // Verify that loop bounds are emitted, the order does not matter.
11    // CHECK-DAG: %[[c1:.*]] = constant 1
12    // CHECK-DAG: %[[c4:.*]] = constant 4
13    // CHECK-DAG: %[[c5:.*]] = constant 5
14    // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"}
15    // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"}
16    // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"}
17    // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"}
18    // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"}
19    // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}
20
21    // Verify that loops for the copy are emitted. We only check the number of
22    // loops here since their bounds are produced by mapLoopToProcessorIds,
23    // tested separately.
24    // CHECK: scf.for %[[i0:.*]] =
25    // CHECK:   scf.for %[[i1:.*]] =
26    // CHECK:     scf.for %[[i2:.*]] =
27
28    // Verify that the copy is emitted and uses only the last two loops.
29    // CHECK:       %[[v:.*]] = load %[[arg]][%[[i1]], %[[i2]]]
30    // CHECK:       store %[[v]], %[[promoted]][%[[i1]], %[[i2]]]
31
32    // Verify that the use has been rewritten.
33    // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, 3>)
34    "use"(%arg0) : (memref<5x4xf32>) -> ()
35
36
37    // Verify that loops for the copy are emitted. We only check the number of
38    // loops here since their bounds are produced by mapLoopToProcessorIds,
39    // tested separately.
40    // CHECK: scf.for %[[i0:.*]] =
41    // CHECK:   scf.for %[[i1:.*]] =
42    // CHECK:     scf.for %[[i2:.*]] =
43
44    // Verify that the copy is emitted and uses only the last two loops.
45    // CHECK:       %[[v:.*]] = load %[[promoted]][%[[i1]], %[[i2]]]
46    // CHECK:       store %[[v]], %[[arg]][%[[i1]], %[[i2]]]
47    gpu.return
48  }
49}
50
51// -----
52
53gpu.module @foo {
54
55  // Verify that the attribution was indeed introduced
56  // CHECK-LABEL: @memref5d
57  // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32>
58  // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, 3>)
59  gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel {
60    // Verify that loop bounds are emitted, the order does not matter.
61    // CHECK-DAG: %[[c0:.*]] = constant 0
62    // CHECK-DAG: %[[c1:.*]] = constant 1
63    // CHECK-DAG: %[[c4:.*]] = constant 4
64    // CHECK-DAG: %[[c5:.*]] = constant 5
65    // CHECK-DAG: %[[c6:.*]] = constant 6
66    // CHECK-DAG: %[[c7:.*]] = constant 7
67    // CHECK-DAG: %[[c8:.*]] = constant 8
68    // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"}
69    // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"}
70    // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"}
71    // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"}
72    // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"}
73    // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"}
74
75    // Verify that loops for the copy are emitted.
76    // CHECK: scf.for %[[i0:.*]] =
77    // CHECK:   scf.for %[[i1:.*]] =
78    // CHECK:     scf.for %[[i2:.*]] =
79    // CHECK:       scf.for %[[i3:.*]] =
80    // CHECK:         scf.for %[[i4:.*]] =
81
82    // Verify that the copy is emitted.
83    // CHECK:           %[[v:.*]] = load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
84    // CHECK:           store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
85
86    // Verify that the use has been rewritten.
87    // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, 3>)
88    "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()
89
90    // Verify that loop loops for the copy are emitted.
91    // CHECK: scf.for %[[i0:.*]] =
92    // CHECK:   scf.for %[[i1:.*]] =
93    // CHECK:     scf.for %[[i2:.*]] =
94    // CHECK:       scf.for %[[i3:.*]] =
95    // CHECK:         scf.for %[[i4:.*]] =
96
97    // Verify that the copy is emitted.
98    // CHECK:           %[[v:.*]] = load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
99    // CHECK:           store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
100    gpu.return
101  }
102}
103
104// -----
105
106gpu.module @foo {
107
108  // Check that attribution insertion works fine.
109  // CHECK-LABEL: @insert
110  // CHECK-SAME: (%{{.*}}: memref<4xf32>
111  // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, 3>
112  // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, 3>)
113  // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>)
114  gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup})
115      workgroup(%arg1: memref<1x1xf64, 3>)
116      private(%arg2: memref<1x1xi64, 5>)
117      kernel {
118    // CHECK: "use"(%[[wg2]])
119    "use"(%arg0) : (memref<4xf32>) -> ()
120    gpu.return
121  }
122}
123