1// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=1 gpu-thread-dims=1" %s | FileCheck --check-prefix=CHECK-11 %s 2// RUN: mlir-opt -convert-affine-for-to-gpu="gpu-block-dims=2 gpu-thread-dims=2" %s | FileCheck --check-prefix=CHECK-22 %s 3 4// CHECK-11-LABEL: @step_1 5// CHECK-22-LABEL: @step_1 6func @step_1(%A : memref<?x?x?x?xf32>, %B : memref<?x?x?x?xf32>) { 7 // Bounds of the loop, its range and step. 8 // CHECK-11-NEXT: %{{.*}} = constant 0 : index 9 // CHECK-11-NEXT: %{{.*}} = constant 42 : index 10 // CHECK-11-NEXT: %{{.*}} = subi %{{.*}}, %{{.*}} : index 11 // CHECK-11-NEXT: %{{.*}} = constant 1 : index 12 // 13 // CHECK-22-NEXT: %{{.*}} = constant 0 : index 14 // CHECK-22-NEXT: %{{.*}} = constant 42 : index 15 // CHECK-22-NEXT: %{{.*}} = subi %{{.*}}, %{{.*}} : index 16 // CHECK-22-NEXT: %{{.*}} = constant 1 : index 17 affine.for %i = 0 to 42 { 18 19 // Bounds of the loop, its range and step. 20 // CHECK-11-NEXT: %{{.*}} = constant 0 : index 21 // CHECK-11-NEXT: %{{.*}} = constant 10 : index 22 // CHECK-11-NEXT: %{{.*}} = subi %{{.*}}, %{{.*}} : index 23 // CHECK-11-NEXT: %{{.*}} = constant 1 : index 24 // 25 // CHECK-22-NEXT: %{{.*}} = constant 0 : index 26 // CHECK-22-NEXT: %{{.*}} = constant 10 : index 27 // CHECK-22-NEXT: %{{.*}} = subi %{{.*}}, %{{.*}} : index 28 // CHECK-22-NEXT: %{{.*}} = constant 1 : index 29 affine.for %j = 0 to 10 { 30 // CHECK-11: gpu.launch 31 // CHECK-11-SAME: blocks 32 // CHECK-11-SAME: threads 33 34 // Remapping of the loop induction variables. 35 // CHECK-11: %[[i:.*]] = addi %{{.*}}, %{{.*}} : index 36 // CHECK-11-NEXT: %[[j:.*]] = addi %{{.*}}, %{{.*}} : index 37 38 // This loop is not converted if mapping to 1, 1 dimensions. 39 // CHECK-11-NEXT: affine.for %[[ii:.*]] = 2 to 16 40 // 41 // Bounds of the loop, its range and step. 42 // CHECK-22-NEXT: %{{.*}} = constant 2 : index 43 // CHECK-22-NEXT: %{{.*}} = constant 16 : index 44 // CHECK-22-NEXT: %{{.*}} = subi %{{.*}}, %{{.*}} : index 45 // CHECK-22-NEXT: %{{.*}} = constant 1 : index 46 affine.for %ii = 2 to 16 { 47 // This loop is not converted if mapping to 1, 1 dimensions. 48 // CHECK-11-NEXT: affine.for %[[jj:.*]] = 5 to 17 49 // 50 // Bounds of the loop, its range and step. 51 // CHECK-22-NEXT: %{{.*}} = constant 5 : index 52 // CHECK-22-NEXT: %{{.*}} = constant 17 : index 53 // CHECK-22-NEXT: %{{.*}} = subi %{{.*}}, %{{.*}} : index 54 // CHECK-22-NEXT: %{{.*}} = constant 1 : index 55 affine.for %jj = 5 to 17 { 56 // CHECK-22: gpu.launch 57 // CHECK-22-SAME: blocks 58 // CHECK-22-SAME: threads 59 60 // Remapping of the loop induction variables in the last mapped scf. 61 // CHECK-22: %[[i:.*]] = addi %{{.*}}, %{{.*}} : index 62 // CHECK-22-NEXT: %[[j:.*]] = addi %{{.*}}, %{{.*}} : index 63 // CHECK-22-NEXT: %[[ii:.*]] = addi %{{.*}}, %{{.*}} : index 64 // CHECK-22-NEXT: %[[jj:.*]] = addi %{{.*}}, %{{.*}} : index 65 66 // Using remapped values instead of loop iterators. 67 // CHECK-11: {{.*}} = load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 68 // CHECK-22: {{.*}} = load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 69 %0 = load %A[%i, %j, %ii, %jj] : memref<?x?x?x?xf32> 70 // CHECK-11-NEXT: store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 71 // CHECK-22-NEXT: store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 72 store %0, %B[%i, %j, %ii, %jj] : memref<?x?x?x?xf32> 73 74 // CHECK-11: gpu.terminator 75 // CHECK-22: gpu.terminator 76 } 77 } 78 } 79 } 80 return 81} 82 83