1// RUN: mlir-opt -test-gpu-rewrite %s | FileCheck %s 2 3// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py 4// CHECK: gpu.module @kernels { 5gpu.module @kernels { 6 7 // CHECK-LABEL: gpu.func @kernel( 8 // CHECK-SAME: [[VAL_0:%.*]]: f32) workgroup([[VAL_1:%.*]] : memref<32xf32, 3>) kernel { 9 gpu.func @kernel(%arg0 : f32) kernel { 10 // CHECK: [[VAL_2:%.*]] = constant 31 : i32 11 // CHECK: [[VAL_3:%.*]] = constant 0 : i32 12 // CHECK: [[VAL_4:%.*]] = constant 0 : index 13 // CHECK: [[VAL_5:%.*]] = constant 32 : i32 14 // CHECK: [[VAL_6:%.*]] = constant 1 : i32 15 // CHECK: [[VAL_7:%.*]] = constant 2 : i32 16 // CHECK: [[VAL_8:%.*]] = constant 4 : i32 17 // CHECK: [[VAL_9:%.*]] = constant 8 : i32 18 // CHECK: [[VAL_10:%.*]] = constant 16 : i32 19 // CHECK: [[VAL_11:%.*]] = "gpu.block_dim"() {dimension = "x"} : () -> index 20 // CHECK: [[VAL_12:%.*]] = index_cast [[VAL_11]] : index to i32 21 // CHECK: [[VAL_13:%.*]] = "gpu.block_dim"() {dimension = "y"} : () -> index 22 // CHECK: [[VAL_14:%.*]] = index_cast [[VAL_13]] : index to i32 23 // CHECK: [[VAL_15:%.*]] = "gpu.block_dim"() {dimension = "z"} : () -> index 24 // CHECK: [[VAL_16:%.*]] = index_cast [[VAL_15]] : index to i32 25 // CHECK: [[VAL_17:%.*]] = "gpu.thread_id"() {dimension = "x"} : () -> index 26 // CHECK: [[VAL_18:%.*]] = index_cast [[VAL_17]] : index to i32 27 // CHECK: [[VAL_19:%.*]] = "gpu.thread_id"() {dimension = "y"} : () -> index 28 // CHECK: [[VAL_20:%.*]] = index_cast [[VAL_19]] : index to i32 29 // CHECK: [[VAL_21:%.*]] = "gpu.thread_id"() {dimension = "z"} : () -> index 30 // CHECK: [[VAL_22:%.*]] = index_cast [[VAL_21]] : index to i32 31 // CHECK: [[VAL_23:%.*]] = muli [[VAL_22]], [[VAL_14]] : i32 32 // CHECK: [[VAL_24:%.*]] = addi [[VAL_23]], [[VAL_20]] : i32 33 // CHECK: [[VAL_25:%.*]] = muli [[VAL_24]], [[VAL_12]] : i32 34 // CHECK: [[VAL_26:%.*]] = muli [[VAL_12]], [[VAL_14]] : i32 35 // CHECK: [[VAL_27:%.*]] = addi [[VAL_25]], [[VAL_18]] : i32 36 // CHECK: [[VAL_28:%.*]] = muli [[VAL_26]], [[VAL_16]] : i32 37 // CHECK: [[VAL_29:%.*]] = and [[VAL_27]], [[VAL_2]] : i32 38 // CHECK: [[VAL_30:%.*]] = cmpi "eq", [[VAL_29]], [[VAL_3]] : i32 39 // CHECK: [[VAL_31:%.*]] = subi [[VAL_27]], [[VAL_29]] : i32 40 // CHECK: [[VAL_32:%.*]] = subi [[VAL_28]], [[VAL_31]] : i32 41 // CHECK: [[VAL_33:%.*]] = cmpi "slt", [[VAL_32]], [[VAL_5]] : i32 42 // CHECK: cond_br [[VAL_33]], ^bb1, ^bb17 43 // CHECK: ^bb1: 44 // CHECK: [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle [[VAL_0]], [[VAL_6]], [[VAL_32]] xor : f32 45 // CHECK: cond_br [[VAL_35]], ^bb2, ^bb3 46 // CHECK: ^bb2: 47 // CHECK: [[VAL_36:%.*]] = addf [[VAL_0]], [[VAL_34]] : f32 48 // CHECK: br ^bb4([[VAL_36]] : f32) 49 // CHECK: ^bb3: 50 // CHECK: br ^bb4([[VAL_0]] : f32) 51 // CHECK: ^bb4([[VAL_37:%.*]]: f32): 52 // CHECK: [[VAL_38:%.*]], [[VAL_39:%.*]] = gpu.shuffle [[VAL_37]], [[VAL_7]], [[VAL_32]] xor : f32 53 // CHECK: cond_br [[VAL_39]], ^bb5, ^bb6 54 // CHECK: ^bb5: 55 // CHECK: [[VAL_40:%.*]] = addf [[VAL_37]], [[VAL_38]] : f32 56 // CHECK: br ^bb7([[VAL_40]] : f32) 57 // CHECK: ^bb6: 58 // CHECK: br ^bb7([[VAL_37]] : f32) 59 // CHECK: ^bb7([[VAL_41:%.*]]: f32): 60 // CHECK: [[VAL_42:%.*]], [[VAL_43:%.*]] = gpu.shuffle [[VAL_41]], [[VAL_8]], [[VAL_32]] xor : f32 61 // CHECK: cond_br [[VAL_43]], ^bb8, ^bb9 62 // CHECK: ^bb8: 63 // CHECK: [[VAL_44:%.*]] = addf [[VAL_41]], [[VAL_42]] : f32 64 // CHECK: br ^bb10([[VAL_44]] : f32) 65 // CHECK: ^bb9: 66 // CHECK: br ^bb10([[VAL_41]] : f32) 67 // CHECK: ^bb10([[VAL_45:%.*]]: f32): 68 // CHECK: [[VAL_46:%.*]], [[VAL_47:%.*]] = gpu.shuffle [[VAL_45]], [[VAL_9]], [[VAL_32]] xor : f32 69 // CHECK: cond_br [[VAL_47]], ^bb11, ^bb12 70 // CHECK: ^bb11: 71 // CHECK: [[VAL_48:%.*]] = addf [[VAL_45]], [[VAL_46]] : f32 72 // CHECK: br ^bb13([[VAL_48]] : f32) 73 // CHECK: ^bb12: 74 // CHECK: br ^bb13([[VAL_45]] : f32) 75 // CHECK: ^bb13([[VAL_49:%.*]]: f32): 76 // CHECK: [[VAL_50:%.*]], [[VAL_51:%.*]] = gpu.shuffle [[VAL_49]], [[VAL_10]], [[VAL_32]] xor : f32 77 // CHECK: cond_br [[VAL_51]], ^bb14, ^bb15 78 // CHECK: ^bb14: 79 // CHECK: [[VAL_52:%.*]] = addf [[VAL_49]], [[VAL_50]] : f32 80 // CHECK: br ^bb16([[VAL_52]] : f32) 81 // CHECK: ^bb15: 82 // CHECK: br ^bb16([[VAL_49]] : f32) 83 // CHECK: ^bb16([[VAL_53:%.*]]: f32): 84 // CHECK: br ^bb18([[VAL_53]] : f32) 85 // CHECK: ^bb17: 86 // CHECK: [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle [[VAL_0]], [[VAL_6]], [[VAL_5]] xor : f32 87 // CHECK: [[VAL_56:%.*]] = addf [[VAL_0]], [[VAL_54]] : f32 88 // CHECK: [[VAL_57:%.*]], [[VAL_58:%.*]] = gpu.shuffle [[VAL_56]], [[VAL_7]], [[VAL_5]] xor : f32 89 // CHECK: [[VAL_59:%.*]] = addf [[VAL_56]], [[VAL_57]] : f32 90 // CHECK: [[VAL_60:%.*]], [[VAL_61:%.*]] = gpu.shuffle [[VAL_59]], [[VAL_8]], [[VAL_5]] xor : f32 91 // CHECK: [[VAL_62:%.*]] = addf [[VAL_59]], [[VAL_60]] : f32 92 // CHECK: [[VAL_63:%.*]], [[VAL_64:%.*]] = gpu.shuffle [[VAL_62]], [[VAL_9]], [[VAL_5]] xor : f32 93 // CHECK: [[VAL_65:%.*]] = addf [[VAL_62]], [[VAL_63]] : f32 94 // CHECK: [[VAL_66:%.*]], [[VAL_67:%.*]] = gpu.shuffle [[VAL_65]], [[VAL_10]], [[VAL_5]] xor : f32 95 // CHECK: [[VAL_68:%.*]] = addf [[VAL_65]], [[VAL_66]] : f32 96 // CHECK: br ^bb18([[VAL_68]] : f32) 97 // CHECK: ^bb18([[VAL_69:%.*]]: f32): 98 // CHECK: cond_br [[VAL_30]], ^bb19, ^bb20 99 // CHECK: ^bb19: 100 // CHECK: [[VAL_70:%.*]] = divi_signed [[VAL_27]], [[VAL_5]] : i32 101 // CHECK: [[VAL_71:%.*]] = index_cast [[VAL_70]] : i32 to index 102 // CHECK: store [[VAL_69]], [[VAL_1]]{{\[}}[[VAL_71]]] : memref<32xf32, 3> 103 // CHECK: br ^bb21 104 // CHECK: ^bb20: 105 // CHECK: br ^bb21 106 // CHECK: ^bb21: 107 // CHECK: gpu.barrier 108 // CHECK: [[VAL_72:%.*]] = addi [[VAL_28]], [[VAL_2]] : i32 109 // CHECK: [[VAL_73:%.*]] = divi_signed [[VAL_72]], [[VAL_5]] : i32 110 // CHECK: [[VAL_74:%.*]] = cmpi "slt", [[VAL_27]], [[VAL_73]] : i32 111 // CHECK: cond_br [[VAL_74]], ^bb22, ^bb41 112 // CHECK: ^bb22: 113 // CHECK: [[VAL_75:%.*]] = index_cast [[VAL_27]] : i32 to index 114 // CHECK: [[VAL_76:%.*]] = load [[VAL_1]]{{\[}}[[VAL_75]]] : memref<32xf32, 3> 115 // CHECK: [[VAL_77:%.*]] = cmpi "slt", [[VAL_73]], [[VAL_5]] : i32 116 // CHECK: cond_br [[VAL_77]], ^bb23, ^bb39 117 // CHECK: ^bb23: 118 // CHECK: [[VAL_78:%.*]], [[VAL_79:%.*]] = gpu.shuffle [[VAL_76]], [[VAL_6]], [[VAL_73]] xor : f32 119 // CHECK: cond_br [[VAL_79]], ^bb24, ^bb25 120 // CHECK: ^bb24: 121 // CHECK: [[VAL_80:%.*]] = addf [[VAL_76]], [[VAL_78]] : f32 122 // CHECK: br ^bb26([[VAL_80]] : f32) 123 // CHECK: ^bb25: 124 // CHECK: br ^bb26([[VAL_76]] : f32) 125 // CHECK: ^bb26([[VAL_81:%.*]]: f32): 126 // CHECK: [[VAL_82:%.*]], [[VAL_83:%.*]] = gpu.shuffle [[VAL_81]], [[VAL_7]], [[VAL_73]] xor : f32 127 // CHECK: cond_br [[VAL_83]], ^bb27, ^bb28 128 // CHECK: ^bb27: 129 // CHECK: [[VAL_84:%.*]] = addf [[VAL_81]], [[VAL_82]] : f32 130 // CHECK: br ^bb29([[VAL_84]] : f32) 131 // CHECK: ^bb28: 132 // CHECK: br ^bb29([[VAL_81]] : f32) 133 // CHECK: ^bb29([[VAL_85:%.*]]: f32): 134 // CHECK: [[VAL_86:%.*]], [[VAL_87:%.*]] = gpu.shuffle [[VAL_85]], [[VAL_8]], [[VAL_73]] xor : f32 135 // CHECK: cond_br [[VAL_87]], ^bb30, ^bb31 136 // CHECK: ^bb30: 137 // CHECK: [[VAL_88:%.*]] = addf [[VAL_85]], [[VAL_86]] : f32 138 // CHECK: br ^bb32([[VAL_88]] : f32) 139 // CHECK: ^bb31: 140 // CHECK: br ^bb32([[VAL_85]] : f32) 141 // CHECK: ^bb32([[VAL_89:%.*]]: f32): 142 // CHECK: [[VAL_90:%.*]], [[VAL_91:%.*]] = gpu.shuffle [[VAL_89]], [[VAL_9]], [[VAL_73]] xor : f32 143 // CHECK: cond_br [[VAL_91]], ^bb33, ^bb34 144 // CHECK: ^bb33: 145 // CHECK: [[VAL_92:%.*]] = addf [[VAL_89]], [[VAL_90]] : f32 146 // CHECK: br ^bb35([[VAL_92]] : f32) 147 // CHECK: ^bb34: 148 // CHECK: br ^bb35([[VAL_89]] : f32) 149 // CHECK: ^bb35([[VAL_93:%.*]]: f32): 150 // CHECK: [[VAL_94:%.*]], [[VAL_95:%.*]] = gpu.shuffle [[VAL_93]], [[VAL_10]], [[VAL_73]] xor : f32 151 // CHECK: cond_br [[VAL_95]], ^bb36, ^bb37 152 // CHECK: ^bb36: 153 // CHECK: [[VAL_96:%.*]] = addf [[VAL_93]], [[VAL_94]] : f32 154 // CHECK: br ^bb38([[VAL_96]] : f32) 155 // CHECK: ^bb37: 156 // CHECK: br ^bb38([[VAL_93]] : f32) 157 // CHECK: ^bb38([[VAL_97:%.*]]: f32): 158 // CHECK: br ^bb40([[VAL_97]] : f32) 159 // CHECK: ^bb39: 160 // CHECK: [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle [[VAL_76]], [[VAL_6]], [[VAL_5]] xor : f32 161 // CHECK: [[VAL_100:%.*]] = addf [[VAL_76]], [[VAL_98]] : f32 162 // CHECK: [[VAL_101:%.*]], [[VAL_102:%.*]] = gpu.shuffle [[VAL_100]], [[VAL_7]], [[VAL_5]] xor : f32 163 // CHECK: [[VAL_103:%.*]] = addf [[VAL_100]], [[VAL_101]] : f32 164 // CHECK: [[VAL_104:%.*]], [[VAL_105:%.*]] = gpu.shuffle [[VAL_103]], [[VAL_8]], [[VAL_5]] xor : f32 165 // CHECK: [[VAL_106:%.*]] = addf [[VAL_103]], [[VAL_104]] : f32 166 // CHECK: [[VAL_107:%.*]], [[VAL_108:%.*]] = gpu.shuffle [[VAL_106]], [[VAL_9]], [[VAL_5]] xor : f32 167 // CHECK: [[VAL_109:%.*]] = addf [[VAL_106]], [[VAL_107]] : f32 168 // CHECK: [[VAL_110:%.*]], [[VAL_111:%.*]] = gpu.shuffle [[VAL_109]], [[VAL_10]], [[VAL_5]] xor : f32 169 // CHECK: [[VAL_112:%.*]] = addf [[VAL_109]], [[VAL_110]] : f32 170 // CHECK: br ^bb40([[VAL_112]] : f32) 171 // CHECK: ^bb40([[VAL_113:%.*]]: f32): 172 // CHECK: store [[VAL_113]], [[VAL_1]]{{\[}}[[VAL_4]]] : memref<32xf32, 3> 173 // CHECK: br ^bb42 174 // CHECK: ^bb41: 175 // CHECK: br ^bb42 176 // CHECK: ^bb42: 177 // CHECK: gpu.barrier 178 %sum = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32) 179 gpu.return 180 } 181 182} 183