• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// RUN: hlo_to_llvm_ir %s | FileCheck %s
2
3// CHECK-LABEL: entry:
4// CHECK:         %[[VAL_0:.*]] = alloca i32, align 4
5// CHECK:         %[[VAL_1:.*]] = alloca i32, align 4
6// CHECK:         %[[VAL_2:.*]] = getelementptr inbounds i8, i8* %[[VAL_3:.*]], i64 0
7// CHECK:         %[[VAL_4:.*]] = bitcast i8* %[[VAL_2]] to [100 x [200 x float]]*
8// CHECK:         %[[VAL_5:.*]] = getelementptr inbounds i8, i8* %[[VAL_6:.*]], i64 0
9// CHECK:         %[[VAL_7:.*]] = bitcast i8* %[[VAL_5]] to [200 x [100 x float]]*
10// CHECK:         %[[VAL_8:.*]] = bitcast [100 x [200 x float]]* %[[VAL_4]] to [1 x [100 x [200 x float]]]*
11// CHECK:         %[[VAL_9:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !2
12// CHECK:         %[[VAL_10:.*]] = urem i32 %[[VAL_9]], 32
13// CHECK:         %[[VAL_11:.*]] = udiv i32 %[[VAL_9]], 32
14// CHECK:         %[[VAL_12:.*]] = urem i32 %[[VAL_9]], 32
15// CHECK:         %[[VAL_13:.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range !3
16// CHECK:         %[[VAL_14:.*]] = udiv i32 %[[VAL_13]], 1
17// CHECK:         %[[VAL_15:.*]] = urem i32 %[[VAL_14]], 4
18// CHECK:         %[[VAL_16:.*]] = udiv i32 %[[VAL_13]], 4
19// CHECK:         %[[VAL_17:.*]] = urem i32 %[[VAL_16]], 7
20// CHECK:         %[[VAL_18:.*]] = udiv i32 %[[VAL_13]], 28
21// CHECK:         %[[VAL_19:.*]] = mul i32 %[[VAL_18]], 1
22// CHECK:         %[[VAL_20:.*]] = icmp eq i32 %[[VAL_17]], 6
23// CHECK:         %[[VAL_21:.*]] = select i1 %[[VAL_20]], i32 8, i32 32
24// CHECK:         %[[VAL_22:.*]] = icmp eq i32 %[[VAL_15]], 3
25// CHECK:         %[[VAL_23:.*]] = select i1 %[[VAL_22]], i32 4, i32 32
26// CHECK:         %[[VAL_24:.*]] = mul i32 %[[VAL_17]], 32
27// CHECK:         %[[VAL_25:.*]] = mul i32 %[[VAL_15]], 32
28// CHECK:         %[[VAL_26:.*]] = mul i32 %[[VAL_10]], 1
29// CHECK:         %[[VAL_27:.*]] = add i32 %[[VAL_24]], %[[VAL_26]]
30// CHECK:         %[[VAL_28:.*]] = sub i32 %[[VAL_23]], %[[VAL_11]]
31// CHECK:         %[[VAL_29:.*]] = add i32 %[[VAL_28]], 4
32// CHECK:         %[[VAL_30:.*]] = add i32 %[[VAL_29]], -1
33// CHECK:         %[[VAL_31:.*]] = udiv i32 %[[VAL_30]], 4
34// CHECK:         store i32 0, i32* %[[VAL_1]], align 4
35// CHECK:         br label %[[VAL_32:.*]]
36// CHECK:       input_y_in_tile.loop_header:                      ; preds = %[[VAL_33:.*]], %[[VAL_34:.*]]
37// CHECK:         %[[VAL_35:.*]] = load i32, i32* %[[VAL_1]], align 4
38// CHECK:         %[[VAL_36:.*]] = icmp uge i32 %[[VAL_35]], %[[VAL_31]]
39// CHECK:         br i1 %[[VAL_36]], label %[[VAL_37:.*]], label %[[VAL_38:.*]]
40// CHECK:       input_y_in_tile.loop_body:                        ; preds = %[[VAL_32]]
41// CHECK:         %[[VAL_39:.*]] = add nuw nsw i32 %[[VAL_35]], 1
42// CHECK:         store i32 %[[VAL_39]], i32* %[[VAL_1]], align 4
43// CHECK:         %[[VAL_40:.*]] = icmp eq i32 %[[VAL_35]], 0
44// CHECK:         %[[VAL_41:.*]] = mul i32 %[[VAL_35]], 4
45// CHECK:         %[[VAL_42:.*]] = add i32 %[[VAL_11]], %[[VAL_41]]
46// CHECK:         %[[VAL_43:.*]] = add i32 %[[VAL_25]], %[[VAL_42]]
47// CHECK:         %[[VAL_44:.*]] = add i32 0, %[[VAL_26]]
48// CHECK:         %[[VAL_45:.*]] = add i32 %[[VAL_27]], 0
49// CHECK:         %[[VAL_46:.*]] = icmp ult i32 %[[VAL_44]], %[[VAL_21]]
50// CHECK:         br i1 %[[VAL_46]], label %[[VAL_47:.*]], label %[[VAL_33]]
51// CHECK:       input_x_in_tile-after:                            ; preds = %[[VAL_47]], %[[VAL_38]]
52// CHECK:         br label %[[VAL_32]], !llvm.loop !4
53// CHECK:       input_y_in_tile.loop_exit:                        ; preds = %[[VAL_32]]
54// CHECK:         call void @llvm.nvvm.barrier0()
55// CHECK:         %[[VAL_48:.*]] = mul i32 %[[VAL_10]], 1
56// CHECK:         %[[VAL_49:.*]] = add i32 %[[VAL_25]], %[[VAL_48]]
57// CHECK:         %[[VAL_50:.*]] = sub i32 %[[VAL_21]], %[[VAL_11]]
58// CHECK:         %[[VAL_51:.*]] = add i32 %[[VAL_50]], 4
59// CHECK:         %[[VAL_52:.*]] = add i32 %[[VAL_51]], -1
60// CHECK:         %[[VAL_53:.*]] = udiv i32 %[[VAL_52]], 4
61// CHECK:         store i32 0, i32* %[[VAL_0]], align 4
62// CHECK:         br label %[[VAL_54:.*]]
63// CHECK:       output_y_in_tile.loop_header:                     ; preds = %[[VAL_55:.*]], %[[VAL_37]]
64// CHECK:         %[[VAL_56:.*]] = load i32, i32* %[[VAL_0]], align 4
65// CHECK:         %[[VAL_57:.*]] = icmp uge i32 %[[VAL_56]], %[[VAL_53]]
66// CHECK:         br i1 %[[VAL_57]], label %[[VAL_58:.*]], label %[[VAL_59:.*]]
67// CHECK:       output_y_in_tile.loop_body:                       ; preds = %[[VAL_54]]
68// CHECK:         %[[VAL_60:.*]] = add nuw nsw i32 %[[VAL_56]], 1
69// CHECK:         store i32 %[[VAL_60]], i32* %[[VAL_0]], align 4
70// CHECK:         %[[VAL_61:.*]] = icmp eq i32 %[[VAL_56]], 0
71// CHECK:         %[[VAL_62:.*]] = mul i32 %[[VAL_56]], 4
72// CHECK:         %[[VAL_63:.*]] = add i32 %[[VAL_11]], %[[VAL_62]]
73// CHECK:         %[[VAL_64:.*]] = add i32 %[[VAL_24]], %[[VAL_63]]
74// CHECK:         %[[VAL_65:.*]] = add i32 0, %[[VAL_48]]
75// CHECK:         %[[VAL_66:.*]] = add i32 %[[VAL_49]], 0
76// CHECK:         %[[VAL_67:.*]] = icmp ult i32 %[[VAL_65]], %[[VAL_23]]
77// CHECK:         br i1 %[[VAL_67]], label %[[VAL_68:.*]], label %[[VAL_55]]
78// CHECK:       output_x_in_tile-after:                           ; preds = %[[VAL_68]], %[[VAL_59]]
79// CHECK:         br label %[[VAL_54]], !llvm.loop !6
80// CHECK:       output_y_in_tile.loop_exit:                       ; preds = %[[VAL_54]]
81// CHECK:         ret void
82// CHECK:       input_x_in_tile-true:                             ; preds = %[[VAL_38]]
83// CHECK:         %[[VAL_69:.*]] = getelementptr inbounds [1 x [100 x [200 x float]]], [1 x [100 x [200 x float]]]* %[[VAL_8]], i32 0, i32 0, i32 %[[VAL_43]], i32 %[[VAL_45]]
84// CHECK:         %[[VAL_70:.*]] = load float, float* %[[VAL_69]], align 4, !invariant.load !7
85// CHECK:         %[[VAL_71:.*]] = getelementptr [32 x [33 x float]], [32 x [33 x float]] addrspace(3)* @b.tile0, i32 0, i32 %[[VAL_42]], i32 %[[VAL_44]]
86// CHECK:         store float %[[VAL_70]], float addrspace(3)* %[[VAL_71]], align 4
87// CHECK:         br label %[[VAL_33]]
88// CHECK:       output_x_in_tile-true:                            ; preds = %[[VAL_59]]
89// CHECK:         %[[VAL_72:.*]] = getelementptr [32 x [33 x float]], [32 x [33 x float]] addrspace(3)* @b.tile0, i64 0, i32 %[[VAL_65]], i32 %[[VAL_63]]
90// CHECK:         %[[VAL_73:.*]] = load float, float addrspace(3)* %[[VAL_72]], align 4
91// CHECK:         %[[VAL_74:.*]] = bitcast [200 x [100 x float]]* %[[VAL_7]] to [1 x [200 x [100 x float]]]*
92// CHECK:         %[[VAL_75:.*]] = getelementptr inbounds [1 x [200 x [100 x float]]], [1 x [200 x [100 x float]]]* %[[VAL_74]], i32 0, i32 0, i32 %[[VAL_64]], i32 %[[VAL_66]]
93// CHECK:         store float %[[VAL_73]], float* %[[VAL_75]], align 4
94// CHECK:         br label %[[VAL_55]]
95
96HloModule Test
97
98ENTRY main {
99  a = f32[100, 200]{1,0} parameter(0)
100  ROOT b = f32[100, 200]{0,1} copy(a)
101}
102