1; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-fix-function-bitcasts < %s | FileCheck -check-prefix=OPT %s 3 4; GCN-LABEL: {{^}}test_bitcast_return_type_noinline: 5; GCN: s_getpc_b64 6; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@lo+4 7; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_noinline@rel32@hi+12 8; GCN: s_swappc_b64 9; OPT-LABEL: @test_bitcast_return_type_noinline( 10; OPT: %val = call i32 @ret_i32_noinline() 11; OPT: bitcast i32 %val to float 12define amdgpu_kernel void @test_bitcast_return_type_noinline() #0 { 13 %val = call float bitcast (i32()* @ret_i32_noinline to float()*)() 14 %op = fadd float %val, 1.0 15 store volatile float %op, float addrspace(1)* undef 16 ret void 17} 18 19; GCN-LABEL: {{^}}test_bitcast_return_type_alwaysinline: 20; GCN-NOT: s_getpc_b64 21; GCN-NOT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@lo+4 22; GCN-NOT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ret_i32_alwaysinline@rel32@hi+12 23; GCN-NOT: s_swappc_b64 24; OPT-LABEL: @test_bitcast_return_type_alwaysinline( 25; OPT: %val = call i32 @ret_i32_alwaysinline() 26; OPT: bitcast i32 %val to float 27define amdgpu_kernel void @test_bitcast_return_type_alwaysinline() #0 { 28 %val = call float bitcast (i32()* @ret_i32_alwaysinline to float()*)() 29 %op = fadd float %val, 1.0 30 store volatile float %op, float addrspace(1)* undef 31 ret void 32} 33 34; GCN-LABEL: {{^}}test_bitcast_argument_type: 35; GCN: s_getpc_b64 36; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 37; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 38; GCN: s_swappc_b64 39; OPT-LABEL: @test_bitcast_argument_type( 40; OPT: %1 = bitcast float 2.000000e+00 to i32 41; OPT: %val = call i32 @ident_i32(i32 %1) 42; OPT-NOT: bitcast i32 %val to float 43define amdgpu_kernel void @test_bitcast_argument_type() #0 { 44 %val = call i32 bitcast (i32(i32)* @ident_i32 to i32(float)*)(float 2.0) 45 %op = add i32 %val, 1 46 store volatile i32 %op, i32 addrspace(1)* undef 47 ret void 48} 49 50; GCN-LABEL: {{^}}test_bitcast_argument_and_return_types: 51; GCN: s_getpc_b64 52; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 53; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 54; GCN: s_swappc_b64 55; OPT-LABEL: @test_bitcast_argument_and_return_types( 56; OPT: %1 = bitcast float 2.000000e+00 to i32 57; OPT: %val = call i32 @ident_i32(i32 %1) 58; OPT: bitcast i32 %val to float 59define amdgpu_kernel void @test_bitcast_argument_and_return_types() #0 { 60 %val = call float bitcast (i32(i32)* @ident_i32 to float(float)*)(float 2.0) 61 %op = fadd float %val, 1.0 62 store volatile float %op, float addrspace(1)* undef 63 ret void 64} 65 66; GCN-LABEL: {{^}}use_workitem_id_x: 67; GCN: s_waitcnt 68; GCN-NEXT: v_and_b32_e32 v1, 0x3ff, v1 69; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 70; GCN-NEXT: s_setpc_b64 71define hidden i32 @use_workitem_id_x(i32 %arg0) #0 { 72 %id = call i32 @llvm.amdgcn.workitem.id.x() 73 %op = add i32 %id, %arg0 74 ret i32 %op 75} 76 77; GCN-LABEL: {{^}}test_bitcast_use_workitem_id_x: 78; GCN: v_mov_b32_e32 v1, v0 79; GCN: s_getpc_b64 80; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@lo+4 81; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, use_workitem_id_x@rel32@hi+12 82; GCN: v_mov_b32_e32 v0, 9 83; GCN: s_swappc_b64 84; GCN: v_add_f32_e32 85; OPT-LABEL: @use_workitem_id_x( 86; OPT: %val = call i32 @use_workitem_id_x(i32 9) 87; OPT: bitcast i32 %val to float 88define amdgpu_kernel void @test_bitcast_use_workitem_id_x() #0 { 89 %val = call float bitcast (i32(i32)* @use_workitem_id_x to float(i32)*)(i32 9) 90 %op = fadd float %val, 1.0 91 store volatile float %op, float addrspace(1)* undef 92 ret void 93} 94 95; GCN-LABEL: {{^}}test_invoke: 96; GCN: s_getpc_b64 97; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@lo+4 98; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ident_i32@rel32@hi+12 99; GCN: s_swappc_b64 100; OPT-LABEL: @test_invoke( 101; OPT: %1 = bitcast float 2.000000e+00 to i32 102; OPT: %val = invoke i32 @ident_i32(i32 %1) 103; OPT-NEXT: to label %continue unwind label %broken 104; OPT-LABEL: continue.split: 105; OPT: bitcast i32 %val to float 106@_ZTIi = external global i8* 107declare i32 @__gxx_personality_v0(...) 108define amdgpu_kernel void @test_invoke() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { 109 %val = invoke float bitcast (i32(i32)* @ident_i32 to float(float)*)(float 2.0) 110 to label %continue unwind label %broken 111 112broken: 113 landingpad { i8*, i32 } catch i8** @_ZTIi 114 ret void 115 116continue: 117 %op = fadd float %val, 1.0 118 store volatile float %op, float addrspace(1)* undef 119 ret void 120} 121 122; Callees appears last in source file to test that we still lower their 123; arguments before we lower any calls to them. 124 125define hidden i32 @ret_i32_noinline() #0 { 126 ret i32 4 127} 128 129define hidden i32 @ret_i32_alwaysinline() #1 { 130 ret i32 4 131} 132 133define hidden i32 @ident_i32(i32 %i) #0 { 134 ret i32 %i 135} 136 137declare i32 @llvm.amdgcn.workitem.id.x() #2 138 139attributes #0 = { nounwind noinline } 140attributes #1 = { alwaysinline nounwind } 141attributes #2 = { nounwind readnone speculatable } 142