1; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL %s 2 3; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI %s 4; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 5; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 6; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s 7 8; GCN-ISEL-LABEL: name: sadd64rr 9; GCN-ISEL-LABEL: body: 10; GCN-ISEL-LABEL: bb.0.entry: 11; GCN-ISEL: S_ADD_U64_PSEUDO 12 13; GCN-LABEL: @sadd64rr 14; GCN: s_add_u32 15; GCN: s_addc_u32 16define amdgpu_kernel void @sadd64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) { 17entry: 18 %add = add i64 %a, %b 19 store i64 %add, i64 addrspace(1)* %out 20 ret void 21} 22 23; GCN-ISEL-LABEL: name: sadd64ri 24; GCN-ISEL-LABEL: body: 25; GCN-ISEL-LABEL: bb.0.entry: 26; GCN-ISEL: S_ADD_U64_PSEUDO 27 28; GCN-LABEL: @sadd64ri 29; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x56789876 30; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1234 31define amdgpu_kernel void @sadd64ri(i64 addrspace(1)* %out, i64 %a) { 32entry: 33 %add = add i64 20015998343286, %a 34 store i64 %add, i64 addrspace(1)* %out 35 ret void 36} 37 38; GCN-ISEL-LABEL: name: vadd64rr 39; GCN-ISEL-LABEL: body: 40; GCN-ISEL-LABEL: bb.0.entry: 41; GCN-ISEL: V_ADD_U64_PSEUDO 42 43; GCN-LABEL: @vadd64rr 44; 45; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 46; CISI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 47; 48; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 49; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 50; 51; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 52; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 53; 54; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} 55; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 56define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) { 57entry: 58 %tid = call i32 @llvm.amdgcn.workitem.id.x() 59 %tid.ext = sext i32 %tid to i64 60 %add = add i64 %a, %tid.ext 61 store i64 %add, i64 addrspace(1)* %out 62 ret void 63} 64 65; GCN-ISEL-LABEL: name: vadd64ri 66; GCN-ISEL-LABEL: body: 67; GCN-ISEL-LABEL: bb.0.entry: 68; GCN-ISEL: V_ADD_U64_PSEUDO 69 70; GCN-LABEL: @vadd64ri 71; 72; CISI: v_add_i32_e32 v0, vcc, 0x56789876, v0 73; CISI: v_mov_b32_e32 v1, 0x1234 74; CISI: v_addc_u32_e32 v1, vcc, 0, v1, vcc 75; 76; VI: v_add_u32_e32 v0, vcc, 0x56789876, v0 77; VI: v_mov_b32_e32 v1, 0x1234 78; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc 79; 80; GFX9: v_add_co_u32_e32 v0, vcc, 0x56789876, v0 81; GFX9: v_mov_b32_e32 v1, 0x1234 82; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 83; 84; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} 85; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]] 86define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) { 87entry: 88 %tid = call i32 @llvm.amdgcn.workitem.id.x() 89 %tid.ext = sext i32 %tid to i64 90 %add = add i64 20015998343286, %tid.ext 91 store i64 %add, i64 addrspace(1)* %out 92 ret void 93} 94 95; GCN-ISEL-LABEL: name: suaddo32 96; GCN-ISEL-LABEL: body: 97; GCN-ISEL-LABEL: bb.0 98; GCN-ISEL: S_ADD_I32 99define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 100 %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) 101 %val = extractvalue { i32, i1 } %uadd, 0 102 %carry = extractvalue { i32, i1 } %uadd, 1 103 store i32 %val, i32 addrspace(1)* %out, align 4 104 ret void 105} 106 107 108; GCN-ISEL-LABEL: name: uaddo32_vcc_user 109; GCN-ISEL-LABEL: body: 110; GCN-ISEL-LABEL: bb.0 111; GCN-ISEL: V_ADD_CO_U32_e64 112 113; below we check selection to v_add/addc 114; because the only user of VCC produced by the UADDOis v_cndmask. 115; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC 116 117; GCN-LABEL: @uaddo32_vcc_user 118; 119; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 120; CISI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 121; 122; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 123; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 124; 125; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 126; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 127; 128; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} 129; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] 130define amdgpu_kernel void @uaddo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 131 %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) 132 %val = extractvalue { i32, i1 } %uadd, 0 133 %carry = extractvalue { i32, i1 } %uadd, 1 134 store i32 %val, i32 addrspace(1)* %out, align 4 135 store i1 %carry, i1 addrspace(1)* %carryout 136 ret void 137} 138 139; GCN-ISEL-LABEL: name: suaddo64 140; GCN-ISEL-LABEL: body: 141; GCN-ISEL-LABEL: bb.0 142; GCN-ISEL: S_ADD_U64_PSEUDO 143 144; GCN-LABEL: @suaddo64 145; 146; GCN: s_add_u32 147; GCN: s_addc_u32 148define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { 149 %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) 150 %val = extractvalue { i64, i1 } %uadd, 0 151 %carry = extractvalue { i64, i1 } %uadd, 1 152 store i64 %val, i64 addrspace(1)* %out, align 8 153 store i1 %carry, i1 addrspace(1)* %carryout 154 ret void 155} 156 157; GCN-ISEL-LABEL: name: vuaddo64 158; GCN-ISEL-LABEL: body: 159; GCN-ISEL-LABEL: bb.0 160; GCN-ISEL: V_ADD_U64_PSEUDO 161 162; GCN-LABEL: @vuaddo64 163; 164; CISI: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 165; CISI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 166; 167; VI: v_add_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 168; VI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 169; 170; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 171; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 172; 173; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 174; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 175define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { 176 %tid = call i32 @llvm.amdgcn.workitem.id.x() 177 %tid.ext = sext i32 %tid to i64 178 %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %tid.ext) 179 %val = extractvalue { i64, i1 } %uadd, 0 180 %carry = extractvalue { i64, i1 } %uadd, 1 181 store i64 %val, i64 addrspace(1)* %out, align 8 182 store i1 %carry, i1 addrspace(1)* %carryout 183 ret void 184} 185 186; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN-ISEL %s 187 188; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CISI %s 189; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 190; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 191; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s 192 193; GCN-ISEL-LABEL: name: ssub64rr 194; GCN-ISEL-LABEL: body: 195; GCN-ISEL-LABEL: bb.0.entry: 196; GCN-ISEL: S_SUB_U64_PSEUDO 197 198; GCN-LABEL: @ssub64rr 199; GCN: s_sub_u32 200; GCN: s_subb_u32 201define amdgpu_kernel void @ssub64rr(i64 addrspace(1)* %out, i64 %a, i64 %b) { 202entry: 203 %sub = sub i64 %a, %b 204 store i64 %sub, i64 addrspace(1)* %out 205 ret void 206} 207 208; GCN-ISEL-LABEL: name: ssub64ri 209; GCN-ISEL-LABEL: body: 210; GCN-ISEL-LABEL: bb.0.entry: 211; GCN-ISEL: S_SUB_U64_PSEUDO 212 213; GCN-LABEL: @ssub64ri 214; GCN: s_sub_u32 s{{[0-9]+}}, 0x56789876, s{{[0-9]+}} 215; GCN: s_subb_u32 s{{[0-9]+}}, 0x1234, s{{[0-9]+}} 216define amdgpu_kernel void @ssub64ri(i64 addrspace(1)* %out, i64 %a) { 217entry: 218 %sub = sub i64 20015998343286, %a 219 store i64 %sub, i64 addrspace(1)* %out 220 ret void 221} 222 223; GCN-ISEL-LABEL: name: vsub64rr 224; GCN-ISEL-LABEL: body: 225; GCN-ISEL-LABEL: bb.0.entry: 226; GCN-ISEL: V_SUB_U64_PSEUDO 227 228; GCN-LABEL: @vsub64rr 229; 230; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 231; CISI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 232; 233; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 234; VI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 235; 236; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 237; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 238; 239; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}} 240; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 241define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) { 242entry: 243 %tid = call i32 @llvm.amdgcn.workitem.id.x() 244 %tid.ext = sext i32 %tid to i64 245 %sub = sub i64 %a, %tid.ext 246 store i64 %sub, i64 addrspace(1)* %out 247 ret void 248} 249 250; GCN-ISEL-LABEL: name: vsub64ri 251; GCN-ISEL-LABEL: body: 252; GCN-ISEL-LABEL: bb.0.entry: 253; GCN-ISEL: V_SUB_U64_PSEUDO 254 255; GCN-LABEL: @vsub64ri 256; 257; CISI: v_sub_i32_e32 v0, vcc, 0x56789876, v0 258; CISI: v_mov_b32_e32 v1, 0x1234 259; CISI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 260; 261; VI: v_sub_u32_e32 v0, vcc, 0x56789876, v0 262; VI: v_mov_b32_e32 v1, 0x1234 263; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc 264; 265; GFX9: v_sub_co_u32_e32 v0, vcc, 0x56789876, v0 266; GFX9: v_mov_b32_e32 v1, 0x1234 267; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc 268; 269; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}} 270; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]] 271define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) { 272entry: 273 %tid = call i32 @llvm.amdgcn.workitem.id.x() 274 %tid.ext = sext i32 %tid to i64 275 %sub = sub i64 20015998343286, %tid.ext 276 store i64 %sub, i64 addrspace(1)* %out 277 ret void 278} 279 280; GCN-ISEL-LABEL: name: susubo32 281; GCN-ISEL-LABEL: body: 282; GCN-ISEL-LABEL: bb.0 283; GCN-ISEL: S_SUB_I32 284define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 285 %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 286 %val = extractvalue { i32, i1 } %usub, 0 287 %carry = extractvalue { i32, i1 } %usub, 1 288 store i32 %val, i32 addrspace(1)* %out, align 4 289 ret void 290} 291 292 293; GCN-ISEL-LABEL: name: usubo32_vcc_user 294; GCN-ISEL-LABEL: body: 295; GCN-ISEL-LABEL: bb.0 296; GCN-ISEL: V_SUB_CO_U32_e64 297 298; below we check selection to v_sub/subb 299; because the only user of VCC produced by the USUBOis v_cndmask. 300; We select to VALU form to avoid unnecessary s_cselect to copy SCC to VCC 301 302; GCN-LABEL: @usubo32_vcc_user 303; 304; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 305; CISI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 306; 307; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 308; VI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 309; 310; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} 311; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc 312; 313; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, s{{[0-9]+}} 314; GFX1010: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, [[CARRY]] 315define amdgpu_kernel void @usubo32_vcc_user(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) #0 { 316 %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) 317 %val = extractvalue { i32, i1 } %usub, 0 318 %carry = extractvalue { i32, i1 } %usub, 1 319 store i32 %val, i32 addrspace(1)* %out, align 4 320 store i1 %carry, i1 addrspace(1)* %carryout 321 ret void 322} 323 324; GCN-ISEL-LABEL: name: susubo64 325; GCN-ISEL-LABEL: body: 326; GCN-ISEL-LABEL: bb.0 327; GCN-ISEL: S_SUB_U64_PSEUDO 328 329; GCN-LABEL: @susubo64 330; 331; GCN: s_sub_u32 332; GCN: s_subb_u32 333define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) #0 { 334 %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) 335 %val = extractvalue { i64, i1 } %usub, 0 336 %carry = extractvalue { i64, i1 } %usub, 1 337 store i64 %val, i64 addrspace(1)* %out, align 8 338 store i1 %carry, i1 addrspace(1)* %carryout 339 ret void 340} 341 342; GCN-ISEL-LABEL: name: vusubo64 343; GCN-ISEL-LABEL: body: 344; GCN-ISEL-LABEL: bb.0 345; GCN-ISEL: V_SUB_U64_PSEUDO 346 347; GCN-LABEL: @vusubo64 348; 349; CISI: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 350; CISI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 351; 352; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 353; VI: v_subbrev_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 354; 355; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v0 356; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc 357; 358; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0 359; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]] 360define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 { 361 %tid = call i32 @llvm.amdgcn.workitem.id.x() 362 %tid.ext = sext i32 %tid to i64 363 %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %tid.ext) 364 %val = extractvalue { i64, i1 } %usub, 0 365 %carry = extractvalue { i64, i1 } %usub, 1 366 store i64 %val, i64 addrspace(1)* %out, align 8 367 store i1 %carry, i1 addrspace(1)* %carryout 368 ret void 369} 370 371; GCN-ISEL-LABEL: name: sudiv64 372; GCN-ISEL-LABEL: body: 373; GCN-ISEL-LABEL: bb.3 374; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 375; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] 376; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 377; GCN-ISEL: S_SUB_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]] 378define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) { 379 %result = udiv i64 %x, %y 380 store i64 %result, i64 addrspace(1)* %out 381 ret void 382} 383 384 385 386declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1 387 388declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 389 390declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1 391 392declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 393 394declare i32 @llvm.amdgcn.workitem.id.x() #1 395 396attributes #0 = { nounwind } 397attributes #1 = { nounwind readnone } 398 399