1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire < %s | FileCheck -check-prefix=GFX78 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefix=GFX78 %s 5 6define double @v_floor_f64_ieee(double %x) { 7; GFX6-LABEL: v_floor_f64_ieee: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 11; GFX6-NEXT: s_mov_b32 s4, -1 12; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 13; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 14; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 15; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 16; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 17; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 18; GFX6-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX78-LABEL: v_floor_f64_ieee: 21; GFX78: ; %bb.0: 22; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 24; GFX78-NEXT: s_setpc_b64 s[30:31] 25 %result = call double @llvm.floor.f64(double %x) 26 ret double %result 27} 28 29define double @v_floor_f64_ieee_nnan(double %x) { 30; GFX6-LABEL: v_floor_f64_ieee_nnan: 31; GFX6: ; %bb.0: 32; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 34; GFX6-NEXT: s_mov_b32 s4, -1 35; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 36; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 37; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 38; GFX6-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX78-LABEL: v_floor_f64_ieee_nnan: 41; GFX78: ; %bb.0: 42; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 44; GFX78-NEXT: s_setpc_b64 s[30:31] 45 %result = call nnan double @llvm.floor.f64(double %x) 46 ret double %result 47} 48 49define double @v_floor_f64_ieee_fneg(double %x) { 50; GFX6-LABEL: v_floor_f64_ieee_fneg: 51; GFX6: ; %bb.0: 52; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] 54; GFX6-NEXT: s_mov_b32 s4, -1 55; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 56; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 57; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 58; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 59; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 60; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 61; GFX6-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX78-LABEL: v_floor_f64_ieee_fneg: 64; GFX78: ; %bb.0: 65; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1] 67; GFX78-NEXT: s_setpc_b64 s[30:31] 68 %neg.x = fneg double %x 69 %result = call double @llvm.floor.f64(double %neg.x) 70 ret double %result 71} 72 73define double @v_floor_f64_nonieee(double %x) #1 { 74; GFX6-LABEL: v_floor_f64_nonieee: 75; GFX6: ; %bb.0: 76; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 78; GFX6-NEXT: s_mov_b32 s4, -1 79; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 80; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 81; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 82; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 83; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 84; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 85; GFX6-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX78-LABEL: v_floor_f64_nonieee: 88; GFX78: ; %bb.0: 89; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 91; GFX78-NEXT: s_setpc_b64 s[30:31] 92 %result = call double @llvm.floor.f64(double %x) 93 ret double %result 94} 95 96define double @v_floor_f64_nonieee_nnan(double %x) #1 { 97; GFX6-LABEL: v_floor_f64_nonieee_nnan: 98; GFX6: ; %bb.0: 99; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 101; GFX6-NEXT: s_mov_b32 s4, -1 102; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 103; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 104; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 105; GFX6-NEXT: s_setpc_b64 s[30:31] 106; 107; GFX78-LABEL: v_floor_f64_nonieee_nnan: 108; GFX78: ; %bb.0: 109; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GFX78-NEXT: v_floor_f64_e32 v[0:1], v[0:1] 111; GFX78-NEXT: s_setpc_b64 s[30:31] 112 %result = call nnan double @llvm.floor.f64(double %x) 113 ret double %result 114} 115 116define double @v_floor_f64_non_ieee_fneg(double %x) #1 { 117; GFX6-LABEL: v_floor_f64_non_ieee_fneg: 118; GFX6: ; %bb.0: 119; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX6-NEXT: v_fract_f64_e64 v[2:3], -v[0:1] 121; GFX6-NEXT: s_mov_b32 s4, -1 122; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 123; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 124; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 125; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 126; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 127; GFX6-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 128; GFX6-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX78-LABEL: v_floor_f64_non_ieee_fneg: 131; GFX78: ; %bb.0: 132; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX78-NEXT: v_floor_f64_e64 v[0:1], -v[0:1] 134; GFX78-NEXT: s_setpc_b64 s[30:31] 135 %neg.x = fneg double %x 136 %result = call double @llvm.floor.f64(double %neg.x) 137 ret double %result 138} 139 140define double @v_floor_f64_fabs(double %x) { 141; GFX6-LABEL: v_floor_f64_fabs: 142; GFX6: ; %bb.0: 143; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX6-NEXT: v_fract_f64_e64 v[2:3], |v[0:1]| 145; GFX6-NEXT: s_mov_b32 s4, -1 146; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 147; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 148; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 149; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 150; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 151; GFX6-NEXT: v_add_f64 v[0:1], |v[0:1]|, -v[2:3] 152; GFX6-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX78-LABEL: v_floor_f64_fabs: 155; GFX78: ; %bb.0: 156; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX78-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]| 158; GFX78-NEXT: s_setpc_b64 s[30:31] 159; GFX7-LABEL: v_floor_f64_fabs: 160; GFX7: ; %bb.0: 161; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX7-NEXT: v_floor_f64_e64 v[0:1], |v[0:1]| 163; GFX7-NEXT: s_setpc_b64 s[30:31] 164 %abs.x = call double @llvm.fabs.f64(double %x) 165 %result = call double @llvm.floor.f64(double %abs.x) 166 ret double %result 167} 168 169define double @v_floor_f64_fneg_fabs(double %x) { 170; GFX6-LABEL: v_floor_f64_fneg_fabs: 171; GFX6: ; %bb.0: 172; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX6-NEXT: v_fract_f64_e64 v[2:3], -|v[0:1]| 174; GFX6-NEXT: s_mov_b32 s4, -1 175; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 176; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], s[4:5] 177; GFX6-NEXT: v_cmp_o_f64_e32 vcc, v[0:1], v[0:1] 178; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 179; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 180; GFX6-NEXT: v_add_f64 v[0:1], -|v[0:1]|, -v[2:3] 181; GFX6-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX78-LABEL: v_floor_f64_fneg_fabs: 184; GFX78: ; %bb.0: 185; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|v[0:1]| 187; GFX78-NEXT: s_setpc_b64 s[30:31] 188 %abs.x = call double @llvm.fabs.f64(double %x) 189 %neg.abs.x = fneg double %abs.x 190 %result = call double @llvm.floor.f64(double %neg.abs.x) 191 ret double %result 192} 193 194define amdgpu_ps <2 x float> @s_floor_f64(double inreg %x) { 195; GFX6-LABEL: s_floor_f64: 196; GFX6: ; %bb.0: 197; GFX6-NEXT: v_fract_f64_e32 v[0:1], s[2:3] 198; GFX6-NEXT: s_mov_b32 s0, -1 199; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff 200; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] 201; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 202; GFX6-NEXT: v_mov_b32_e32 v2, s2 203; GFX6-NEXT: v_mov_b32_e32 v3, s3 204; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 205; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 206; GFX6-NEXT: v_add_f64 v[0:1], s[2:3], -v[0:1] 207; GFX6-NEXT: ; return to shader part epilog 208; 209; GFX78-LABEL: s_floor_f64: 210; GFX78: ; %bb.0: 211; GFX78-NEXT: v_floor_f64_e32 v[0:1], s[2:3] 212; GFX78-NEXT: ; return to shader part epilog 213 %result = call double @llvm.floor.f64(double %x) 214 %cast = bitcast double %result to <2 x float> 215 ret <2 x float> %cast 216} 217 218define amdgpu_ps <2 x float> @s_floor_f64_fneg(double inreg %x) { 219; GFX6-LABEL: s_floor_f64_fneg: 220; GFX6: ; %bb.0: 221; GFX6-NEXT: v_fract_f64_e64 v[0:1], -s[2:3] 222; GFX6-NEXT: s_mov_b32 s0, -1 223; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff 224; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] 225; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 226; GFX6-NEXT: v_mov_b32_e32 v2, s2 227; GFX6-NEXT: v_mov_b32_e32 v3, s3 228; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 229; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 230; GFX6-NEXT: v_add_f64 v[0:1], -s[2:3], -v[0:1] 231; GFX6-NEXT: ; return to shader part epilog 232; 233; GFX78-LABEL: s_floor_f64_fneg: 234; GFX78: ; %bb.0: 235; GFX78-NEXT: v_floor_f64_e64 v[0:1], -s[2:3] 236; GFX78-NEXT: ; return to shader part epilog 237 %neg.x = fneg double %x 238 %result = call double @llvm.floor.f64(double %neg.x) 239 %cast = bitcast double %result to <2 x float> 240 ret <2 x float> %cast 241} 242 243define amdgpu_ps <2 x float> @s_floor_f64_fabs(double inreg %x) { 244; GFX6-LABEL: s_floor_f64_fabs: 245; GFX6: ; %bb.0: 246; GFX6-NEXT: v_fract_f64_e64 v[0:1], |s[2:3]| 247; GFX6-NEXT: s_mov_b32 s0, -1 248; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff 249; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] 250; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 251; GFX6-NEXT: v_mov_b32_e32 v2, s2 252; GFX6-NEXT: v_mov_b32_e32 v3, s3 253; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 254; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 255; GFX6-NEXT: v_add_f64 v[0:1], |s[2:3]|, -v[0:1] 256; GFX6-NEXT: ; return to shader part epilog 257; 258; GFX78-LABEL: s_floor_f64_fabs: 259; GFX78: ; %bb.0: 260; GFX78-NEXT: v_floor_f64_e64 v[0:1], |s[2:3]| 261; GFX78-NEXT: ; return to shader part epilog 262 %abs.x = call double @llvm.fabs.f64(double %x) 263 %result = call double @llvm.floor.f64(double %abs.x) 264 %cast = bitcast double %result to <2 x float> 265 ret <2 x float> %cast 266} 267 268define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) { 269; GFX6-LABEL: s_floor_f64_fneg_fabs: 270; GFX6: ; %bb.0: 271; GFX6-NEXT: v_fract_f64_e64 v[0:1], -|s[2:3]| 272; GFX6-NEXT: s_mov_b32 s0, -1 273; GFX6-NEXT: s_mov_b32 s1, 0x3fefffff 274; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[0:1] 275; GFX6-NEXT: v_cmp_o_f64_e64 vcc, s[2:3], s[2:3] 276; GFX6-NEXT: v_mov_b32_e32 v2, s2 277; GFX6-NEXT: v_mov_b32_e32 v3, s3 278; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 279; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 280; GFX6-NEXT: v_add_f64 v[0:1], -|s[2:3]|, -v[0:1] 281; GFX6-NEXT: ; return to shader part epilog 282; 283; GFX78-LABEL: s_floor_f64_fneg_fabs: 284; GFX78: ; %bb.0: 285; GFX78-NEXT: v_floor_f64_e64 v[0:1], -|s[2:3]| 286; GFX78-NEXT: ; return to shader part epilog 287 %abs.x = call double @llvm.fabs.f64(double %x) 288 %neg.abs.x = fneg double %abs.x 289 %result = call double @llvm.floor.f64(double %neg.abs.x) 290 %cast = bitcast double %result to <2 x float> 291 ret <2 x float> %cast 292} 293 294declare double @llvm.floor.f64(double) #0 295declare double @llvm.fabs.f64(double) #0 296 297attributes #0 = { nounwind readnone speculatable willreturn } 298attributes #1 = { "amdgpu-ieee"="false" } 299