1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 4 5; Use a 64-bit value with lo bits that can be represented as an inline constant 6define amdgpu_kernel void @i64_imm_inline_lo(i64 addrspace(1) *%out) { 7; SI-LABEL: i64_imm_inline_lo: 8; SI: ; %bb.0: ; %entry 9; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 10; SI-NEXT: s_mov_b32 s3, 0xf000 11; SI-NEXT: s_mov_b32 s2, -1 12; SI-NEXT: v_mov_b32_e32 v0, 5 13; SI-NEXT: v_mov_b32_e32 v1, 0x12345678 14; SI-NEXT: s_waitcnt lgkmcnt(0) 15; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 16; SI-NEXT: s_endpgm 17; 18; VI-LABEL: i64_imm_inline_lo: 19; VI: ; %bb.0: ; %entry 20; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 21; VI-NEXT: s_mov_b32 s3, 0xf000 22; VI-NEXT: s_mov_b32 s2, -1 23; VI-NEXT: v_mov_b32_e32 v0, 5 24; VI-NEXT: v_mov_b32_e32 v1, 0x12345678 25; VI-NEXT: s_waitcnt lgkmcnt(0) 26; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 27; VI-NEXT: s_endpgm 28entry: 29 store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005 30 ret void 31} 32 33; Use a 64-bit value with hi bits that can be represented as an inline constant 34define amdgpu_kernel void @i64_imm_inline_hi(i64 addrspace(1) *%out) { 35; SI-LABEL: i64_imm_inline_hi: 36; SI: ; %bb.0: ; %entry 37; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 38; SI-NEXT: s_mov_b32 s3, 0xf000 39; SI-NEXT: s_mov_b32 s2, -1 40; SI-NEXT: v_mov_b32_e32 v0, 0x12345678 41; SI-NEXT: v_mov_b32_e32 v1, 5 42; SI-NEXT: s_waitcnt lgkmcnt(0) 43; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 44; SI-NEXT: s_endpgm 45; 46; VI-LABEL: i64_imm_inline_hi: 47; VI: ; %bb.0: ; %entry 48; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 49; VI-NEXT: s_mov_b32 s3, 0xf000 50; VI-NEXT: s_mov_b32 s2, -1 51; VI-NEXT: v_mov_b32_e32 v0, 0x12345678 52; VI-NEXT: v_mov_b32_e32 v1, 5 53; VI-NEXT: s_waitcnt lgkmcnt(0) 54; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 55; VI-NEXT: s_endpgm 56entry: 57 store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678 58 ret void 59} 60 61define amdgpu_kernel void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) { 62; SI-LABEL: store_imm_neg_0.0_i64: 63; SI: ; %bb.0: 64; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 65; SI-NEXT: s_mov_b32 s3, 0xf000 66; SI-NEXT: s_mov_b32 s2, -1 67; SI-NEXT: v_mov_b32_e32 v0, 0 68; SI-NEXT: v_bfrev_b32_e32 v1, 1 69; SI-NEXT: s_waitcnt lgkmcnt(0) 70; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 71; SI-NEXT: s_endpgm 72; 73; VI-LABEL: store_imm_neg_0.0_i64: 74; VI: ; %bb.0: 75; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 76; VI-NEXT: s_mov_b32 s3, 0xf000 77; VI-NEXT: s_mov_b32 s2, -1 78; VI-NEXT: v_mov_b32_e32 v0, 0 79; VI-NEXT: v_bfrev_b32_e32 v1, 1 80; VI-NEXT: s_waitcnt lgkmcnt(0) 81; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 82; VI-NEXT: s_endpgm 83 store i64 -9223372036854775808, i64 addrspace(1) *%out 84 ret void 85} 86 87define amdgpu_kernel void @store_inline_imm_neg_0.0_i32(i32 addrspace(1)* %out) { 88; SI-LABEL: store_inline_imm_neg_0.0_i32: 89; SI: ; %bb.0: 90; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 91; SI-NEXT: s_mov_b32 s3, 0xf000 92; SI-NEXT: s_mov_b32 s2, -1 93; SI-NEXT: v_bfrev_b32_e32 v0, 1 94; SI-NEXT: s_waitcnt lgkmcnt(0) 95; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 96; SI-NEXT: s_endpgm 97; 98; VI-LABEL: store_inline_imm_neg_0.0_i32: 99; VI: ; %bb.0: 100; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 101; VI-NEXT: s_mov_b32 s3, 0xf000 102; VI-NEXT: s_mov_b32 s2, -1 103; VI-NEXT: v_bfrev_b32_e32 v0, 1 104; VI-NEXT: s_waitcnt lgkmcnt(0) 105; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 106; VI-NEXT: s_endpgm 107 store i32 -2147483648, i32 addrspace(1)* %out 108 ret void 109} 110 111define amdgpu_kernel void @store_inline_imm_0.0_f32(float addrspace(1)* %out) { 112; SI-LABEL: store_inline_imm_0.0_f32: 113; SI: ; %bb.0: 114; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 115; SI-NEXT: s_mov_b32 s3, 0xf000 116; SI-NEXT: s_mov_b32 s2, -1 117; SI-NEXT: v_mov_b32_e32 v0, 0 118; SI-NEXT: s_waitcnt lgkmcnt(0) 119; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 120; SI-NEXT: s_endpgm 121; 122; VI-LABEL: store_inline_imm_0.0_f32: 123; VI: ; %bb.0: 124; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 125; VI-NEXT: s_mov_b32 s3, 0xf000 126; VI-NEXT: s_mov_b32 s2, -1 127; VI-NEXT: v_mov_b32_e32 v0, 0 128; VI-NEXT: s_waitcnt lgkmcnt(0) 129; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 130; VI-NEXT: s_endpgm 131 store float 0.0, float addrspace(1)* %out 132 ret void 133} 134 135define amdgpu_kernel void @store_imm_neg_0.0_f32(float addrspace(1)* %out) { 136; SI-LABEL: store_imm_neg_0.0_f32: 137; SI: ; %bb.0: 138; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 139; SI-NEXT: s_mov_b32 s3, 0xf000 140; SI-NEXT: s_mov_b32 s2, -1 141; SI-NEXT: v_bfrev_b32_e32 v0, 1 142; SI-NEXT: s_waitcnt lgkmcnt(0) 143; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 144; SI-NEXT: s_endpgm 145; 146; VI-LABEL: store_imm_neg_0.0_f32: 147; VI: ; %bb.0: 148; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 149; VI-NEXT: s_mov_b32 s3, 0xf000 150; VI-NEXT: s_mov_b32 s2, -1 151; VI-NEXT: v_bfrev_b32_e32 v0, 1 152; VI-NEXT: s_waitcnt lgkmcnt(0) 153; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 154; VI-NEXT: s_endpgm 155 store float -0.0, float addrspace(1)* %out 156 ret void 157} 158 159define amdgpu_kernel void @store_inline_imm_0.5_f32(float addrspace(1)* %out) { 160; SI-LABEL: store_inline_imm_0.5_f32: 161; SI: ; %bb.0: 162; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 163; SI-NEXT: s_mov_b32 s3, 0xf000 164; SI-NEXT: s_mov_b32 s2, -1 165; SI-NEXT: v_mov_b32_e32 v0, 0.5 166; SI-NEXT: s_waitcnt lgkmcnt(0) 167; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 168; SI-NEXT: s_endpgm 169; 170; VI-LABEL: store_inline_imm_0.5_f32: 171; VI: ; %bb.0: 172; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 173; VI-NEXT: s_mov_b32 s3, 0xf000 174; VI-NEXT: s_mov_b32 s2, -1 175; VI-NEXT: v_mov_b32_e32 v0, 0.5 176; VI-NEXT: s_waitcnt lgkmcnt(0) 177; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 178; VI-NEXT: s_endpgm 179 store float 0.5, float addrspace(1)* %out 180 ret void 181} 182 183define amdgpu_kernel void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) { 184; SI-LABEL: store_inline_imm_m_0.5_f32: 185; SI: ; %bb.0: 186; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 187; SI-NEXT: s_mov_b32 s3, 0xf000 188; SI-NEXT: s_mov_b32 s2, -1 189; SI-NEXT: v_mov_b32_e32 v0, -0.5 190; SI-NEXT: s_waitcnt lgkmcnt(0) 191; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 192; SI-NEXT: s_endpgm 193; 194; VI-LABEL: store_inline_imm_m_0.5_f32: 195; VI: ; %bb.0: 196; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 197; VI-NEXT: s_mov_b32 s3, 0xf000 198; VI-NEXT: s_mov_b32 s2, -1 199; VI-NEXT: v_mov_b32_e32 v0, -0.5 200; VI-NEXT: s_waitcnt lgkmcnt(0) 201; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 202; VI-NEXT: s_endpgm 203 store float -0.5, float addrspace(1)* %out 204 ret void 205} 206 207define amdgpu_kernel void @store_inline_imm_1.0_f32(float addrspace(1)* %out) { 208; SI-LABEL: store_inline_imm_1.0_f32: 209; SI: ; %bb.0: 210; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 211; SI-NEXT: s_mov_b32 s3, 0xf000 212; SI-NEXT: s_mov_b32 s2, -1 213; SI-NEXT: v_mov_b32_e32 v0, 1.0 214; SI-NEXT: s_waitcnt lgkmcnt(0) 215; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 216; SI-NEXT: s_endpgm 217; 218; VI-LABEL: store_inline_imm_1.0_f32: 219; VI: ; %bb.0: 220; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 221; VI-NEXT: s_mov_b32 s3, 0xf000 222; VI-NEXT: s_mov_b32 s2, -1 223; VI-NEXT: v_mov_b32_e32 v0, 1.0 224; VI-NEXT: s_waitcnt lgkmcnt(0) 225; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 226; VI-NEXT: s_endpgm 227 store float 1.0, float addrspace(1)* %out 228 ret void 229} 230 231define amdgpu_kernel void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) { 232; SI-LABEL: store_inline_imm_m_1.0_f32: 233; SI: ; %bb.0: 234; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 235; SI-NEXT: s_mov_b32 s3, 0xf000 236; SI-NEXT: s_mov_b32 s2, -1 237; SI-NEXT: v_mov_b32_e32 v0, -1.0 238; SI-NEXT: s_waitcnt lgkmcnt(0) 239; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 240; SI-NEXT: s_endpgm 241; 242; VI-LABEL: store_inline_imm_m_1.0_f32: 243; VI: ; %bb.0: 244; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 245; VI-NEXT: s_mov_b32 s3, 0xf000 246; VI-NEXT: s_mov_b32 s2, -1 247; VI-NEXT: v_mov_b32_e32 v0, -1.0 248; VI-NEXT: s_waitcnt lgkmcnt(0) 249; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 250; VI-NEXT: s_endpgm 251 store float -1.0, float addrspace(1)* %out 252 ret void 253} 254 255define amdgpu_kernel void @store_inline_imm_2.0_f32(float addrspace(1)* %out) { 256; SI-LABEL: store_inline_imm_2.0_f32: 257; SI: ; %bb.0: 258; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 259; SI-NEXT: s_mov_b32 s3, 0xf000 260; SI-NEXT: s_mov_b32 s2, -1 261; SI-NEXT: v_mov_b32_e32 v0, 2.0 262; SI-NEXT: s_waitcnt lgkmcnt(0) 263; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 264; SI-NEXT: s_endpgm 265; 266; VI-LABEL: store_inline_imm_2.0_f32: 267; VI: ; %bb.0: 268; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 269; VI-NEXT: s_mov_b32 s3, 0xf000 270; VI-NEXT: s_mov_b32 s2, -1 271; VI-NEXT: v_mov_b32_e32 v0, 2.0 272; VI-NEXT: s_waitcnt lgkmcnt(0) 273; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 274; VI-NEXT: s_endpgm 275 store float 2.0, float addrspace(1)* %out 276 ret void 277} 278 279define amdgpu_kernel void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) { 280; SI-LABEL: store_inline_imm_m_2.0_f32: 281; SI: ; %bb.0: 282; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 283; SI-NEXT: s_mov_b32 s3, 0xf000 284; SI-NEXT: s_mov_b32 s2, -1 285; SI-NEXT: v_mov_b32_e32 v0, -2.0 286; SI-NEXT: s_waitcnt lgkmcnt(0) 287; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 288; SI-NEXT: s_endpgm 289; 290; VI-LABEL: store_inline_imm_m_2.0_f32: 291; VI: ; %bb.0: 292; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 293; VI-NEXT: s_mov_b32 s3, 0xf000 294; VI-NEXT: s_mov_b32 s2, -1 295; VI-NEXT: v_mov_b32_e32 v0, -2.0 296; VI-NEXT: s_waitcnt lgkmcnt(0) 297; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 298; VI-NEXT: s_endpgm 299 store float -2.0, float addrspace(1)* %out 300 ret void 301} 302 303define amdgpu_kernel void @store_inline_imm_4.0_f32(float addrspace(1)* %out) { 304; SI-LABEL: store_inline_imm_4.0_f32: 305; SI: ; %bb.0: 306; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 307; SI-NEXT: s_mov_b32 s3, 0xf000 308; SI-NEXT: s_mov_b32 s2, -1 309; SI-NEXT: v_mov_b32_e32 v0, 4.0 310; SI-NEXT: s_waitcnt lgkmcnt(0) 311; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 312; SI-NEXT: s_endpgm 313; 314; VI-LABEL: store_inline_imm_4.0_f32: 315; VI: ; %bb.0: 316; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 317; VI-NEXT: s_mov_b32 s3, 0xf000 318; VI-NEXT: s_mov_b32 s2, -1 319; VI-NEXT: v_mov_b32_e32 v0, 4.0 320; VI-NEXT: s_waitcnt lgkmcnt(0) 321; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 322; VI-NEXT: s_endpgm 323 store float 4.0, float addrspace(1)* %out 324 ret void 325} 326 327define amdgpu_kernel void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) { 328; SI-LABEL: store_inline_imm_m_4.0_f32: 329; SI: ; %bb.0: 330; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 331; SI-NEXT: s_mov_b32 s3, 0xf000 332; SI-NEXT: s_mov_b32 s2, -1 333; SI-NEXT: v_mov_b32_e32 v0, -4.0 334; SI-NEXT: s_waitcnt lgkmcnt(0) 335; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 336; SI-NEXT: s_endpgm 337; 338; VI-LABEL: store_inline_imm_m_4.0_f32: 339; VI: ; %bb.0: 340; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 341; VI-NEXT: s_mov_b32 s3, 0xf000 342; VI-NEXT: s_mov_b32 s2, -1 343; VI-NEXT: v_mov_b32_e32 v0, -4.0 344; VI-NEXT: s_waitcnt lgkmcnt(0) 345; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 346; VI-NEXT: s_endpgm 347 store float -4.0, float addrspace(1)* %out 348 ret void 349} 350 351define amdgpu_kernel void @store_inline_imm_inv_2pi_f32(float addrspace(1)* %out) { 352; SI-LABEL: store_inline_imm_inv_2pi_f32: 353; SI: ; %bb.0: 354; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 355; SI-NEXT: s_mov_b32 s3, 0xf000 356; SI-NEXT: s_mov_b32 s2, -1 357; SI-NEXT: v_mov_b32_e32 v0, 0x3e22f983 358; SI-NEXT: s_waitcnt lgkmcnt(0) 359; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 360; SI-NEXT: s_endpgm 361; 362; VI-LABEL: store_inline_imm_inv_2pi_f32: 363; VI: ; %bb.0: 364; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 365; VI-NEXT: s_mov_b32 s3, 0xf000 366; VI-NEXT: s_mov_b32 s2, -1 367; VI-NEXT: v_mov_b32_e32 v0, 0.15915494 368; VI-NEXT: s_waitcnt lgkmcnt(0) 369; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 370; VI-NEXT: s_endpgm 371 store float 0x3FC45F3060000000, float addrspace(1)* %out 372 ret void 373} 374 375define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f32(float addrspace(1)* %out) { 376; SI-LABEL: store_inline_imm_m_inv_2pi_f32: 377; SI: ; %bb.0: 378; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 379; SI-NEXT: s_mov_b32 s3, 0xf000 380; SI-NEXT: s_mov_b32 s2, -1 381; SI-NEXT: v_mov_b32_e32 v0, 0xbe22f983 382; SI-NEXT: s_waitcnt lgkmcnt(0) 383; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 384; SI-NEXT: s_endpgm 385; 386; VI-LABEL: store_inline_imm_m_inv_2pi_f32: 387; VI: ; %bb.0: 388; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 389; VI-NEXT: s_mov_b32 s3, 0xf000 390; VI-NEXT: s_mov_b32 s2, -1 391; VI-NEXT: v_mov_b32_e32 v0, 0xbe22f983 392; VI-NEXT: s_waitcnt lgkmcnt(0) 393; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 394; VI-NEXT: s_endpgm 395 store float 0xBFC45F3060000000, float addrspace(1)* %out 396 ret void 397} 398 399define amdgpu_kernel void @store_literal_imm_f32(float addrspace(1)* %out) { 400; SI-LABEL: store_literal_imm_f32: 401; SI: ; %bb.0: 402; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 403; SI-NEXT: s_mov_b32 s3, 0xf000 404; SI-NEXT: s_mov_b32 s2, -1 405; SI-NEXT: v_mov_b32_e32 v0, 0x45800000 406; SI-NEXT: s_waitcnt lgkmcnt(0) 407; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 408; SI-NEXT: s_endpgm 409; 410; VI-LABEL: store_literal_imm_f32: 411; VI: ; %bb.0: 412; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 413; VI-NEXT: s_mov_b32 s3, 0xf000 414; VI-NEXT: s_mov_b32 s2, -1 415; VI-NEXT: v_mov_b32_e32 v0, 0x45800000 416; VI-NEXT: s_waitcnt lgkmcnt(0) 417; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 418; VI-NEXT: s_endpgm 419 store float 4096.0, float addrspace(1)* %out 420 ret void 421} 422 423define amdgpu_kernel void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { 424; SI-LABEL: add_inline_imm_0.0_f32: 425; SI: ; %bb.0: 426; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 427; SI-NEXT: s_load_dword s0, s[0:1], 0xb 428; SI-NEXT: s_mov_b32 s7, 0xf000 429; SI-NEXT: s_mov_b32 s6, -1 430; SI-NEXT: s_waitcnt lgkmcnt(0) 431; SI-NEXT: v_add_f32_e64 v0, s0, 0 432; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 433; SI-NEXT: s_endpgm 434; 435; VI-LABEL: add_inline_imm_0.0_f32: 436; VI: ; %bb.0: 437; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 438; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 439; VI-NEXT: s_mov_b32 s7, 0xf000 440; VI-NEXT: s_mov_b32 s6, -1 441; VI-NEXT: s_waitcnt lgkmcnt(0) 442; VI-NEXT: v_add_f32_e64 v0, s0, 0 443; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 444; VI-NEXT: s_endpgm 445 %y = fadd float %x, 0.0 446 store float %y, float addrspace(1)* %out 447 ret void 448} 449 450define amdgpu_kernel void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { 451; SI-LABEL: add_inline_imm_0.5_f32: 452; SI: ; %bb.0: 453; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 454; SI-NEXT: s_load_dword s0, s[0:1], 0xb 455; SI-NEXT: s_mov_b32 s7, 0xf000 456; SI-NEXT: s_mov_b32 s6, -1 457; SI-NEXT: s_waitcnt lgkmcnt(0) 458; SI-NEXT: v_add_f32_e64 v0, s0, 0.5 459; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 460; SI-NEXT: s_endpgm 461; 462; VI-LABEL: add_inline_imm_0.5_f32: 463; VI: ; %bb.0: 464; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 465; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 466; VI-NEXT: s_mov_b32 s7, 0xf000 467; VI-NEXT: s_mov_b32 s6, -1 468; VI-NEXT: s_waitcnt lgkmcnt(0) 469; VI-NEXT: v_add_f32_e64 v0, s0, 0.5 470; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 471; VI-NEXT: s_endpgm 472 %y = fadd float %x, 0.5 473 store float %y, float addrspace(1)* %out 474 ret void 475} 476 477define amdgpu_kernel void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { 478; SI-LABEL: add_inline_imm_neg_0.5_f32: 479; SI: ; %bb.0: 480; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 481; SI-NEXT: s_load_dword s0, s[0:1], 0xb 482; SI-NEXT: s_mov_b32 s7, 0xf000 483; SI-NEXT: s_mov_b32 s6, -1 484; SI-NEXT: s_waitcnt lgkmcnt(0) 485; SI-NEXT: v_add_f32_e64 v0, s0, -0.5 486; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 487; SI-NEXT: s_endpgm 488; 489; VI-LABEL: add_inline_imm_neg_0.5_f32: 490; VI: ; %bb.0: 491; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 492; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 493; VI-NEXT: s_mov_b32 s7, 0xf000 494; VI-NEXT: s_mov_b32 s6, -1 495; VI-NEXT: s_waitcnt lgkmcnt(0) 496; VI-NEXT: v_add_f32_e64 v0, s0, -0.5 497; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 498; VI-NEXT: s_endpgm 499 %y = fadd float %x, -0.5 500 store float %y, float addrspace(1)* %out 501 ret void 502} 503 504define amdgpu_kernel void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { 505; SI-LABEL: add_inline_imm_1.0_f32: 506; SI: ; %bb.0: 507; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 508; SI-NEXT: s_load_dword s0, s[0:1], 0xb 509; SI-NEXT: s_mov_b32 s7, 0xf000 510; SI-NEXT: s_mov_b32 s6, -1 511; SI-NEXT: s_waitcnt lgkmcnt(0) 512; SI-NEXT: v_add_f32_e64 v0, s0, 1.0 513; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 514; SI-NEXT: s_endpgm 515; 516; VI-LABEL: add_inline_imm_1.0_f32: 517; VI: ; %bb.0: 518; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 519; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 520; VI-NEXT: s_mov_b32 s7, 0xf000 521; VI-NEXT: s_mov_b32 s6, -1 522; VI-NEXT: s_waitcnt lgkmcnt(0) 523; VI-NEXT: v_add_f32_e64 v0, s0, 1.0 524; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 525; VI-NEXT: s_endpgm 526 %y = fadd float %x, 1.0 527 store float %y, float addrspace(1)* %out 528 ret void 529} 530 531define amdgpu_kernel void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { 532; SI-LABEL: add_inline_imm_neg_1.0_f32: 533; SI: ; %bb.0: 534; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 535; SI-NEXT: s_load_dword s0, s[0:1], 0xb 536; SI-NEXT: s_mov_b32 s7, 0xf000 537; SI-NEXT: s_mov_b32 s6, -1 538; SI-NEXT: s_waitcnt lgkmcnt(0) 539; SI-NEXT: v_add_f32_e64 v0, s0, -1.0 540; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 541; SI-NEXT: s_endpgm 542; 543; VI-LABEL: add_inline_imm_neg_1.0_f32: 544; VI: ; %bb.0: 545; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 546; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 547; VI-NEXT: s_mov_b32 s7, 0xf000 548; VI-NEXT: s_mov_b32 s6, -1 549; VI-NEXT: s_waitcnt lgkmcnt(0) 550; VI-NEXT: v_add_f32_e64 v0, s0, -1.0 551; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 552; VI-NEXT: s_endpgm 553 %y = fadd float %x, -1.0 554 store float %y, float addrspace(1)* %out 555 ret void 556} 557 558define amdgpu_kernel void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { 559; SI-LABEL: add_inline_imm_2.0_f32: 560; SI: ; %bb.0: 561; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 562; SI-NEXT: s_load_dword s0, s[0:1], 0xb 563; SI-NEXT: s_mov_b32 s7, 0xf000 564; SI-NEXT: s_mov_b32 s6, -1 565; SI-NEXT: s_waitcnt lgkmcnt(0) 566; SI-NEXT: v_add_f32_e64 v0, s0, 2.0 567; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 568; SI-NEXT: s_endpgm 569; 570; VI-LABEL: add_inline_imm_2.0_f32: 571; VI: ; %bb.0: 572; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 573; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 574; VI-NEXT: s_mov_b32 s7, 0xf000 575; VI-NEXT: s_mov_b32 s6, -1 576; VI-NEXT: s_waitcnt lgkmcnt(0) 577; VI-NEXT: v_add_f32_e64 v0, s0, 2.0 578; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 579; VI-NEXT: s_endpgm 580 %y = fadd float %x, 2.0 581 store float %y, float addrspace(1)* %out 582 ret void 583} 584 585define amdgpu_kernel void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { 586; SI-LABEL: add_inline_imm_neg_2.0_f32: 587; SI: ; %bb.0: 588; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 589; SI-NEXT: s_load_dword s0, s[0:1], 0xb 590; SI-NEXT: s_mov_b32 s7, 0xf000 591; SI-NEXT: s_mov_b32 s6, -1 592; SI-NEXT: s_waitcnt lgkmcnt(0) 593; SI-NEXT: v_add_f32_e64 v0, s0, -2.0 594; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 595; SI-NEXT: s_endpgm 596; 597; VI-LABEL: add_inline_imm_neg_2.0_f32: 598; VI: ; %bb.0: 599; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 600; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 601; VI-NEXT: s_mov_b32 s7, 0xf000 602; VI-NEXT: s_mov_b32 s6, -1 603; VI-NEXT: s_waitcnt lgkmcnt(0) 604; VI-NEXT: v_add_f32_e64 v0, s0, -2.0 605; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 606; VI-NEXT: s_endpgm 607 %y = fadd float %x, -2.0 608 store float %y, float addrspace(1)* %out 609 ret void 610} 611 612define amdgpu_kernel void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { 613; SI-LABEL: add_inline_imm_4.0_f32: 614; SI: ; %bb.0: 615; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 616; SI-NEXT: s_load_dword s0, s[0:1], 0xb 617; SI-NEXT: s_mov_b32 s7, 0xf000 618; SI-NEXT: s_mov_b32 s6, -1 619; SI-NEXT: s_waitcnt lgkmcnt(0) 620; SI-NEXT: v_add_f32_e64 v0, s0, 4.0 621; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 622; SI-NEXT: s_endpgm 623; 624; VI-LABEL: add_inline_imm_4.0_f32: 625; VI: ; %bb.0: 626; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 627; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 628; VI-NEXT: s_mov_b32 s7, 0xf000 629; VI-NEXT: s_mov_b32 s6, -1 630; VI-NEXT: s_waitcnt lgkmcnt(0) 631; VI-NEXT: v_add_f32_e64 v0, s0, 4.0 632; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 633; VI-NEXT: s_endpgm 634 %y = fadd float %x, 4.0 635 store float %y, float addrspace(1)* %out 636 ret void 637} 638 639define amdgpu_kernel void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { 640; SI-LABEL: add_inline_imm_neg_4.0_f32: 641; SI: ; %bb.0: 642; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 643; SI-NEXT: s_load_dword s0, s[0:1], 0xb 644; SI-NEXT: s_mov_b32 s7, 0xf000 645; SI-NEXT: s_mov_b32 s6, -1 646; SI-NEXT: s_waitcnt lgkmcnt(0) 647; SI-NEXT: v_add_f32_e64 v0, s0, -4.0 648; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 649; SI-NEXT: s_endpgm 650; 651; VI-LABEL: add_inline_imm_neg_4.0_f32: 652; VI: ; %bb.0: 653; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 654; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 655; VI-NEXT: s_mov_b32 s7, 0xf000 656; VI-NEXT: s_mov_b32 s6, -1 657; VI-NEXT: s_waitcnt lgkmcnt(0) 658; VI-NEXT: v_add_f32_e64 v0, s0, -4.0 659; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 660; VI-NEXT: s_endpgm 661 %y = fadd float %x, -4.0 662 store float %y, float addrspace(1)* %out 663 ret void 664} 665 666define amdgpu_kernel void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 667; SI-LABEL: commute_add_inline_imm_0.5_f32: 668; SI: ; %bb.0: 669; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 670; SI-NEXT: s_mov_b32 s3, 0xf000 671; SI-NEXT: s_mov_b32 s2, -1 672; SI-NEXT: s_waitcnt lgkmcnt(0) 673; SI-NEXT: s_mov_b32 s0, s4 674; SI-NEXT: s_mov_b32 s1, s5 675; SI-NEXT: s_mov_b32 s4, s6 676; SI-NEXT: s_mov_b32 s5, s7 677; SI-NEXT: s_mov_b32 s6, s2 678; SI-NEXT: s_mov_b32 s7, s3 679; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 680; SI-NEXT: s_waitcnt vmcnt(0) 681; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 682; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 683; SI-NEXT: s_endpgm 684; 685; VI-LABEL: commute_add_inline_imm_0.5_f32: 686; VI: ; %bb.0: 687; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 688; VI-NEXT: s_mov_b32 s3, 0xf000 689; VI-NEXT: s_mov_b32 s2, -1 690; VI-NEXT: s_waitcnt lgkmcnt(0) 691; VI-NEXT: s_mov_b32 s0, s4 692; VI-NEXT: s_mov_b32 s1, s5 693; VI-NEXT: s_mov_b32 s4, s6 694; VI-NEXT: s_mov_b32 s5, s7 695; VI-NEXT: s_mov_b32 s6, s2 696; VI-NEXT: s_mov_b32 s7, s3 697; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 698; VI-NEXT: s_waitcnt vmcnt(0) 699; VI-NEXT: v_add_f32_e32 v0, 0.5, v0 700; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 701; VI-NEXT: s_endpgm 702 %x = load float, float addrspace(1)* %in 703 %y = fadd float %x, 0.5 704 store float %y, float addrspace(1)* %out 705 ret void 706} 707 708define amdgpu_kernel void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 709; SI-LABEL: commute_add_literal_f32: 710; SI: ; %bb.0: 711; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 712; SI-NEXT: s_mov_b32 s3, 0xf000 713; SI-NEXT: s_mov_b32 s2, -1 714; SI-NEXT: s_waitcnt lgkmcnt(0) 715; SI-NEXT: s_mov_b32 s0, s4 716; SI-NEXT: s_mov_b32 s1, s5 717; SI-NEXT: s_mov_b32 s4, s6 718; SI-NEXT: s_mov_b32 s5, s7 719; SI-NEXT: s_mov_b32 s6, s2 720; SI-NEXT: s_mov_b32 s7, s3 721; SI-NEXT: buffer_load_dword v0, off, s[4:7], 0 722; SI-NEXT: s_waitcnt vmcnt(0) 723; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 724; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 725; SI-NEXT: s_endpgm 726; 727; VI-LABEL: commute_add_literal_f32: 728; VI: ; %bb.0: 729; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 730; VI-NEXT: s_mov_b32 s3, 0xf000 731; VI-NEXT: s_mov_b32 s2, -1 732; VI-NEXT: s_waitcnt lgkmcnt(0) 733; VI-NEXT: s_mov_b32 s0, s4 734; VI-NEXT: s_mov_b32 s1, s5 735; VI-NEXT: s_mov_b32 s4, s6 736; VI-NEXT: s_mov_b32 s5, s7 737; VI-NEXT: s_mov_b32 s6, s2 738; VI-NEXT: s_mov_b32 s7, s3 739; VI-NEXT: buffer_load_dword v0, off, s[4:7], 0 740; VI-NEXT: s_waitcnt vmcnt(0) 741; VI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 742; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 743; VI-NEXT: s_endpgm 744 %x = load float, float addrspace(1)* %in 745 %y = fadd float %x, 1024.0 746 store float %y, float addrspace(1)* %out 747 ret void 748} 749 750define amdgpu_kernel void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { 751; SI-LABEL: add_inline_imm_1_f32: 752; SI: ; %bb.0: 753; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 754; SI-NEXT: s_load_dword s0, s[0:1], 0xb 755; SI-NEXT: s_mov_b32 s7, 0xf000 756; SI-NEXT: s_mov_b32 s6, -1 757; SI-NEXT: s_waitcnt lgkmcnt(0) 758; SI-NEXT: v_add_f32_e64 v0, s0, 1 759; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 760; SI-NEXT: s_endpgm 761; 762; VI-LABEL: add_inline_imm_1_f32: 763; VI: ; %bb.0: 764; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 765; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 766; VI-NEXT: s_mov_b32 s7, 0xf000 767; VI-NEXT: s_mov_b32 s6, -1 768; VI-NEXT: s_waitcnt lgkmcnt(0) 769; VI-NEXT: v_add_f32_e64 v0, s0, 1 770; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 771; VI-NEXT: s_endpgm 772 %y = fadd float %x, 0x36a0000000000000 773 store float %y, float addrspace(1)* %out 774 ret void 775} 776 777define amdgpu_kernel void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { 778; SI-LABEL: add_inline_imm_2_f32: 779; SI: ; %bb.0: 780; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 781; SI-NEXT: s_load_dword s0, s[0:1], 0xb 782; SI-NEXT: s_mov_b32 s7, 0xf000 783; SI-NEXT: s_mov_b32 s6, -1 784; SI-NEXT: s_waitcnt lgkmcnt(0) 785; SI-NEXT: v_add_f32_e64 v0, s0, 2 786; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 787; SI-NEXT: s_endpgm 788; 789; VI-LABEL: add_inline_imm_2_f32: 790; VI: ; %bb.0: 791; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 792; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 793; VI-NEXT: s_mov_b32 s7, 0xf000 794; VI-NEXT: s_mov_b32 s6, -1 795; VI-NEXT: s_waitcnt lgkmcnt(0) 796; VI-NEXT: v_add_f32_e64 v0, s0, 2 797; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 798; VI-NEXT: s_endpgm 799 %y = fadd float %x, 0x36b0000000000000 800 store float %y, float addrspace(1)* %out 801 ret void 802} 803 804define amdgpu_kernel void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { 805; SI-LABEL: add_inline_imm_16_f32: 806; SI: ; %bb.0: 807; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 808; SI-NEXT: s_load_dword s0, s[0:1], 0xb 809; SI-NEXT: s_mov_b32 s7, 0xf000 810; SI-NEXT: s_mov_b32 s6, -1 811; SI-NEXT: s_waitcnt lgkmcnt(0) 812; SI-NEXT: v_add_f32_e64 v0, s0, 16 813; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 814; SI-NEXT: s_endpgm 815; 816; VI-LABEL: add_inline_imm_16_f32: 817; VI: ; %bb.0: 818; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 819; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 820; VI-NEXT: s_mov_b32 s7, 0xf000 821; VI-NEXT: s_mov_b32 s6, -1 822; VI-NEXT: s_waitcnt lgkmcnt(0) 823; VI-NEXT: v_add_f32_e64 v0, s0, 16 824; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 825; VI-NEXT: s_endpgm 826 %y = fadd float %x, 0x36e0000000000000 827 store float %y, float addrspace(1)* %out 828 ret void 829} 830 831define amdgpu_kernel void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { 832; SI-LABEL: add_inline_imm_neg_1_f32: 833; SI: ; %bb.0: 834; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 835; SI-NEXT: s_load_dword s0, s[0:1], 0xb 836; SI-NEXT: s_mov_b32 s7, 0xf000 837; SI-NEXT: s_mov_b32 s6, -1 838; SI-NEXT: s_waitcnt lgkmcnt(0) 839; SI-NEXT: s_add_i32 s0, s0, -1 840; SI-NEXT: v_mov_b32_e32 v0, s0 841; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 842; SI-NEXT: s_endpgm 843; 844; VI-LABEL: add_inline_imm_neg_1_f32: 845; VI: ; %bb.0: 846; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 847; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 848; VI-NEXT: s_mov_b32 s7, 0xf000 849; VI-NEXT: s_mov_b32 s6, -1 850; VI-NEXT: s_waitcnt lgkmcnt(0) 851; VI-NEXT: s_add_i32 s0, s0, -1 852; VI-NEXT: v_mov_b32_e32 v0, s0 853; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 854; VI-NEXT: s_endpgm 855 %xbc = bitcast float %x to i32 856 %y = add i32 %xbc, -1 857 %ybc = bitcast i32 %y to float 858 store float %ybc, float addrspace(1)* %out 859 ret void 860} 861 862define amdgpu_kernel void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { 863; SI-LABEL: add_inline_imm_neg_2_f32: 864; SI: ; %bb.0: 865; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 866; SI-NEXT: s_load_dword s0, s[0:1], 0xb 867; SI-NEXT: s_mov_b32 s7, 0xf000 868; SI-NEXT: s_mov_b32 s6, -1 869; SI-NEXT: s_waitcnt lgkmcnt(0) 870; SI-NEXT: s_add_i32 s0, s0, -2 871; SI-NEXT: v_mov_b32_e32 v0, s0 872; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 873; SI-NEXT: s_endpgm 874; 875; VI-LABEL: add_inline_imm_neg_2_f32: 876; VI: ; %bb.0: 877; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 878; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 879; VI-NEXT: s_mov_b32 s7, 0xf000 880; VI-NEXT: s_mov_b32 s6, -1 881; VI-NEXT: s_waitcnt lgkmcnt(0) 882; VI-NEXT: s_add_i32 s0, s0, -2 883; VI-NEXT: v_mov_b32_e32 v0, s0 884; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 885; VI-NEXT: s_endpgm 886 %xbc = bitcast float %x to i32 887 %y = add i32 %xbc, -2 888 %ybc = bitcast i32 %y to float 889 store float %ybc, float addrspace(1)* %out 890 ret void 891} 892 893define amdgpu_kernel void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { 894; SI-LABEL: add_inline_imm_neg_16_f32: 895; SI: ; %bb.0: 896; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 897; SI-NEXT: s_load_dword s0, s[0:1], 0xb 898; SI-NEXT: s_mov_b32 s7, 0xf000 899; SI-NEXT: s_mov_b32 s6, -1 900; SI-NEXT: s_waitcnt lgkmcnt(0) 901; SI-NEXT: s_add_i32 s0, s0, -16 902; SI-NEXT: v_mov_b32_e32 v0, s0 903; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 904; SI-NEXT: s_endpgm 905; 906; VI-LABEL: add_inline_imm_neg_16_f32: 907; VI: ; %bb.0: 908; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 909; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 910; VI-NEXT: s_mov_b32 s7, 0xf000 911; VI-NEXT: s_mov_b32 s6, -1 912; VI-NEXT: s_waitcnt lgkmcnt(0) 913; VI-NEXT: s_add_i32 s0, s0, -16 914; VI-NEXT: v_mov_b32_e32 v0, s0 915; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 916; VI-NEXT: s_endpgm 917 %xbc = bitcast float %x to i32 918 %y = add i32 %xbc, -16 919 %ybc = bitcast i32 %y to float 920 store float %ybc, float addrspace(1)* %out 921 ret void 922} 923 924define amdgpu_kernel void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { 925; SI-LABEL: add_inline_imm_63_f32: 926; SI: ; %bb.0: 927; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 928; SI-NEXT: s_load_dword s0, s[0:1], 0xb 929; SI-NEXT: s_mov_b32 s7, 0xf000 930; SI-NEXT: s_mov_b32 s6, -1 931; SI-NEXT: s_waitcnt lgkmcnt(0) 932; SI-NEXT: v_add_f32_e64 v0, s0, 63 933; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 934; SI-NEXT: s_endpgm 935; 936; VI-LABEL: add_inline_imm_63_f32: 937; VI: ; %bb.0: 938; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 939; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 940; VI-NEXT: s_mov_b32 s7, 0xf000 941; VI-NEXT: s_mov_b32 s6, -1 942; VI-NEXT: s_waitcnt lgkmcnt(0) 943; VI-NEXT: v_add_f32_e64 v0, s0, 63 944; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 945; VI-NEXT: s_endpgm 946 %y = fadd float %x, 0x36ff800000000000 947 store float %y, float addrspace(1)* %out 948 ret void 949} 950 951define amdgpu_kernel void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { 952; SI-LABEL: add_inline_imm_64_f32: 953; SI: ; %bb.0: 954; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 955; SI-NEXT: s_load_dword s0, s[0:1], 0xb 956; SI-NEXT: s_mov_b32 s7, 0xf000 957; SI-NEXT: s_mov_b32 s6, -1 958; SI-NEXT: s_waitcnt lgkmcnt(0) 959; SI-NEXT: v_add_f32_e64 v0, s0, 64 960; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 961; SI-NEXT: s_endpgm 962; 963; VI-LABEL: add_inline_imm_64_f32: 964; VI: ; %bb.0: 965; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 966; VI-NEXT: s_load_dword s0, s[0:1], 0x2c 967; VI-NEXT: s_mov_b32 s7, 0xf000 968; VI-NEXT: s_mov_b32 s6, -1 969; VI-NEXT: s_waitcnt lgkmcnt(0) 970; VI-NEXT: v_add_f32_e64 v0, s0, 64 971; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 972; VI-NEXT: s_endpgm 973 %y = fadd float %x, 0x3700000000000000 974 store float %y, float addrspace(1)* %out 975 ret void 976} 977 978define amdgpu_kernel void @add_inline_imm_0.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 979; SI-LABEL: add_inline_imm_0.0_f64: 980; SI: ; %bb.0: 981; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 982; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 983; SI-NEXT: s_waitcnt lgkmcnt(0) 984; SI-NEXT: v_add_f64 v[0:1], s[2:3], 0 985; SI-NEXT: s_mov_b32 s3, 0xf000 986; SI-NEXT: s_mov_b32 s2, -1 987; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 988; SI-NEXT: s_endpgm 989; 990; VI-LABEL: add_inline_imm_0.0_f64: 991; VI: ; %bb.0: 992; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 993; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 994; VI-NEXT: s_waitcnt lgkmcnt(0) 995; VI-NEXT: v_add_f64 v[0:1], s[2:3], 0 996; VI-NEXT: s_mov_b32 s3, 0xf000 997; VI-NEXT: s_mov_b32 s2, -1 998; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 999; VI-NEXT: s_endpgm 1000 %y = fadd double %x, 0.0 1001 store double %y, double addrspace(1)* %out 1002 ret void 1003} 1004 1005define amdgpu_kernel void @add_inline_imm_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1006; SI-LABEL: add_inline_imm_0.5_f64: 1007; SI: ; %bb.0: 1008; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1009; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1010; SI-NEXT: s_waitcnt lgkmcnt(0) 1011; SI-NEXT: v_add_f64 v[0:1], s[2:3], 0.5 1012; SI-NEXT: s_mov_b32 s3, 0xf000 1013; SI-NEXT: s_mov_b32 s2, -1 1014; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1015; SI-NEXT: s_endpgm 1016; 1017; VI-LABEL: add_inline_imm_0.5_f64: 1018; VI: ; %bb.0: 1019; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1020; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1021; VI-NEXT: s_waitcnt lgkmcnt(0) 1022; VI-NEXT: v_add_f64 v[0:1], s[2:3], 0.5 1023; VI-NEXT: s_mov_b32 s3, 0xf000 1024; VI-NEXT: s_mov_b32 s2, -1 1025; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1026; VI-NEXT: s_endpgm 1027 %y = fadd double %x, 0.5 1028 store double %y, double addrspace(1)* %out 1029 ret void 1030} 1031 1032define amdgpu_kernel void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1033; SI-LABEL: add_inline_imm_neg_0.5_f64: 1034; SI: ; %bb.0: 1035; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1036; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1037; SI-NEXT: s_waitcnt lgkmcnt(0) 1038; SI-NEXT: v_add_f64 v[0:1], s[2:3], -0.5 1039; SI-NEXT: s_mov_b32 s3, 0xf000 1040; SI-NEXT: s_mov_b32 s2, -1 1041; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1042; SI-NEXT: s_endpgm 1043; 1044; VI-LABEL: add_inline_imm_neg_0.5_f64: 1045; VI: ; %bb.0: 1046; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1047; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1048; VI-NEXT: s_waitcnt lgkmcnt(0) 1049; VI-NEXT: v_add_f64 v[0:1], s[2:3], -0.5 1050; VI-NEXT: s_mov_b32 s3, 0xf000 1051; VI-NEXT: s_mov_b32 s2, -1 1052; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1053; VI-NEXT: s_endpgm 1054 %y = fadd double %x, -0.5 1055 store double %y, double addrspace(1)* %out 1056 ret void 1057} 1058 1059define amdgpu_kernel void @add_inline_imm_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1060; SI-LABEL: add_inline_imm_1.0_f64: 1061; SI: ; %bb.0: 1062; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1063; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1064; SI-NEXT: s_waitcnt lgkmcnt(0) 1065; SI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0 1066; SI-NEXT: s_mov_b32 s3, 0xf000 1067; SI-NEXT: s_mov_b32 s2, -1 1068; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1069; SI-NEXT: s_endpgm 1070; 1071; VI-LABEL: add_inline_imm_1.0_f64: 1072; VI: ; %bb.0: 1073; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1074; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1075; VI-NEXT: s_waitcnt lgkmcnt(0) 1076; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1.0 1077; VI-NEXT: s_mov_b32 s3, 0xf000 1078; VI-NEXT: s_mov_b32 s2, -1 1079; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1080; VI-NEXT: s_endpgm 1081 %y = fadd double %x, 1.0 1082 store double %y, double addrspace(1)* %out 1083 ret void 1084} 1085 1086define amdgpu_kernel void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1087; SI-LABEL: add_inline_imm_neg_1.0_f64: 1088; SI: ; %bb.0: 1089; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1090; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1091; SI-NEXT: s_waitcnt lgkmcnt(0) 1092; SI-NEXT: v_add_f64 v[0:1], s[2:3], -1.0 1093; SI-NEXT: s_mov_b32 s3, 0xf000 1094; SI-NEXT: s_mov_b32 s2, -1 1095; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1096; SI-NEXT: s_endpgm 1097; 1098; VI-LABEL: add_inline_imm_neg_1.0_f64: 1099; VI: ; %bb.0: 1100; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1101; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1102; VI-NEXT: s_waitcnt lgkmcnt(0) 1103; VI-NEXT: v_add_f64 v[0:1], s[2:3], -1.0 1104; VI-NEXT: s_mov_b32 s3, 0xf000 1105; VI-NEXT: s_mov_b32 s2, -1 1106; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1107; VI-NEXT: s_endpgm 1108 %y = fadd double %x, -1.0 1109 store double %y, double addrspace(1)* %out 1110 ret void 1111} 1112 1113define amdgpu_kernel void @add_inline_imm_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1114; SI-LABEL: add_inline_imm_2.0_f64: 1115; SI: ; %bb.0: 1116; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1117; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1118; SI-NEXT: s_waitcnt lgkmcnt(0) 1119; SI-NEXT: v_add_f64 v[0:1], s[2:3], 2.0 1120; SI-NEXT: s_mov_b32 s3, 0xf000 1121; SI-NEXT: s_mov_b32 s2, -1 1122; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1123; SI-NEXT: s_endpgm 1124; 1125; VI-LABEL: add_inline_imm_2.0_f64: 1126; VI: ; %bb.0: 1127; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1128; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1129; VI-NEXT: s_waitcnt lgkmcnt(0) 1130; VI-NEXT: v_add_f64 v[0:1], s[2:3], 2.0 1131; VI-NEXT: s_mov_b32 s3, 0xf000 1132; VI-NEXT: s_mov_b32 s2, -1 1133; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1134; VI-NEXT: s_endpgm 1135 %y = fadd double %x, 2.0 1136 store double %y, double addrspace(1)* %out 1137 ret void 1138} 1139 1140define amdgpu_kernel void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1141; SI-LABEL: add_inline_imm_neg_2.0_f64: 1142; SI: ; %bb.0: 1143; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1144; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1145; SI-NEXT: s_waitcnt lgkmcnt(0) 1146; SI-NEXT: v_add_f64 v[0:1], s[2:3], -2.0 1147; SI-NEXT: s_mov_b32 s3, 0xf000 1148; SI-NEXT: s_mov_b32 s2, -1 1149; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1150; SI-NEXT: s_endpgm 1151; 1152; VI-LABEL: add_inline_imm_neg_2.0_f64: 1153; VI: ; %bb.0: 1154; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1155; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1156; VI-NEXT: s_waitcnt lgkmcnt(0) 1157; VI-NEXT: v_add_f64 v[0:1], s[2:3], -2.0 1158; VI-NEXT: s_mov_b32 s3, 0xf000 1159; VI-NEXT: s_mov_b32 s2, -1 1160; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1161; VI-NEXT: s_endpgm 1162 %y = fadd double %x, -2.0 1163 store double %y, double addrspace(1)* %out 1164 ret void 1165} 1166 1167define amdgpu_kernel void @add_inline_imm_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1168; SI-LABEL: add_inline_imm_4.0_f64: 1169; SI: ; %bb.0: 1170; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1171; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1172; SI-NEXT: s_waitcnt lgkmcnt(0) 1173; SI-NEXT: v_add_f64 v[0:1], s[2:3], 4.0 1174; SI-NEXT: s_mov_b32 s3, 0xf000 1175; SI-NEXT: s_mov_b32 s2, -1 1176; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1177; SI-NEXT: s_endpgm 1178; 1179; VI-LABEL: add_inline_imm_4.0_f64: 1180; VI: ; %bb.0: 1181; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1182; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1183; VI-NEXT: s_waitcnt lgkmcnt(0) 1184; VI-NEXT: v_add_f64 v[0:1], s[2:3], 4.0 1185; VI-NEXT: s_mov_b32 s3, 0xf000 1186; VI-NEXT: s_mov_b32 s2, -1 1187; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1188; VI-NEXT: s_endpgm 1189 %y = fadd double %x, 4.0 1190 store double %y, double addrspace(1)* %out 1191 ret void 1192} 1193 1194define amdgpu_kernel void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1195; SI-LABEL: add_inline_imm_neg_4.0_f64: 1196; SI: ; %bb.0: 1197; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1198; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1199; SI-NEXT: s_waitcnt lgkmcnt(0) 1200; SI-NEXT: v_add_f64 v[0:1], s[2:3], -4.0 1201; SI-NEXT: s_mov_b32 s3, 0xf000 1202; SI-NEXT: s_mov_b32 s2, -1 1203; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1204; SI-NEXT: s_endpgm 1205; 1206; VI-LABEL: add_inline_imm_neg_4.0_f64: 1207; VI: ; %bb.0: 1208; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1209; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1210; VI-NEXT: s_waitcnt lgkmcnt(0) 1211; VI-NEXT: v_add_f64 v[0:1], s[2:3], -4.0 1212; VI-NEXT: s_mov_b32 s3, 0xf000 1213; VI-NEXT: s_mov_b32 s2, -1 1214; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1215; VI-NEXT: s_endpgm 1216 %y = fadd double %x, -4.0 1217 store double %y, double addrspace(1)* %out 1218 ret void 1219} 1220 1221define amdgpu_kernel void @add_inline_imm_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1222; SI-LABEL: add_inline_imm_inv_2pi_f64: 1223; SI: ; %bb.0: 1224; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1225; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1226; SI-NEXT: v_mov_b32_e32 v1, 0x3fc45f30 1227; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1228; SI-NEXT: s_waitcnt lgkmcnt(0) 1229; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] 1230; SI-NEXT: s_mov_b32 s3, 0xf000 1231; SI-NEXT: s_mov_b32 s2, -1 1232; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1233; SI-NEXT: s_endpgm 1234; 1235; VI-LABEL: add_inline_imm_inv_2pi_f64: 1236; VI: ; %bb.0: 1237; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1238; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1239; VI-NEXT: s_waitcnt lgkmcnt(0) 1240; VI-NEXT: v_add_f64 v[0:1], s[2:3], 0.15915494309189532 1241; VI-NEXT: s_mov_b32 s3, 0xf000 1242; VI-NEXT: s_mov_b32 s2, -1 1243; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1244; VI-NEXT: s_endpgm 1245 %y = fadd double %x, 0x3fc45f306dc9c882 1246 store double %y, double addrspace(1)* %out 1247 ret void 1248} 1249 1250define amdgpu_kernel void @add_m_inv_2pi_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1251; SI-LABEL: add_m_inv_2pi_f64: 1252; SI: ; %bb.0: 1253; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1254; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1255; SI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30 1256; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1257; SI-NEXT: s_waitcnt lgkmcnt(0) 1258; SI-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] 1259; SI-NEXT: s_mov_b32 s3, 0xf000 1260; SI-NEXT: s_mov_b32 s2, -1 1261; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1262; SI-NEXT: s_endpgm 1263; 1264; VI-LABEL: add_m_inv_2pi_f64: 1265; VI: ; %bb.0: 1266; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1267; VI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1268; VI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30 1269; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1270; VI-NEXT: s_waitcnt lgkmcnt(0) 1271; VI-NEXT: v_add_f64 v[0:1], s[2:3], v[0:1] 1272; VI-NEXT: s_mov_b32 s3, 0xf000 1273; VI-NEXT: s_mov_b32 s2, -1 1274; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1275; VI-NEXT: s_endpgm 1276 %y = fadd double %x, 0xbfc45f306dc9c882 1277 store double %y, double addrspace(1)* %out 1278 ret void 1279} 1280 1281define amdgpu_kernel void @add_inline_imm_1_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1282; SI-LABEL: add_inline_imm_1_f64: 1283; SI: ; %bb.0: 1284; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1285; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1286; SI-NEXT: s_waitcnt lgkmcnt(0) 1287; SI-NEXT: v_add_f64 v[0:1], s[2:3], 1 1288; SI-NEXT: s_mov_b32 s3, 0xf000 1289; SI-NEXT: s_mov_b32 s2, -1 1290; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1291; SI-NEXT: s_endpgm 1292; 1293; VI-LABEL: add_inline_imm_1_f64: 1294; VI: ; %bb.0: 1295; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1296; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1297; VI-NEXT: s_waitcnt lgkmcnt(0) 1298; VI-NEXT: v_add_f64 v[0:1], s[2:3], 1 1299; VI-NEXT: s_mov_b32 s3, 0xf000 1300; VI-NEXT: s_mov_b32 s2, -1 1301; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1302; VI-NEXT: s_endpgm 1303 %y = fadd double %x, 0x0000000000000001 1304 store double %y, double addrspace(1)* %out 1305 ret void 1306} 1307 1308define amdgpu_kernel void @add_inline_imm_2_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1309; SI-LABEL: add_inline_imm_2_f64: 1310; SI: ; %bb.0: 1311; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1312; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1313; SI-NEXT: s_waitcnt lgkmcnt(0) 1314; SI-NEXT: v_add_f64 v[0:1], s[2:3], 2 1315; SI-NEXT: s_mov_b32 s3, 0xf000 1316; SI-NEXT: s_mov_b32 s2, -1 1317; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1318; SI-NEXT: s_endpgm 1319; 1320; VI-LABEL: add_inline_imm_2_f64: 1321; VI: ; %bb.0: 1322; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1323; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1324; VI-NEXT: s_waitcnt lgkmcnt(0) 1325; VI-NEXT: v_add_f64 v[0:1], s[2:3], 2 1326; VI-NEXT: s_mov_b32 s3, 0xf000 1327; VI-NEXT: s_mov_b32 s2, -1 1328; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1329; VI-NEXT: s_endpgm 1330 %y = fadd double %x, 0x0000000000000002 1331 store double %y, double addrspace(1)* %out 1332 ret void 1333} 1334 1335define amdgpu_kernel void @add_inline_imm_16_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1336; SI-LABEL: add_inline_imm_16_f64: 1337; SI: ; %bb.0: 1338; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1339; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1340; SI-NEXT: s_waitcnt lgkmcnt(0) 1341; SI-NEXT: v_add_f64 v[0:1], s[2:3], 16 1342; SI-NEXT: s_mov_b32 s3, 0xf000 1343; SI-NEXT: s_mov_b32 s2, -1 1344; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1345; SI-NEXT: s_endpgm 1346; 1347; VI-LABEL: add_inline_imm_16_f64: 1348; VI: ; %bb.0: 1349; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1350; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1351; VI-NEXT: s_waitcnt lgkmcnt(0) 1352; VI-NEXT: v_add_f64 v[0:1], s[2:3], 16 1353; VI-NEXT: s_mov_b32 s3, 0xf000 1354; VI-NEXT: s_mov_b32 s2, -1 1355; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1356; VI-NEXT: s_endpgm 1357 %y = fadd double %x, 0x0000000000000010 1358 store double %y, double addrspace(1)* %out 1359 ret void 1360} 1361 1362define amdgpu_kernel void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1363; SI-LABEL: add_inline_imm_neg_1_f64: 1364; SI: ; %bb.0: 1365; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1366; SI-NEXT: v_mov_b32_e32 v0, -1 1367; SI-NEXT: s_mov_b32 s3, 0xf000 1368; SI-NEXT: s_mov_b32 s2, -1 1369; SI-NEXT: v_mov_b32_e32 v1, v0 1370; SI-NEXT: s_waitcnt lgkmcnt(0) 1371; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1372; SI-NEXT: s_endpgm 1373; 1374; VI-LABEL: add_inline_imm_neg_1_f64: 1375; VI: ; %bb.0: 1376; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1377; VI-NEXT: v_mov_b32_e32 v0, -1 1378; VI-NEXT: s_mov_b32 s3, 0xf000 1379; VI-NEXT: s_mov_b32 s2, -1 1380; VI-NEXT: v_mov_b32_e32 v1, v0 1381; VI-NEXT: s_waitcnt lgkmcnt(0) 1382; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1383; VI-NEXT: s_endpgm 1384 %y = fadd double %x, 0xffffffffffffffff 1385 store double %y, double addrspace(1)* %out 1386 ret void 1387} 1388 1389define amdgpu_kernel void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1390; SI-LABEL: add_inline_imm_neg_2_f64: 1391; SI: ; %bb.0: 1392; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1393; SI-NEXT: s_mov_b32 s3, 0xf000 1394; SI-NEXT: s_mov_b32 s2, -1 1395; SI-NEXT: v_mov_b32_e32 v0, -2 1396; SI-NEXT: v_mov_b32_e32 v1, -1 1397; SI-NEXT: s_waitcnt lgkmcnt(0) 1398; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1399; SI-NEXT: s_endpgm 1400; 1401; VI-LABEL: add_inline_imm_neg_2_f64: 1402; VI: ; %bb.0: 1403; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1404; VI-NEXT: s_mov_b32 s3, 0xf000 1405; VI-NEXT: s_mov_b32 s2, -1 1406; VI-NEXT: v_mov_b32_e32 v0, -2 1407; VI-NEXT: v_mov_b32_e32 v1, -1 1408; VI-NEXT: s_waitcnt lgkmcnt(0) 1409; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1410; VI-NEXT: s_endpgm 1411 %y = fadd double %x, 0xfffffffffffffffe 1412 store double %y, double addrspace(1)* %out 1413 ret void 1414} 1415 1416define amdgpu_kernel void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1417; SI-LABEL: add_inline_imm_neg_16_f64: 1418; SI: ; %bb.0: 1419; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1420; SI-NEXT: s_mov_b32 s3, 0xf000 1421; SI-NEXT: s_mov_b32 s2, -1 1422; SI-NEXT: v_mov_b32_e32 v0, -16 1423; SI-NEXT: v_mov_b32_e32 v1, -1 1424; SI-NEXT: s_waitcnt lgkmcnt(0) 1425; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1426; SI-NEXT: s_endpgm 1427; 1428; VI-LABEL: add_inline_imm_neg_16_f64: 1429; VI: ; %bb.0: 1430; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1431; VI-NEXT: s_mov_b32 s3, 0xf000 1432; VI-NEXT: s_mov_b32 s2, -1 1433; VI-NEXT: v_mov_b32_e32 v0, -16 1434; VI-NEXT: v_mov_b32_e32 v1, -1 1435; VI-NEXT: s_waitcnt lgkmcnt(0) 1436; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1437; VI-NEXT: s_endpgm 1438 %y = fadd double %x, 0xfffffffffffffff0 1439 store double %y, double addrspace(1)* %out 1440 ret void 1441} 1442 1443define amdgpu_kernel void @add_inline_imm_63_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1444; SI-LABEL: add_inline_imm_63_f64: 1445; SI: ; %bb.0: 1446; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1447; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1448; SI-NEXT: s_waitcnt lgkmcnt(0) 1449; SI-NEXT: v_add_f64 v[0:1], s[2:3], 63 1450; SI-NEXT: s_mov_b32 s3, 0xf000 1451; SI-NEXT: s_mov_b32 s2, -1 1452; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1453; SI-NEXT: s_endpgm 1454; 1455; VI-LABEL: add_inline_imm_63_f64: 1456; VI: ; %bb.0: 1457; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1458; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1459; VI-NEXT: s_waitcnt lgkmcnt(0) 1460; VI-NEXT: v_add_f64 v[0:1], s[2:3], 63 1461; VI-NEXT: s_mov_b32 s3, 0xf000 1462; VI-NEXT: s_mov_b32 s2, -1 1463; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1464; VI-NEXT: s_endpgm 1465 %y = fadd double %x, 0x000000000000003F 1466 store double %y, double addrspace(1)* %out 1467 ret void 1468} 1469 1470define amdgpu_kernel void @add_inline_imm_64_f64(double addrspace(1)* %out, [8 x i32], double %x) { 1471; SI-LABEL: add_inline_imm_64_f64: 1472; SI: ; %bb.0: 1473; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x13 1474; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1475; SI-NEXT: s_waitcnt lgkmcnt(0) 1476; SI-NEXT: v_add_f64 v[0:1], s[2:3], 64 1477; SI-NEXT: s_mov_b32 s3, 0xf000 1478; SI-NEXT: s_mov_b32 s2, -1 1479; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1480; SI-NEXT: s_endpgm 1481; 1482; VI-LABEL: add_inline_imm_64_f64: 1483; VI: ; %bb.0: 1484; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c 1485; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1486; VI-NEXT: s_waitcnt lgkmcnt(0) 1487; VI-NEXT: v_add_f64 v[0:1], s[2:3], 64 1488; VI-NEXT: s_mov_b32 s3, 0xf000 1489; VI-NEXT: s_mov_b32 s2, -1 1490; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1491; VI-NEXT: s_endpgm 1492 %y = fadd double %x, 0x0000000000000040 1493 store double %y, double addrspace(1)* %out 1494 ret void 1495} 1496 1497define amdgpu_kernel void @store_inline_imm_0.0_f64(double addrspace(1)* %out) { 1498; SI-LABEL: store_inline_imm_0.0_f64: 1499; SI: ; %bb.0: 1500; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1501; SI-NEXT: v_mov_b32_e32 v0, 0 1502; SI-NEXT: s_mov_b32 s3, 0xf000 1503; SI-NEXT: s_mov_b32 s2, -1 1504; SI-NEXT: v_mov_b32_e32 v1, v0 1505; SI-NEXT: s_waitcnt lgkmcnt(0) 1506; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1507; SI-NEXT: s_endpgm 1508; 1509; VI-LABEL: store_inline_imm_0.0_f64: 1510; VI: ; %bb.0: 1511; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1512; VI-NEXT: v_mov_b32_e32 v0, 0 1513; VI-NEXT: s_mov_b32 s3, 0xf000 1514; VI-NEXT: s_mov_b32 s2, -1 1515; VI-NEXT: v_mov_b32_e32 v1, v0 1516; VI-NEXT: s_waitcnt lgkmcnt(0) 1517; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1518; VI-NEXT: s_endpgm 1519 store double 0.0, double addrspace(1)* %out 1520 ret void 1521} 1522 1523define amdgpu_kernel void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) { 1524; SI-LABEL: store_literal_imm_neg_0.0_f64: 1525; SI: ; %bb.0: 1526; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1527; SI-NEXT: s_mov_b32 s3, 0xf000 1528; SI-NEXT: s_mov_b32 s2, -1 1529; SI-NEXT: v_mov_b32_e32 v0, 0 1530; SI-NEXT: v_bfrev_b32_e32 v1, 1 1531; SI-NEXT: s_waitcnt lgkmcnt(0) 1532; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1533; SI-NEXT: s_endpgm 1534; 1535; VI-LABEL: store_literal_imm_neg_0.0_f64: 1536; VI: ; %bb.0: 1537; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1538; VI-NEXT: s_mov_b32 s3, 0xf000 1539; VI-NEXT: s_mov_b32 s2, -1 1540; VI-NEXT: v_mov_b32_e32 v0, 0 1541; VI-NEXT: v_bfrev_b32_e32 v1, 1 1542; VI-NEXT: s_waitcnt lgkmcnt(0) 1543; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1544; VI-NEXT: s_endpgm 1545 store double -0.0, double addrspace(1)* %out 1546 ret void 1547} 1548 1549define amdgpu_kernel void @store_inline_imm_0.5_f64(double addrspace(1)* %out) { 1550; SI-LABEL: store_inline_imm_0.5_f64: 1551; SI: ; %bb.0: 1552; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1553; SI-NEXT: s_mov_b32 s3, 0xf000 1554; SI-NEXT: s_mov_b32 s2, -1 1555; SI-NEXT: v_mov_b32_e32 v0, 0 1556; SI-NEXT: v_mov_b32_e32 v1, 0x3fe00000 1557; SI-NEXT: s_waitcnt lgkmcnt(0) 1558; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1559; SI-NEXT: s_endpgm 1560; 1561; VI-LABEL: store_inline_imm_0.5_f64: 1562; VI: ; %bb.0: 1563; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1564; VI-NEXT: s_mov_b32 s3, 0xf000 1565; VI-NEXT: s_mov_b32 s2, -1 1566; VI-NEXT: v_mov_b32_e32 v0, 0 1567; VI-NEXT: v_mov_b32_e32 v1, 0x3fe00000 1568; VI-NEXT: s_waitcnt lgkmcnt(0) 1569; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1570; VI-NEXT: s_endpgm 1571 store double 0.5, double addrspace(1)* %out 1572 ret void 1573} 1574 1575define amdgpu_kernel void @store_inline_imm_m_0.5_f64(double addrspace(1)* %out) { 1576; SI-LABEL: store_inline_imm_m_0.5_f64: 1577; SI: ; %bb.0: 1578; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1579; SI-NEXT: s_mov_b32 s3, 0xf000 1580; SI-NEXT: s_mov_b32 s2, -1 1581; SI-NEXT: v_mov_b32_e32 v0, 0 1582; SI-NEXT: v_mov_b32_e32 v1, 0xbfe00000 1583; SI-NEXT: s_waitcnt lgkmcnt(0) 1584; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1585; SI-NEXT: s_endpgm 1586; 1587; VI-LABEL: store_inline_imm_m_0.5_f64: 1588; VI: ; %bb.0: 1589; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1590; VI-NEXT: s_mov_b32 s3, 0xf000 1591; VI-NEXT: s_mov_b32 s2, -1 1592; VI-NEXT: v_mov_b32_e32 v0, 0 1593; VI-NEXT: v_mov_b32_e32 v1, 0xbfe00000 1594; VI-NEXT: s_waitcnt lgkmcnt(0) 1595; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1596; VI-NEXT: s_endpgm 1597 store double -0.5, double addrspace(1)* %out 1598 ret void 1599} 1600 1601define amdgpu_kernel void @store_inline_imm_1.0_f64(double addrspace(1)* %out) { 1602; SI-LABEL: store_inline_imm_1.0_f64: 1603; SI: ; %bb.0: 1604; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1605; SI-NEXT: s_mov_b32 s3, 0xf000 1606; SI-NEXT: s_mov_b32 s2, -1 1607; SI-NEXT: v_mov_b32_e32 v0, 0 1608; SI-NEXT: v_mov_b32_e32 v1, 0x3ff00000 1609; SI-NEXT: s_waitcnt lgkmcnt(0) 1610; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1611; SI-NEXT: s_endpgm 1612; 1613; VI-LABEL: store_inline_imm_1.0_f64: 1614; VI: ; %bb.0: 1615; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1616; VI-NEXT: s_mov_b32 s3, 0xf000 1617; VI-NEXT: s_mov_b32 s2, -1 1618; VI-NEXT: v_mov_b32_e32 v0, 0 1619; VI-NEXT: v_mov_b32_e32 v1, 0x3ff00000 1620; VI-NEXT: s_waitcnt lgkmcnt(0) 1621; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1622; VI-NEXT: s_endpgm 1623 store double 1.0, double addrspace(1)* %out 1624 ret void 1625} 1626 1627define amdgpu_kernel void @store_inline_imm_m_1.0_f64(double addrspace(1)* %out) { 1628; SI-LABEL: store_inline_imm_m_1.0_f64: 1629; SI: ; %bb.0: 1630; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1631; SI-NEXT: s_mov_b32 s3, 0xf000 1632; SI-NEXT: s_mov_b32 s2, -1 1633; SI-NEXT: v_mov_b32_e32 v0, 0 1634; SI-NEXT: v_mov_b32_e32 v1, 0xbff00000 1635; SI-NEXT: s_waitcnt lgkmcnt(0) 1636; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1637; SI-NEXT: s_endpgm 1638; 1639; VI-LABEL: store_inline_imm_m_1.0_f64: 1640; VI: ; %bb.0: 1641; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1642; VI-NEXT: s_mov_b32 s3, 0xf000 1643; VI-NEXT: s_mov_b32 s2, -1 1644; VI-NEXT: v_mov_b32_e32 v0, 0 1645; VI-NEXT: v_mov_b32_e32 v1, 0xbff00000 1646; VI-NEXT: s_waitcnt lgkmcnt(0) 1647; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1648; VI-NEXT: s_endpgm 1649 store double -1.0, double addrspace(1)* %out 1650 ret void 1651} 1652 1653define amdgpu_kernel void @store_inline_imm_2.0_f64(double addrspace(1)* %out) { 1654; SI-LABEL: store_inline_imm_2.0_f64: 1655; SI: ; %bb.0: 1656; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1657; SI-NEXT: s_mov_b32 s3, 0xf000 1658; SI-NEXT: s_mov_b32 s2, -1 1659; SI-NEXT: v_mov_b32_e32 v0, 0 1660; SI-NEXT: v_mov_b32_e32 v1, 2.0 1661; SI-NEXT: s_waitcnt lgkmcnt(0) 1662; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1663; SI-NEXT: s_endpgm 1664; 1665; VI-LABEL: store_inline_imm_2.0_f64: 1666; VI: ; %bb.0: 1667; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1668; VI-NEXT: s_mov_b32 s3, 0xf000 1669; VI-NEXT: s_mov_b32 s2, -1 1670; VI-NEXT: v_mov_b32_e32 v0, 0 1671; VI-NEXT: v_mov_b32_e32 v1, 2.0 1672; VI-NEXT: s_waitcnt lgkmcnt(0) 1673; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1674; VI-NEXT: s_endpgm 1675 store double 2.0, double addrspace(1)* %out 1676 ret void 1677} 1678 1679define amdgpu_kernel void @store_inline_imm_m_2.0_f64(double addrspace(1)* %out) { 1680; SI-LABEL: store_inline_imm_m_2.0_f64: 1681; SI: ; %bb.0: 1682; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1683; SI-NEXT: s_mov_b32 s3, 0xf000 1684; SI-NEXT: s_mov_b32 s2, -1 1685; SI-NEXT: v_mov_b32_e32 v0, 0 1686; SI-NEXT: v_mov_b32_e32 v1, -2.0 1687; SI-NEXT: s_waitcnt lgkmcnt(0) 1688; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1689; SI-NEXT: s_endpgm 1690; 1691; VI-LABEL: store_inline_imm_m_2.0_f64: 1692; VI: ; %bb.0: 1693; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1694; VI-NEXT: s_mov_b32 s3, 0xf000 1695; VI-NEXT: s_mov_b32 s2, -1 1696; VI-NEXT: v_mov_b32_e32 v0, 0 1697; VI-NEXT: v_mov_b32_e32 v1, -2.0 1698; VI-NEXT: s_waitcnt lgkmcnt(0) 1699; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1700; VI-NEXT: s_endpgm 1701 store double -2.0, double addrspace(1)* %out 1702 ret void 1703} 1704 1705define amdgpu_kernel void @store_inline_imm_4.0_f64(double addrspace(1)* %out) { 1706; SI-LABEL: store_inline_imm_4.0_f64: 1707; SI: ; %bb.0: 1708; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1709; SI-NEXT: s_mov_b32 s3, 0xf000 1710; SI-NEXT: s_mov_b32 s2, -1 1711; SI-NEXT: v_mov_b32_e32 v0, 0 1712; SI-NEXT: v_mov_b32_e32 v1, 0x40100000 1713; SI-NEXT: s_waitcnt lgkmcnt(0) 1714; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1715; SI-NEXT: s_endpgm 1716; 1717; VI-LABEL: store_inline_imm_4.0_f64: 1718; VI: ; %bb.0: 1719; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1720; VI-NEXT: s_mov_b32 s3, 0xf000 1721; VI-NEXT: s_mov_b32 s2, -1 1722; VI-NEXT: v_mov_b32_e32 v0, 0 1723; VI-NEXT: v_mov_b32_e32 v1, 0x40100000 1724; VI-NEXT: s_waitcnt lgkmcnt(0) 1725; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1726; VI-NEXT: s_endpgm 1727 store double 4.0, double addrspace(1)* %out 1728 ret void 1729} 1730 1731define amdgpu_kernel void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) { 1732; SI-LABEL: store_inline_imm_m_4.0_f64: 1733; SI: ; %bb.0: 1734; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1735; SI-NEXT: s_mov_b32 s3, 0xf000 1736; SI-NEXT: s_mov_b32 s2, -1 1737; SI-NEXT: v_mov_b32_e32 v0, 0 1738; SI-NEXT: v_mov_b32_e32 v1, 0xc0100000 1739; SI-NEXT: s_waitcnt lgkmcnt(0) 1740; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1741; SI-NEXT: s_endpgm 1742; 1743; VI-LABEL: store_inline_imm_m_4.0_f64: 1744; VI: ; %bb.0: 1745; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1746; VI-NEXT: s_mov_b32 s3, 0xf000 1747; VI-NEXT: s_mov_b32 s2, -1 1748; VI-NEXT: v_mov_b32_e32 v0, 0 1749; VI-NEXT: v_mov_b32_e32 v1, 0xc0100000 1750; VI-NEXT: s_waitcnt lgkmcnt(0) 1751; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1752; VI-NEXT: s_endpgm 1753 store double -4.0, double addrspace(1)* %out 1754 ret void 1755} 1756 1757define amdgpu_kernel void @store_inv_2pi_f64(double addrspace(1)* %out) { 1758; SI-LABEL: store_inv_2pi_f64: 1759; SI: ; %bb.0: 1760; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1761; SI-NEXT: s_mov_b32 s3, 0xf000 1762; SI-NEXT: s_mov_b32 s2, -1 1763; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1764; SI-NEXT: v_mov_b32_e32 v1, 0x3fc45f30 1765; SI-NEXT: s_waitcnt lgkmcnt(0) 1766; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1767; SI-NEXT: s_endpgm 1768; 1769; VI-LABEL: store_inv_2pi_f64: 1770; VI: ; %bb.0: 1771; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1772; VI-NEXT: s_mov_b32 s3, 0xf000 1773; VI-NEXT: s_mov_b32 s2, -1 1774; VI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1775; VI-NEXT: v_mov_b32_e32 v1, 0x3fc45f30 1776; VI-NEXT: s_waitcnt lgkmcnt(0) 1777; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1778; VI-NEXT: s_endpgm 1779 store double 0x3fc45f306dc9c882, double addrspace(1)* %out 1780 ret void 1781} 1782 1783define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f64(double addrspace(1)* %out) { 1784; SI-LABEL: store_inline_imm_m_inv_2pi_f64: 1785; SI: ; %bb.0: 1786; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1787; SI-NEXT: s_mov_b32 s3, 0xf000 1788; SI-NEXT: s_mov_b32 s2, -1 1789; SI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1790; SI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30 1791; SI-NEXT: s_waitcnt lgkmcnt(0) 1792; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1793; SI-NEXT: s_endpgm 1794; 1795; VI-LABEL: store_inline_imm_m_inv_2pi_f64: 1796; VI: ; %bb.0: 1797; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1798; VI-NEXT: s_mov_b32 s3, 0xf000 1799; VI-NEXT: s_mov_b32 s2, -1 1800; VI-NEXT: v_mov_b32_e32 v0, 0x6dc9c882 1801; VI-NEXT: v_mov_b32_e32 v1, 0xbfc45f30 1802; VI-NEXT: s_waitcnt lgkmcnt(0) 1803; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1804; VI-NEXT: s_endpgm 1805 store double 0xbfc45f306dc9c882, double addrspace(1)* %out 1806 ret void 1807} 1808 1809define amdgpu_kernel void @store_literal_imm_f64(double addrspace(1)* %out) { 1810; SI-LABEL: store_literal_imm_f64: 1811; SI: ; %bb.0: 1812; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1813; SI-NEXT: s_mov_b32 s3, 0xf000 1814; SI-NEXT: s_mov_b32 s2, -1 1815; SI-NEXT: v_mov_b32_e32 v0, 0 1816; SI-NEXT: v_mov_b32_e32 v1, 0x40b00000 1817; SI-NEXT: s_waitcnt lgkmcnt(0) 1818; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1819; SI-NEXT: s_endpgm 1820; 1821; VI-LABEL: store_literal_imm_f64: 1822; VI: ; %bb.0: 1823; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1824; VI-NEXT: s_mov_b32 s3, 0xf000 1825; VI-NEXT: s_mov_b32 s2, -1 1826; VI-NEXT: v_mov_b32_e32 v0, 0 1827; VI-NEXT: v_mov_b32_e32 v1, 0x40b00000 1828; VI-NEXT: s_waitcnt lgkmcnt(0) 1829; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1830; VI-NEXT: s_endpgm 1831 store double 4096.0, double addrspace(1)* %out 1832 ret void 1833} 1834 1835define amdgpu_vs void @literal_folding(float %arg) { 1836; GCN-LABEL: literal_folding: 1837; GCN: ; %bb.0: ; %main_body 1838; GCN-NEXT: v_mul_f32_e32 v1, 0x3f4353f8, v0 1839; GCN-NEXT: v_mul_f32_e32 v0, 0xbf4353f8, v0 1840; GCN-NEXT: exp pos0 v1, v1, v0, v0 done 1841; GCN-NEXT: s_endpgm 1842main_body: 1843 %tmp = fmul float %arg, 0x3FE86A7F00000000 1844 %tmp1 = fmul float %arg, 0xBFE86A7F00000000 1845 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %tmp, float %tmp, float %tmp1, float %tmp1, i1 true, i1 false) #0 1846 ret void 1847} 1848 1849declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 1850 1851attributes #0 = { nounwind } 1852