1; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 3 4; FIXME: Merge into imm.ll 5 6; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i16: 7; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}} 8; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}} 9; GCN: buffer_store_short [[REG]] 10define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) { 11 store volatile i16 -32768, i16 addrspace(1)* %out 12 ret void 13} 14 15; GCN-LABEL: {{^}}store_inline_imm_0.0_f16: 16; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} 17; GCN: buffer_store_short [[REG]] 18define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) { 19 store half 0.0, half addrspace(1)* %out 20 ret void 21} 22 23; GCN-LABEL: {{^}}store_imm_neg_0.0_f16: 24; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}} 25; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}} 26; GCN: buffer_store_short [[REG]] 27define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) { 28 store half -0.0, half addrspace(1)* %out 29 ret void 30} 31 32; GCN-LABEL: {{^}}store_inline_imm_0.5_f16: 33; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3800{{$}} 34; GCN: buffer_store_short [[REG]] 35define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) { 36 store half 0.5, half addrspace(1)* %out 37 ret void 38} 39 40; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f16: 41; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800{{$}} 42; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb800{{$}} 43; GCN: buffer_store_short [[REG]] 44define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) { 45 store half -0.5, half addrspace(1)* %out 46 ret void 47} 48 49; GCN-LABEL: {{^}}store_inline_imm_1.0_f16: 50; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}} 51; GCN: buffer_store_short [[REG]] 52define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) { 53 store half 1.0, half addrspace(1)* %out 54 ret void 55} 56 57; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f16: 58; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00{{$}} 59; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}} 60; GCN: buffer_store_short [[REG]] 61define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) { 62 store half -1.0, half addrspace(1)* %out 63 ret void 64} 65 66; GCN-LABEL: {{^}}store_inline_imm_2.0_f16: 67; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}} 68; GCN: buffer_store_short [[REG]] 69define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) { 70 store half 2.0, half addrspace(1)* %out 71 ret void 72} 73 74; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f16: 75; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000{{$}} 76; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc000{{$}} 77; GCN: buffer_store_short [[REG]] 78define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) { 79 store half -2.0, half addrspace(1)* %out 80 ret void 81} 82 83; GCN-LABEL: {{^}}store_inline_imm_4.0_f16: 84; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4400{{$}} 85; GCN: buffer_store_short [[REG]] 86define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) { 87 store half 4.0, half addrspace(1)* %out 88 ret void 89} 90 91; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f16: 92; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400{{$}} 93; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc400{{$}} 94; GCN: buffer_store_short [[REG]] 95define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) { 96 store half -4.0, half addrspace(1)* %out 97 ret void 98} 99 100 101; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f16: 102; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3118{{$}} 103; GCN: buffer_store_short [[REG]] 104define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) { 105 store half 0xH3118, half addrspace(1)* %out 106 ret void 107} 108 109; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f16: 110; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118{{$}} 111; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb118{{$}} 112; GCN: buffer_store_short [[REG]] 113define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) { 114 store half 0xHB118, half addrspace(1)* %out 115 ret void 116} 117 118; GCN-LABEL: {{^}}store_literal_imm_f16: 119; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c00 120; GCN: buffer_store_short [[REG]] 121define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) { 122 store half 4096.0, half addrspace(1)* %out 123 ret void 124} 125 126; GCN-LABEL: {{^}}add_inline_imm_0.0_f16: 127; VI: s_load_dword [[VAL:s[0-9]+]] 128; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}} 129; VI: buffer_store_short [[REG]] 130define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) { 131 %y = fadd half %x, 0.0 132 store half %y, half addrspace(1)* %out 133 ret void 134} 135 136; GCN-LABEL: {{^}}add_inline_imm_0.5_f16: 137; VI: s_load_dword [[VAL:s[0-9]+]] 138; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}} 139; VI: buffer_store_short [[REG]] 140define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) { 141 %y = fadd half %x, 0.5 142 store half %y, half addrspace(1)* %out 143 ret void 144} 145 146; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f16: 147; VI: s_load_dword [[VAL:s[0-9]+]] 148; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}} 149; VI: buffer_store_short [[REG]] 150define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) { 151 %y = fadd half %x, -0.5 152 store half %y, half addrspace(1)* %out 153 ret void 154} 155 156; GCN-LABEL: {{^}}add_inline_imm_1.0_f16: 157; VI: s_load_dword [[VAL:s[0-9]+]] 158; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}} 159; VI: buffer_store_short [[REG]] 160define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) { 161 %y = fadd half %x, 1.0 162 store half %y, half addrspace(1)* %out 163 ret void 164} 165 166; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f16: 167; VI: s_load_dword [[VAL:s[0-9]+]] 168; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}} 169; VI: buffer_store_short [[REG]] 170define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) { 171 %y = fadd half %x, -1.0 172 store half %y, half addrspace(1)* %out 173 ret void 174} 175 176; GCN-LABEL: {{^}}add_inline_imm_2.0_f16: 177; VI: s_load_dword [[VAL:s[0-9]+]] 178; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}} 179; VI: buffer_store_short [[REG]] 180define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) { 181 %y = fadd half %x, 2.0 182 store half %y, half addrspace(1)* %out 183 ret void 184} 185 186; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f16: 187; VI: s_load_dword [[VAL:s[0-9]+]] 188; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}} 189; VI: buffer_store_short [[REG]] 190define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) { 191 %y = fadd half %x, -2.0 192 store half %y, half addrspace(1)* %out 193 ret void 194} 195 196; GCN-LABEL: {{^}}add_inline_imm_4.0_f16: 197; VI: s_load_dword [[VAL:s[0-9]+]] 198; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}} 199; VI: buffer_store_short [[REG]] 200define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) { 201 %y = fadd half %x, 4.0 202 store half %y, half addrspace(1)* %out 203 ret void 204} 205 206; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f16: 207; VI: s_load_dword [[VAL:s[0-9]+]] 208; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}} 209; VI: buffer_store_short [[REG]] 210define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) { 211 %y = fadd half %x, -4.0 212 store half %y, half addrspace(1)* %out 213 ret void 214} 215 216; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f16: 217; VI: buffer_load_ushort [[VAL:v[0-9]+]] 218; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]] 219; VI: buffer_store_short [[REG]] 220define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 221 %x = load half, half addrspace(1)* %in 222 %y = fadd half %x, 0.5 223 store half %y, half addrspace(1)* %out 224 ret void 225} 226 227; GCN-LABEL: {{^}}commute_add_literal_f16: 228; VI: buffer_load_ushort [[VAL:v[0-9]+]] 229; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0x6400, [[VAL]] 230; VI: buffer_store_short [[REG]] 231define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) { 232 %x = load half, half addrspace(1)* %in 233 %y = fadd half %x, 1024.0 234 store half %y, half addrspace(1)* %out 235 ret void 236} 237 238; GCN-LABEL: {{^}}add_inline_imm_1_f16: 239; VI: s_load_dword [[VAL:s[0-9]+]] 240; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}} 241; VI: buffer_store_short [[REG]] 242define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) { 243 %y = fadd half %x, 0xH0001 244 store half %y, half addrspace(1)* %out 245 ret void 246} 247 248; GCN-LABEL: {{^}}add_inline_imm_2_f16: 249; VI: s_load_dword [[VAL:s[0-9]+]] 250; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}} 251; VI: buffer_store_short [[REG]] 252define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) { 253 %y = fadd half %x, 0xH0002 254 store half %y, half addrspace(1)* %out 255 ret void 256} 257 258; GCN-LABEL: {{^}}add_inline_imm_16_f16: 259; VI: s_load_dword [[VAL:s[0-9]+]] 260; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 16{{$}} 261; VI: buffer_store_short [[REG]] 262define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) { 263 %y = fadd half %x, 0xH0010 264 store half %y, half addrspace(1)* %out 265 ret void 266} 267 268; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16: 269; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, -1 270; VI: buffer_store_short [[REG]] 271define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 272 %x = load i16, i16 addrspace(1)* %in 273 %y = add i16 %x, -1 274 %ybc = bitcast i16 %y to half 275 store half %ybc, half addrspace(1)* %out 276 ret void 277} 278 279; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16: 280; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, 0xfffe 281; VI: buffer_store_short [[REG]] 282define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 283 %x = load i16, i16 addrspace(1)* %in 284 %y = add i16 %x, -2 285 %ybc = bitcast i16 %y to half 286 store half %ybc, half addrspace(1)* %out 287 ret void 288} 289 290; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16: 291; VI: v_add_u32_e32 [[REG:v[0-9]+]], vcc, 0xfff0 292; VI: buffer_store_short [[REG]] 293define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) { 294 %x = load i16, i16 addrspace(1)* %in 295 %y = add i16 %x, -16 296 %ybc = bitcast i16 %y to half 297 store half %ybc, half addrspace(1)* %out 298 ret void 299} 300 301; GCN-LABEL: {{^}}add_inline_imm_63_f16: 302; VI: s_load_dword [[VAL:s[0-9]+]] 303; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 63 304; VI: buffer_store_short [[REG]] 305define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) { 306 %y = fadd half %x, 0xH003F 307 store half %y, half addrspace(1)* %out 308 ret void 309} 310 311; GCN-LABEL: {{^}}add_inline_imm_64_f16: 312; VI: s_load_dword [[VAL:s[0-9]+]] 313; VI: v_add_f16_e64 [[REG:v[0-9]+]], [[VAL]], 64 314; VI: buffer_store_short [[REG]] 315define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) { 316 %y = fadd half %x, 0xH0040 317 store half %y, half addrspace(1)* %out 318 ret void 319} 320