1; RUN: llc -amdgpu-scalarize-global-loads=false --verify-machineinstrs -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=SI,GFX9 -check-prefix=FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s 5 6; FUNC-LABEL: {{^}}srem_i16_7: 7; GFX9: s_movk_i32 {{s[0-9]+}}, 0x4925 8; GFX9: v_mul_i32_i24_sdwa 9define amdgpu_kernel void @srem_i16_7(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 10 %num = load i16, i16 addrspace(1) * %in 11 %result = srem i16 %num, 7 12 store i16 %result, i16 addrspace(1)* %out 13 ret void 14} 15 16define amdgpu_kernel void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 17 %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 18 %num = load i32, i32 addrspace(1) * %in 19 %den = load i32, i32 addrspace(1) * %den_ptr 20 %result = srem i32 %num, %den 21 store i32 %result, i32 addrspace(1)* %out 22 ret void 23} 24 25define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 26 %num = load i32, i32 addrspace(1) * %in 27 %result = srem i32 %num, 4 28 store i32 %result, i32 addrspace(1)* %out 29 ret void 30} 31 32; FUNC-LABEL: {{^}}srem_i32_7: 33; SI: s_mov_b32 [[MAGIC:s[0-9]+]], 0x92492493 34; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] 35; SI: v_mul_lo_u32 36; SI: v_sub_{{[iu]}}32 37; SI: s_endpgm 38define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 39 %num = load i32, i32 addrspace(1) * %in 40 %result = srem i32 %num, 7 41 store i32 %result, i32 addrspace(1)* %out 42 ret void 43} 44 45define amdgpu_kernel void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 46 %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 47 %num = load <2 x i32>, <2 x i32> addrspace(1) * %in 48 %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr 49 %result = srem <2 x i32> %num, %den 50 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 51 ret void 52} 53 54define amdgpu_kernel void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 55 %num = load <2 x i32>, <2 x i32> addrspace(1) * %in 56 %result = srem <2 x i32> %num, <i32 4, i32 4> 57 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 58 ret void 59} 60 61define amdgpu_kernel void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 62 %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 63 %num = load <4 x i32>, <4 x i32> addrspace(1) * %in 64 %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr 65 %result = srem <4 x i32> %num, %den 66 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 67 ret void 68} 69 70define amdgpu_kernel void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 71 %num = load <4 x i32>, <4 x i32> addrspace(1) * %in 72 %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4> 73 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 74 ret void 75} 76 77define amdgpu_kernel void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 78 %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 79 %num = load i64, i64 addrspace(1) * %in 80 %den = load i64, i64 addrspace(1) * %den_ptr 81 %result = srem i64 %num, %den 82 store i64 %result, i64 addrspace(1)* %out 83 ret void 84} 85 86define amdgpu_kernel void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 87 %num = load i64, i64 addrspace(1) * %in 88 %result = srem i64 %num, 4 89 store i64 %result, i64 addrspace(1)* %out 90 ret void 91} 92 93define amdgpu_kernel void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { 94 %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 95 %num = load <2 x i64>, <2 x i64> addrspace(1) * %in 96 %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr 97 %result = srem <2 x i64> %num, %den 98 store <2 x i64> %result, <2 x i64> addrspace(1)* %out 99 ret void 100} 101 102define amdgpu_kernel void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { 103 %num = load <2 x i64>, <2 x i64> addrspace(1) * %in 104 %result = srem <2 x i64> %num, <i64 4, i64 4> 105 store <2 x i64> %result, <2 x i64> addrspace(1)* %out 106 ret void 107} 108 109define amdgpu_kernel void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 110 %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 111 %num = load <4 x i64>, <4 x i64> addrspace(1) * %in 112 %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr 113 %result = srem <4 x i64> %num, %den 114 store <4 x i64> %result, <4 x i64> addrspace(1)* %out 115 ret void 116} 117 118define amdgpu_kernel void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 119 %num = load <4 x i64>, <4 x i64> addrspace(1) * %in 120 %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4> 121 store <4 x i64> %result, <4 x i64> addrspace(1)* %out 122 ret void 123} 124