1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}global_load_f64: 6; GCN-NOHSA: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 7; GCN-NOHSA: buffer_store_dwordx2 [[VAL]] 8 9; GCN-HSA: flat_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]] 10; GCN-HSA: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]] 11define amdgpu_kernel void @global_load_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 { 12 %ld = load double, double addrspace(1)* %in 13 store double %ld, double addrspace(1)* %out 14 ret void 15} 16 17; FUNC-LABEL: {{^}}global_load_v2f64: 18; GCN-NOHSA: buffer_load_dwordx4 19; GCN-HSA: flat_load_dwordx4 20define amdgpu_kernel void @global_load_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) #0 { 21entry: 22 %ld = load <2 x double>, <2 x double> addrspace(1)* %in 23 store <2 x double> %ld, <2 x double> addrspace(1)* %out 24 ret void 25} 26 27; FUNC-LABEL: {{^}}global_load_v3f64: 28; GCN-NOHSA: buffer_load_dwordx4 29; GCN-NOHSA: buffer_load_dwordx4 30; GCN-HSA: flat_load_dwordx4 31; GCN-HSA: flat_load_dwordx4 32define amdgpu_kernel void @global_load_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %in) #0 { 33entry: 34 %ld = load <3 x double>, <3 x double> addrspace(1)* %in 35 store <3 x double> %ld, <3 x double> addrspace(1)* %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}global_load_v4f64: 40; GCN-NOHSA: buffer_load_dwordx4 41; GCN-NOHSA: buffer_load_dwordx4 42 43; GCN-HSA: flat_load_dwordx4 44; GCN-HSA: flat_load_dwordx4 45define amdgpu_kernel void @global_load_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) #0 { 46entry: 47 %ld = load <4 x double>, <4 x double> addrspace(1)* %in 48 store <4 x double> %ld, <4 x double> addrspace(1)* %out 49 ret void 50} 51 52; FUNC-LABEL: {{^}}global_load_v8f64: 53; GCN-NOHSA: buffer_load_dwordx4 54; GCN-NOHSA: buffer_load_dwordx4 55; GCN-NOHSA: buffer_load_dwordx4 56; GCN-NOHSA: buffer_load_dwordx4 57 58; GCN-HSA: flat_load_dwordx4 59; GCN-HSA: flat_load_dwordx4 60; GCN-HSA: flat_load_dwordx4 61; GCN-HSA: flat_load_dwordx4 62define amdgpu_kernel void @global_load_v8f64(<8 x double> addrspace(1)* %out, <8 x double> addrspace(1)* %in) #0 { 63entry: 64 %ld = load <8 x double>, <8 x double> addrspace(1)* %in 65 store <8 x double> %ld, <8 x double> addrspace(1)* %out 66 ret void 67} 68 69; FUNC-LABEL: {{^}}global_load_v16f64: 70; GCN-NOHSA: buffer_load_dwordx4 71; GCN-NOHSA: buffer_load_dwordx4 72; GCN-NOHSA: buffer_load_dwordx4 73; GCN-NOHSA: buffer_load_dwordx4 74; GCN-NOHSA: buffer_load_dwordx4 75; GCN-NOHSA: buffer_load_dwordx4 76; GCN-NOHSA: buffer_load_dwordx4 77; GCN-NOHSA: buffer_load_dwordx4 78 79; GCN-HSA: flat_load_dwordx4 80; GCN-HSA: flat_load_dwordx4 81; GCN-HSA: flat_load_dwordx4 82; GCN-HSA: flat_load_dwordx4 83; GCN-HSA: flat_load_dwordx4 84; GCN-HSA: flat_load_dwordx4 85; GCN-HSA: flat_load_dwordx4 86; GCN-HSA: flat_load_dwordx4 87define amdgpu_kernel void @global_load_v16f64(<16 x double> addrspace(1)* %out, <16 x double> addrspace(1)* %in) #0 { 88entry: 89 %ld = load <16 x double>, <16 x double> addrspace(1)* %in 90 store <16 x double> %ld, <16 x double> addrspace(1)* %out 91 ret void 92} 93 94attributes #0 = { nounwind } 95