1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind 5declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind 6 7 8; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: 9; SI-DAG: ds_read_u8 10; SI-DAG: ds_read_u8 11; SI-DAG: ds_read_u8 12; SI-DAG: ds_read_u8 13; SI-DAG: ds_read_u8 14; SI-DAG: ds_read_u8 15; SI-DAG: ds_read_u8 16; SI-DAG: ds_read_u8 17 18; SI-DAG: ds_read_u8 19; SI-DAG: ds_read_u8 20; SI-DAG: ds_read_u8 21; SI-DAG: ds_read_u8 22; SI-DAG: ds_read_u8 23; SI-DAG: ds_read_u8 24; SI-DAG: ds_read_u8 25; SI-DAG: ds_read_u8 26 27; SI-DAG: ds_read_u8 28; SI-DAG: ds_read_u8 29; SI-DAG: ds_read_u8 30; SI-DAG: ds_read_u8 31; SI-DAG: ds_read_u8 32; SI-DAG: ds_read_u8 33; SI-DAG: ds_read_u8 34; SI-DAG: ds_read_u8 35 36; SI-DAG: ds_read_u8 37; SI-DAG: ds_read_u8 38; SI-DAG: ds_read_u8 39; SI-DAG: ds_read_u8 40; SI-DAG: ds_read_u8 41; SI-DAG: ds_read_u8 42; SI-DAG: ds_read_u8 43; SI-DAG: ds_read_u8 44 45; SI-DAG: ds_write_b8 46; SI-DAG: ds_write_b8 47; SI-DAG: ds_write_b8 48; SI-DAG: ds_write_b8 49; SI-DAG: ds_write_b8 50; SI-DAG: ds_write_b8 51; SI-DAG: ds_write_b8 52; SI-DAG: ds_write_b8 53 54; SI-DAG: ds_write_b8 55; SI-DAG: ds_write_b8 56; SI-DAG: ds_write_b8 57; SI-DAG: ds_write_b8 58; SI-DAG: ds_write_b8 59; SI-DAG: ds_write_b8 60; SI-DAG: ds_write_b8 61; SI-DAG: ds_write_b8 62 63; SI-DAG: ds_write_b8 64; SI-DAG: ds_write_b8 65; SI-DAG: ds_write_b8 66; SI-DAG: ds_write_b8 67; SI-DAG: ds_write_b8 68; SI-DAG: ds_write_b8 69; SI-DAG: ds_write_b8 70; SI-DAG: ds_write_b8 71 72; SI-DAG: ds_write_b8 73; SI-DAG: ds_write_b8 74; SI-DAG: ds_write_b8 75; SI-DAG: ds_write_b8 76; SI-DAG: ds_write_b8 77; SI-DAG: ds_write_b8 78; SI-DAG: ds_write_b8 79; SI-DAG: ds_write_b8 80 81; SI: s_endpgm 82define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { 83 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* 84 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* 85 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind 86 ret void 87} 88 89; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2: 90; SI-DAG: ds_read_u16 91; SI-DAG: ds_read_u16 92; SI-DAG: ds_read_u16 93; SI-DAG: ds_read_u16 94; SI-DAG: ds_read_u16 95; SI-DAG: ds_read_u16 96; SI-DAG: ds_read_u16 97; SI-DAG: ds_read_u16 98 99; SI-DAG: ds_read_u16 100; SI-DAG: ds_read_u16 101; SI-DAG: ds_read_u16 102; SI-DAG: ds_read_u16 103; SI-DAG: ds_read_u16 104; SI-DAG: ds_read_u16 105; SI-DAG: ds_read_u16 106; SI-DAG: ds_read_u16 107 108; SI-DAG: ds_write_b16 109; SI-DAG: ds_write_b16 110; SI-DAG: ds_write_b16 111; SI-DAG: ds_write_b16 112; SI-DAG: ds_write_b16 113; SI-DAG: ds_write_b16 114; SI-DAG: ds_write_b16 115; SI-DAG: ds_write_b16 116 117; SI-DAG: ds_write_b16 118; SI-DAG: ds_write_b16 119; SI-DAG: ds_write_b16 120; SI-DAG: ds_write_b16 121; SI-DAG: ds_write_b16 122; SI-DAG: ds_write_b16 123; SI-DAG: ds_write_b16 124; SI-DAG: ds_write_b16 125 126; SI: s_endpgm 127define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { 128 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* 129 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* 130 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind 131 ret void 132} 133 134; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4: 135; SI: ds_read2_b32 136; SI: ds_read2_b32 137; SI: ds_read2_b32 138; SI: ds_read2_b32 139 140; SI: ds_write2_b32 141; SI: ds_write2_b32 142; SI: ds_write2_b32 143; SI: ds_write2_b32 144 145; SI: s_endpgm 146define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { 147 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* 148 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* 149 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind 150 ret void 151} 152 153; FIXME: Use 64-bit ops 154; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8: 155 156; SI: ds_read2_b64 157; SI: ds_read2_b64 158 159; SI: ds_write2_b64 160; SI: ds_write2_b64 161 162; SI-DAG: s_endpgm 163define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { 164 %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* 165 %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* 166 call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind 167 ret void 168} 169 170; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1: 171; SI-DAG: buffer_load_ubyte 172; SI-DAG: buffer_store_byte 173; SI-DAG: buffer_load_ubyte 174; SI-DAG: buffer_store_byte 175; SI-DAG: buffer_load_ubyte 176; SI-DAG: buffer_store_byte 177; SI-DAG: buffer_load_ubyte 178; SI-DAG: buffer_store_byte 179; SI-DAG: buffer_load_ubyte 180; SI-DAG: buffer_store_byte 181; SI-DAG: buffer_load_ubyte 182; SI-DAG: buffer_store_byte 183; SI-DAG: buffer_load_ubyte 184; SI-DAG: buffer_store_byte 185; SI-DAG: buffer_load_ubyte 186; SI-DAG: buffer_store_byte 187 188; SI-DAG: buffer_load_ubyte 189; SI-DAG: buffer_store_byte 190; SI-DAG: buffer_load_ubyte 191; SI-DAG: buffer_store_byte 192; SI-DAG: buffer_load_ubyte 193; SI-DAG: buffer_store_byte 194; SI-DAG: buffer_load_ubyte 195; SI-DAG: buffer_store_byte 196; SI-DAG: buffer_load_ubyte 197; SI-DAG: buffer_store_byte 198; SI-DAG: buffer_load_ubyte 199; SI-DAG: buffer_store_byte 200; SI-DAG: buffer_load_ubyte 201; SI-DAG: buffer_store_byte 202; SI-DAG: buffer_load_ubyte 203; SI-DAG: buffer_store_byte 204 205; SI-DAG: buffer_load_ubyte 206; SI-DAG: buffer_store_byte 207; SI-DAG: buffer_load_ubyte 208; SI-DAG: buffer_store_byte 209; SI-DAG: buffer_load_ubyte 210; SI-DAG: buffer_store_byte 211; SI-DAG: buffer_load_ubyte 212; SI-DAG: buffer_store_byte 213; SI-DAG: buffer_load_ubyte 214; SI-DAG: buffer_store_byte 215; SI-DAG: buffer_load_ubyte 216; SI-DAG: buffer_store_byte 217; SI-DAG: buffer_load_ubyte 218; SI-DAG: buffer_store_byte 219; SI-DAG: buffer_load_ubyte 220; SI-DAG: buffer_store_byte 221 222; SI-DAG: buffer_load_ubyte 223; SI-DAG: buffer_store_byte 224; SI-DAG: buffer_load_ubyte 225; SI-DAG: buffer_store_byte 226; SI-DAG: buffer_load_ubyte 227; SI-DAG: buffer_store_byte 228; SI-DAG: buffer_load_ubyte 229; SI-DAG: buffer_store_byte 230; SI-DAG: buffer_load_ubyte 231; SI-DAG: buffer_store_byte 232; SI-DAG: buffer_load_ubyte 233; SI-DAG: buffer_store_byte 234; SI-DAG: buffer_load_ubyte 235; SI-DAG: buffer_store_byte 236; SI-DAG: buffer_load_ubyte 237; SI-DAG: buffer_store_byte 238 239; SI: s_endpgm 240define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { 241 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* 242 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* 243 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind 244 ret void 245} 246 247; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2: 248; SI-DAG: buffer_load_ushort 249; SI-DAG: buffer_load_ushort 250; SI-DAG: buffer_load_ushort 251; SI-DAG: buffer_load_ushort 252; SI-DAG: buffer_load_ushort 253; SI-DAG: buffer_load_ushort 254; SI-DAG: buffer_load_ushort 255; SI-DAG: buffer_load_ushort 256; SI-DAG: buffer_load_ushort 257; SI-DAG: buffer_load_ushort 258; SI-DAG: buffer_load_ushort 259; SI-DAG: buffer_load_ushort 260; SI-DAG: buffer_load_ushort 261; SI-DAG: buffer_load_ushort 262; SI-DAG: buffer_load_ushort 263; SI-DAG: buffer_load_ushort 264 265; SI-DAG: buffer_store_short 266; SI-DAG: buffer_store_short 267; SI-DAG: buffer_store_short 268; SI-DAG: buffer_store_short 269; SI-DAG: buffer_store_short 270; SI-DAG: buffer_store_short 271; SI-DAG: buffer_store_short 272; SI-DAG: buffer_store_short 273; SI-DAG: buffer_store_short 274; SI-DAG: buffer_store_short 275; SI-DAG: buffer_store_short 276; SI-DAG: buffer_store_short 277; SI-DAG: buffer_store_short 278; SI-DAG: buffer_store_short 279; SI-DAG: buffer_store_short 280; SI-DAG: buffer_store_short 281 282; SI: s_endpgm 283define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { 284 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* 285 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* 286 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind 287 ret void 288} 289 290; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4: 291; SI: buffer_load_dwordx4 292; SI: buffer_load_dwordx4 293; SI: buffer_store_dwordx4 294; SI: buffer_store_dwordx4 295; SI: s_endpgm 296define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { 297 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* 298 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* 299 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind 300 ret void 301} 302 303; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8: 304; SI: buffer_load_dwordx4 305; SI: buffer_load_dwordx4 306; SI: buffer_store_dwordx4 307; SI: buffer_store_dwordx4 308; SI: s_endpgm 309define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { 310 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* 311 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* 312 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind 313 ret void 314} 315 316; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16: 317; SI: buffer_load_dwordx4 318; SI: buffer_load_dwordx4 319; SI: buffer_store_dwordx4 320; SI: buffer_store_dwordx4 321; SI: s_endpgm 322define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { 323 %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* 324 %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* 325 call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind 326 ret void 327} 328