1; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s 2 3; The main purpose of this test is to find out whether copyPhysReg can deal with 4; the memmove-like situation arising in tuples, where an early copy can clobber 5; the value needed by a later one if the tuples overlap. 6 7; We use dummy inline asm to force LLVM to generate a COPY between the registers 8; we want by clobbering all the others. 9 10define void @test_D1D2_from_D0D1(i8* %addr) #0 { 11; CHECK-LABEL: test_D1D2_from_D0D1: 12; CHECK: mov.8b v2, v1 13; CHECK: mov.8b v1, v0 14entry: 15 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 16 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 17 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 18 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 19 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 20 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 21 22 tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 23 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 24 ret void 25} 26 27define void @test_D0D1_from_D1D2(i8* %addr) #0 { 28; CHECK-LABEL: test_D0D1_from_D1D2: 29; CHECK: mov.8b v0, v1 30; CHECK: mov.8b v1, v2 31entry: 32 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 33 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 34 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 35 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 36 tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 37 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 38 39 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 40 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 41 ret void 42} 43 44define void @test_D0D1_from_D31D0(i8* %addr) #0 { 45; CHECK-LABEL: test_D0D1_from_D31D0: 46; CHECK: mov.8b v1, v0 47; CHECK: mov.8b v0, v31 48entry: 49 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 50 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 51 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 52 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 53 tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() 54 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 55 56 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 57 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 58 ret void 59} 60 61define void @test_D31D0_from_D0D1(i8* %addr) #0 { 62; CHECK-LABEL: test_D31D0_from_D0D1: 63; CHECK: mov.8b v31, v0 64; CHECK: mov.8b v0, v1 65entry: 66 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 67 %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 68 %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0 69 %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1 70 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 71 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 72 73 tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"() 74 tail call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, i8* %addr) 75 ret void 76} 77 78define void @test_D2D3D4_from_D0D1D2(i8* %addr) #0 { 79; CHECK-LABEL: test_D2D3D4_from_D0D1D2: 80; CHECK: mov.8b v4, v2 81; CHECK: mov.8b v3, v1 82; CHECK: mov.8b v2, v0 83entry: 84 %addr_v8i8 = bitcast i8* %addr to <8 x i8>* 85 %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>* %addr_v8i8) 86 %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0 87 %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1 88 %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2 89 90 tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 91 tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) 92 93 tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 94 tail call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, i8* %addr) 95 ret void 96} 97 98define void @test_Q0Q1Q2_from_Q1Q2Q3(i8* %addr) #0 { 99; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3: 100; CHECK: mov.16b v0, v1 101; CHECK: mov.16b v1, v2 102; CHECK: mov.16b v2, v3 103entry: 104 %addr_v16i8 = bitcast i8* %addr to <16 x i8>* 105 %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) 106 %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 107 %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 108 %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 109 tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 110 tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) 111 112 tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 113 tail call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, i8* %addr) 114 ret void 115} 116 117define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(i8* %addr) #0 { 118; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1: 119; CHECK: mov.16b v4, v1 120; CHECK: mov.16b v3, v0 121; CHECK: mov.16b v2, v31 122; CHECK: mov.16b v1, v30 123 %addr_v16i8 = bitcast i8* %addr to <16 x i8>* 124 %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>* %addr_v16i8) 125 %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0 126 %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1 127 %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2 128 %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3 129 130 tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"() 131 tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) 132 133 tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() 134 tail call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, i8* %addr) 135 ret void 136} 137 138declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0v8i8(<8 x i8>*) 139declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0v8i8(<8 x i8>*) 140declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0v16i8(<16 x i8>*) 141declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0v16i8(<16 x i8>*) 142 143declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) 144declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) 145declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) 146declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) 147