1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX 4 5; Test ADDSUB ISel patterns. 6 7; Functions below are obtained from the following source: 8; 9; typedef double double2 __attribute__((ext_vector_type(2))); 10; typedef double double4 __attribute__((ext_vector_type(4))); 11; typedef float float4 __attribute__((ext_vector_type(4))); 12; typedef float float8 __attribute__((ext_vector_type(8))); 13; 14; float4 test1(float4 A, float4 B) { 15; float4 X = A - B; 16; float4 Y = A + B; 17; return (float4){X[0], Y[1], X[2], Y[3]}; 18; } 19; 20; float8 test2(float8 A, float8 B) { 21; float8 X = A - B; 22; float8 Y = A + B; 23; return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]}; 24; } 25; 26; double4 test3(double4 A, double4 B) { 27; double4 X = A - B; 28; double4 Y = A + B; 29; return (double4){X[0], Y[1], X[2], Y[3]}; 30; } 31; 32; double2 test4(double2 A, double2 B) { 33; double2 X = A - B; 34; double2 Y = A + B; 35; return (double2){X[0], Y[1]}; 36; } 37 38define <4 x float> @test1(<4 x float> %A, <4 x float> %B) { 39; SSE-LABEL: test1: 40; SSE: # BB#0: 41; SSE-NEXT: addsubps %xmm1, %xmm0 42; SSE-NEXT: retq 43; 44; AVX-LABEL: test1: 45; AVX: # BB#0: 46; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 47; AVX-NEXT: retq 48 %sub = fsub <4 x float> %A, %B 49 %add = fadd <4 x float> %A, %B 50 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 51 ret <4 x float> %vecinit6 52} 53 54define <8 x float> @test2(<8 x float> %A, <8 x float> %B) { 55; SSE-LABEL: test2: 56; SSE: # BB#0: 57; SSE-NEXT: addsubps %xmm2, %xmm0 58; SSE-NEXT: addsubps %xmm3, %xmm1 59; SSE-NEXT: retq 60; 61; AVX-LABEL: test2: 62; AVX: # BB#0: 63; AVX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 64; AVX-NEXT: retq 65 %sub = fsub <8 x float> %A, %B 66 %add = fadd <8 x float> %A, %B 67 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 68 ret <8 x float> %vecinit14 69} 70 71define <4 x double> @test3(<4 x double> %A, <4 x double> %B) { 72; SSE-LABEL: test3: 73; SSE: # BB#0: 74; SSE-NEXT: addsubpd %xmm2, %xmm0 75; SSE-NEXT: addsubpd %xmm3, %xmm1 76; SSE-NEXT: retq 77; 78; AVX-LABEL: test3: 79; AVX: # BB#0: 80; AVX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 81; AVX-NEXT: retq 82 %sub = fsub <4 x double> %A, %B 83 %add = fadd <4 x double> %A, %B 84 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 85 ret <4 x double> %vecinit6 86} 87 88define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 { 89; SSE-LABEL: test4: 90; SSE: # BB#0: 91; SSE-NEXT: addsubpd %xmm1, %xmm0 92; SSE-NEXT: retq 93; 94; AVX-LABEL: test4: 95; AVX: # BB#0: 96; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 97; AVX-NEXT: retq 98 %add = fadd <2 x double> %A, %B 99 %sub = fsub <2 x double> %A, %B 100 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 101 ret <2 x double> %vecinit2 102} 103 104define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) { 105; SSE-LABEL: test1b: 106; SSE: # BB#0: 107; SSE-NEXT: addsubps (%rdi), %xmm0 108; SSE-NEXT: retq 109; 110; AVX-LABEL: test1b: 111; AVX: # BB#0: 112; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 113; AVX-NEXT: retq 114 %1 = load <4 x float>, <4 x float>* %B 115 %add = fadd <4 x float> %A, %1 116 %sub = fsub <4 x float> %A, %1 117 %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 118 ret <4 x float> %vecinit6 119} 120 121define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) { 122; SSE-LABEL: test2b: 123; SSE: # BB#0: 124; SSE-NEXT: addsubps (%rdi), %xmm0 125; SSE-NEXT: addsubps 16(%rdi), %xmm1 126; SSE-NEXT: retq 127; 128; AVX-LABEL: test2b: 129; AVX: # BB#0: 130; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 131; AVX-NEXT: retq 132 %1 = load <8 x float>, <8 x float>* %B 133 %add = fadd <8 x float> %A, %1 134 %sub = fsub <8 x float> %A, %1 135 %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 136 ret <8 x float> %vecinit14 137} 138 139define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) { 140; SSE-LABEL: test3b: 141; SSE: # BB#0: 142; SSE-NEXT: addsubpd (%rdi), %xmm0 143; SSE-NEXT: addsubpd 16(%rdi), %xmm1 144; SSE-NEXT: retq 145; 146; AVX-LABEL: test3b: 147; AVX: # BB#0: 148; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 149; AVX-NEXT: retq 150 %1 = load <4 x double>, <4 x double>* %B 151 %add = fadd <4 x double> %A, %1 152 %sub = fsub <4 x double> %A, %1 153 %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 154 ret <4 x double> %vecinit6 155} 156 157define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) { 158; SSE-LABEL: test4b: 159; SSE: # BB#0: 160; SSE-NEXT: addsubpd (%rdi), %xmm0 161; SSE-NEXT: retq 162; 163; AVX-LABEL: test4b: 164; AVX: # BB#0: 165; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 166; AVX-NEXT: retq 167 %1 = load <2 x double>, <2 x double>* %B 168 %sub = fsub <2 x double> %A, %1 169 %add = fadd <2 x double> %A, %1 170 %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3> 171 ret <2 x double> %vecinit2 172} 173 174define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) { 175; SSE-LABEL: test1c: 176; SSE: # BB#0: 177; SSE-NEXT: addsubps (%rdi), %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: test1c: 181; AVX: # BB#0: 182; AVX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 183; AVX-NEXT: retq 184 %1 = load <4 x float>, <4 x float>* %B 185 %add = fadd <4 x float> %A, %1 186 %sub = fsub <4 x float> %A, %1 187 %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 188 ret <4 x float> %vecinit6 189} 190 191define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) { 192; SSE-LABEL: test2c: 193; SSE: # BB#0: 194; SSE-NEXT: addsubps (%rdi), %xmm0 195; SSE-NEXT: addsubps 16(%rdi), %xmm1 196; SSE-NEXT: retq 197; 198; AVX-LABEL: test2c: 199; AVX: # BB#0: 200; AVX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 201; AVX-NEXT: retq 202 %1 = load <8 x float>, <8 x float>* %B 203 %add = fadd <8 x float> %A, %1 204 %sub = fsub <8 x float> %A, %1 205 %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 206 ret <8 x float> %vecinit14 207} 208 209define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) { 210; SSE-LABEL: test3c: 211; SSE: # BB#0: 212; SSE-NEXT: addsubpd (%rdi), %xmm0 213; SSE-NEXT: addsubpd 16(%rdi), %xmm1 214; SSE-NEXT: retq 215; 216; AVX-LABEL: test3c: 217; AVX: # BB#0: 218; AVX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 219; AVX-NEXT: retq 220 %1 = load <4 x double>, <4 x double>* %B 221 %add = fadd <4 x double> %A, %1 222 %sub = fsub <4 x double> %A, %1 223 %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 224 ret <4 x double> %vecinit6 225} 226 227define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) { 228; SSE-LABEL: test4c: 229; SSE: # BB#0: 230; SSE-NEXT: addsubpd (%rdi), %xmm0 231; SSE-NEXT: retq 232; 233; AVX-LABEL: test4c: 234; AVX: # BB#0: 235; AVX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 236; AVX-NEXT: retq 237 %1 = load <2 x double>, <2 x double>* %B 238 %sub = fsub <2 x double> %A, %1 239 %add = fadd <2 x double> %A, %1 240 %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1> 241 ret <2 x double> %vecinit2 242} 243