1; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s 2 3; Verify that the backend correctly combines AVX2 builtin intrinsics. 4 5 6define <8 x i32> @test_psra_1(<8 x i32> %A) { 7 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3) 8 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>) 9 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2) 10 ret <8 x i32> %3 11} 12; CHECK-LABEL: test_psra_1 13; CHECK: vpsrad $8, %ymm0, %ymm0 14; CHECK-NEXT: ret 15 16define <16 x i16> @test_psra_2(<16 x i16> %A) { 17 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3) 18 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 19 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2) 20 ret <16 x i16> %3 21} 22; CHECK-LABEL: test_psra_2 23; CHECK: vpsraw $8, %ymm0, %ymm0 24; CHECK-NEXT: ret 25 26define <16 x i16> @test_psra_3(<16 x i16> %A) { 27 %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0) 28 %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>) 29 %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0) 30 ret <16 x i16> %3 31} 32; CHECK-LABEL: test_psra_3 33; CHECK-NOT: vpsraw 34; CHECK: ret 35 36define <8 x i32> @test_psra_4(<8 x i32> %A) { 37 %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0) 38 %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>) 39 %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0) 40 ret <8 x i32> %3 41} 42; CHECK-LABEL: test_psra_4 43; CHECK-NOT: vpsrad 44; CHECK: ret 45 46 47define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { 48 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1) 49 ret <32 x i8> %res 50} 51; CHECK-LABEL: test_x86_avx2_pblendvb 52; CHECK-NOT: vpblendvb 53; CHECK: ret 54 55 56define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) { 57 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7) 58 ret <16 x i16> %res 59} 60; CHECK-LABEL: test_x86_avx2_pblendw 61; CHECK-NOT: vpblendw 62; CHECK: ret 63 64 65define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0) { 66 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a0, i32 7) 67 ret <4 x i32> %res 68} 69; CHECK-LABEL: test_x86_avx2_pblendd_128 70; CHECK-NOT: vpblendd 71; CHECK: ret 72 73 74define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) { 75 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a0, i32 7) 76 ret <8 x i32> %res 77} 78; CHECK-LABEL: test_x86_avx2_pblendd_256 79; CHECK-NOT: vpblendd 80; CHECK: ret 81 82 83define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { 84 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer) 85 ret <32 x i8> %res 86} 87; CHECK-LABEL: test2_x86_avx2_pblendvb 88; CHECK-NOT: vpblendvb 89; CHECK: ret 90 91 92define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 93 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0) 94 ret <16 x i16> %res 95} 96; CHECK-LABEL: test2_x86_avx2_pblendw 97; CHECK-NOT: vpblendw 98; CHECK: ret 99 100 101define <4 x i32> @test2_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 102 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 0) 103 ret <4 x i32> %res 104} 105; CHECK-LABEL: test2_x86_avx2_pblendd_128 106; CHECK-NOT: vpblendd 107; CHECK: ret 108 109 110define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 111 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 0) 112 ret <8 x i32> %res 113} 114; CHECK-LABEL: test2_x86_avx2_pblendd_256 115; CHECK-NOT: vpblendd 116; CHECK: ret 117 118 119define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) { 120 %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8> 121 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1) 122 ret <32 x i8> %res 123} 124; CHECK-LABEL: test3_x86_avx2_pblendvb 125; CHECK-NOT: vpblendvb 126; CHECK: ret 127 128 129define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 130 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1) 131 ret <16 x i16> %res 132} 133; CHECK-LABEL: test3_x86_avx2_pblendw 134; CHECK-NOT: vpblendw 135; CHECK: ret 136 137 138define <4 x i32> @test3_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 139 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 -1) 140 ret <4 x i32> %res 141} 142; CHECK-LABEL: test3_x86_avx2_pblendd_128 143; CHECK-NOT: vpblendd 144; CHECK: ret 145 146 147define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 148 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 -1) 149 ret <8 x i32> %res 150} 151; CHECK-LABEL: test3_x86_avx2_pblendd_256 152; CHECK-NOT: vpblendd 153; CHECK: ret 154 155 156declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) 157declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) 158declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) 159declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) 160declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) 161declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) 162declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) 163declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) 164 165