1; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3 2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42 3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 5 6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 7target triple = "x86_64-apple-macosx10.8.0" 8 9; CHECK-LABEL: 'add' 10define i32 @add(i32 %arg) { 11 ; SSSE3: cost of 1 {{.*}} %A = add 12 ; SSE42: cost of 1 {{.*}} %A = add 13 ; AVX: cost of 1 {{.*}} %A = add 14 ; AVX2: cost of 1 {{.*}} %A = add 15 %A = add <4 x i32> undef, undef 16 ; SSSE3: cost of 2 {{.*}} %B = add 17 ; SSE42: cost of 2 {{.*}} %B = add 18 ; AVX: cost of 4 {{.*}} %B = add 19 ; AVX2: cost of 1 {{.*}} %B = add 20 %B = add <8 x i32> undef, undef 21 ; SSSE3: cost of 1 {{.*}} %C = add 22 ; SSE42: cost of 1 {{.*}} %C = add 23 ; AVX: cost of 1 {{.*}} %C = add 24 ; AVX2: cost of 1 {{.*}} %C = add 25 %C = add <2 x i64> undef, undef 26 ; SSSE3: cost of 2 {{.*}} %D = add 27 ; SSE42: cost of 2 {{.*}} %D = add 28 ; AVX: cost of 4 {{.*}} %D = add 29 ; AVX2: cost of 1 {{.*}} %D = add 30 %D = add <4 x i64> undef, undef 31 ; SSSE3: cost of 4 {{.*}} %E = add 32 ; SSE42: cost of 4 {{.*}} %E = add 33 ; AVX: cost of 8 {{.*}} %E = add 34 ; AVX2: cost of 2 {{.*}} %E = add 35 %E = add <8 x i64> undef, undef 36 ret i32 undef 37} 38 39; CHECK-LABEL: 'xor' 40define i32 @xor(i32 %arg) { 41 ; SSSE3: cost of 1 {{.*}} %A = xor 42 ; SSE42: cost of 1 {{.*}} %A = xor 43 ; AVX: cost of 1 {{.*}} %A = xor 44 ; AVX2: cost of 1 {{.*}} %A = xor 45 %A = xor <4 x i32> undef, undef 46 ; SSSE3: cost of 2 {{.*}} %B = xor 47 ; SSE42: cost of 2 {{.*}} %B = xor 48 ; AVX: cost of 1 {{.*}} %B = xor 49 ; AVX2: cost of 1 {{.*}} %B = xor 50 %B = xor <8 x i32> undef, undef 51 ; SSSE3: cost of 1 {{.*}} %C = xor 52 ; SSE42: cost of 1 {{.*}} %C = xor 53 ; AVX: cost of 1 {{.*}} %C = xor 54 ; AVX2: cost of 1 {{.*}} %C = xor 55 %C = xor <2 x i64> undef, undef 56 ; SSSE3: cost of 2 {{.*}} %D = xor 57 ; SSE42: cost of 2 {{.*}} %D = xor 58 ; AVX: cost of 1 {{.*}} %D = xor 59 ; AVX2: cost of 1 {{.*}} %D = xor 60 %D = xor <4 x i64> undef, undef 61 ret i32 undef 62} 63 64; CHECK-LABEL: 'mul' 65define void @mul() { 66 ; A <2 x i32> gets expanded to a <2 x i64> vector. 67 ; A <2 x i64> vector multiply is implemented using 68 ; 3 PMULUDQ and 2 PADDS and 4 shifts. 69 ; SSSE3: cost of 9 {{.*}} %A0 = mul 70 ; SSE42: cost of 9 {{.*}} %A0 = mul 71 ; AVX: cost of 9 {{.*}} %A0 = mul 72 ; AVX2: cost of 9 {{.*}} %A0 = mul 73 %A0 = mul <2 x i32> undef, undef 74 ; SSSE3: cost of 6 {{.*}} %A1 = mul 75 ; SSE42: cost of 1 {{.*}} %A1 = mul 76 ; AVX: cost of 1 {{.*}} %A1 = mul 77 ; AVX2: cost of 1 {{.*}} %A1 = mul 78 %A1 = mul <4 x i32> undef, undef 79 ; SSSE3: cost of 9 {{.*}} %A2 = mul 80 ; SSE42: cost of 9 {{.*}} %A2 = mul 81 ; AVX: cost of 9 {{.*}} %A2 = mul 82 ; AVX2: cost of 9 {{.*}} %A2 = mul 83 %A2 = mul <2 x i64> undef, undef 84 ; SSSE3: cost of 18 {{.*}} %A3 = mul 85 ; SSE42: cost of 18 {{.*}} %A3 = mul 86 ; AVX: cost of 18 {{.*}} %A3 = mul 87 ; AVX2: cost of 9 {{.*}} %A3 = mul 88 %A3 = mul <4 x i64> undef, undef 89 ret void 90} 91 92; CHECK-LABEL: 'fmul' 93define i32 @fmul(i32 %arg) { 94 ; SSSE3: cost of 2 {{.*}} %A = fmul 95 ; SSE42: cost of 2 {{.*}} %A = fmul 96 ; AVX: cost of 2 {{.*}} %A = fmul 97 ; AVX2: cost of 2 {{.*}} %A = fmul 98 %A = fmul <4 x float> undef, undef 99 ; SSSE3: cost of 4 {{.*}} %B = fmul 100 ; SSE42: cost of 4 {{.*}} %B = fmul 101 ; AVX: cost of 2 {{.*}} %B = fmul 102 ; AVX2: cost of 2 {{.*}} %B = fmul 103 %B = fmul <8 x float> undef, undef 104 ret i32 undef 105} 106 107; CHECK-LABEL: 'shift' 108define void @shift() { 109 ; SSSE3: cost of 10 {{.*}} %A0 = shl 110 ; SSE42: cost of 10 {{.*}} %A0 = shl 111 ; AVX: cost of 10 {{.*}} %A0 = shl 112 ; AVX2: cost of 1 {{.*}} %A0 = shl 113 %A0 = shl <4 x i32> undef, undef 114 ; SSSE3: cost of 4 {{.*}} %A1 = shl 115 ; SSE42: cost of 4 {{.*}} %A1 = shl 116 ; AVX: cost of 4 {{.*}} %A1 = shl 117 ; AVX2: cost of 1 {{.*}} %A1 = shl 118 %A1 = shl <2 x i64> undef, undef 119 ; SSSE3: cost of 20 {{.*}} %A2 = shl 120 ; SSE42: cost of 20 {{.*}} %A2 = shl 121 ; AVX: cost of 20 {{.*}} %A2 = shl 122 ; AVX2: cost of 1 {{.*}} %A2 = shl 123 %A2 = shl <8 x i32> undef, undef 124 ; SSSE3: cost of 8 {{.*}} %A3 = shl 125 ; SSE42: cost of 8 {{.*}} %A3 = shl 126 ; AVX: cost of 8 {{.*}} %A3 = shl 127 ; AVX2: cost of 1 {{.*}} %A3 = shl 128 %A3 = shl <4 x i64> undef, undef 129 130 ; SSSE3: cost of 16 {{.*}} %B0 = lshr 131 ; SSE42: cost of 16 {{.*}} %B0 = lshr 132 ; AVX: cost of 16 {{.*}} %B0 = lshr 133 ; AVX2: cost of 1 {{.*}} %B0 = lshr 134 %B0 = lshr <4 x i32> undef, undef 135 ; SSSE3: cost of 4 {{.*}} %B1 = lshr 136 ; SSE42: cost of 4 {{.*}} %B1 = lshr 137 ; AVX: cost of 4 {{.*}} %B1 = lshr 138 ; AVX2: cost of 1 {{.*}} %B1 = lshr 139 %B1 = lshr <2 x i64> undef, undef 140 ; SSSE3: cost of 32 {{.*}} %B2 = lshr 141 ; SSE42: cost of 32 {{.*}} %B2 = lshr 142 ; AVX: cost of 32 {{.*}} %B2 = lshr 143 ; AVX2: cost of 1 {{.*}} %B2 = lshr 144 %B2 = lshr <8 x i32> undef, undef 145 ; SSSE3: cost of 8 {{.*}} %B3 = lshr 146 ; SSE42: cost of 8 {{.*}} %B3 = lshr 147 ; AVX: cost of 8 {{.*}} %B3 = lshr 148 ; AVX2: cost of 1 {{.*}} %B3 = lshr 149 %B3 = lshr <4 x i64> undef, undef 150 151 ; SSSE3: cost of 16 {{.*}} %C0 = ashr 152 ; SSE42: cost of 16 {{.*}} %C0 = ashr 153 ; AVX: cost of 16 {{.*}} %C0 = ashr 154 ; AVX2: cost of 1 {{.*}} %C0 = ashr 155 %C0 = ashr <4 x i32> undef, undef 156 ; SSSE3: cost of 12 {{.*}} %C1 = ashr 157 ; SSE42: cost of 12 {{.*}} %C1 = ashr 158 ; AVX: cost of 12 {{.*}} %C1 = ashr 159 ; AVX2: cost of 4 {{.*}} %C1 = ashr 160 %C1 = ashr <2 x i64> undef, undef 161 ; SSSE3: cost of 32 {{.*}} %C2 = ashr 162 ; SSE42: cost of 32 {{.*}} %C2 = ashr 163 ; AVX: cost of 32 {{.*}} %C2 = ashr 164 ; AVX2: cost of 1 {{.*}} %C2 = ashr 165 %C2 = ashr <8 x i32> undef, undef 166 ; SSSE3: cost of 24 {{.*}} %C3 = ashr 167 ; SSE42: cost of 24 {{.*}} %C3 = ashr 168 ; AVX: cost of 24 {{.*}} %C3 = ashr 169 ; AVX2: cost of 4 {{.*}} %C3 = ashr 170 %C3 = ashr <4 x i64> undef, undef 171 172 ret void 173} 174