1; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=pentium4 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2 %s 2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=yonah | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE3 %s 3; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSSE3 %s 4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=penryn | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41 %s 5; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE42 %s 6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1 %s 7; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2 %s 8; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 %s 9 10target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 11target triple = "x86_64-apple-macosx10.8.0" 12 13define i32 @cmp(i32 %arg) { 14 ; -- floats -- 15 ;SSE2: cost of 3 {{.*}} fcmp 16 ;SSE3: cost of 3 {{.*}} fcmp 17 ;SSSE3: cost of 3 {{.*}} fcmp 18 ;SSE41: cost of 3 {{.*}} fcmp 19 ;SSE42: cost of 1 {{.*}} fcmp 20 ;AVX: cost of 1 {{.*}} fcmp 21 %A = fcmp olt <2 x float> undef, undef 22 23 ;SSE2: cost of 7 {{.*}} fcmp 24 ;SSE3: cost of 7 {{.*}} fcmp 25 ;SSSE3: cost of 7 {{.*}} fcmp 26 ;SSE41: cost of 7 {{.*}} fcmp 27 ;SSE42: cost of 1 {{.*}} fcmp 28 ;AVX: cost of 1 {{.*}} fcmp 29 %B = fcmp olt <4 x float> undef, undef 30 31 ;SSE2: cost of 14 {{.*}} fcmp 32 ;SSE3: cost of 14 {{.*}} fcmp 33 ;SSSE3: cost of 14 {{.*}} fcmp 34 ;SSE41: cost of 14 {{.*}} fcmp 35 ;SSE42: cost of 2 {{.*}} fcmp 36 ;AVX: cost of 1 {{.*}} fcmp 37 %C = fcmp olt <8 x float> undef, undef 38 39 ;SSE2: cost of 3 {{.*}} fcmp 40 ;SSE3: cost of 3 {{.*}} fcmp 41 ;SSSE3: cost of 3 {{.*}} fcmp 42 ;SSE41: cost of 3 {{.*}} fcmp 43 ;SSE42: cost of 1 {{.*}} fcmp 44 ;AVX: cost of 1 {{.*}} fcmp 45 %D = fcmp olt <2 x double> undef, undef 46 47 ;SSE2: cost of 6 {{.*}} fcmp 48 ;SSE3: cost of 6 {{.*}} fcmp 49 ;SSSE3: cost of 6 {{.*}} fcmp 50 ;SSE41: cost of 6 {{.*}} fcmp 51 ;SSE42: cost of 2 {{.*}} fcmp 52 ;AVX: cost of 1 {{.*}} fcmp 53 %E = fcmp olt <4 x double> undef, undef 54 55 ; AVX512: cost of 1 {{.*}} %E1 = fcmp 56 %E1 = fcmp olt <16 x float> undef, undef 57 58 ; AVX512: cost of 1 {{.*}} %E2 = fcmp 59 %E2 = fcmp olt <8 x double> undef, undef 60 61 ; AVX512: cost of 2 {{.*}} %E3 = fcmp 62 %E3 = fcmp olt <16 x double> undef, undef 63 64 ; -- integers -- 65 66 ;SSE2: cost of 1 {{.*}} icmp 67 ;SSE3: cost of 1 {{.*}} icmp 68 ;SSSE3: cost of 1 {{.*}} icmp 69 ;SSE41: cost of 1 {{.*}} icmp 70 ;SSE42: cost of 1 {{.*}} icmp 71 ;AVX: cost of 1 {{.*}} icmp 72 %F = icmp eq <16 x i8> undef, undef 73 74 ;SSE2: cost of 1 {{.*}} icmp 75 ;SSE3: cost of 1 {{.*}} icmp 76 ;SSSE3: cost of 1 {{.*}} icmp 77 ;SSE41: cost of 1 {{.*}} icmp 78 ;SSE42: cost of 1 {{.*}} icmp 79 ;AVX: cost of 1 {{.*}} icmp 80 %G = icmp eq <8 x i16> undef, undef 81 82 ;SSE2: cost of 1 {{.*}} icmp 83 ;SSE3: cost of 1 {{.*}} icmp 84 ;SSSE3: cost of 1 {{.*}} icmp 85 ;SSE41: cost of 1 {{.*}} icmp 86 ;SSE42: cost of 1 {{.*}} icmp 87 ;AVX: cost of 1 {{.*}} icmp 88 %H = icmp eq <4 x i32> undef, undef 89 90 ;SSE2: cost of 8 {{.*}} icmp 91 ;SSE3: cost of 8 {{.*}} icmp 92 ;SSSE3: cost of 8 {{.*}} icmp 93 ;SSE41: cost of 8 {{.*}} icmp 94 ;SSE42: cost of 1 {{.*}} icmp 95 ;AVX: cost of 1 {{.*}} icmp 96 %I = icmp eq <2 x i64> undef, undef 97 98 ;SSE2: cost of 16 {{.*}} icmp 99 ;SSE3: cost of 16 {{.*}} icmp 100 ;SSSE3: cost of 16 {{.*}} icmp 101 ;SSE41: cost of 16 {{.*}} icmp 102 ;SSE42: cost of 2 {{.*}} icmp 103 ;AVX1: cost of 4 {{.*}} icmp 104 ;AVX2: cost of 1 {{.*}} icmp 105 %J = icmp eq <4 x i64> undef, undef 106 107 ;SSE2: cost of 2 {{.*}} icmp 108 ;SSE3: cost of 2 {{.*}} icmp 109 ;SSSE3: cost of 2 {{.*}} icmp 110 ;SSE41: cost of 2 {{.*}} icmp 111 ;SSE42: cost of 2 {{.*}} icmp 112 ;AVX1: cost of 4 {{.*}} icmp 113 ;AVX2: cost of 1 {{.*}} icmp 114 %K = icmp eq <8 x i32> undef, undef 115 116 ;SSE2: cost of 2 {{.*}} icmp 117 ;SSE3: cost of 2 {{.*}} icmp 118 ;SSSE3: cost of 2 {{.*}} icmp 119 ;SSE41: cost of 2 {{.*}} icmp 120 ;SSE42: cost of 2 {{.*}} icmp 121 ;AVX1: cost of 4 {{.*}} icmp 122 ;AVX2: cost of 1 {{.*}} icmp 123 %L = icmp eq <16 x i16> undef, undef 124 125 ;SSE2: cost of 2 {{.*}} icmp 126 ;SSE3: cost of 2 {{.*}} icmp 127 ;SSSE3: cost of 2 {{.*}} icmp 128 ;SSE41: cost of 2 {{.*}} icmp 129 ;SSE42: cost of 2 {{.*}} icmp 130 ;AVX1: cost of 4 {{.*}} icmp 131 ;AVX2: cost of 1 {{.*}} icmp 132 %M = icmp eq <32 x i8> undef, undef 133 134 ; AVX512: cost of 1 {{.*}} %M1 = icmp 135 %M1 = icmp eq <16 x i32> undef, undef 136 137 ; AVX512: cost of 1 {{.*}} %M2 = icmp 138 %M2 = icmp eq <8 x i64> undef, undef 139 140 ; AVX512: cost of 2 {{.*}} %M3 = icmp 141 %M3 = icmp eq <16 x i64> undef, undef 142 143 ;CHECK: cost of 0 {{.*}} ret 144 ret i32 undef 145} 146 147 148