1; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=pentium4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE2 2; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=SSE42 3; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX1 4; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX2 5; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX1 6; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 7 8; Verify the cost of vector population count instructions. 9 10declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) 11declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) 12declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) 13declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) 14 15declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) 16declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) 17declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) 18declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) 19 20define <2 x i64> @var_ctpop_v2i64(<2 x i64> %a) { 21; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v2i64': 22; SSE: Found an estimated cost of 2 for instruction: %ctpop 23; AVX: Found an estimated cost of 2 for instruction: %ctpop 24; XOP: Found an estimated cost of 2 for instruction: %ctpop 25 %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) 26 ret <2 x i64> %ctpop 27} 28 29define <4 x i64> @var_ctpop_v4i64(<4 x i64> %a) { 30; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i64': 31; SSE: Found an estimated cost of 4 for instruction: %ctpop 32; AVX: Found an estimated cost of 2 for instruction: %ctpop 33; XOP: Found an estimated cost of 2 for instruction: %ctpop 34 %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a) 35 ret <4 x i64> %ctpop 36} 37 38define <4 x i32> @var_ctpop_v4i32(<4 x i32> %a) { 39; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v4i32': 40; SSE: Found an estimated cost of 2 for instruction: %ctpop 41; AVX: Found an estimated cost of 2 for instruction: %ctpop 42; XOP: Found an estimated cost of 2 for instruction: %ctpop 43 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) 44 ret <4 x i32> %ctpop 45} 46 47define <8 x i32> @var_ctpop_v8i32(<8 x i32> %a) { 48; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i32': 49; SSE: Found an estimated cost of 4 for instruction: %ctpop 50; AVX: Found an estimated cost of 2 for instruction: %ctpop 51; XOP: Found an estimated cost of 2 for instruction: %ctpop 52 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a) 53 ret <8 x i32> %ctpop 54} 55 56define <8 x i16> @var_ctpop_v8i16(<8 x i16> %a) { 57; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v8i16': 58; SSE: Found an estimated cost of 2 for instruction: %ctpop 59; AVX: Found an estimated cost of 2 for instruction: %ctpop 60; XOP: Found an estimated cost of 2 for instruction: %ctpop 61 %ctpop = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) 62 ret <8 x i16> %ctpop 63} 64 65define <16 x i16> @var_ctpop_v16i16(<16 x i16> %a) { 66; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i16': 67; SSE: Found an estimated cost of 4 for instruction: %ctpop 68; AVX: Found an estimated cost of 2 for instruction: %ctpop 69; XOP: Found an estimated cost of 2 for instruction: %ctpop 70 %ctpop = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %a) 71 ret <16 x i16> %ctpop 72} 73 74define <16 x i8> @var_ctpop_v16i8(<16 x i8> %a) { 75; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v16i8': 76; SSE: Found an estimated cost of 2 for instruction: %ctpop 77; AVX: Found an estimated cost of 2 for instruction: %ctpop 78; XOP: Found an estimated cost of 2 for instruction: %ctpop 79 %ctpop = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) 80 ret <16 x i8> %ctpop 81} 82 83define <32 x i8> @var_ctpop_v32i8(<32 x i8> %a) { 84; CHECK: 'Cost Model Analysis' for function 'var_ctpop_v32i8': 85; SSE: Found an estimated cost of 4 for instruction: %ctpop 86; AVX: Found an estimated cost of 2 for instruction: %ctpop 87; XOP: Found an estimated cost of 2 for instruction: %ctpop 88 %ctpop = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %a) 89 ret <32 x i8> %ctpop 90} 91 92; Verify the cost of vector leading zero count instructions. 93 94declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) 95declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) 96declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) 97declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) 98 99declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) 100declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) 101declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) 102declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) 103 104define <2 x i64> @var_ctlz_v2i64(<2 x i64> %a) { 105; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64': 106; SSE: Found an estimated cost of 6 for instruction: %ctlz 107; AVX: Found an estimated cost of 6 for instruction: %ctlz 108; XOP: Found an estimated cost of 6 for instruction: %ctlz 109 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 0) 110 ret <2 x i64> %ctlz 111} 112 113define <2 x i64> @var_ctlz_v2i64u(<2 x i64> %a) { 114; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v2i64u': 115; SSE: Found an estimated cost of 6 for instruction: %ctlz 116; AVX: Found an estimated cost of 6 for instruction: %ctlz 117; XOP: Found an estimated cost of 6 for instruction: %ctlz 118 %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 1) 119 ret <2 x i64> %ctlz 120} 121 122define <4 x i64> @var_ctlz_v4i64(<4 x i64> %a) { 123; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64': 124; SSE: Found an estimated cost of 12 for instruction: %ctlz 125; AVX: Found an estimated cost of 12 for instruction: %ctlz 126; XOP: Found an estimated cost of 12 for instruction: %ctlz 127 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 0) 128 ret <4 x i64> %ctlz 129} 130 131define <4 x i64> @var_ctlz_v4i64u(<4 x i64> %a) { 132; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i64u': 133; SSE: Found an estimated cost of 12 for instruction: %ctlz 134; AVX: Found an estimated cost of 12 for instruction: %ctlz 135; XOP: Found an estimated cost of 12 for instruction: %ctlz 136 %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 1) 137 ret <4 x i64> %ctlz 138} 139 140define <4 x i32> @var_ctlz_v4i32(<4 x i32> %a) { 141; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32': 142; SSE: Found an estimated cost of 12 for instruction: %ctlz 143; AVX: Found an estimated cost of 12 for instruction: %ctlz 144; XOP: Found an estimated cost of 12 for instruction: %ctlz 145 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 0) 146 ret <4 x i32> %ctlz 147} 148 149define <4 x i32> @var_ctlz_v4i32u(<4 x i32> %a) { 150; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v4i32u': 151; SSE: Found an estimated cost of 12 for instruction: %ctlz 152; AVX: Found an estimated cost of 12 for instruction: %ctlz 153; XOP: Found an estimated cost of 12 for instruction: %ctlz 154 %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 1) 155 ret <4 x i32> %ctlz 156} 157 158define <8 x i32> @var_ctlz_v8i32(<8 x i32> %a) { 159; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32': 160; SSE: Found an estimated cost of 24 for instruction: %ctlz 161; AVX: Found an estimated cost of 24 for instruction: %ctlz 162; XOP: Found an estimated cost of 24 for instruction: %ctlz 163 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 0) 164 ret <8 x i32> %ctlz 165} 166 167define <8 x i32> @var_ctlz_v8i32u(<8 x i32> %a) { 168; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i32u': 169; SSE: Found an estimated cost of 24 for instruction: %ctlz 170; AVX: Found an estimated cost of 24 for instruction: %ctlz 171; XOP: Found an estimated cost of 24 for instruction: %ctlz 172 %ctlz = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 1) 173 ret <8 x i32> %ctlz 174} 175 176define <8 x i16> @var_ctlz_v8i16(<8 x i16> %a) { 177; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16': 178; SSE: Found an estimated cost of 24 for instruction: %ctlz 179; AVX: Found an estimated cost of 24 for instruction: %ctlz 180; XOP: Found an estimated cost of 24 for instruction: %ctlz 181 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 0) 182 ret <8 x i16> %ctlz 183} 184 185define <8 x i16> @var_ctlz_v8i16u(<8 x i16> %a) { 186; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v8i16u': 187; SSE: Found an estimated cost of 24 for instruction: %ctlz 188; AVX: Found an estimated cost of 24 for instruction: %ctlz 189; XOP: Found an estimated cost of 24 for instruction: %ctlz 190 %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 1) 191 ret <8 x i16> %ctlz 192} 193 194define <16 x i16> @var_ctlz_v16i16(<16 x i16> %a) { 195; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16': 196; SSE: Found an estimated cost of 48 for instruction: %ctlz 197; AVX: Found an estimated cost of 48 for instruction: %ctlz 198; XOP: Found an estimated cost of 48 for instruction: %ctlz 199 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 0) 200 ret <16 x i16> %ctlz 201} 202 203define <16 x i16> @var_ctlz_v16i16u(<16 x i16> %a) { 204; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i16u': 205; SSE: Found an estimated cost of 48 for instruction: %ctlz 206; AVX: Found an estimated cost of 48 for instruction: %ctlz 207; XOP: Found an estimated cost of 48 for instruction: %ctlz 208 %ctlz = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %a, i1 1) 209 ret <16 x i16> %ctlz 210} 211 212define <16 x i8> @var_ctlz_v16i8(<16 x i8> %a) { 213; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8': 214; SSE: Found an estimated cost of 48 for instruction: %ctlz 215; AVX: Found an estimated cost of 48 for instruction: %ctlz 216; XOP: Found an estimated cost of 48 for instruction: %ctlz 217 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 0) 218 ret <16 x i8> %ctlz 219} 220 221define <16 x i8> @var_ctlz_v16i8u(<16 x i8> %a) { 222; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v16i8u': 223; SSE: Found an estimated cost of 48 for instruction: %ctlz 224; AVX: Found an estimated cost of 48 for instruction: %ctlz 225; XOP: Found an estimated cost of 48 for instruction: %ctlz 226 %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 1) 227 ret <16 x i8> %ctlz 228} 229 230define <32 x i8> @var_ctlz_v32i8(<32 x i8> %a) { 231; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8': 232; SSE: Found an estimated cost of 96 for instruction: %ctlz 233; AVX: Found an estimated cost of 96 for instruction: %ctlz 234; XOP: Found an estimated cost of 96 for instruction: %ctlz 235 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 0) 236 ret <32 x i8> %ctlz 237} 238 239define <32 x i8> @var_ctlz_v32i8u(<32 x i8> %a) { 240; CHECK: 'Cost Model Analysis' for function 'var_ctlz_v32i8u': 241; SSE: Found an estimated cost of 96 for instruction: %ctlz 242; AVX: Found an estimated cost of 96 for instruction: %ctlz 243; XOP: Found an estimated cost of 96 for instruction: %ctlz 244 %ctlz = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %a, i1 1) 245 ret <32 x i8> %ctlz 246} 247 248; Verify the cost of vector trailing zero count instructions. 249 250declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 251declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 252declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 253declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 254 255declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) 256declare <8 x i32> @llvm.cttz.v8i32(<8 x i32>, i1) 257declare <16 x i16> @llvm.cttz.v16i16(<16 x i16>, i1) 258declare <32 x i8> @llvm.cttz.v32i8(<32 x i8>, i1) 259 260define <2 x i64> @var_cttz_v2i64(<2 x i64> %a) { 261; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64': 262; SSE: Found an estimated cost of 6 for instruction: %cttz 263; AVX: Found an estimated cost of 6 for instruction: %cttz 264; XOP: Found an estimated cost of 6 for instruction: %cttz 265 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 0) 266 ret <2 x i64> %cttz 267} 268 269define <2 x i64> @var_cttz_v2i64u(<2 x i64> %a) { 270; CHECK: 'Cost Model Analysis' for function 'var_cttz_v2i64u': 271; SSE: Found an estimated cost of 6 for instruction: %cttz 272; AVX: Found an estimated cost of 6 for instruction: %cttz 273; XOP: Found an estimated cost of 6 for instruction: %cttz 274 %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 1) 275 ret <2 x i64> %cttz 276} 277 278define <4 x i64> @var_cttz_v4i64(<4 x i64> %a) { 279; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64': 280; SSE: Found an estimated cost of 12 for instruction: %cttz 281; AVX: Found an estimated cost of 12 for instruction: %cttz 282; XOP: Found an estimated cost of 12 for instruction: %cttz 283 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 0) 284 ret <4 x i64> %cttz 285} 286 287define <4 x i64> @var_cttz_v4i64u(<4 x i64> %a) { 288; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i64u': 289; SSE: Found an estimated cost of 12 for instruction: %cttz 290; AVX: Found an estimated cost of 12 for instruction: %cttz 291; XOP: Found an estimated cost of 12 for instruction: %cttz 292 %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 1) 293 ret <4 x i64> %cttz 294} 295 296define <4 x i32> @var_cttz_v4i32(<4 x i32> %a) { 297; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32': 298; SSE: Found an estimated cost of 12 for instruction: %cttz 299; AVX: Found an estimated cost of 12 for instruction: %cttz 300; XOP: Found an estimated cost of 12 for instruction: %cttz 301 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 0) 302 ret <4 x i32> %cttz 303} 304 305define <4 x i32> @var_cttz_v4i32u(<4 x i32> %a) { 306; CHECK: 'Cost Model Analysis' for function 'var_cttz_v4i32u': 307; SSE: Found an estimated cost of 12 for instruction: %cttz 308; AVX: Found an estimated cost of 12 for instruction: %cttz 309; XOP: Found an estimated cost of 12 for instruction: %cttz 310 %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 1) 311 ret <4 x i32> %cttz 312} 313 314define <8 x i32> @var_cttz_v8i32(<8 x i32> %a) { 315; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32': 316; SSE: Found an estimated cost of 24 for instruction: %cttz 317; AVX: Found an estimated cost of 24 for instruction: %cttz 318; XOP: Found an estimated cost of 24 for instruction: %cttz 319 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 0) 320 ret <8 x i32> %cttz 321} 322 323define <8 x i32> @var_cttz_v8i32u(<8 x i32> %a) { 324; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i32u': 325; SSE: Found an estimated cost of 24 for instruction: %cttz 326; AVX: Found an estimated cost of 24 for instruction: %cttz 327; XOP: Found an estimated cost of 24 for instruction: %cttz 328 %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 1) 329 ret <8 x i32> %cttz 330} 331 332define <8 x i16> @var_cttz_v8i16(<8 x i16> %a) { 333; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16': 334; SSE: Found an estimated cost of 24 for instruction: %cttz 335; AVX: Found an estimated cost of 24 for instruction: %cttz 336; XOP: Found an estimated cost of 24 for instruction: %cttz 337 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 0) 338 ret <8 x i16> %cttz 339} 340 341define <8 x i16> @var_cttz_v8i16u(<8 x i16> %a) { 342; CHECK: 'Cost Model Analysis' for function 'var_cttz_v8i16u': 343; SSE: Found an estimated cost of 24 for instruction: %cttz 344; AVX: Found an estimated cost of 24 for instruction: %cttz 345; XOP: Found an estimated cost of 24 for instruction: %cttz 346 %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 1) 347 ret <8 x i16> %cttz 348} 349 350define <16 x i16> @var_cttz_v16i16(<16 x i16> %a) { 351; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16': 352; SSE: Found an estimated cost of 48 for instruction: %cttz 353; AVX: Found an estimated cost of 48 for instruction: %cttz 354; XOP: Found an estimated cost of 48 for instruction: %cttz 355 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 0) 356 ret <16 x i16> %cttz 357} 358 359define <16 x i16> @var_cttz_v16i16u(<16 x i16> %a) { 360; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i16u': 361; SSE: Found an estimated cost of 48 for instruction: %cttz 362; AVX: Found an estimated cost of 48 for instruction: %cttz 363; XOP: Found an estimated cost of 48 for instruction: %cttz 364 %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 1) 365 ret <16 x i16> %cttz 366} 367 368define <16 x i8> @var_cttz_v16i8(<16 x i8> %a) { 369; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8': 370; SSE: Found an estimated cost of 48 for instruction: %cttz 371; AVX: Found an estimated cost of 48 for instruction: %cttz 372; XOP: Found an estimated cost of 48 for instruction: %cttz 373 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 0) 374 ret <16 x i8> %cttz 375} 376 377define <16 x i8> @var_cttz_v16i8u(<16 x i8> %a) { 378; CHECK: 'Cost Model Analysis' for function 'var_cttz_v16i8u': 379; SSE: Found an estimated cost of 48 for instruction: %cttz 380; AVX: Found an estimated cost of 48 for instruction: %cttz 381; XOP: Found an estimated cost of 48 for instruction: %cttz 382 %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 1) 383 ret <16 x i8> %cttz 384} 385 386define <32 x i8> @var_cttz_v32i8(<32 x i8> %a) { 387; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8': 388; SSE: Found an estimated cost of 96 for instruction: %cttz 389; AVX: Found an estimated cost of 96 for instruction: %cttz 390; XOP: Found an estimated cost of 96 for instruction: %cttz 391 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 0) 392 ret <32 x i8> %cttz 393} 394 395define <32 x i8> @var_cttz_v32i8u(<32 x i8> %a) { 396; CHECK: 'Cost Model Analysis' for function 'var_cttz_v32i8u': 397; SSE: Found an estimated cost of 96 for instruction: %cttz 398; AVX: Found an estimated cost of 96 for instruction: %cttz 399; XOP: Found an estimated cost of 96 for instruction: %cttz 400 %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 1) 401 ret <32 x i8> %cttz 402} 403