1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 4; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 5; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F 8; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW 9; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ 10 11define i32 @reduce_i64(i32 %arg) { 12; SSE-LABEL: 'reduce_i64' 13; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) 14; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) 15; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) 16; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) 17; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) 18; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 19; 20; AVX-LABEL: 'reduce_i64' 21; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) 22; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) 23; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) 24; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) 25; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) 26; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 27; 28; AVX512-LABEL: 'reduce_i64' 29; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) 30; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) 31; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) 32; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) 33; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) 34; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 35; 36 %V1 = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> undef) 37 %V2 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> undef) 38 %V4 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> undef) 39 %V8 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> undef) 40 %V16 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> undef) 41 ret i32 undef 42} 43 44define i32 @reduce_i32(i32 %arg) { 45; SSE-LABEL: 'reduce_i32' 46; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) 47; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) 48; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) 49; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) 50; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) 51; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 52; 53; AVX-LABEL: 'reduce_i32' 54; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) 55; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) 56; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) 57; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) 58; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) 59; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 60; 61; AVX512-LABEL: 'reduce_i32' 62; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) 63; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) 64; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) 65; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) 66; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) 67; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 68; 69 %V2 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> undef) 70 %V4 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> undef) 71 %V8 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> undef) 72 %V16 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> undef) 73 %V32 = call i32 @llvm.vector.reduce.xor.v32i32(<32 x i32> undef) 74 ret i32 undef 75} 76 77define i32 @reduce_i16(i32 %arg) { 78; SSE-LABEL: 'reduce_i16' 79; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) 80; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) 81; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) 82; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) 83; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) 84; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) 85; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 86; 87; AVX-LABEL: 'reduce_i16' 88; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) 89; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) 90; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) 91; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) 92; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) 93; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) 94; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 95; 96; AVX512-LABEL: 'reduce_i16' 97; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) 98; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) 99; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) 100; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) 101; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) 102; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) 103; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 104; 105 %V2 = call i16 @llvm.vector.reduce.xor.v2i16(<2 x i16> undef) 106 %V4 = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> undef) 107 %V8 = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> undef) 108 %V16 = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> undef) 109 %V32 = call i16 @llvm.vector.reduce.xor.v32i16(<32 x i16> undef) 110 %V64 = call i16 @llvm.vector.reduce.xor.v64i16(<64 x i16> undef) 111 ret i32 undef 112} 113 114define i32 @reduce_i8(i32 %arg) { 115; SSE-LABEL: 'reduce_i8' 116; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) 117; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) 118; SSE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) 119; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) 120; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) 121; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) 122; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) 123; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 124; 125; AVX-LABEL: 'reduce_i8' 126; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) 127; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) 128; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) 129; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) 130; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) 131; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) 132; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) 133; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 134; 135; AVX512-LABEL: 'reduce_i8' 136; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) 137; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) 138; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) 139; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) 140; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) 141; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) 142; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) 143; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 144; 145 %V2 = call i8 @llvm.vector.reduce.xor.v2i8(<2 x i8> undef) 146 %V4 = call i8 @llvm.vector.reduce.xor.v4i8(<4 x i8> undef) 147 %V8 = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> undef) 148 %V16 = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> undef) 149 %V32 = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> undef) 150 %V64 = call i8 @llvm.vector.reduce.xor.v64i8(<64 x i8> undef) 151 %V128 = call i8 @llvm.vector.reduce.xor.v128i8(<128 x i8> undef) 152 ret i32 undef 153} 154 155define i32 @reduce_i1(i32 %arg) { 156; SSE2-LABEL: 'reduce_i1' 157; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 158; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 159; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 160; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 161; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 162; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 163; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 164; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 165; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 166; 167; SSSE3-LABEL: 'reduce_i1' 168; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 169; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 170; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 171; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 172; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 173; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 174; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 175; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 176; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 177; 178; SSE42-LABEL: 'reduce_i1' 179; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 180; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 181; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 182; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 183; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 184; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 185; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 186; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 187; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 188; 189; AVX1-LABEL: 'reduce_i1' 190; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 191; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 192; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 193; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 194; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 195; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 196; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 197; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 198; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 199; 200; AVX2-LABEL: 'reduce_i1' 201; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 202; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 203; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 204; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 205; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 206; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 207; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 208; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 209; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 210; 211; AVX512F-LABEL: 'reduce_i1' 212; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 213; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 214; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 215; AVX512F-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 216; AVX512F-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 217; AVX512F-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 218; AVX512F-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 219; AVX512F-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 220; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 221; 222; AVX512BW-LABEL: 'reduce_i1' 223; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 224; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 225; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 226; AVX512BW-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 227; AVX512BW-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 228; AVX512BW-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 229; AVX512BW-NEXT: Cost Model: Found an estimated cost of 775 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 230; AVX512BW-NEXT: Cost Model: Found an estimated cost of 776 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 231; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 232; 233; AVX512DQ-LABEL: 'reduce_i1' 234; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 235; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 236; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 237; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 238; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 239; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 240; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 241; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 242; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 243; 244 %V1 = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> undef) 245 %V2 = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> undef) 246 %V4 = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> undef) 247 %V8 = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> undef) 248 %V16 = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> undef) 249 %V32 = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> undef) 250 %V64 = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> undef) 251 %V128 = call i1 @llvm.vector.reduce.xor.v128i1(<128 x i1> undef) 252 ret i32 undef 253} 254 255declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) 256declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) 257declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) 258declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) 259declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) 260 261declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) 262declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) 263declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) 264declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) 265declare i32 @llvm.vector.reduce.xor.v32i32(<32 x i32>) 266 267declare i16 @llvm.vector.reduce.xor.v2i16(<2 x i16>) 268declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) 269declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) 270declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) 271declare i16 @llvm.vector.reduce.xor.v32i16(<32 x i16>) 272declare i16 @llvm.vector.reduce.xor.v64i16(<64 x i16>) 273 274declare i8 @llvm.vector.reduce.xor.v2i8(<2 x i8>) 275declare i8 @llvm.vector.reduce.xor.v4i8(<4 x i8>) 276declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) 277declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) 278declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) 279declare i8 @llvm.vector.reduce.xor.v64i8(<64 x i8>) 280declare i8 @llvm.vector.reduce.xor.v128i8(<128 x i8>) 281 282declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1>) 283declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>) 284declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>) 285declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>) 286declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1>) 287declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1>) 288declare i1 @llvm.vector.reduce.xor.v64i1(<64 x i1>) 289declare i1 @llvm.vector.reduce.xor.v128i1(<128 x i1>) 290