; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 ; Verify the cost of vector shift left instructions. ; ; ; Variable Shifts ; define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <2 x i64> %a, %b ret <2 x i64> %shift } define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, %b ret <4 x i64> %shift } define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32': ; SSE2: Found an estimated cost of 10 for instruction: %shift ; SSE41: Found an estimated cost of 10 for instruction: %shift ; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i32> %a, %b ret <4 x i32> %shift } define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift ; SSE41: Found an estimated cost of 20 for instruction: %shift ; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, %b ret <8 x i32> %shift } define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift ; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 32 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, %b ret <8 x i16> %shift } define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift ; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <16 x i16> %a, %b ret <16 x i16> %shift } define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 26 for instruction: %shift ; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 26 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, %b ret <16 x i8> %shift } define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift ; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, %b ret <32 x i8> %shift } ; ; Uniform Variable Shifts ; define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer %shift = shl <2 x i64> %a, %splat ret <2 x i64> %shift } define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer %shift = shl <4 x i64> %a, %splat ret <4 x i64> %shift } define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32': ; SSE2: Found an estimated cost of 10 for instruction: %shift ; SSE41: Found an estimated cost of 10 for instruction: %shift ; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer %shift = shl <4 x i32> %a, %splat ret <4 x i32> %shift } define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32': ; SSE2: Found an estimated cost of 20 for instruction: %shift ; SSE41: Found an estimated cost of 20 for instruction: %shift ; AVX: Found an estimated cost of 20 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer %shift = shl <8 x i32> %a, %splat ret <8 x i32> %shift } define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16': ; SSE2: Found an estimated cost of 32 for instruction: %shift ; SSE41: Found an estimated cost of 32 for instruction: %shift ; AVX: Found an estimated cost of 32 for instruction: %shift ; AVX2: Found an estimated cost of 32 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer %shift = shl <8 x i16> %a, %splat ret <8 x i16> %shift } define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16': ; SSE2: Found an estimated cost of 64 for instruction: %shift ; SSE41: Found an estimated cost of 64 for instruction: %shift ; AVX: Found an estimated cost of 64 for instruction: %shift ; AVX2: Found an estimated cost of 10 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer %shift = shl <16 x i16> %a, %splat ret <16 x i16> %shift } define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 26 for instruction: %shift ; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 26 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer %shift = shl <16 x i8> %a, %splat ret <16 x i8> %shift } define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) { ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift ; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer %shift = shl <32 x i8> %a, %splat ret <32 x i8> %shift } ; ; Constant Shifts ; define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64': ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64': ; SSE2: Found an estimated cost of 8 for instruction: %shift ; SSE41: Found an estimated cost of 8 for instruction: %shift ; AVX: Found an estimated cost of 8 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, ret <4 x i64> %shift } define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32': ; SSE2: Found an estimated cost of 6 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i32> %a, ret <4 x i32> %shift } define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32': ; SSE2: Found an estimated cost of 12 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, ret <8 x i32> %shift } define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, ret <8 x i16> %shift } define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i16> %a, ret <16 x i16> %shift } define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8': ; SSE2: Found an estimated cost of 26 for instruction: %shift ; SSE41: Found an estimated cost of 26 for instruction: %shift ; AVX: Found an estimated cost of 26 for instruction: %shift ; AVX2: Found an estimated cost of 26 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8': ; SSE2: Found an estimated cost of 52 for instruction: %shift ; SSE41: Found an estimated cost of 52 for instruction: %shift ; AVX: Found an estimated cost of 52 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, ret <32 x i8> %shift } ; ; Uniform Constant Shifts ; define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <2 x i64> %a, ret <2 x i64> %shift } define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i64> %a, ret <4 x i64> %shift } define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 1 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <4 x i32> %a, ret <4 x i32> %shift } define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i32> %a, ret <8 x i32> %shift } define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <8 x i16> %a, ret <8 x i16> %shift } define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOPAVX: Found an estimated cost of 2 for instruction: %shift ; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i16> %a, ret <16 x i16> %shift } define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8': ; SSE2: Found an estimated cost of 1 for instruction: %shift ; SSE41: Found an estimated cost of 1 for instruction: %shift ; AVX: Found an estimated cost of 1 for instruction: %shift ; AVX2: Found an estimated cost of 1 for instruction: %shift ; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, ret <16 x i8> %shift } define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8': ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 11 for instruction: %shift ; XOP: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, ret <32 x i8> %shift } ; ; Special Cases ; ; We always emit a single pmullw in the case of v8i16 vector shifts by ; non-uniform constant. define <8 x i16> @test1(<8 x i16> %a) { %shl = shl <8 x i16> %a, ret <8 x i16> %shl } ; CHECK: 'Cost Model Analysis' for function 'test1': ; CHECK: Found an estimated cost of 1 for instruction: %shl define <8 x i16> @test2(<8 x i16> %a) { %shl = shl <8 x i16> %a, ret <8 x i16> %shl } ; CHECK: 'Cost Model Analysis' for function 'test2': ; CHECK: Found an estimated cost of 1 for instruction: %shl ; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction. ; Make sure that the estimated cost is always 1 except for the case where ; we only have SSE2 support. With SSE2, we are forced to special lower the ; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle. define <4 x i32> @test3(<4 x i32> %a) { %shl = shl <4 x i32> %a, ret <4 x i32> %shl } ; CHECK: 'Cost Model Analysis' for function 'test3': ; SSE2: Found an estimated cost of 6 for instruction: %shl ; SSE41: Found an estimated cost of 1 for instruction: %shl ; AVX: Found an estimated cost of 1 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOP: Found an estimated cost of 1 for instruction: %shl define <4 x i32> @test4(<4 x i32> %a) { %shl = shl <4 x i32> %a, ret <4 x i32> %shl } ; CHECK: 'Cost Model Analysis' for function 'test4': ; SSE2: Found an estimated cost of 6 for instruction: %shl ; SSE41: Found an estimated cost of 1 for instruction: %shl ; AVX: Found an estimated cost of 1 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOP: Found an estimated cost of 1 for instruction: %shl ; On AVX2 we are able to lower the following shift into a single ; vpsllvq. Therefore, the expected cost is only 1. ; In all other cases, this shift is scalarized as the target does not support ; vpsllv instructions. define <2 x i64> @test5(<2 x i64> %a) { %shl = shl <2 x i64> %a, ret <2 x i64> %shl } ; CHECK: 'Cost Model Analysis' for function 'test5': ; SSE2: Found an estimated cost of 4 for instruction: %shl ; SSE41: Found an estimated cost of 4 for instruction: %shl ; AVX: Found an estimated cost of 4 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOP: Found an estimated cost of 1 for instruction: %shl ; v16i16 and v8i32 shift left by non-uniform constant are lowered into ; vector multiply instructions. With AVX (but not AVX2), the vector multiply ; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert. ; ; With AVX2, instruction vpmullw works with 256bit quantities and ; therefore there is no need to split the resulting vector multiply into ; a sequence of two multiply. ; ; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice ; the cost computed in the case of 'test1'. That is because the backend ; simply emits 2 pmullw with no extract/insert. define <16 x i16> @test6(<16 x i16> %a) { %shl = shl <16 x i16> %a, ret <16 x i16> %shl } ; CHECK: 'Cost Model Analysis' for function 'test6': ; SSE2: Found an estimated cost of 2 for instruction: %shl ; SSE41: Found an estimated cost of 2 for instruction: %shl ; AVX: Found an estimated cost of 4 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOPAVX: Found an estimated cost of 2 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl ; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice ; the cost computed in the case of 'test3'. That is because the multiply ; is type-legalized into two 4i32 vector multiply. define <8 x i32> @test7(<8 x i32> %a) { %shl = shl <8 x i32> %a, ret <8 x i32> %shl } ; CHECK: 'Cost Model Analysis' for function 'test7': ; SSE2: Found an estimated cost of 12 for instruction: %shl ; SSE41: Found an estimated cost of 2 for instruction: %shl ; AVX: Found an estimated cost of 4 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOPAVX: Found an estimated cost of 2 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl ; On AVX2 we are able to lower the following shift into a single ; vpsllvq. Therefore, the expected cost is only 1. ; In all other cases, this shift is scalarized as the target does not support ; vpsllv instructions. define <4 x i64> @test8(<4 x i64> %a) { %shl = shl <4 x i64> %a, ret <4 x i64> %shl } ; CHECK: 'Cost Model Analysis' for function 'test8': ; SSE2: Found an estimated cost of 8 for instruction: %shl ; SSE41: Found an estimated cost of 8 for instruction: %shl ; AVX: Found an estimated cost of 8 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl ; XOPAVX: Found an estimated cost of 2 for instruction: %shl ; XOPAVX2: Found an estimated cost of 1 for instruction: %shl ; Same as 'test6', with the difference that the cost is double. define <32 x i16> @test9(<32 x i16> %a) { %shl = shl <32 x i16> %a, ret <32 x i16> %shl } ; CHECK: 'Cost Model Analysis' for function 'test9': ; SSE2: Found an estimated cost of 4 for instruction: %shl ; SSE41: Found an estimated cost of 4 for instruction: %shl ; AVX: Found an estimated cost of 8 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl ; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl ; Same as 'test7', except that now the cost is double. define <16 x i32> @test10(<16 x i32> %a) { %shl = shl <16 x i32> %a, ret <16 x i32> %shl } ; CHECK: 'Cost Model Analysis' for function 'test10': ; SSE2: Found an estimated cost of 24 for instruction: %shl ; SSE41: Found an estimated cost of 4 for instruction: %shl ; AVX: Found an estimated cost of 8 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl ; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl ; On AVX2 we are able to lower the following shift into a sequence of ; two vpsllvq instructions. Therefore, the expected cost is only 2. ; In all other cases, this shift is scalarized as we don't have vpsllv ; instructions. define <8 x i64> @test11(<8 x i64> %a) { %shl = shl <8 x i64> %a, ret <8 x i64> %shl } ; CHECK: 'Cost Model Analysis' for function 'test11': ; SSE2: Found an estimated cost of 16 for instruction: %shl ; SSE41: Found an estimated cost of 16 for instruction: %shl ; AVX: Found an estimated cost of 16 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl ; XOPAVX: Found an estimated cost of 4 for instruction: %shl ; XOPAVX2: Found an estimated cost of 2 for instruction: %shl