1; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon -verify-machineinstrs < %s | FileCheck %s 2 3; An optimization in DAG Combiner to fold 4; (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)) 5; will generate nodes like: 6; v1i32 trunc v1i64, v1i16 trunc v1i64, v1i8 trunc v1i64. 7; And such nodes will be defaultly scalarized in type legalization. But such 8; scalarization will cause an assertion failure, as v1i64 is a legal type in 9; AArch64. We change the default behaviour from be scalarized to be widen. 10 11; FIXME: Currently XTN is generated for v1i32, but it can be optimized. 12; Just like v1i16 and v1i8, there is no XTN generated. 13 14define <2 x i32> @test_v1i32_0(<1 x i64> %in0) { 15; CHECK-LABEL: test_v1i32_0: 16; CHECK: xtn v0.2s, v0.2d 17 %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 0, i32 undef> 18 %2 = trunc <2 x i64> %1 to <2 x i32> 19 ret <2 x i32> %2 20} 21 22define <2 x i32> @test_v1i32_1(<1 x i64> %in0) { 23; CHECK-LABEL: test_v1i32_1: 24; CHECK: xtn v0.2s, v0.2d 25; CHECK-NEXT: dup v0.2s, v0.s[0] 26 %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <2 x i32> <i32 undef, i32 0> 27 %2 = trunc <2 x i64> %1 to <2 x i32> 28 ret <2 x i32> %2 29} 30 31define <4 x i16> @test_v1i16_0(<1 x i64> %in0) { 32; CHECK-LABEL: test_v1i16_0: 33; CHECK-NOT: xtn 34 %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 35 %2 = trunc <4 x i64> %1 to <4 x i16> 36 ret <4 x i16> %2 37} 38 39define <4 x i16> @test_v1i16_1(<1 x i64> %in0) { 40; CHECK-LABEL: test_v1i16_1: 41; CHECK-NOT: xtn 42; CHECK: dup v0.4h, v0.h[0] 43 %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 undef> 44 %2 = trunc <4 x i64> %1 to <4 x i16> 45 ret <4 x i16> %2 46} 47 48define <8 x i8> @test_v1i8_0(<1 x i64> %in0) { 49; CHECK-LABEL: test_v1i8_0: 50; CHECK-NOT: xtn 51 %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 52 %2 = trunc <8 x i64> %1 to <8 x i8> 53 ret <8 x i8> %2 54} 55 56define <8 x i8> @test_v1i8_1(<1 x i64> %in0) { 57; CHECK-LABEL: test_v1i8_1: 58; CHECK-NOT: xtn 59; CHECK: dup v0.8b, v0.b[0] 60 %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 61 %2 = trunc <8 x i64> %1 to <8 x i8> 62 ret <8 x i8> %2 63} 64 65; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt 66; the i64 out of the v1i64 operand, and truncate that scalar instead. 67 68define <1 x i1> @test_v1i1_0(<1 x i64> %in0) { 69; CHECK-LABEL: test_v1i1_0: 70; CHECK: fmov w0, s0 71 %1 = trunc <1 x i64> %in0 to <1 x i1> 72 ret <1 x i1> %1 73} 74 75define i1 @test_v1i1_1(<1 x i64> %in0) { 76; CHECK-LABEL: test_v1i1_1: 77; CHECK: fmov [[REG:w[0-9]+]], s0 78 %1 = trunc <1 x i64> %in0 to <1 x i1> 79; CHECK: and w0, [[REG]], #0x1 80 %2 = extractelement <1 x i1> %1, i32 0 81 ret i1 %2 82} 83