1; RUN: opt -S -instcombine < %s | FileCheck %s 2 3define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp { 4entry: 5 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind 6 ret <4 x i32> %a 7; CHECK: entry: 8; CHECK-NEXT: ret <4 x i32> zeroinitializer 9} 10 11define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp { 12entry: 13 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 14 ret <4 x i32> %a 15; CHECK: entry: 16; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> 17; CHECK-NEXT: ret <4 x i32> %a 18} 19 20define <4 x i32> @constantMul() nounwind readnone ssp { 21entry: 22 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 23 ret <4 x i32> %a 24; CHECK: entry: 25; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6> 26} 27 28define <4 x i32> @constantMulS() nounwind readnone ssp { 29entry: 30 %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 31 ret <4 x i32> %b 32; CHECK: entry: 33; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 34} 35 36define <4 x i32> @constantMulU() nounwind readnone ssp { 37entry: 38 %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 39 ret <4 x i32> %b 40; CHECK: entry: 41; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> 42} 43 44define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp { 45entry: 46 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind 47 %b = add <4 x i32> zeroinitializer, %a 48 ret <4 x i32> %b 49; CHECK: entry: 50; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]] 51; CHECK-NEXT: ret <4 x i32> %a 52} 53 54define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp { 55entry: 56 %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 57 %b = add <4 x i32> %x, %a 58 ret <4 x i32> %b 59; CHECK: entry: 60; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6> 61; CHECK-NEXT: ret <4 x i32> %b 62} 63 64declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 65declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 66 67; ARM64 variants - <rdar://problem/12349617> 68 69define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp { 70entry: 71 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind 72 ret <4 x i32> %a 73; CHECK: entry: 74; CHECK-NEXT: ret <4 x i32> zeroinitializer 75} 76 77define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp { 78entry: 79 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 80 ret <4 x i32> %a 81; CHECK: entry: 82; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32> 83; CHECK-NEXT: ret <4 x i32> %a 84} 85 86define <4 x i32> @constantMulARM64() nounwind readnone ssp { 87entry: 88 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 89 ret <4 x i32> %a 90; CHECK: entry: 91; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6> 92} 93 94define <4 x i32> @constantMulSARM64() nounwind readnone ssp { 95entry: 96 %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 97 ret <4 x i32> %b 98; CHECK: entry: 99; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 100} 101 102define <4 x i32> @constantMulUARM64() nounwind readnone ssp { 103entry: 104 %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind 105 ret <4 x i32> %b 106; CHECK: entry: 107; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535> 108} 109 110define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp { 111entry: 112 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind 113 %b = add <4 x i32> zeroinitializer, %a 114 ret <4 x i32> %b 115; CHECK: entry: 116; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]] 117; CHECK-NEXT: ret <4 x i32> %a 118} 119 120define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp { 121entry: 122 %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind 123 %b = add <4 x i32> %x, %a 124 ret <4 x i32> %b 125; CHECK: entry: 126; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6> 127; CHECK-NEXT: ret <4 x i32> %b 128} 129 130declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 131declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 132 133; CHECK: attributes #0 = { nounwind readnone ssp } 134; CHECK: attributes #1 = { nounwind readnone } 135; CHECK: attributes [[NUW]] = { nounwind } 136