1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s 3 4 5define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { 6; CHECK-LABEL: mla8xi8: 7; CHECK: // %bb.0: 8; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b 9; CHECK-NEXT: mov v0.16b, v2.16b 10; CHECK-NEXT: ret 11 %tmp1 = mul <8 x i8> %A, %B; 12 %tmp2 = add <8 x i8> %C, %tmp1; 13 ret <8 x i8> %tmp2 14} 15 16define <16 x i8> @mla16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { 17; CHECK-LABEL: mla16xi8: 18; CHECK: // %bb.0: 19; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b 20; CHECK-NEXT: mov v0.16b, v2.16b 21; CHECK-NEXT: ret 22 %tmp1 = mul <16 x i8> %A, %B; 23 %tmp2 = add <16 x i8> %C, %tmp1; 24 ret <16 x i8> %tmp2 25} 26 27define <4 x i16> @mla4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { 28; CHECK-LABEL: mla4xi16: 29; CHECK: // %bb.0: 30; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h 31; CHECK-NEXT: mov v0.16b, v2.16b 32; CHECK-NEXT: ret 33 %tmp1 = mul <4 x i16> %A, %B; 34 %tmp2 = add <4 x i16> %C, %tmp1; 35 ret <4 x i16> %tmp2 36} 37 38define <8 x i16> @mla8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { 39; CHECK-LABEL: mla8xi16: 40; CHECK: // %bb.0: 41; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h 42; CHECK-NEXT: mov v0.16b, v2.16b 43; CHECK-NEXT: ret 44 %tmp1 = mul <8 x i16> %A, %B; 45 %tmp2 = add <8 x i16> %C, %tmp1; 46 ret <8 x i16> %tmp2 47} 48 49define <2 x i32> @mla2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { 50; CHECK-LABEL: mla2xi32: 51; CHECK: // %bb.0: 52; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s 53; CHECK-NEXT: mov v0.16b, v2.16b 54; CHECK-NEXT: ret 55 %tmp1 = mul <2 x i32> %A, %B; 56 %tmp2 = add <2 x i32> %C, %tmp1; 57 ret <2 x i32> %tmp2 58} 59 60define <4 x i32> @mla4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 61; CHECK-LABEL: mla4xi32: 62; CHECK: // %bb.0: 63; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s 64; CHECK-NEXT: mov v0.16b, v2.16b 65; CHECK-NEXT: ret 66 %tmp1 = mul <4 x i32> %A, %B; 67 %tmp2 = add <4 x i32> %C, %tmp1; 68 ret <4 x i32> %tmp2 69} 70 71define <8 x i8> @mls8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { 72; CHECK-LABEL: mls8xi8: 73; CHECK: // %bb.0: 74; CHECK-NEXT: mls v2.8b, v0.8b, v1.8b 75; CHECK-NEXT: mov v0.16b, v2.16b 76; CHECK-NEXT: ret 77 %tmp1 = mul <8 x i8> %A, %B; 78 %tmp2 = sub <8 x i8> %C, %tmp1; 79 ret <8 x i8> %tmp2 80} 81 82define <16 x i8> @mls16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { 83; CHECK-LABEL: mls16xi8: 84; CHECK: // %bb.0: 85; CHECK-NEXT: mls v2.16b, v0.16b, v1.16b 86; CHECK-NEXT: mov v0.16b, v2.16b 87; CHECK-NEXT: ret 88 %tmp1 = mul <16 x i8> %A, %B; 89 %tmp2 = sub <16 x i8> %C, %tmp1; 90 ret <16 x i8> %tmp2 91} 92 93define <4 x i16> @mls4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { 94; CHECK-LABEL: mls4xi16: 95; CHECK: // %bb.0: 96; CHECK-NEXT: mls v2.4h, v0.4h, v1.4h 97; CHECK-NEXT: mov v0.16b, v2.16b 98; CHECK-NEXT: ret 99 %tmp1 = mul <4 x i16> %A, %B; 100 %tmp2 = sub <4 x i16> %C, %tmp1; 101 ret <4 x i16> %tmp2 102} 103 104define <8 x i16> @mls8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { 105; CHECK-LABEL: mls8xi16: 106; CHECK: // %bb.0: 107; CHECK-NEXT: mls v2.8h, v0.8h, v1.8h 108; CHECK-NEXT: mov v0.16b, v2.16b 109; CHECK-NEXT: ret 110 %tmp1 = mul <8 x i16> %A, %B; 111 %tmp2 = sub <8 x i16> %C, %tmp1; 112 ret <8 x i16> %tmp2 113} 114 115define <2 x i32> @mls2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { 116; CHECK-LABEL: mls2xi32: 117; CHECK: // %bb.0: 118; CHECK-NEXT: mls v2.2s, v0.2s, v1.2s 119; CHECK-NEXT: mov v0.16b, v2.16b 120; CHECK-NEXT: ret 121 %tmp1 = mul <2 x i32> %A, %B; 122 %tmp2 = sub <2 x i32> %C, %tmp1; 123 ret <2 x i32> %tmp2 124} 125 126define <4 x i32> @mls4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 127; CHECK-LABEL: mls4xi32: 128; CHECK: // %bb.0: 129; CHECK-NEXT: mls v2.4s, v0.4s, v1.4s 130; CHECK-NEXT: mov v0.16b, v2.16b 131; CHECK-NEXT: ret 132 %tmp1 = mul <4 x i32> %A, %B; 133 %tmp2 = sub <4 x i32> %C, %tmp1; 134 ret <4 x i32> %tmp2 135} 136 137 138define <8 x i8> @mls2v8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { 139; CHECK-LABEL: mls2v8xi8: 140; CHECK: // %bb.0: 141; CHECK-NEXT: neg v2.8b, v2.8b 142; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b 143; CHECK-NEXT: mov v0.16b, v2.16b 144; CHECK-NEXT: ret 145 %tmp1 = mul <8 x i8> %A, %B; 146 %tmp2 = sub <8 x i8> %tmp1, %C; 147 ret <8 x i8> %tmp2 148} 149 150define <16 x i8> @mls2v16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { 151; CHECK-LABEL: mls2v16xi8: 152; CHECK: // %bb.0: 153; CHECK-NEXT: neg v2.16b, v2.16b 154; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b 155; CHECK-NEXT: mov v0.16b, v2.16b 156; CHECK-NEXT: ret 157 %tmp1 = mul <16 x i8> %A, %B; 158 %tmp2 = sub <16 x i8> %tmp1, %C; 159 ret <16 x i8> %tmp2 160} 161 162define <4 x i16> @mls2v4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { 163; CHECK-LABEL: mls2v4xi16: 164; CHECK: // %bb.0: 165; CHECK-NEXT: neg v2.4h, v2.4h 166; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h 167; CHECK-NEXT: mov v0.16b, v2.16b 168; CHECK-NEXT: ret 169 %tmp1 = mul <4 x i16> %A, %B; 170 %tmp2 = sub <4 x i16> %tmp1, %C; 171 ret <4 x i16> %tmp2 172} 173 174define <8 x i16> @mls2v8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { 175; CHECK-LABEL: mls2v8xi16: 176; CHECK: // %bb.0: 177; CHECK-NEXT: neg v2.8h, v2.8h 178; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h 179; CHECK-NEXT: mov v0.16b, v2.16b 180; CHECK-NEXT: ret 181 %tmp1 = mul <8 x i16> %A, %B; 182 %tmp2 = sub <8 x i16> %tmp1, %C; 183 ret <8 x i16> %tmp2 184} 185 186define <2 x i32> @mls2v2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { 187; CHECK-LABEL: mls2v2xi32: 188; CHECK: // %bb.0: 189; CHECK-NEXT: neg v2.2s, v2.2s 190; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s 191; CHECK-NEXT: mov v0.16b, v2.16b 192; CHECK-NEXT: ret 193 %tmp1 = mul <2 x i32> %A, %B; 194 %tmp2 = sub <2 x i32> %tmp1, %C; 195 ret <2 x i32> %tmp2 196} 197 198define <4 x i32> @mls2v4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { 199; CHECK-LABEL: mls2v4xi32: 200; CHECK: // %bb.0: 201; CHECK-NEXT: neg v2.4s, v2.4s 202; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s 203; CHECK-NEXT: mov v0.16b, v2.16b 204; CHECK-NEXT: ret 205 %tmp1 = mul <4 x i32> %A, %B; 206 %tmp2 = sub <4 x i32> %tmp1, %C; 207 ret <4 x i32> %tmp2 208} 209 210