1; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 2 3define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) { 4; CHECK: test_vshr_n_s8 5; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 6 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 7 ret <8 x i8> %vshr_n 8} 9 10define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) { 11; CHECK: test_vshr_n_s16 12; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 13 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> 14 ret <4 x i16> %vshr_n 15} 16 17define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) { 18; CHECK: test_vshr_n_s32 19; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 20 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3> 21 ret <2 x i32> %vshr_n 22} 23 24define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { 25; CHECK: test_vshrq_n_s8 26; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 27 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 28 ret <16 x i8> %vshr_n 29} 30 31define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { 32; CHECK: test_vshrq_n_s16 33; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 34 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 35 ret <8 x i16> %vshr_n 36} 37 38define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { 39; CHECK: test_vshrq_n_s32 40; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 41 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 42 ret <4 x i32> %vshr_n 43} 44 45define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) { 46; CHECK: test_vshrq_n_s64 47; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 48 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3> 49 ret <2 x i64> %vshr_n 50} 51 52define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) { 53; CHECK: test_vshr_n_u8 54; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 55 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 56 ret <8 x i8> %vshr_n 57} 58 59define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) { 60; CHECK: test_vshr_n_u16 61; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 62 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> 63 ret <4 x i16> %vshr_n 64} 65 66define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) { 67; CHECK: test_vshr_n_u32 68; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 69 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3> 70 ret <2 x i32> %vshr_n 71} 72 73define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { 74; CHECK: test_vshrq_n_u8 75; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 76 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 77 ret <16 x i8> %vshr_n 78} 79 80define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { 81; CHECK: test_vshrq_n_u16 82; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 83 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 84 ret <8 x i16> %vshr_n 85} 86 87define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { 88; CHECK: test_vshrq_n_u32 89; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 90 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 91 ret <4 x i32> %vshr_n 92} 93 94define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) { 95; CHECK: test_vshrq_n_u64 96; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 97 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3> 98 ret <2 x i64> %vshr_n 99} 100 101define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) { 102; CHECK: test_vsra_n_s8 103; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 104 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 105 %1 = add <8 x i8> %vsra_n, %a 106 ret <8 x i8> %1 107} 108 109define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) { 110; CHECK: test_vsra_n_s16 111; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 112 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> 113 %1 = add <4 x i16> %vsra_n, %a 114 ret <4 x i16> %1 115} 116 117define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) { 118; CHECK: test_vsra_n_s32 119; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 120 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3> 121 %1 = add <2 x i32> %vsra_n, %a 122 ret <2 x i32> %1 123} 124 125define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { 126; CHECK: test_vsraq_n_s8 127; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 128 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 129 %1 = add <16 x i8> %vsra_n, %a 130 ret <16 x i8> %1 131} 132 133define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { 134; CHECK: test_vsraq_n_s16 135; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 136 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 137 %1 = add <8 x i16> %vsra_n, %a 138 ret <8 x i16> %1 139} 140 141define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { 142; CHECK: test_vsraq_n_s32 143; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 144 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> 145 %1 = add <4 x i32> %vsra_n, %a 146 ret <4 x i32> %1 147} 148 149define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { 150; CHECK: test_vsraq_n_s64 151; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 152 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3> 153 %1 = add <2 x i64> %vsra_n, %a 154 ret <2 x i64> %1 155} 156 157define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) { 158; CHECK: test_vsra_n_u8 159; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 160 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 161 %1 = add <8 x i8> %vsra_n, %a 162 ret <8 x i8> %1 163} 164 165define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) { 166; CHECK: test_vsra_n_u16 167; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 168 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> 169 %1 = add <4 x i16> %vsra_n, %a 170 ret <4 x i16> %1 171} 172 173define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) { 174; CHECK: test_vsra_n_u32 175; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 176 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3> 177 %1 = add <2 x i32> %vsra_n, %a 178 ret <2 x i32> %1 179} 180 181define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { 182; CHECK: test_vsraq_n_u8 183; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 184 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 185 %1 = add <16 x i8> %vsra_n, %a 186 ret <16 x i8> %1 187} 188 189define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { 190; CHECK: test_vsraq_n_u16 191; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 192 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 193 %1 = add <8 x i16> %vsra_n, %a 194 ret <8 x i16> %1 195} 196 197define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { 198; CHECK: test_vsraq_n_u32 199; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 200 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> 201 %1 = add <4 x i32> %vsra_n, %a 202 ret <4 x i32> %1 203} 204 205define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { 206; CHECK: test_vsraq_n_u64 207; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 208 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3> 209 %1 = add <2 x i64> %vsra_n, %a 210 ret <2 x i64> %1 211} 212 213define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) { 214; CHECK: test_vshrn_n_s16 215; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 216 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 217 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 218 ret <8 x i8> %vshrn_n 219} 220 221define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) { 222; CHECK: test_vshrn_n_s32 223; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 224 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> 225 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 226 ret <4 x i16> %vshrn_n 227} 228 229define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) { 230; CHECK: test_vshrn_n_s64 231; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 232 %1 = ashr <2 x i64> %a, <i64 19, i64 19> 233 %vshrn_n = trunc <2 x i64> %1 to <2 x i32> 234 ret <2 x i32> %vshrn_n 235} 236 237define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) { 238; CHECK: test_vshrn_n_u16 239; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 240 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 241 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 242 ret <8 x i8> %vshrn_n 243} 244 245define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) { 246; CHECK: test_vshrn_n_u32 247; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 248 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> 249 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 250 ret <4 x i16> %vshrn_n 251} 252 253define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) { 254; CHECK: test_vshrn_n_u64 255; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 256 %1 = lshr <2 x i64> %a, <i64 19, i64 19> 257 %vshrn_n = trunc <2 x i64> %1 to <2 x i32> 258 ret <2 x i32> %vshrn_n 259} 260 261define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 262; CHECK: test_vshrn_high_n_s16 263; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 264 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 265 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 266 %2 = bitcast <8 x i8> %a to <1 x i64> 267 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> 268 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 269 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> 270 ret <16 x i8> %4 271} 272 273define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 274; CHECK: test_vshrn_high_n_s32 275; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 276 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> 277 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 278 %2 = bitcast <4 x i16> %a to <1 x i64> 279 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> 280 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 281 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> 282 ret <8 x i16> %4 283} 284 285define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 286; CHECK: test_vshrn_high_n_s64 287; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 288 %1 = bitcast <2 x i32> %a to <1 x i64> 289 %2 = ashr <2 x i64> %b, <i64 19, i64 19> 290 %vshrn_n = trunc <2 x i64> %2 to <2 x i32> 291 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> 292 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 293 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> 294 ret <4 x i32> %4 295} 296 297define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { 298; CHECK: test_vshrn_high_n_u16 299; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 300 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 301 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 302 %2 = bitcast <8 x i8> %a to <1 x i64> 303 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> 304 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 305 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> 306 ret <16 x i8> %4 307} 308 309define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { 310; CHECK: test_vshrn_high_n_u32 311; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 312 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> 313 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 314 %2 = bitcast <4 x i16> %a to <1 x i64> 315 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> 316 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 317 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> 318 ret <8 x i16> %4 319} 320 321define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { 322; CHECK: test_vshrn_high_n_u64 323; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 324 %1 = bitcast <2 x i32> %a to <1 x i64> 325 %2 = lshr <2 x i64> %b, <i64 19, i64 19> 326 %vshrn_n = trunc <2 x i64> %2 to <2 x i32> 327 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> 328 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 329 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> 330 ret <4 x i32> %4 331} 332 333define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 334; CHECK: test_vqshrun_high_n_s16 335; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 336 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3) 337 %1 = bitcast <8 x i8> %a to <1 x i64> 338 %2 = bitcast <8 x i8> %vqshrun to <1 x i64> 339 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 340 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 341 ret <16 x i8> %3 342} 343 344define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 345; CHECK: test_vqshrun_high_n_s32 346; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 347 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9) 348 %1 = bitcast <4 x i16> %a to <1 x i64> 349 %2 = bitcast <4 x i16> %vqshrun to <1 x i64> 350 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 351 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 352 ret <8 x i16> %3 353} 354 355define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 356; CHECK: test_vqshrun_high_n_s64 357; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 358 %1 = bitcast <2 x i32> %a to <1 x i64> 359 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19) 360 %2 = bitcast <2 x i32> %vqshrun to <1 x i64> 361 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 362 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 363 ret <4 x i32> %3 364} 365 366define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 367; CHECK: test_vrshrn_high_n_s16 368; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 369 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3) 370 %1 = bitcast <8 x i8> %a to <1 x i64> 371 %2 = bitcast <8 x i8> %vrshrn to <1 x i64> 372 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 373 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 374 ret <16 x i8> %3 375} 376 377define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 378; CHECK: test_vrshrn_high_n_s32 379; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 380 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9) 381 %1 = bitcast <4 x i16> %a to <1 x i64> 382 %2 = bitcast <4 x i16> %vrshrn to <1 x i64> 383 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 384 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 385 ret <8 x i16> %3 386} 387 388define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 389; CHECK: test_vrshrn_high_n_s64 390; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 391 %1 = bitcast <2 x i32> %a to <1 x i64> 392 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19) 393 %2 = bitcast <2 x i32> %vrshrn to <1 x i64> 394 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 395 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 396 ret <4 x i32> %3 397} 398 399define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 400; CHECK: test_vqrshrun_high_n_s16 401; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 402 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3) 403 %1 = bitcast <8 x i8> %a to <1 x i64> 404 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64> 405 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 406 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 407 ret <16 x i8> %3 408} 409 410define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 411; CHECK: test_vqrshrun_high_n_s32 412; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 413 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9) 414 %1 = bitcast <4 x i16> %a to <1 x i64> 415 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64> 416 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 417 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 418 ret <8 x i16> %3 419} 420 421define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 422; CHECK: test_vqrshrun_high_n_s64 423; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 424 %1 = bitcast <2 x i32> %a to <1 x i64> 425 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19) 426 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64> 427 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 428 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 429 ret <4 x i32> %3 430} 431 432define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 433; CHECK: test_vqshrn_high_n_s16 434; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 435 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3) 436 %1 = bitcast <8 x i8> %a to <1 x i64> 437 %2 = bitcast <8 x i8> %vqshrn to <1 x i64> 438 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 439 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 440 ret <16 x i8> %3 441} 442 443define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 444; CHECK: test_vqshrn_high_n_s32 445; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 446 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9) 447 %1 = bitcast <4 x i16> %a to <1 x i64> 448 %2 = bitcast <4 x i16> %vqshrn to <1 x i64> 449 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 450 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 451 ret <8 x i16> %3 452} 453 454define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 455; CHECK: test_vqshrn_high_n_s64 456; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 457 %1 = bitcast <2 x i32> %a to <1 x i64> 458 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19) 459 %2 = bitcast <2 x i32> %vqshrn to <1 x i64> 460 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 461 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 462 ret <4 x i32> %3 463} 464 465define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { 466; CHECK: test_vqshrn_high_n_u16 467; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 468 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3) 469 %1 = bitcast <8 x i8> %a to <1 x i64> 470 %2 = bitcast <8 x i8> %vqshrn to <1 x i64> 471 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 472 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 473 ret <16 x i8> %3 474} 475 476define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { 477; CHECK: test_vqshrn_high_n_u32 478; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 479 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9) 480 %1 = bitcast <4 x i16> %a to <1 x i64> 481 %2 = bitcast <4 x i16> %vqshrn to <1 x i64> 482 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 483 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 484 ret <8 x i16> %3 485} 486 487define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { 488; CHECK: test_vqshrn_high_n_u64 489; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 490 %1 = bitcast <2 x i32> %a to <1 x i64> 491 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19) 492 %2 = bitcast <2 x i32> %vqshrn to <1 x i64> 493 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 494 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 495 ret <4 x i32> %3 496} 497 498define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 499; CHECK: test_vqrshrn_high_n_s16 500; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 501 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3) 502 %1 = bitcast <8 x i8> %a to <1 x i64> 503 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> 504 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 505 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 506 ret <16 x i8> %3 507} 508 509define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 510; CHECK: test_vqrshrn_high_n_s32 511; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 512 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9) 513 %1 = bitcast <4 x i16> %a to <1 x i64> 514 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> 515 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 516 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 517 ret <8 x i16> %3 518} 519 520define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 521; CHECK: test_vqrshrn_high_n_s64 522; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 523 %1 = bitcast <2 x i32> %a to <1 x i64> 524 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19) 525 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> 526 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 527 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 528 ret <4 x i32> %3 529} 530 531define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { 532; CHECK: test_vqrshrn_high_n_u16 533; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 534 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3) 535 %1 = bitcast <8 x i8> %a to <1 x i64> 536 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> 537 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 538 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 539 ret <16 x i8> %3 540} 541 542define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { 543; CHECK: test_vqrshrn_high_n_u32 544; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 545 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9) 546 %1 = bitcast <4 x i16> %a to <1 x i64> 547 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> 548 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 549 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 550 ret <8 x i16> %3 551} 552 553define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { 554; CHECK: test_vqrshrn_high_n_u64 555; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 556 %1 = bitcast <2 x i32> %a to <1 x i64> 557 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19) 558 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> 559 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 560 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 561 ret <4 x i32> %3 562} 563 564 565 566declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) 567 568declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) 569 570declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) 571 572declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) 573 574declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) 575 576declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) 577 578declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) 579 580declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) 581 582declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) 583 584declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) 585 586declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) 587 588declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) 589 590declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) 591 592declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) 593 594declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) 595 596declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) 597 598declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) 599 600declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) 601 602declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) 603 604declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) 605 606declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) 607 608declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) 609 610declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) 611 612declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) 613 614declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) 615 616declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) 617 618declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) 619 620declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) 621 622declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) 623 624declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) 625 626declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) 627 628declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) 629 630declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) 631 632define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { 633; CHECK-LABEL: test_vcvt_n_s64_f64 634; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 635 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) 636 ret <1 x i64> %1 637} 638 639define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { 640; CHECK-LABEL: test_vcvt_n_u64_f64 641; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 642 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) 643 ret <1 x i64> %1 644} 645 646define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { 647; CHECK-LABEL: test_vcvt_n_f64_s64 648; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 649 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) 650 ret <1 x double> %1 651} 652 653define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { 654; CHECK-LABEL: test_vcvt_n_f64_u64 655; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 656 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) 657 ret <1 x double> %1 658} 659 660declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) 661declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) 662declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) 663declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) 664