1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefixes=AVX,AVXONLY 4; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefixes=AVX,AVX512 5; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefixes=AVX,AVX512 6 7; Verify that fast-isel knows how to select aligned/unaligned vector loads. 8; Also verify that the selected load instruction is in the correct domain. 9 10define <16 x i8> @test_v16i8(<16 x i8>* %V) { 11; SSE-LABEL: test_v16i8: 12; SSE: # %bb.0: # %entry 13; SSE-NEXT: movdqa (%rdi), %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: test_v16i8: 17; AVX: # %bb.0: # %entry 18; AVX-NEXT: vmovdqa (%rdi), %xmm0 19; AVX-NEXT: retq 20entry: 21 %0 = load <16 x i8>, <16 x i8>* %V, align 16 22 ret <16 x i8> %0 23} 24 25define <8 x i16> @test_v8i16(<8 x i16>* %V) { 26; SSE-LABEL: test_v8i16: 27; SSE: # %bb.0: # %entry 28; SSE-NEXT: movdqa (%rdi), %xmm0 29; SSE-NEXT: retq 30; 31; AVX-LABEL: test_v8i16: 32; AVX: # %bb.0: # %entry 33; AVX-NEXT: vmovdqa (%rdi), %xmm0 34; AVX-NEXT: retq 35entry: 36 %0 = load <8 x i16>, <8 x i16>* %V, align 16 37 ret <8 x i16> %0 38} 39 40define <4 x i32> @test_v4i32(<4 x i32>* %V) { 41; SSE-LABEL: test_v4i32: 42; SSE: # %bb.0: # %entry 43; SSE-NEXT: movdqa (%rdi), %xmm0 44; SSE-NEXT: retq 45; 46; AVX-LABEL: test_v4i32: 47; AVX: # %bb.0: # %entry 48; AVX-NEXT: vmovdqa (%rdi), %xmm0 49; AVX-NEXT: retq 50entry: 51 %0 = load <4 x i32>, <4 x i32>* %V, align 16 52 ret <4 x i32> %0 53} 54 55define <2 x i64> @test_v2i64(<2 x i64>* %V) { 56; SSE-LABEL: test_v2i64: 57; SSE: # %bb.0: # %entry 58; SSE-NEXT: movdqa (%rdi), %xmm0 59; SSE-NEXT: retq 60; 61; AVX-LABEL: test_v2i64: 62; AVX: # %bb.0: # %entry 63; AVX-NEXT: vmovdqa (%rdi), %xmm0 64; AVX-NEXT: retq 65entry: 66 %0 = load <2 x i64>, <2 x i64>* %V, align 16 67 ret <2 x i64> %0 68} 69 70define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) { 71; SSE-LABEL: test_v16i8_unaligned: 72; SSE: # %bb.0: # %entry 73; SSE-NEXT: movdqu (%rdi), %xmm0 74; SSE-NEXT: retq 75; 76; AVX-LABEL: test_v16i8_unaligned: 77; AVX: # %bb.0: # %entry 78; AVX-NEXT: vmovdqu (%rdi), %xmm0 79; AVX-NEXT: retq 80entry: 81 %0 = load <16 x i8>, <16 x i8>* %V, align 4 82 ret <16 x i8> %0 83} 84 85define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) { 86; SSE-LABEL: test_v8i16_unaligned: 87; SSE: # %bb.0: # %entry 88; SSE-NEXT: movdqu (%rdi), %xmm0 89; SSE-NEXT: retq 90; 91; AVX-LABEL: test_v8i16_unaligned: 92; AVX: # %bb.0: # %entry 93; AVX-NEXT: vmovdqu (%rdi), %xmm0 94; AVX-NEXT: retq 95entry: 96 %0 = load <8 x i16>, <8 x i16>* %V, align 4 97 ret <8 x i16> %0 98} 99 100define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) { 101; SSE-LABEL: test_v4i32_unaligned: 102; SSE: # %bb.0: # %entry 103; SSE-NEXT: movdqu (%rdi), %xmm0 104; SSE-NEXT: retq 105; 106; AVX-LABEL: test_v4i32_unaligned: 107; AVX: # %bb.0: # %entry 108; AVX-NEXT: vmovdqu (%rdi), %xmm0 109; AVX-NEXT: retq 110entry: 111 %0 = load <4 x i32>, <4 x i32>* %V, align 4 112 ret <4 x i32> %0 113} 114 115define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) { 116; SSE-LABEL: test_v2i64_unaligned: 117; SSE: # %bb.0: # %entry 118; SSE-NEXT: movdqu (%rdi), %xmm0 119; SSE-NEXT: retq 120; 121; AVX-LABEL: test_v2i64_unaligned: 122; AVX: # %bb.0: # %entry 123; AVX-NEXT: vmovdqu (%rdi), %xmm0 124; AVX-NEXT: retq 125entry: 126 %0 = load <2 x i64>, <2 x i64>* %V, align 4 127 ret <2 x i64> %0 128} 129 130define <4 x float> @test_v4f32(<4 x float>* %V) { 131; SSE-LABEL: test_v4f32: 132; SSE: # %bb.0: # %entry 133; SSE-NEXT: movaps (%rdi), %xmm0 134; SSE-NEXT: retq 135; 136; AVX-LABEL: test_v4f32: 137; AVX: # %bb.0: # %entry 138; AVX-NEXT: vmovaps (%rdi), %xmm0 139; AVX-NEXT: retq 140entry: 141 %0 = load <4 x float>, <4 x float>* %V, align 16 142 ret <4 x float> %0 143} 144 145define <2 x double> @test_v2f64(<2 x double>* %V) { 146; SSE-LABEL: test_v2f64: 147; SSE: # %bb.0: # %entry 148; SSE-NEXT: movapd (%rdi), %xmm0 149; SSE-NEXT: retq 150; 151; AVX-LABEL: test_v2f64: 152; AVX: # %bb.0: # %entry 153; AVX-NEXT: vmovapd (%rdi), %xmm0 154; AVX-NEXT: retq 155entry: 156 %0 = load <2 x double>, <2 x double>* %V, align 16 157 ret <2 x double> %0 158} 159 160define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) { 161; SSE-LABEL: test_v4f32_unaligned: 162; SSE: # %bb.0: # %entry 163; SSE-NEXT: movups (%rdi), %xmm0 164; SSE-NEXT: retq 165; 166; AVX-LABEL: test_v4f32_unaligned: 167; AVX: # %bb.0: # %entry 168; AVX-NEXT: vmovups (%rdi), %xmm0 169; AVX-NEXT: retq 170entry: 171 %0 = load <4 x float>, <4 x float>* %V, align 4 172 ret <4 x float> %0 173} 174 175define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) { 176; SSE-LABEL: test_v2f64_unaligned: 177; SSE: # %bb.0: # %entry 178; SSE-NEXT: movupd (%rdi), %xmm0 179; SSE-NEXT: retq 180; 181; AVX-LABEL: test_v2f64_unaligned: 182; AVX: # %bb.0: # %entry 183; AVX-NEXT: vmovupd (%rdi), %xmm0 184; AVX-NEXT: retq 185entry: 186 %0 = load <2 x double>, <2 x double>* %V, align 4 187 ret <2 x double> %0 188} 189 190define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) { 191; SSE-LABEL: test_v16i8_abi_alignment: 192; SSE: # %bb.0: # %entry 193; SSE-NEXT: movdqa (%rdi), %xmm0 194; SSE-NEXT: retq 195; 196; AVX-LABEL: test_v16i8_abi_alignment: 197; AVX: # %bb.0: # %entry 198; AVX-NEXT: vmovdqa (%rdi), %xmm0 199; AVX-NEXT: retq 200entry: 201 %0 = load <16 x i8>, <16 x i8>* %V 202 ret <16 x i8> %0 203} 204 205define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) { 206; SSE-LABEL: test_v8i16_abi_alignment: 207; SSE: # %bb.0: # %entry 208; SSE-NEXT: movdqa (%rdi), %xmm0 209; SSE-NEXT: retq 210; 211; AVX-LABEL: test_v8i16_abi_alignment: 212; AVX: # %bb.0: # %entry 213; AVX-NEXT: vmovdqa (%rdi), %xmm0 214; AVX-NEXT: retq 215entry: 216 %0 = load <8 x i16>, <8 x i16>* %V 217 ret <8 x i16> %0 218} 219 220define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) { 221; SSE-LABEL: test_v4i32_abi_alignment: 222; SSE: # %bb.0: # %entry 223; SSE-NEXT: movdqa (%rdi), %xmm0 224; SSE-NEXT: retq 225; 226; AVX-LABEL: test_v4i32_abi_alignment: 227; AVX: # %bb.0: # %entry 228; AVX-NEXT: vmovdqa (%rdi), %xmm0 229; AVX-NEXT: retq 230entry: 231 %0 = load <4 x i32>, <4 x i32>* %V 232 ret <4 x i32> %0 233} 234 235define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) { 236; SSE-LABEL: test_v2i64_abi_alignment: 237; SSE: # %bb.0: # %entry 238; SSE-NEXT: movdqa (%rdi), %xmm0 239; SSE-NEXT: retq 240; 241; AVX-LABEL: test_v2i64_abi_alignment: 242; AVX: # %bb.0: # %entry 243; AVX-NEXT: vmovdqa (%rdi), %xmm0 244; AVX-NEXT: retq 245entry: 246 %0 = load <2 x i64>, <2 x i64>* %V 247 ret <2 x i64> %0 248} 249 250define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) { 251; SSE-LABEL: test_v4f32_abi_alignment: 252; SSE: # %bb.0: # %entry 253; SSE-NEXT: movaps (%rdi), %xmm0 254; SSE-NEXT: retq 255; 256; AVX-LABEL: test_v4f32_abi_alignment: 257; AVX: # %bb.0: # %entry 258; AVX-NEXT: vmovaps (%rdi), %xmm0 259; AVX-NEXT: retq 260entry: 261 %0 = load <4 x float>, <4 x float>* %V 262 ret <4 x float> %0 263} 264 265define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) { 266; SSE-LABEL: test_v2f64_abi_alignment: 267; SSE: # %bb.0: # %entry 268; SSE-NEXT: movapd (%rdi), %xmm0 269; SSE-NEXT: retq 270; 271; AVX-LABEL: test_v2f64_abi_alignment: 272; AVX: # %bb.0: # %entry 273; AVX-NEXT: vmovapd (%rdi), %xmm0 274; AVX-NEXT: retq 275entry: 276 %0 = load <2 x double>, <2 x double>* %V 277 ret <2 x double> %0 278} 279 280define <32 x i8> @test_v32i8(<32 x i8>* %V) { 281; SSE-LABEL: test_v32i8: 282; SSE: # %bb.0: # %entry 283; SSE-NEXT: movaps (%rdi), %xmm0 284; SSE-NEXT: movaps 16(%rdi), %xmm1 285; SSE-NEXT: retq 286; 287; AVX-LABEL: test_v32i8: 288; AVX: # %bb.0: # %entry 289; AVX-NEXT: vmovdqa (%rdi), %ymm0 290; AVX-NEXT: retq 291entry: 292 %0 = load <32 x i8>, <32 x i8>* %V, align 32 293 ret <32 x i8> %0 294} 295 296define <16 x i16> @test_v16i16(<16 x i16>* %V) { 297; SSE-LABEL: test_v16i16: 298; SSE: # %bb.0: # %entry 299; SSE-NEXT: movaps (%rdi), %xmm0 300; SSE-NEXT: movaps 16(%rdi), %xmm1 301; SSE-NEXT: retq 302; 303; AVX-LABEL: test_v16i16: 304; AVX: # %bb.0: # %entry 305; AVX-NEXT: vmovdqa (%rdi), %ymm0 306; AVX-NEXT: retq 307entry: 308 %0 = load <16 x i16>, <16 x i16>* %V, align 32 309 ret <16 x i16> %0 310} 311 312define <8 x i32> @test_v8i32(<8 x i32>* %V) { 313; SSE-LABEL: test_v8i32: 314; SSE: # %bb.0: # %entry 315; SSE-NEXT: movaps (%rdi), %xmm0 316; SSE-NEXT: movaps 16(%rdi), %xmm1 317; SSE-NEXT: retq 318; 319; AVX-LABEL: test_v8i32: 320; AVX: # %bb.0: # %entry 321; AVX-NEXT: vmovdqa (%rdi), %ymm0 322; AVX-NEXT: retq 323entry: 324 %0 = load <8 x i32>, <8 x i32>* %V, align 32 325 ret <8 x i32> %0 326} 327 328define <4 x i64> @test_v4i64(<4 x i64>* %V) { 329; SSE-LABEL: test_v4i64: 330; SSE: # %bb.0: # %entry 331; SSE-NEXT: movaps (%rdi), %xmm0 332; SSE-NEXT: movaps 16(%rdi), %xmm1 333; SSE-NEXT: retq 334; 335; AVX-LABEL: test_v4i64: 336; AVX: # %bb.0: # %entry 337; AVX-NEXT: vmovdqa (%rdi), %ymm0 338; AVX-NEXT: retq 339entry: 340 %0 = load <4 x i64>, <4 x i64>* %V, align 32 341 ret <4 x i64> %0 342} 343 344define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) { 345; SSE-LABEL: test_v32i8_unaligned: 346; SSE: # %bb.0: # %entry 347; SSE-NEXT: movups (%rdi), %xmm0 348; SSE-NEXT: movups 16(%rdi), %xmm1 349; SSE-NEXT: retq 350; 351; AVX-LABEL: test_v32i8_unaligned: 352; AVX: # %bb.0: # %entry 353; AVX-NEXT: vmovdqu (%rdi), %ymm0 354; AVX-NEXT: retq 355entry: 356 %0 = load <32 x i8>, <32 x i8>* %V, align 4 357 ret <32 x i8> %0 358} 359 360define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) { 361; SSE-LABEL: test_v16i16_unaligned: 362; SSE: # %bb.0: # %entry 363; SSE-NEXT: movups (%rdi), %xmm0 364; SSE-NEXT: movups 16(%rdi), %xmm1 365; SSE-NEXT: retq 366; 367; AVX-LABEL: test_v16i16_unaligned: 368; AVX: # %bb.0: # %entry 369; AVX-NEXT: vmovdqu (%rdi), %ymm0 370; AVX-NEXT: retq 371entry: 372 %0 = load <16 x i16>, <16 x i16>* %V, align 4 373 ret <16 x i16> %0 374} 375 376define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) { 377; SSE-LABEL: test_v8i32_unaligned: 378; SSE: # %bb.0: # %entry 379; SSE-NEXT: movups (%rdi), %xmm0 380; SSE-NEXT: movups 16(%rdi), %xmm1 381; SSE-NEXT: retq 382; 383; AVX-LABEL: test_v8i32_unaligned: 384; AVX: # %bb.0: # %entry 385; AVX-NEXT: vmovdqu (%rdi), %ymm0 386; AVX-NEXT: retq 387entry: 388 %0 = load <8 x i32>, <8 x i32>* %V, align 4 389 ret <8 x i32> %0 390} 391 392define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) { 393; SSE-LABEL: test_v4i64_unaligned: 394; SSE: # %bb.0: # %entry 395; SSE-NEXT: movups (%rdi), %xmm0 396; SSE-NEXT: movups 16(%rdi), %xmm1 397; SSE-NEXT: retq 398; 399; AVX-LABEL: test_v4i64_unaligned: 400; AVX: # %bb.0: # %entry 401; AVX-NEXT: vmovdqu (%rdi), %ymm0 402; AVX-NEXT: retq 403entry: 404 %0 = load <4 x i64>, <4 x i64>* %V, align 4 405 ret <4 x i64> %0 406} 407 408define <8 x float> @test_v8f32(<8 x float>* %V) { 409; SSE-LABEL: test_v8f32: 410; SSE: # %bb.0: # %entry 411; SSE-NEXT: movaps (%rdi), %xmm0 412; SSE-NEXT: movaps 16(%rdi), %xmm1 413; SSE-NEXT: retq 414; 415; AVX-LABEL: test_v8f32: 416; AVX: # %bb.0: # %entry 417; AVX-NEXT: vmovaps (%rdi), %ymm0 418; AVX-NEXT: retq 419entry: 420 %0 = load <8 x float>, <8 x float>* %V, align 32 421 ret <8 x float> %0 422} 423 424define <4 x double> @test_v4f64(<4 x double>* %V) { 425; SSE-LABEL: test_v4f64: 426; SSE: # %bb.0: # %entry 427; SSE-NEXT: movapd (%rdi), %xmm0 428; SSE-NEXT: movapd 16(%rdi), %xmm1 429; SSE-NEXT: retq 430; 431; AVX-LABEL: test_v4f64: 432; AVX: # %bb.0: # %entry 433; AVX-NEXT: vmovapd (%rdi), %ymm0 434; AVX-NEXT: retq 435entry: 436 %0 = load <4 x double>, <4 x double>* %V, align 32 437 ret <4 x double> %0 438} 439 440define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) { 441; SSE-LABEL: test_v8f32_unaligned: 442; SSE: # %bb.0: # %entry 443; SSE-NEXT: movups (%rdi), %xmm0 444; SSE-NEXT: movups 16(%rdi), %xmm1 445; SSE-NEXT: retq 446; 447; AVX-LABEL: test_v8f32_unaligned: 448; AVX: # %bb.0: # %entry 449; AVX-NEXT: vmovups (%rdi), %ymm0 450; AVX-NEXT: retq 451entry: 452 %0 = load <8 x float>, <8 x float>* %V, align 4 453 ret <8 x float> %0 454} 455 456define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) { 457; SSE-LABEL: test_v4f64_unaligned: 458; SSE: # %bb.0: # %entry 459; SSE-NEXT: movupd (%rdi), %xmm0 460; SSE-NEXT: movupd 16(%rdi), %xmm1 461; SSE-NEXT: retq 462; 463; AVX-LABEL: test_v4f64_unaligned: 464; AVX: # %bb.0: # %entry 465; AVX-NEXT: vmovupd (%rdi), %ymm0 466; AVX-NEXT: retq 467entry: 468 %0 = load <4 x double>, <4 x double>* %V, align 4 469 ret <4 x double> %0 470} 471 472define <64 x i8> @test_v64i8(<64 x i8>* %V) { 473; SSE-LABEL: test_v64i8: 474; SSE: # %bb.0: # %entry 475; SSE-NEXT: movaps (%rdi), %xmm0 476; SSE-NEXT: movaps 16(%rdi), %xmm1 477; SSE-NEXT: movaps 32(%rdi), %xmm2 478; SSE-NEXT: movaps 48(%rdi), %xmm3 479; SSE-NEXT: retq 480; 481; AVXONLY-LABEL: test_v64i8: 482; AVXONLY: # %bb.0: # %entry 483; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 484; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 485; AVXONLY-NEXT: retq 486; 487; AVX512-LABEL: test_v64i8: 488; AVX512: # %bb.0: # %entry 489; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 490; AVX512-NEXT: retq 491entry: 492 %0 = load <64 x i8>, <64 x i8>* %V, align 64 493 ret <64 x i8> %0 494} 495 496define <32 x i16> @test_v32i16(<32 x i16>* %V) { 497; SSE-LABEL: test_v32i16: 498; SSE: # %bb.0: # %entry 499; SSE-NEXT: movaps (%rdi), %xmm0 500; SSE-NEXT: movaps 16(%rdi), %xmm1 501; SSE-NEXT: movaps 32(%rdi), %xmm2 502; SSE-NEXT: movaps 48(%rdi), %xmm3 503; SSE-NEXT: retq 504; 505; AVXONLY-LABEL: test_v32i16: 506; AVXONLY: # %bb.0: # %entry 507; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 508; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 509; AVXONLY-NEXT: retq 510; 511; AVX512-LABEL: test_v32i16: 512; AVX512: # %bb.0: # %entry 513; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 514; AVX512-NEXT: retq 515entry: 516 %0 = load <32 x i16>, <32 x i16>* %V, align 64 517 ret <32 x i16> %0 518} 519 520define <16 x i32> @test_v16i32(<16 x i32>* %V) { 521; SSE-LABEL: test_v16i32: 522; SSE: # %bb.0: # %entry 523; SSE-NEXT: movaps (%rdi), %xmm0 524; SSE-NEXT: movaps 16(%rdi), %xmm1 525; SSE-NEXT: movaps 32(%rdi), %xmm2 526; SSE-NEXT: movaps 48(%rdi), %xmm3 527; SSE-NEXT: retq 528; 529; AVXONLY-LABEL: test_v16i32: 530; AVXONLY: # %bb.0: # %entry 531; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 532; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 533; AVXONLY-NEXT: retq 534; 535; AVX512-LABEL: test_v16i32: 536; AVX512: # %bb.0: # %entry 537; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 538; AVX512-NEXT: retq 539entry: 540 %0 = load <16 x i32>, <16 x i32>* %V, align 64 541 ret <16 x i32> %0 542} 543 544define <8 x i64> @test_v8i64(<8 x i64>* %V) { 545; SSE-LABEL: test_v8i64: 546; SSE: # %bb.0: # %entry 547; SSE-NEXT: movaps (%rdi), %xmm0 548; SSE-NEXT: movaps 16(%rdi), %xmm1 549; SSE-NEXT: movaps 32(%rdi), %xmm2 550; SSE-NEXT: movaps 48(%rdi), %xmm3 551; SSE-NEXT: retq 552; 553; AVXONLY-LABEL: test_v8i64: 554; AVXONLY: # %bb.0: # %entry 555; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 556; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 557; AVXONLY-NEXT: retq 558; 559; AVX512-LABEL: test_v8i64: 560; AVX512: # %bb.0: # %entry 561; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 562; AVX512-NEXT: retq 563entry: 564 %0 = load <8 x i64>, <8 x i64>* %V, align 64 565 ret <8 x i64> %0 566} 567 568define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) { 569; SSE-LABEL: test_v64i8_unaligned: 570; SSE: # %bb.0: # %entry 571; SSE-NEXT: movups (%rdi), %xmm0 572; SSE-NEXT: movups 16(%rdi), %xmm1 573; SSE-NEXT: movups 32(%rdi), %xmm2 574; SSE-NEXT: movups 48(%rdi), %xmm3 575; SSE-NEXT: retq 576; 577; AVXONLY-LABEL: test_v64i8_unaligned: 578; AVXONLY: # %bb.0: # %entry 579; AVXONLY-NEXT: vmovups (%rdi), %ymm0 580; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 581; AVXONLY-NEXT: retq 582; 583; AVX512-LABEL: test_v64i8_unaligned: 584; AVX512: # %bb.0: # %entry 585; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 586; AVX512-NEXT: retq 587entry: 588 %0 = load <64 x i8>, <64 x i8>* %V, align 4 589 ret <64 x i8> %0 590} 591 592define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) { 593; SSE-LABEL: test_v32i16_unaligned: 594; SSE: # %bb.0: # %entry 595; SSE-NEXT: movups (%rdi), %xmm0 596; SSE-NEXT: movups 16(%rdi), %xmm1 597; SSE-NEXT: movups 32(%rdi), %xmm2 598; SSE-NEXT: movups 48(%rdi), %xmm3 599; SSE-NEXT: retq 600; 601; AVXONLY-LABEL: test_v32i16_unaligned: 602; AVXONLY: # %bb.0: # %entry 603; AVXONLY-NEXT: vmovups (%rdi), %ymm0 604; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 605; AVXONLY-NEXT: retq 606; 607; AVX512-LABEL: test_v32i16_unaligned: 608; AVX512: # %bb.0: # %entry 609; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 610; AVX512-NEXT: retq 611entry: 612 %0 = load <32 x i16>, <32 x i16>* %V, align 4 613 ret <32 x i16> %0 614} 615 616define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) { 617; SSE-LABEL: test_v16i32_unaligned: 618; SSE: # %bb.0: # %entry 619; SSE-NEXT: movups (%rdi), %xmm0 620; SSE-NEXT: movups 16(%rdi), %xmm1 621; SSE-NEXT: movups 32(%rdi), %xmm2 622; SSE-NEXT: movups 48(%rdi), %xmm3 623; SSE-NEXT: retq 624; 625; AVXONLY-LABEL: test_v16i32_unaligned: 626; AVXONLY: # %bb.0: # %entry 627; AVXONLY-NEXT: vmovups (%rdi), %ymm0 628; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 629; AVXONLY-NEXT: retq 630; 631; AVX512-LABEL: test_v16i32_unaligned: 632; AVX512: # %bb.0: # %entry 633; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 634; AVX512-NEXT: retq 635entry: 636 %0 = load <16 x i32>, <16 x i32>* %V, align 4 637 ret <16 x i32> %0 638} 639 640define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) { 641; SSE-LABEL: test_v8i64_unaligned: 642; SSE: # %bb.0: # %entry 643; SSE-NEXT: movups (%rdi), %xmm0 644; SSE-NEXT: movups 16(%rdi), %xmm1 645; SSE-NEXT: movups 32(%rdi), %xmm2 646; SSE-NEXT: movups 48(%rdi), %xmm3 647; SSE-NEXT: retq 648; 649; AVXONLY-LABEL: test_v8i64_unaligned: 650; AVXONLY: # %bb.0: # %entry 651; AVXONLY-NEXT: vmovups (%rdi), %ymm0 652; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 653; AVXONLY-NEXT: retq 654; 655; AVX512-LABEL: test_v8i64_unaligned: 656; AVX512: # %bb.0: # %entry 657; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 658; AVX512-NEXT: retq 659entry: 660 %0 = load <8 x i64>, <8 x i64>* %V, align 4 661 ret <8 x i64> %0 662} 663 664define <8 x float> @test_v16f32(<8 x float>* %V) { 665; SSE-LABEL: test_v16f32: 666; SSE: # %bb.0: # %entry 667; SSE-NEXT: movaps (%rdi), %xmm0 668; SSE-NEXT: movaps 16(%rdi), %xmm1 669; SSE-NEXT: retq 670; 671; AVX-LABEL: test_v16f32: 672; AVX: # %bb.0: # %entry 673; AVX-NEXT: vmovaps (%rdi), %ymm0 674; AVX-NEXT: retq 675entry: 676 %0 = load <8 x float>, <8 x float>* %V, align 64 677 ret <8 x float> %0 678} 679 680define <8 x double> @test_v8f64(<8 x double>* %V) { 681; SSE-LABEL: test_v8f64: 682; SSE: # %bb.0: # %entry 683; SSE-NEXT: movapd (%rdi), %xmm0 684; SSE-NEXT: movapd 16(%rdi), %xmm1 685; SSE-NEXT: movapd 32(%rdi), %xmm2 686; SSE-NEXT: movapd 48(%rdi), %xmm3 687; SSE-NEXT: retq 688; 689; AVXONLY-LABEL: test_v8f64: 690; AVXONLY: # %bb.0: # %entry 691; AVXONLY-NEXT: vmovapd (%rdi), %ymm0 692; AVXONLY-NEXT: vmovapd 32(%rdi), %ymm1 693; AVXONLY-NEXT: retq 694; 695; AVX512-LABEL: test_v8f64: 696; AVX512: # %bb.0: # %entry 697; AVX512-NEXT: vmovapd (%rdi), %zmm0 698; AVX512-NEXT: retq 699entry: 700 %0 = load <8 x double>, <8 x double>* %V, align 64 701 ret <8 x double> %0 702} 703 704define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) { 705; SSE-LABEL: test_v16f32_unaligned: 706; SSE: # %bb.0: # %entry 707; SSE-NEXT: movups (%rdi), %xmm0 708; SSE-NEXT: movups 16(%rdi), %xmm1 709; SSE-NEXT: movups 32(%rdi), %xmm2 710; SSE-NEXT: movups 48(%rdi), %xmm3 711; SSE-NEXT: retq 712; 713; AVXONLY-LABEL: test_v16f32_unaligned: 714; AVXONLY: # %bb.0: # %entry 715; AVXONLY-NEXT: vmovups (%rdi), %ymm0 716; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 717; AVXONLY-NEXT: retq 718; 719; AVX512-LABEL: test_v16f32_unaligned: 720; AVX512: # %bb.0: # %entry 721; AVX512-NEXT: vmovups (%rdi), %zmm0 722; AVX512-NEXT: retq 723entry: 724 %0 = load <16 x float>, <16 x float>* %V, align 4 725 ret <16 x float> %0 726} 727 728define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) { 729; SSE-LABEL: test_v8f64_unaligned: 730; SSE: # %bb.0: # %entry 731; SSE-NEXT: movupd (%rdi), %xmm0 732; SSE-NEXT: movupd 16(%rdi), %xmm1 733; SSE-NEXT: movupd 32(%rdi), %xmm2 734; SSE-NEXT: movupd 48(%rdi), %xmm3 735; SSE-NEXT: retq 736; 737; AVXONLY-LABEL: test_v8f64_unaligned: 738; AVXONLY: # %bb.0: # %entry 739; AVXONLY-NEXT: vmovupd (%rdi), %ymm0 740; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1 741; AVXONLY-NEXT: retq 742; 743; AVX512-LABEL: test_v8f64_unaligned: 744; AVX512: # %bb.0: # %entry 745; AVX512-NEXT: vmovupd (%rdi), %zmm0 746; AVX512-NEXT: retq 747entry: 748 %0 = load <8 x double>, <8 x double>* %V, align 4 749 ret <8 x double> %0 750} 751 752