1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVXONLY 4; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL 5; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX 6 7; Verify that fast-isel knows how to select aligned/unaligned vector loads. 8; Also verify that the selected load instruction is in the correct domain. 9 10define <16 x i8> @test_v16i8(<16 x i8>* %V) { 11; SSE-LABEL: test_v16i8: 12; SSE: # %bb.0: # %entry 13; SSE-NEXT: movdqa (%rdi), %xmm0 14; SSE-NEXT: retq 15; 16; AVXONLY-LABEL: test_v16i8: 17; AVXONLY: # %bb.0: # %entry 18; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 19; AVXONLY-NEXT: retq 20; 21; KNL-LABEL: test_v16i8: 22; KNL: # %bb.0: # %entry 23; KNL-NEXT: vmovdqa (%rdi), %xmm0 24; KNL-NEXT: retq 25; 26; SKX-LABEL: test_v16i8: 27; SKX: # %bb.0: # %entry 28; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 29; SKX-NEXT: retq 30entry: 31 %0 = load <16 x i8>, <16 x i8>* %V, align 16 32 ret <16 x i8> %0 33} 34 35define <8 x i16> @test_v8i16(<8 x i16>* %V) { 36; SSE-LABEL: test_v8i16: 37; SSE: # %bb.0: # %entry 38; SSE-NEXT: movdqa (%rdi), %xmm0 39; SSE-NEXT: retq 40; 41; AVXONLY-LABEL: test_v8i16: 42; AVXONLY: # %bb.0: # %entry 43; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 44; AVXONLY-NEXT: retq 45; 46; KNL-LABEL: test_v8i16: 47; KNL: # %bb.0: # %entry 48; KNL-NEXT: vmovdqa (%rdi), %xmm0 49; KNL-NEXT: retq 50; 51; SKX-LABEL: test_v8i16: 52; SKX: # %bb.0: # %entry 53; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 54; SKX-NEXT: retq 55entry: 56 %0 = load <8 x i16>, <8 x i16>* %V, align 16 57 ret <8 x i16> %0 58} 59 60define <4 x i32> @test_v4i32(<4 x i32>* %V) { 61; SSE-LABEL: test_v4i32: 62; SSE: # %bb.0: # %entry 63; SSE-NEXT: movdqa (%rdi), %xmm0 64; SSE-NEXT: retq 65; 66; AVXONLY-LABEL: test_v4i32: 67; AVXONLY: # %bb.0: # %entry 68; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 69; AVXONLY-NEXT: retq 70; 71; KNL-LABEL: test_v4i32: 72; KNL: # %bb.0: # %entry 73; KNL-NEXT: vmovdqa (%rdi), %xmm0 74; KNL-NEXT: retq 75; 76; SKX-LABEL: test_v4i32: 77; SKX: # %bb.0: # %entry 78; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 79; SKX-NEXT: retq 80entry: 81 %0 = load <4 x i32>, <4 x i32>* %V, align 16 82 ret <4 x i32> %0 83} 84 85define <2 x i64> @test_v2i64(<2 x i64>* %V) { 86; SSE-LABEL: test_v2i64: 87; SSE: # %bb.0: # %entry 88; SSE-NEXT: movdqa (%rdi), %xmm0 89; SSE-NEXT: retq 90; 91; AVXONLY-LABEL: test_v2i64: 92; AVXONLY: # %bb.0: # %entry 93; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 94; AVXONLY-NEXT: retq 95; 96; KNL-LABEL: test_v2i64: 97; KNL: # %bb.0: # %entry 98; KNL-NEXT: vmovdqa (%rdi), %xmm0 99; KNL-NEXT: retq 100; 101; SKX-LABEL: test_v2i64: 102; SKX: # %bb.0: # %entry 103; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 104; SKX-NEXT: retq 105entry: 106 %0 = load <2 x i64>, <2 x i64>* %V, align 16 107 ret <2 x i64> %0 108} 109 110define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) { 111; SSE-LABEL: test_v16i8_unaligned: 112; SSE: # %bb.0: # %entry 113; SSE-NEXT: movdqu (%rdi), %xmm0 114; SSE-NEXT: retq 115; 116; AVXONLY-LABEL: test_v16i8_unaligned: 117; AVXONLY: # %bb.0: # %entry 118; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 119; AVXONLY-NEXT: retq 120; 121; KNL-LABEL: test_v16i8_unaligned: 122; KNL: # %bb.0: # %entry 123; KNL-NEXT: vmovdqu (%rdi), %xmm0 124; KNL-NEXT: retq 125; 126; SKX-LABEL: test_v16i8_unaligned: 127; SKX: # %bb.0: # %entry 128; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 129; SKX-NEXT: retq 130entry: 131 %0 = load <16 x i8>, <16 x i8>* %V, align 4 132 ret <16 x i8> %0 133} 134 135define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) { 136; SSE-LABEL: test_v8i16_unaligned: 137; SSE: # %bb.0: # %entry 138; SSE-NEXT: movdqu (%rdi), %xmm0 139; SSE-NEXT: retq 140; 141; AVXONLY-LABEL: test_v8i16_unaligned: 142; AVXONLY: # %bb.0: # %entry 143; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 144; AVXONLY-NEXT: retq 145; 146; KNL-LABEL: test_v8i16_unaligned: 147; KNL: # %bb.0: # %entry 148; KNL-NEXT: vmovdqu (%rdi), %xmm0 149; KNL-NEXT: retq 150; 151; SKX-LABEL: test_v8i16_unaligned: 152; SKX: # %bb.0: # %entry 153; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 154; SKX-NEXT: retq 155entry: 156 %0 = load <8 x i16>, <8 x i16>* %V, align 4 157 ret <8 x i16> %0 158} 159 160define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) { 161; SSE-LABEL: test_v4i32_unaligned: 162; SSE: # %bb.0: # %entry 163; SSE-NEXT: movdqu (%rdi), %xmm0 164; SSE-NEXT: retq 165; 166; AVXONLY-LABEL: test_v4i32_unaligned: 167; AVXONLY: # %bb.0: # %entry 168; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 169; AVXONLY-NEXT: retq 170; 171; KNL-LABEL: test_v4i32_unaligned: 172; KNL: # %bb.0: # %entry 173; KNL-NEXT: vmovdqu (%rdi), %xmm0 174; KNL-NEXT: retq 175; 176; SKX-LABEL: test_v4i32_unaligned: 177; SKX: # %bb.0: # %entry 178; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 179; SKX-NEXT: retq 180entry: 181 %0 = load <4 x i32>, <4 x i32>* %V, align 4 182 ret <4 x i32> %0 183} 184 185define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) { 186; SSE-LABEL: test_v2i64_unaligned: 187; SSE: # %bb.0: # %entry 188; SSE-NEXT: movdqu (%rdi), %xmm0 189; SSE-NEXT: retq 190; 191; AVXONLY-LABEL: test_v2i64_unaligned: 192; AVXONLY: # %bb.0: # %entry 193; AVXONLY-NEXT: vmovdqu (%rdi), %xmm0 194; AVXONLY-NEXT: retq 195; 196; KNL-LABEL: test_v2i64_unaligned: 197; KNL: # %bb.0: # %entry 198; KNL-NEXT: vmovdqu (%rdi), %xmm0 199; KNL-NEXT: retq 200; 201; SKX-LABEL: test_v2i64_unaligned: 202; SKX: # %bb.0: # %entry 203; SKX-NEXT: vmovdqu64 (%rdi), %xmm0 204; SKX-NEXT: retq 205entry: 206 %0 = load <2 x i64>, <2 x i64>* %V, align 4 207 ret <2 x i64> %0 208} 209 210define <4 x float> @test_v4f32(<4 x float>* %V) { 211; SSE-LABEL: test_v4f32: 212; SSE: # %bb.0: # %entry 213; SSE-NEXT: movaps (%rdi), %xmm0 214; SSE-NEXT: retq 215; 216; AVX-LABEL: test_v4f32: 217; AVX: # %bb.0: # %entry 218; AVX-NEXT: vmovaps (%rdi), %xmm0 219; AVX-NEXT: retq 220entry: 221 %0 = load <4 x float>, <4 x float>* %V, align 16 222 ret <4 x float> %0 223} 224 225define <2 x double> @test_v2f64(<2 x double>* %V) { 226; SSE-LABEL: test_v2f64: 227; SSE: # %bb.0: # %entry 228; SSE-NEXT: movapd (%rdi), %xmm0 229; SSE-NEXT: retq 230; 231; AVX-LABEL: test_v2f64: 232; AVX: # %bb.0: # %entry 233; AVX-NEXT: vmovapd (%rdi), %xmm0 234; AVX-NEXT: retq 235entry: 236 %0 = load <2 x double>, <2 x double>* %V, align 16 237 ret <2 x double> %0 238} 239 240define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) { 241; SSE-LABEL: test_v4f32_unaligned: 242; SSE: # %bb.0: # %entry 243; SSE-NEXT: movups (%rdi), %xmm0 244; SSE-NEXT: retq 245; 246; AVX-LABEL: test_v4f32_unaligned: 247; AVX: # %bb.0: # %entry 248; AVX-NEXT: vmovups (%rdi), %xmm0 249; AVX-NEXT: retq 250entry: 251 %0 = load <4 x float>, <4 x float>* %V, align 4 252 ret <4 x float> %0 253} 254 255define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) { 256; SSE-LABEL: test_v2f64_unaligned: 257; SSE: # %bb.0: # %entry 258; SSE-NEXT: movupd (%rdi), %xmm0 259; SSE-NEXT: retq 260; 261; AVX-LABEL: test_v2f64_unaligned: 262; AVX: # %bb.0: # %entry 263; AVX-NEXT: vmovupd (%rdi), %xmm0 264; AVX-NEXT: retq 265entry: 266 %0 = load <2 x double>, <2 x double>* %V, align 4 267 ret <2 x double> %0 268} 269 270define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) { 271; SSE-LABEL: test_v16i8_abi_alignment: 272; SSE: # %bb.0: # %entry 273; SSE-NEXT: movdqa (%rdi), %xmm0 274; SSE-NEXT: retq 275; 276; AVXONLY-LABEL: test_v16i8_abi_alignment: 277; AVXONLY: # %bb.0: # %entry 278; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 279; AVXONLY-NEXT: retq 280; 281; KNL-LABEL: test_v16i8_abi_alignment: 282; KNL: # %bb.0: # %entry 283; KNL-NEXT: vmovdqa (%rdi), %xmm0 284; KNL-NEXT: retq 285; 286; SKX-LABEL: test_v16i8_abi_alignment: 287; SKX: # %bb.0: # %entry 288; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 289; SKX-NEXT: retq 290entry: 291 %0 = load <16 x i8>, <16 x i8>* %V 292 ret <16 x i8> %0 293} 294 295define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) { 296; SSE-LABEL: test_v8i16_abi_alignment: 297; SSE: # %bb.0: # %entry 298; SSE-NEXT: movdqa (%rdi), %xmm0 299; SSE-NEXT: retq 300; 301; AVXONLY-LABEL: test_v8i16_abi_alignment: 302; AVXONLY: # %bb.0: # %entry 303; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 304; AVXONLY-NEXT: retq 305; 306; KNL-LABEL: test_v8i16_abi_alignment: 307; KNL: # %bb.0: # %entry 308; KNL-NEXT: vmovdqa (%rdi), %xmm0 309; KNL-NEXT: retq 310; 311; SKX-LABEL: test_v8i16_abi_alignment: 312; SKX: # %bb.0: # %entry 313; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 314; SKX-NEXT: retq 315entry: 316 %0 = load <8 x i16>, <8 x i16>* %V 317 ret <8 x i16> %0 318} 319 320define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) { 321; SSE-LABEL: test_v4i32_abi_alignment: 322; SSE: # %bb.0: # %entry 323; SSE-NEXT: movdqa (%rdi), %xmm0 324; SSE-NEXT: retq 325; 326; AVXONLY-LABEL: test_v4i32_abi_alignment: 327; AVXONLY: # %bb.0: # %entry 328; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 329; AVXONLY-NEXT: retq 330; 331; KNL-LABEL: test_v4i32_abi_alignment: 332; KNL: # %bb.0: # %entry 333; KNL-NEXT: vmovdqa (%rdi), %xmm0 334; KNL-NEXT: retq 335; 336; SKX-LABEL: test_v4i32_abi_alignment: 337; SKX: # %bb.0: # %entry 338; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 339; SKX-NEXT: retq 340entry: 341 %0 = load <4 x i32>, <4 x i32>* %V 342 ret <4 x i32> %0 343} 344 345define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) { 346; SSE-LABEL: test_v2i64_abi_alignment: 347; SSE: # %bb.0: # %entry 348; SSE-NEXT: movdqa (%rdi), %xmm0 349; SSE-NEXT: retq 350; 351; AVXONLY-LABEL: test_v2i64_abi_alignment: 352; AVXONLY: # %bb.0: # %entry 353; AVXONLY-NEXT: vmovdqa (%rdi), %xmm0 354; AVXONLY-NEXT: retq 355; 356; KNL-LABEL: test_v2i64_abi_alignment: 357; KNL: # %bb.0: # %entry 358; KNL-NEXT: vmovdqa (%rdi), %xmm0 359; KNL-NEXT: retq 360; 361; SKX-LABEL: test_v2i64_abi_alignment: 362; SKX: # %bb.0: # %entry 363; SKX-NEXT: vmovdqa64 (%rdi), %xmm0 364; SKX-NEXT: retq 365entry: 366 %0 = load <2 x i64>, <2 x i64>* %V 367 ret <2 x i64> %0 368} 369 370define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) { 371; SSE-LABEL: test_v4f32_abi_alignment: 372; SSE: # %bb.0: # %entry 373; SSE-NEXT: movaps (%rdi), %xmm0 374; SSE-NEXT: retq 375; 376; AVX-LABEL: test_v4f32_abi_alignment: 377; AVX: # %bb.0: # %entry 378; AVX-NEXT: vmovaps (%rdi), %xmm0 379; AVX-NEXT: retq 380entry: 381 %0 = load <4 x float>, <4 x float>* %V 382 ret <4 x float> %0 383} 384 385define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) { 386; SSE-LABEL: test_v2f64_abi_alignment: 387; SSE: # %bb.0: # %entry 388; SSE-NEXT: movapd (%rdi), %xmm0 389; SSE-NEXT: retq 390; 391; AVX-LABEL: test_v2f64_abi_alignment: 392; AVX: # %bb.0: # %entry 393; AVX-NEXT: vmovapd (%rdi), %xmm0 394; AVX-NEXT: retq 395entry: 396 %0 = load <2 x double>, <2 x double>* %V 397 ret <2 x double> %0 398} 399 400define <32 x i8> @test_v32i8(<32 x i8>* %V) { 401; SSE-LABEL: test_v32i8: 402; SSE: # %bb.0: # %entry 403; SSE-NEXT: movaps (%rdi), %xmm0 404; SSE-NEXT: movaps 16(%rdi), %xmm1 405; SSE-NEXT: retq 406; 407; AVXONLY-LABEL: test_v32i8: 408; AVXONLY: # %bb.0: # %entry 409; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 410; AVXONLY-NEXT: retq 411; 412; KNL-LABEL: test_v32i8: 413; KNL: # %bb.0: # %entry 414; KNL-NEXT: vmovdqa (%rdi), %ymm0 415; KNL-NEXT: retq 416; 417; SKX-LABEL: test_v32i8: 418; SKX: # %bb.0: # %entry 419; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 420; SKX-NEXT: retq 421entry: 422 %0 = load <32 x i8>, <32 x i8>* %V, align 32 423 ret <32 x i8> %0 424} 425 426define <16 x i16> @test_v16i16(<16 x i16>* %V) { 427; SSE-LABEL: test_v16i16: 428; SSE: # %bb.0: # %entry 429; SSE-NEXT: movaps (%rdi), %xmm0 430; SSE-NEXT: movaps 16(%rdi), %xmm1 431; SSE-NEXT: retq 432; 433; AVXONLY-LABEL: test_v16i16: 434; AVXONLY: # %bb.0: # %entry 435; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 436; AVXONLY-NEXT: retq 437; 438; KNL-LABEL: test_v16i16: 439; KNL: # %bb.0: # %entry 440; KNL-NEXT: vmovdqa (%rdi), %ymm0 441; KNL-NEXT: retq 442; 443; SKX-LABEL: test_v16i16: 444; SKX: # %bb.0: # %entry 445; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 446; SKX-NEXT: retq 447entry: 448 %0 = load <16 x i16>, <16 x i16>* %V, align 32 449 ret <16 x i16> %0 450} 451 452define <8 x i32> @test_v8i32(<8 x i32>* %V) { 453; SSE-LABEL: test_v8i32: 454; SSE: # %bb.0: # %entry 455; SSE-NEXT: movaps (%rdi), %xmm0 456; SSE-NEXT: movaps 16(%rdi), %xmm1 457; SSE-NEXT: retq 458; 459; AVXONLY-LABEL: test_v8i32: 460; AVXONLY: # %bb.0: # %entry 461; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 462; AVXONLY-NEXT: retq 463; 464; KNL-LABEL: test_v8i32: 465; KNL: # %bb.0: # %entry 466; KNL-NEXT: vmovdqa (%rdi), %ymm0 467; KNL-NEXT: retq 468; 469; SKX-LABEL: test_v8i32: 470; SKX: # %bb.0: # %entry 471; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 472; SKX-NEXT: retq 473entry: 474 %0 = load <8 x i32>, <8 x i32>* %V, align 32 475 ret <8 x i32> %0 476} 477 478define <4 x i64> @test_v4i64(<4 x i64>* %V) { 479; SSE-LABEL: test_v4i64: 480; SSE: # %bb.0: # %entry 481; SSE-NEXT: movaps (%rdi), %xmm0 482; SSE-NEXT: movaps 16(%rdi), %xmm1 483; SSE-NEXT: retq 484; 485; AVXONLY-LABEL: test_v4i64: 486; AVXONLY: # %bb.0: # %entry 487; AVXONLY-NEXT: vmovdqa (%rdi), %ymm0 488; AVXONLY-NEXT: retq 489; 490; KNL-LABEL: test_v4i64: 491; KNL: # %bb.0: # %entry 492; KNL-NEXT: vmovdqa (%rdi), %ymm0 493; KNL-NEXT: retq 494; 495; SKX-LABEL: test_v4i64: 496; SKX: # %bb.0: # %entry 497; SKX-NEXT: vmovdqa64 (%rdi), %ymm0 498; SKX-NEXT: retq 499entry: 500 %0 = load <4 x i64>, <4 x i64>* %V, align 32 501 ret <4 x i64> %0 502} 503 504define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) { 505; SSE-LABEL: test_v32i8_unaligned: 506; SSE: # %bb.0: # %entry 507; SSE-NEXT: movups (%rdi), %xmm0 508; SSE-NEXT: movups 16(%rdi), %xmm1 509; SSE-NEXT: retq 510; 511; AVXONLY-LABEL: test_v32i8_unaligned: 512; AVXONLY: # %bb.0: # %entry 513; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 514; AVXONLY-NEXT: retq 515; 516; KNL-LABEL: test_v32i8_unaligned: 517; KNL: # %bb.0: # %entry 518; KNL-NEXT: vmovdqu (%rdi), %ymm0 519; KNL-NEXT: retq 520; 521; SKX-LABEL: test_v32i8_unaligned: 522; SKX: # %bb.0: # %entry 523; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 524; SKX-NEXT: retq 525entry: 526 %0 = load <32 x i8>, <32 x i8>* %V, align 4 527 ret <32 x i8> %0 528} 529 530define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) { 531; SSE-LABEL: test_v16i16_unaligned: 532; SSE: # %bb.0: # %entry 533; SSE-NEXT: movups (%rdi), %xmm0 534; SSE-NEXT: movups 16(%rdi), %xmm1 535; SSE-NEXT: retq 536; 537; AVXONLY-LABEL: test_v16i16_unaligned: 538; AVXONLY: # %bb.0: # %entry 539; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 540; AVXONLY-NEXT: retq 541; 542; KNL-LABEL: test_v16i16_unaligned: 543; KNL: # %bb.0: # %entry 544; KNL-NEXT: vmovdqu (%rdi), %ymm0 545; KNL-NEXT: retq 546; 547; SKX-LABEL: test_v16i16_unaligned: 548; SKX: # %bb.0: # %entry 549; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 550; SKX-NEXT: retq 551entry: 552 %0 = load <16 x i16>, <16 x i16>* %V, align 4 553 ret <16 x i16> %0 554} 555 556define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) { 557; SSE-LABEL: test_v8i32_unaligned: 558; SSE: # %bb.0: # %entry 559; SSE-NEXT: movups (%rdi), %xmm0 560; SSE-NEXT: movups 16(%rdi), %xmm1 561; SSE-NEXT: retq 562; 563; AVXONLY-LABEL: test_v8i32_unaligned: 564; AVXONLY: # %bb.0: # %entry 565; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 566; AVXONLY-NEXT: retq 567; 568; KNL-LABEL: test_v8i32_unaligned: 569; KNL: # %bb.0: # %entry 570; KNL-NEXT: vmovdqu (%rdi), %ymm0 571; KNL-NEXT: retq 572; 573; SKX-LABEL: test_v8i32_unaligned: 574; SKX: # %bb.0: # %entry 575; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 576; SKX-NEXT: retq 577entry: 578 %0 = load <8 x i32>, <8 x i32>* %V, align 4 579 ret <8 x i32> %0 580} 581 582define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) { 583; SSE-LABEL: test_v4i64_unaligned: 584; SSE: # %bb.0: # %entry 585; SSE-NEXT: movups (%rdi), %xmm0 586; SSE-NEXT: movups 16(%rdi), %xmm1 587; SSE-NEXT: retq 588; 589; AVXONLY-LABEL: test_v4i64_unaligned: 590; AVXONLY: # %bb.0: # %entry 591; AVXONLY-NEXT: vmovdqu (%rdi), %ymm0 592; AVXONLY-NEXT: retq 593; 594; KNL-LABEL: test_v4i64_unaligned: 595; KNL: # %bb.0: # %entry 596; KNL-NEXT: vmovdqu (%rdi), %ymm0 597; KNL-NEXT: retq 598; 599; SKX-LABEL: test_v4i64_unaligned: 600; SKX: # %bb.0: # %entry 601; SKX-NEXT: vmovdqu64 (%rdi), %ymm0 602; SKX-NEXT: retq 603entry: 604 %0 = load <4 x i64>, <4 x i64>* %V, align 4 605 ret <4 x i64> %0 606} 607 608define <8 x float> @test_v8f32(<8 x float>* %V) { 609; SSE-LABEL: test_v8f32: 610; SSE: # %bb.0: # %entry 611; SSE-NEXT: movaps (%rdi), %xmm0 612; SSE-NEXT: movaps 16(%rdi), %xmm1 613; SSE-NEXT: retq 614; 615; AVX-LABEL: test_v8f32: 616; AVX: # %bb.0: # %entry 617; AVX-NEXT: vmovaps (%rdi), %ymm0 618; AVX-NEXT: retq 619entry: 620 %0 = load <8 x float>, <8 x float>* %V, align 32 621 ret <8 x float> %0 622} 623 624define <4 x double> @test_v4f64(<4 x double>* %V) { 625; SSE-LABEL: test_v4f64: 626; SSE: # %bb.0: # %entry 627; SSE-NEXT: movapd (%rdi), %xmm0 628; SSE-NEXT: movapd 16(%rdi), %xmm1 629; SSE-NEXT: retq 630; 631; AVX-LABEL: test_v4f64: 632; AVX: # %bb.0: # %entry 633; AVX-NEXT: vmovapd (%rdi), %ymm0 634; AVX-NEXT: retq 635entry: 636 %0 = load <4 x double>, <4 x double>* %V, align 32 637 ret <4 x double> %0 638} 639 640define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) { 641; SSE-LABEL: test_v8f32_unaligned: 642; SSE: # %bb.0: # %entry 643; SSE-NEXT: movups (%rdi), %xmm0 644; SSE-NEXT: movups 16(%rdi), %xmm1 645; SSE-NEXT: retq 646; 647; AVX-LABEL: test_v8f32_unaligned: 648; AVX: # %bb.0: # %entry 649; AVX-NEXT: vmovups (%rdi), %ymm0 650; AVX-NEXT: retq 651entry: 652 %0 = load <8 x float>, <8 x float>* %V, align 4 653 ret <8 x float> %0 654} 655 656define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) { 657; SSE-LABEL: test_v4f64_unaligned: 658; SSE: # %bb.0: # %entry 659; SSE-NEXT: movupd (%rdi), %xmm0 660; SSE-NEXT: movupd 16(%rdi), %xmm1 661; SSE-NEXT: retq 662; 663; AVX-LABEL: test_v4f64_unaligned: 664; AVX: # %bb.0: # %entry 665; AVX-NEXT: vmovupd (%rdi), %ymm0 666; AVX-NEXT: retq 667entry: 668 %0 = load <4 x double>, <4 x double>* %V, align 4 669 ret <4 x double> %0 670} 671 672define <64 x i8> @test_v64i8(<64 x i8>* %V) { 673; SSE-LABEL: test_v64i8: 674; SSE: # %bb.0: # %entry 675; SSE-NEXT: movaps (%rdi), %xmm0 676; SSE-NEXT: movaps 16(%rdi), %xmm1 677; SSE-NEXT: movaps 32(%rdi), %xmm2 678; SSE-NEXT: movaps 48(%rdi), %xmm3 679; SSE-NEXT: retq 680; 681; AVXONLY-LABEL: test_v64i8: 682; AVXONLY: # %bb.0: # %entry 683; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 684; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 685; AVXONLY-NEXT: retq 686; 687; KNL-LABEL: test_v64i8: 688; KNL: # %bb.0: # %entry 689; KNL-NEXT: vmovaps (%rdi), %ymm0 690; KNL-NEXT: vmovaps 32(%rdi), %ymm1 691; KNL-NEXT: retq 692; 693; SKX-LABEL: test_v64i8: 694; SKX: # %bb.0: # %entry 695; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 696; SKX-NEXT: retq 697entry: 698 %0 = load <64 x i8>, <64 x i8>* %V, align 64 699 ret <64 x i8> %0 700} 701 702define <32 x i16> @test_v32i16(<32 x i16>* %V) { 703; SSE-LABEL: test_v32i16: 704; SSE: # %bb.0: # %entry 705; SSE-NEXT: movaps (%rdi), %xmm0 706; SSE-NEXT: movaps 16(%rdi), %xmm1 707; SSE-NEXT: movaps 32(%rdi), %xmm2 708; SSE-NEXT: movaps 48(%rdi), %xmm3 709; SSE-NEXT: retq 710; 711; AVXONLY-LABEL: test_v32i16: 712; AVXONLY: # %bb.0: # %entry 713; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 714; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 715; AVXONLY-NEXT: retq 716; 717; KNL-LABEL: test_v32i16: 718; KNL: # %bb.0: # %entry 719; KNL-NEXT: vmovaps (%rdi), %ymm0 720; KNL-NEXT: vmovaps 32(%rdi), %ymm1 721; KNL-NEXT: retq 722; 723; SKX-LABEL: test_v32i16: 724; SKX: # %bb.0: # %entry 725; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 726; SKX-NEXT: retq 727entry: 728 %0 = load <32 x i16>, <32 x i16>* %V, align 64 729 ret <32 x i16> %0 730} 731 732define <16 x i32> @test_v16i32(<16 x i32>* %V) { 733; SSE-LABEL: test_v16i32: 734; SSE: # %bb.0: # %entry 735; SSE-NEXT: movaps (%rdi), %xmm0 736; SSE-NEXT: movaps 16(%rdi), %xmm1 737; SSE-NEXT: movaps 32(%rdi), %xmm2 738; SSE-NEXT: movaps 48(%rdi), %xmm3 739; SSE-NEXT: retq 740; 741; AVXONLY-LABEL: test_v16i32: 742; AVXONLY: # %bb.0: # %entry 743; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 744; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 745; AVXONLY-NEXT: retq 746; 747; AVX512-LABEL: test_v16i32: 748; AVX512: # %bb.0: # %entry 749; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 750; AVX512-NEXT: retq 751entry: 752 %0 = load <16 x i32>, <16 x i32>* %V, align 64 753 ret <16 x i32> %0 754} 755 756define <8 x i64> @test_v8i64(<8 x i64>* %V) { 757; SSE-LABEL: test_v8i64: 758; SSE: # %bb.0: # %entry 759; SSE-NEXT: movaps (%rdi), %xmm0 760; SSE-NEXT: movaps 16(%rdi), %xmm1 761; SSE-NEXT: movaps 32(%rdi), %xmm2 762; SSE-NEXT: movaps 48(%rdi), %xmm3 763; SSE-NEXT: retq 764; 765; AVXONLY-LABEL: test_v8i64: 766; AVXONLY: # %bb.0: # %entry 767; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 768; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 769; AVXONLY-NEXT: retq 770; 771; AVX512-LABEL: test_v8i64: 772; AVX512: # %bb.0: # %entry 773; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 774; AVX512-NEXT: retq 775entry: 776 %0 = load <8 x i64>, <8 x i64>* %V, align 64 777 ret <8 x i64> %0 778} 779 780define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) { 781; SSE-LABEL: test_v64i8_unaligned: 782; SSE: # %bb.0: # %entry 783; SSE-NEXT: movups (%rdi), %xmm0 784; SSE-NEXT: movups 16(%rdi), %xmm1 785; SSE-NEXT: movups 32(%rdi), %xmm2 786; SSE-NEXT: movups 48(%rdi), %xmm3 787; SSE-NEXT: retq 788; 789; AVXONLY-LABEL: test_v64i8_unaligned: 790; AVXONLY: # %bb.0: # %entry 791; AVXONLY-NEXT: vmovups (%rdi), %ymm0 792; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 793; AVXONLY-NEXT: retq 794; 795; KNL-LABEL: test_v64i8_unaligned: 796; KNL: # %bb.0: # %entry 797; KNL-NEXT: vmovups (%rdi), %ymm0 798; KNL-NEXT: vmovups 32(%rdi), %ymm1 799; KNL-NEXT: retq 800; 801; SKX-LABEL: test_v64i8_unaligned: 802; SKX: # %bb.0: # %entry 803; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 804; SKX-NEXT: retq 805entry: 806 %0 = load <64 x i8>, <64 x i8>* %V, align 4 807 ret <64 x i8> %0 808} 809 810define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) { 811; SSE-LABEL: test_v32i16_unaligned: 812; SSE: # %bb.0: # %entry 813; SSE-NEXT: movups (%rdi), %xmm0 814; SSE-NEXT: movups 16(%rdi), %xmm1 815; SSE-NEXT: movups 32(%rdi), %xmm2 816; SSE-NEXT: movups 48(%rdi), %xmm3 817; SSE-NEXT: retq 818; 819; AVXONLY-LABEL: test_v32i16_unaligned: 820; AVXONLY: # %bb.0: # %entry 821; AVXONLY-NEXT: vmovups (%rdi), %ymm0 822; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 823; AVXONLY-NEXT: retq 824; 825; KNL-LABEL: test_v32i16_unaligned: 826; KNL: # %bb.0: # %entry 827; KNL-NEXT: vmovups (%rdi), %ymm0 828; KNL-NEXT: vmovups 32(%rdi), %ymm1 829; KNL-NEXT: retq 830; 831; SKX-LABEL: test_v32i16_unaligned: 832; SKX: # %bb.0: # %entry 833; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 834; SKX-NEXT: retq 835entry: 836 %0 = load <32 x i16>, <32 x i16>* %V, align 4 837 ret <32 x i16> %0 838} 839 840define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) { 841; SSE-LABEL: test_v16i32_unaligned: 842; SSE: # %bb.0: # %entry 843; SSE-NEXT: movups (%rdi), %xmm0 844; SSE-NEXT: movups 16(%rdi), %xmm1 845; SSE-NEXT: movups 32(%rdi), %xmm2 846; SSE-NEXT: movups 48(%rdi), %xmm3 847; SSE-NEXT: retq 848; 849; AVXONLY-LABEL: test_v16i32_unaligned: 850; AVXONLY: # %bb.0: # %entry 851; AVXONLY-NEXT: vmovups (%rdi), %ymm0 852; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 853; AVXONLY-NEXT: retq 854; 855; AVX512-LABEL: test_v16i32_unaligned: 856; AVX512: # %bb.0: # %entry 857; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 858; AVX512-NEXT: retq 859entry: 860 %0 = load <16 x i32>, <16 x i32>* %V, align 4 861 ret <16 x i32> %0 862} 863 864define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) { 865; SSE-LABEL: test_v8i64_unaligned: 866; SSE: # %bb.0: # %entry 867; SSE-NEXT: movups (%rdi), %xmm0 868; SSE-NEXT: movups 16(%rdi), %xmm1 869; SSE-NEXT: movups 32(%rdi), %xmm2 870; SSE-NEXT: movups 48(%rdi), %xmm3 871; SSE-NEXT: retq 872; 873; AVXONLY-LABEL: test_v8i64_unaligned: 874; AVXONLY: # %bb.0: # %entry 875; AVXONLY-NEXT: vmovups (%rdi), %ymm0 876; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 877; AVXONLY-NEXT: retq 878; 879; AVX512-LABEL: test_v8i64_unaligned: 880; AVX512: # %bb.0: # %entry 881; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 882; AVX512-NEXT: retq 883entry: 884 %0 = load <8 x i64>, <8 x i64>* %V, align 4 885 ret <8 x i64> %0 886} 887 888define <8 x float> @test_v16f32(<8 x float>* %V) { 889; SSE-LABEL: test_v16f32: 890; SSE: # %bb.0: # %entry 891; SSE-NEXT: movaps (%rdi), %xmm0 892; SSE-NEXT: movaps 16(%rdi), %xmm1 893; SSE-NEXT: retq 894; 895; AVX-LABEL: test_v16f32: 896; AVX: # %bb.0: # %entry 897; AVX-NEXT: vmovaps (%rdi), %ymm0 898; AVX-NEXT: retq 899entry: 900 %0 = load <8 x float>, <8 x float>* %V, align 64 901 ret <8 x float> %0 902} 903 904define <8 x double> @test_v8f64(<8 x double>* %V) { 905; SSE-LABEL: test_v8f64: 906; SSE: # %bb.0: # %entry 907; SSE-NEXT: movapd (%rdi), %xmm0 908; SSE-NEXT: movapd 16(%rdi), %xmm1 909; SSE-NEXT: movapd 32(%rdi), %xmm2 910; SSE-NEXT: movapd 48(%rdi), %xmm3 911; SSE-NEXT: retq 912; 913; AVXONLY-LABEL: test_v8f64: 914; AVXONLY: # %bb.0: # %entry 915; AVXONLY-NEXT: vmovapd (%rdi), %ymm0 916; AVXONLY-NEXT: vmovapd 32(%rdi), %ymm1 917; AVXONLY-NEXT: retq 918; 919; AVX512-LABEL: test_v8f64: 920; AVX512: # %bb.0: # %entry 921; AVX512-NEXT: vmovapd (%rdi), %zmm0 922; AVX512-NEXT: retq 923entry: 924 %0 = load <8 x double>, <8 x double>* %V, align 64 925 ret <8 x double> %0 926} 927 928define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) { 929; SSE-LABEL: test_v16f32_unaligned: 930; SSE: # %bb.0: # %entry 931; SSE-NEXT: movups (%rdi), %xmm0 932; SSE-NEXT: movups 16(%rdi), %xmm1 933; SSE-NEXT: movups 32(%rdi), %xmm2 934; SSE-NEXT: movups 48(%rdi), %xmm3 935; SSE-NEXT: retq 936; 937; AVXONLY-LABEL: test_v16f32_unaligned: 938; AVXONLY: # %bb.0: # %entry 939; AVXONLY-NEXT: vmovups (%rdi), %ymm0 940; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 941; AVXONLY-NEXT: retq 942; 943; AVX512-LABEL: test_v16f32_unaligned: 944; AVX512: # %bb.0: # %entry 945; AVX512-NEXT: vmovups (%rdi), %zmm0 946; AVX512-NEXT: retq 947entry: 948 %0 = load <16 x float>, <16 x float>* %V, align 4 949 ret <16 x float> %0 950} 951 952define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) { 953; SSE-LABEL: test_v8f64_unaligned: 954; SSE: # %bb.0: # %entry 955; SSE-NEXT: movupd (%rdi), %xmm0 956; SSE-NEXT: movupd 16(%rdi), %xmm1 957; SSE-NEXT: movupd 32(%rdi), %xmm2 958; SSE-NEXT: movupd 48(%rdi), %xmm3 959; SSE-NEXT: retq 960; 961; AVXONLY-LABEL: test_v8f64_unaligned: 962; AVXONLY: # %bb.0: # %entry 963; AVXONLY-NEXT: vmovupd (%rdi), %ymm0 964; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1 965; AVXONLY-NEXT: retq 966; 967; AVX512-LABEL: test_v8f64_unaligned: 968; AVX512: # %bb.0: # %entry 969; AVX512-NEXT: vmovupd (%rdi), %zmm0 970; AVX512-NEXT: retq 971entry: 972 %0 = load <8 x double>, <8 x double>* %V, align 4 973 ret <8 x double> %0 974} 975 976