1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK 3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED 4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2 5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512 6 7; ModuleID = '../testSFB/testOverlapBlocks.c' 8source_filename = "../testSFB/testOverlapBlocks.c" 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10target triple = "x86_64-unknown-linux-gnu" 11 12; Function Attrs: nounwind uwtable 13define dso_local void @test_overlap_1(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 14; CHECK-LABEL: test_overlap_1: 15; CHECK: # %bb.0: # %entry 16; CHECK-NEXT: movl $7, -8(%rdi) 17; CHECK-NEXT: movq -16(%rdi), %rax 18; CHECK-NEXT: movq %rax, (%rdi) 19; CHECK-NEXT: movl -8(%rdi), %eax 20; CHECK-NEXT: movl %eax, 8(%rdi) 21; CHECK-NEXT: movl -4(%rdi), %eax 22; CHECK-NEXT: movl %eax, 12(%rdi) 23; CHECK-NEXT: movslq %esi, %rax 24; CHECK-NEXT: movq %rax, -9(%rdi) 25; CHECK-NEXT: movq %rax, -16(%rdi) 26; CHECK-NEXT: movb $0, -1(%rdi) 27; CHECK-NEXT: movq -16(%rdi), %rax 28; CHECK-NEXT: movq %rax, 16(%rdi) 29; CHECK-NEXT: movl -8(%rdi), %eax 30; CHECK-NEXT: movl %eax, 24(%rdi) 31; CHECK-NEXT: movzwl -4(%rdi), %eax 32; CHECK-NEXT: movw %ax, 28(%rdi) 33; CHECK-NEXT: movb -2(%rdi), %al 34; CHECK-NEXT: movb %al, 30(%rdi) 35; CHECK-NEXT: movb -1(%rdi), %al 36; CHECK-NEXT: movb %al, 31(%rdi) 37; CHECK-NEXT: retq 38; 39; DISABLED-LABEL: test_overlap_1: 40; DISABLED: # %bb.0: # %entry 41; DISABLED-NEXT: movl $7, -8(%rdi) 42; DISABLED-NEXT: movups -16(%rdi), %xmm0 43; DISABLED-NEXT: movups %xmm0, (%rdi) 44; DISABLED-NEXT: movslq %esi, %rax 45; DISABLED-NEXT: movq %rax, -9(%rdi) 46; DISABLED-NEXT: movq %rax, -16(%rdi) 47; DISABLED-NEXT: movb $0, -1(%rdi) 48; DISABLED-NEXT: movups -16(%rdi), %xmm0 49; DISABLED-NEXT: movups %xmm0, 16(%rdi) 50; DISABLED-NEXT: retq 51; 52; CHECK-AVX2-LABEL: test_overlap_1: 53; CHECK-AVX2: # %bb.0: # %entry 54; CHECK-AVX2-NEXT: movl $7, -8(%rdi) 55; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 56; CHECK-AVX2-NEXT: movq %rax, (%rdi) 57; CHECK-AVX2-NEXT: movl -8(%rdi), %eax 58; CHECK-AVX2-NEXT: movl %eax, 8(%rdi) 59; CHECK-AVX2-NEXT: movl -4(%rdi), %eax 60; CHECK-AVX2-NEXT: movl %eax, 12(%rdi) 61; CHECK-AVX2-NEXT: movslq %esi, %rax 62; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) 63; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 64; CHECK-AVX2-NEXT: movb $0, -1(%rdi) 65; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 66; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) 67; CHECK-AVX2-NEXT: movl -8(%rdi), %eax 68; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) 69; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax 70; CHECK-AVX2-NEXT: movw %ax, 28(%rdi) 71; CHECK-AVX2-NEXT: movb -2(%rdi), %al 72; CHECK-AVX2-NEXT: movb %al, 30(%rdi) 73; CHECK-AVX2-NEXT: movb -1(%rdi), %al 74; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 75; CHECK-AVX2-NEXT: retq 76; 77; CHECK-AVX512-LABEL: test_overlap_1: 78; CHECK-AVX512: # %bb.0: # %entry 79; CHECK-AVX512-NEXT: movl $7, -8(%rdi) 80; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 81; CHECK-AVX512-NEXT: movq %rax, (%rdi) 82; CHECK-AVX512-NEXT: movl -8(%rdi), %eax 83; CHECK-AVX512-NEXT: movl %eax, 8(%rdi) 84; CHECK-AVX512-NEXT: movl -4(%rdi), %eax 85; CHECK-AVX512-NEXT: movl %eax, 12(%rdi) 86; CHECK-AVX512-NEXT: movslq %esi, %rax 87; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) 88; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 89; CHECK-AVX512-NEXT: movb $0, -1(%rdi) 90; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 91; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) 92; CHECK-AVX512-NEXT: movl -8(%rdi), %eax 93; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) 94; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax 95; CHECK-AVX512-NEXT: movw %ax, 28(%rdi) 96; CHECK-AVX512-NEXT: movb -2(%rdi), %al 97; CHECK-AVX512-NEXT: movb %al, 30(%rdi) 98; CHECK-AVX512-NEXT: movb -1(%rdi), %al 99; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 100; CHECK-AVX512-NEXT: retq 101entry: 102 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 103 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8 104 %0 = bitcast i8* %add.ptr1 to i32* 105 store i32 7, i32* %0, align 4 106 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 107 %conv = sext i32 %x to i64 108 %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9 109 %1 = bitcast i8* %add.ptr2 to i64* 110 store i64 %conv, i64* %1, align 8 111 %2 = bitcast i8* %add.ptr to i64* 112 store i64 %conv, i64* %2, align 8 113 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1 114 store i8 0, i8* %add.ptr5, align 1 115 %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16 116 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 117 ret void 118} 119 120; Function Attrs: argmemonly nounwind 121declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 122 123; Function Attrs: nounwind uwtable 124define dso_local void @test_overlap_2(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 125; CHECK-LABEL: test_overlap_2: 126; CHECK: # %bb.0: # %entry 127; CHECK-NEXT: movslq %esi, %rax 128; CHECK-NEXT: movq %rax, -16(%rdi) 129; CHECK-NEXT: movq -16(%rdi), %rcx 130; CHECK-NEXT: movq %rcx, (%rdi) 131; CHECK-NEXT: movq -8(%rdi), %rcx 132; CHECK-NEXT: movq %rcx, 8(%rdi) 133; CHECK-NEXT: movq %rax, -8(%rdi) 134; CHECK-NEXT: movl $7, -12(%rdi) 135; CHECK-NEXT: movl -16(%rdi), %eax 136; CHECK-NEXT: movl %eax, 16(%rdi) 137; CHECK-NEXT: movl -12(%rdi), %eax 138; CHECK-NEXT: movl %eax, 20(%rdi) 139; CHECK-NEXT: movq -8(%rdi), %rax 140; CHECK-NEXT: movq %rax, 24(%rdi) 141; CHECK-NEXT: retq 142; 143; DISABLED-LABEL: test_overlap_2: 144; DISABLED: # %bb.0: # %entry 145; DISABLED-NEXT: movslq %esi, %rax 146; DISABLED-NEXT: movq %rax, -16(%rdi) 147; DISABLED-NEXT: movups -16(%rdi), %xmm0 148; DISABLED-NEXT: movups %xmm0, (%rdi) 149; DISABLED-NEXT: movq %rax, -8(%rdi) 150; DISABLED-NEXT: movl $7, -12(%rdi) 151; DISABLED-NEXT: movups -16(%rdi), %xmm0 152; DISABLED-NEXT: movups %xmm0, 16(%rdi) 153; DISABLED-NEXT: retq 154; 155; CHECK-AVX2-LABEL: test_overlap_2: 156; CHECK-AVX2: # %bb.0: # %entry 157; CHECK-AVX2-NEXT: movslq %esi, %rax 158; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 159; CHECK-AVX2-NEXT: movq -16(%rdi), %rcx 160; CHECK-AVX2-NEXT: movq %rcx, (%rdi) 161; CHECK-AVX2-NEXT: movq -8(%rdi), %rcx 162; CHECK-AVX2-NEXT: movq %rcx, 8(%rdi) 163; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) 164; CHECK-AVX2-NEXT: movl $7, -12(%rdi) 165; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 166; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) 167; CHECK-AVX2-NEXT: movl -12(%rdi), %eax 168; CHECK-AVX2-NEXT: movl %eax, 20(%rdi) 169; CHECK-AVX2-NEXT: movq -8(%rdi), %rax 170; CHECK-AVX2-NEXT: movq %rax, 24(%rdi) 171; CHECK-AVX2-NEXT: retq 172; 173; CHECK-AVX512-LABEL: test_overlap_2: 174; CHECK-AVX512: # %bb.0: # %entry 175; CHECK-AVX512-NEXT: movslq %esi, %rax 176; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 177; CHECK-AVX512-NEXT: movq -16(%rdi), %rcx 178; CHECK-AVX512-NEXT: movq %rcx, (%rdi) 179; CHECK-AVX512-NEXT: movq -8(%rdi), %rcx 180; CHECK-AVX512-NEXT: movq %rcx, 8(%rdi) 181; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) 182; CHECK-AVX512-NEXT: movl $7, -12(%rdi) 183; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 184; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) 185; CHECK-AVX512-NEXT: movl -12(%rdi), %eax 186; CHECK-AVX512-NEXT: movl %eax, 20(%rdi) 187; CHECK-AVX512-NEXT: movq -8(%rdi), %rax 188; CHECK-AVX512-NEXT: movq %rax, 24(%rdi) 189; CHECK-AVX512-NEXT: retq 190entry: 191 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 192 %conv = sext i32 %x to i64 193 %0 = bitcast i8* %add.ptr to i64* 194 store i64 %conv, i64* %0, align 8 195 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 196 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -8 197 %1 = bitcast i8* %add.ptr3 to i64* 198 store i64 %conv, i64* %1, align 8 199 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -12 200 %2 = bitcast i8* %add.ptr4 to i32* 201 store i32 7, i32* %2, align 4 202 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16 203 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 204 ret void 205} 206 207; Function Attrs: nounwind uwtable 208define dso_local void @test_overlap_3(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 209; CHECK-LABEL: test_overlap_3: 210; CHECK: # %bb.0: # %entry 211; CHECK-NEXT: movl $7, -10(%rdi) 212; CHECK-NEXT: movl -16(%rdi), %eax 213; CHECK-NEXT: movl %eax, (%rdi) 214; CHECK-NEXT: movzwl -12(%rdi), %eax 215; CHECK-NEXT: movw %ax, 4(%rdi) 216; CHECK-NEXT: movl -10(%rdi), %eax 217; CHECK-NEXT: movl %eax, 6(%rdi) 218; CHECK-NEXT: movl -6(%rdi), %eax 219; CHECK-NEXT: movl %eax, 10(%rdi) 220; CHECK-NEXT: movzwl -2(%rdi), %eax 221; CHECK-NEXT: movw %ax, 14(%rdi) 222; CHECK-NEXT: movslq %esi, %rax 223; CHECK-NEXT: movq %rax, -9(%rdi) 224; CHECK-NEXT: movq %rax, -16(%rdi) 225; CHECK-NEXT: movb $0, -1(%rdi) 226; CHECK-NEXT: movq -16(%rdi), %rax 227; CHECK-NEXT: movq %rax, 16(%rdi) 228; CHECK-NEXT: movzwl -8(%rdi), %eax 229; CHECK-NEXT: movw %ax, 24(%rdi) 230; CHECK-NEXT: movl -6(%rdi), %eax 231; CHECK-NEXT: movl %eax, 26(%rdi) 232; CHECK-NEXT: movb -2(%rdi), %al 233; CHECK-NEXT: movb %al, 30(%rdi) 234; CHECK-NEXT: movb -1(%rdi), %al 235; CHECK-NEXT: movb %al, 31(%rdi) 236; CHECK-NEXT: retq 237; 238; DISABLED-LABEL: test_overlap_3: 239; DISABLED: # %bb.0: # %entry 240; DISABLED-NEXT: movl $7, -10(%rdi) 241; DISABLED-NEXT: movups -16(%rdi), %xmm0 242; DISABLED-NEXT: movups %xmm0, (%rdi) 243; DISABLED-NEXT: movslq %esi, %rax 244; DISABLED-NEXT: movq %rax, -9(%rdi) 245; DISABLED-NEXT: movq %rax, -16(%rdi) 246; DISABLED-NEXT: movb $0, -1(%rdi) 247; DISABLED-NEXT: movups -16(%rdi), %xmm0 248; DISABLED-NEXT: movups %xmm0, 16(%rdi) 249; DISABLED-NEXT: retq 250; 251; CHECK-AVX2-LABEL: test_overlap_3: 252; CHECK-AVX2: # %bb.0: # %entry 253; CHECK-AVX2-NEXT: movl $7, -10(%rdi) 254; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 255; CHECK-AVX2-NEXT: movl %eax, (%rdi) 256; CHECK-AVX2-NEXT: movzwl -12(%rdi), %eax 257; CHECK-AVX2-NEXT: movw %ax, 4(%rdi) 258; CHECK-AVX2-NEXT: movl -10(%rdi), %eax 259; CHECK-AVX2-NEXT: movl %eax, 6(%rdi) 260; CHECK-AVX2-NEXT: movl -6(%rdi), %eax 261; CHECK-AVX2-NEXT: movl %eax, 10(%rdi) 262; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax 263; CHECK-AVX2-NEXT: movw %ax, 14(%rdi) 264; CHECK-AVX2-NEXT: movslq %esi, %rax 265; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) 266; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 267; CHECK-AVX2-NEXT: movb $0, -1(%rdi) 268; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 269; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) 270; CHECK-AVX2-NEXT: movzwl -8(%rdi), %eax 271; CHECK-AVX2-NEXT: movw %ax, 24(%rdi) 272; CHECK-AVX2-NEXT: movl -6(%rdi), %eax 273; CHECK-AVX2-NEXT: movl %eax, 26(%rdi) 274; CHECK-AVX2-NEXT: movb -2(%rdi), %al 275; CHECK-AVX2-NEXT: movb %al, 30(%rdi) 276; CHECK-AVX2-NEXT: movb -1(%rdi), %al 277; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 278; CHECK-AVX2-NEXT: retq 279; 280; CHECK-AVX512-LABEL: test_overlap_3: 281; CHECK-AVX512: # %bb.0: # %entry 282; CHECK-AVX512-NEXT: movl $7, -10(%rdi) 283; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 284; CHECK-AVX512-NEXT: movl %eax, (%rdi) 285; CHECK-AVX512-NEXT: movzwl -12(%rdi), %eax 286; CHECK-AVX512-NEXT: movw %ax, 4(%rdi) 287; CHECK-AVX512-NEXT: movl -10(%rdi), %eax 288; CHECK-AVX512-NEXT: movl %eax, 6(%rdi) 289; CHECK-AVX512-NEXT: movl -6(%rdi), %eax 290; CHECK-AVX512-NEXT: movl %eax, 10(%rdi) 291; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax 292; CHECK-AVX512-NEXT: movw %ax, 14(%rdi) 293; CHECK-AVX512-NEXT: movslq %esi, %rax 294; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) 295; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 296; CHECK-AVX512-NEXT: movb $0, -1(%rdi) 297; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 298; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) 299; CHECK-AVX512-NEXT: movzwl -8(%rdi), %eax 300; CHECK-AVX512-NEXT: movw %ax, 24(%rdi) 301; CHECK-AVX512-NEXT: movl -6(%rdi), %eax 302; CHECK-AVX512-NEXT: movl %eax, 26(%rdi) 303; CHECK-AVX512-NEXT: movb -2(%rdi), %al 304; CHECK-AVX512-NEXT: movb %al, 30(%rdi) 305; CHECK-AVX512-NEXT: movb -1(%rdi), %al 306; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 307; CHECK-AVX512-NEXT: retq 308entry: 309 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 310 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -10 311 %0 = bitcast i8* %add.ptr1 to i32* 312 store i32 7, i32* %0, align 4 313 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 314 %conv = sext i32 %x to i64 315 %add.ptr2 = getelementptr inbounds i8, i8* %A, i64 -9 316 %1 = bitcast i8* %add.ptr2 to i64* 317 store i64 %conv, i64* %1, align 8 318 %2 = bitcast i8* %add.ptr to i64* 319 store i64 %conv, i64* %2, align 8 320 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 -1 321 store i8 0, i8* %add.ptr5, align 1 322 %add.ptr6 = getelementptr inbounds i8, i8* %A, i64 16 323 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr6, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 324 ret void 325} 326 327; Function Attrs: nounwind uwtable 328define dso_local void @test_overlap_4(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 329; CHECK-LABEL: test_overlap_4: 330; CHECK: # %bb.0: # %entry 331; CHECK-NEXT: movups -16(%rdi), %xmm0 332; CHECK-NEXT: movups %xmm0, (%rdi) 333; CHECK-NEXT: movslq %esi, %rax 334; CHECK-NEXT: movq %rax, -8(%rdi) 335; CHECK-NEXT: movl %eax, -16(%rdi) 336; CHECK-NEXT: movl $0, -11(%rdi) 337; CHECK-NEXT: movl -16(%rdi), %eax 338; CHECK-NEXT: movl %eax, 16(%rdi) 339; CHECK-NEXT: movb -12(%rdi), %al 340; CHECK-NEXT: movb %al, 20(%rdi) 341; CHECK-NEXT: movl -11(%rdi), %eax 342; CHECK-NEXT: movl %eax, 21(%rdi) 343; CHECK-NEXT: movl -7(%rdi), %eax 344; CHECK-NEXT: movl %eax, 25(%rdi) 345; CHECK-NEXT: movzwl -3(%rdi), %eax 346; CHECK-NEXT: movw %ax, 29(%rdi) 347; CHECK-NEXT: movb -1(%rdi), %al 348; CHECK-NEXT: movb %al, 31(%rdi) 349; CHECK-NEXT: retq 350; 351; DISABLED-LABEL: test_overlap_4: 352; DISABLED: # %bb.0: # %entry 353; DISABLED-NEXT: movups -16(%rdi), %xmm0 354; DISABLED-NEXT: movups %xmm0, (%rdi) 355; DISABLED-NEXT: movslq %esi, %rax 356; DISABLED-NEXT: movq %rax, -8(%rdi) 357; DISABLED-NEXT: movl %eax, -16(%rdi) 358; DISABLED-NEXT: movl $0, -11(%rdi) 359; DISABLED-NEXT: movups -16(%rdi), %xmm0 360; DISABLED-NEXT: movups %xmm0, 16(%rdi) 361; DISABLED-NEXT: retq 362; 363; CHECK-AVX2-LABEL: test_overlap_4: 364; CHECK-AVX2: # %bb.0: # %entry 365; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 366; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) 367; CHECK-AVX2-NEXT: movslq %esi, %rax 368; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) 369; CHECK-AVX2-NEXT: movl %eax, -16(%rdi) 370; CHECK-AVX2-NEXT: movl $0, -11(%rdi) 371; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 372; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) 373; CHECK-AVX2-NEXT: movb -12(%rdi), %al 374; CHECK-AVX2-NEXT: movb %al, 20(%rdi) 375; CHECK-AVX2-NEXT: movl -11(%rdi), %eax 376; CHECK-AVX2-NEXT: movl %eax, 21(%rdi) 377; CHECK-AVX2-NEXT: movl -7(%rdi), %eax 378; CHECK-AVX2-NEXT: movl %eax, 25(%rdi) 379; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax 380; CHECK-AVX2-NEXT: movw %ax, 29(%rdi) 381; CHECK-AVX2-NEXT: movb -1(%rdi), %al 382; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 383; CHECK-AVX2-NEXT: retq 384; 385; CHECK-AVX512-LABEL: test_overlap_4: 386; CHECK-AVX512: # %bb.0: # %entry 387; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 388; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) 389; CHECK-AVX512-NEXT: movslq %esi, %rax 390; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) 391; CHECK-AVX512-NEXT: movl %eax, -16(%rdi) 392; CHECK-AVX512-NEXT: movl $0, -11(%rdi) 393; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 394; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) 395; CHECK-AVX512-NEXT: movb -12(%rdi), %al 396; CHECK-AVX512-NEXT: movb %al, 20(%rdi) 397; CHECK-AVX512-NEXT: movl -11(%rdi), %eax 398; CHECK-AVX512-NEXT: movl %eax, 21(%rdi) 399; CHECK-AVX512-NEXT: movl -7(%rdi), %eax 400; CHECK-AVX512-NEXT: movl %eax, 25(%rdi) 401; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax 402; CHECK-AVX512-NEXT: movw %ax, 29(%rdi) 403; CHECK-AVX512-NEXT: movb -1(%rdi), %al 404; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 405; CHECK-AVX512-NEXT: retq 406entry: 407 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 408 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 409 %conv = sext i32 %x to i64 410 %add.ptr1 = getelementptr inbounds i8, i8* %A, i64 -8 411 %0 = bitcast i8* %add.ptr1 to i64* 412 store i64 %conv, i64* %0, align 8 413 %1 = bitcast i8* %add.ptr to i32* 414 store i32 %x, i32* %1, align 4 415 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -11 416 %2 = bitcast i8* %add.ptr3 to i32* 417 store i32 0, i32* %2, align 4 418 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 16 419 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr4, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 420 ret void 421} 422 423; Function Attrs: nounwind uwtable 424define dso_local void @test_overlap_5(i8* nocapture %A, i32 %x) local_unnamed_addr #0 { 425; CHECK-LABEL: test_overlap_5: 426; CHECK: # %bb.0: # %entry 427; CHECK-NEXT: movups -16(%rdi), %xmm0 428; CHECK-NEXT: movups %xmm0, (%rdi) 429; CHECK-NEXT: movslq %esi, %rax 430; CHECK-NEXT: movq %rax, -16(%rdi) 431; CHECK-NEXT: movb %al, -14(%rdi) 432; CHECK-NEXT: movb $0, -11(%rdi) 433; CHECK-NEXT: movzwl -16(%rdi), %eax 434; CHECK-NEXT: movw %ax, 16(%rdi) 435; CHECK-NEXT: movb -14(%rdi), %al 436; CHECK-NEXT: movb %al, 18(%rdi) 437; CHECK-NEXT: movzwl -13(%rdi), %eax 438; CHECK-NEXT: movw %ax, 19(%rdi) 439; CHECK-NEXT: movb -11(%rdi), %al 440; CHECK-NEXT: movb %al, 21(%rdi) 441; CHECK-NEXT: movq -10(%rdi), %rax 442; CHECK-NEXT: movq %rax, 22(%rdi) 443; CHECK-NEXT: movzwl -2(%rdi), %eax 444; CHECK-NEXT: movw %ax, 30(%rdi) 445; CHECK-NEXT: retq 446; 447; DISABLED-LABEL: test_overlap_5: 448; DISABLED: # %bb.0: # %entry 449; DISABLED-NEXT: movups -16(%rdi), %xmm0 450; DISABLED-NEXT: movups %xmm0, (%rdi) 451; DISABLED-NEXT: movslq %esi, %rax 452; DISABLED-NEXT: movq %rax, -16(%rdi) 453; DISABLED-NEXT: movb %al, -14(%rdi) 454; DISABLED-NEXT: movb $0, -11(%rdi) 455; DISABLED-NEXT: movups -16(%rdi), %xmm0 456; DISABLED-NEXT: movups %xmm0, 16(%rdi) 457; DISABLED-NEXT: retq 458; 459; CHECK-AVX2-LABEL: test_overlap_5: 460; CHECK-AVX2: # %bb.0: # %entry 461; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 462; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) 463; CHECK-AVX2-NEXT: movslq %esi, %rax 464; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 465; CHECK-AVX2-NEXT: movb %al, -14(%rdi) 466; CHECK-AVX2-NEXT: movb $0, -11(%rdi) 467; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax 468; CHECK-AVX2-NEXT: movw %ax, 16(%rdi) 469; CHECK-AVX2-NEXT: movb -14(%rdi), %al 470; CHECK-AVX2-NEXT: movb %al, 18(%rdi) 471; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax 472; CHECK-AVX2-NEXT: movw %ax, 19(%rdi) 473; CHECK-AVX2-NEXT: movb -11(%rdi), %al 474; CHECK-AVX2-NEXT: movb %al, 21(%rdi) 475; CHECK-AVX2-NEXT: movq -10(%rdi), %rax 476; CHECK-AVX2-NEXT: movq %rax, 22(%rdi) 477; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax 478; CHECK-AVX2-NEXT: movw %ax, 30(%rdi) 479; CHECK-AVX2-NEXT: retq 480; 481; CHECK-AVX512-LABEL: test_overlap_5: 482; CHECK-AVX512: # %bb.0: # %entry 483; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 484; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) 485; CHECK-AVX512-NEXT: movslq %esi, %rax 486; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 487; CHECK-AVX512-NEXT: movb %al, -14(%rdi) 488; CHECK-AVX512-NEXT: movb $0, -11(%rdi) 489; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax 490; CHECK-AVX512-NEXT: movw %ax, 16(%rdi) 491; CHECK-AVX512-NEXT: movb -14(%rdi), %al 492; CHECK-AVX512-NEXT: movb %al, 18(%rdi) 493; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax 494; CHECK-AVX512-NEXT: movw %ax, 19(%rdi) 495; CHECK-AVX512-NEXT: movb -11(%rdi), %al 496; CHECK-AVX512-NEXT: movb %al, 21(%rdi) 497; CHECK-AVX512-NEXT: movq -10(%rdi), %rax 498; CHECK-AVX512-NEXT: movq %rax, 22(%rdi) 499; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax 500; CHECK-AVX512-NEXT: movw %ax, 30(%rdi) 501; CHECK-AVX512-NEXT: retq 502entry: 503 %add.ptr = getelementptr inbounds i8, i8* %A, i64 -16 504 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %A, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 505 %conv = sext i32 %x to i64 506 %0 = bitcast i8* %add.ptr to i64* 507 store i64 %conv, i64* %0, align 8 508 %conv2 = trunc i32 %x to i8 509 %add.ptr3 = getelementptr inbounds i8, i8* %A, i64 -14 510 store i8 %conv2, i8* %add.ptr3, align 1 511 %add.ptr4 = getelementptr inbounds i8, i8* %A, i64 -11 512 store i8 0, i8* %add.ptr4, align 1 513 %add.ptr5 = getelementptr inbounds i8, i8* %A, i64 16 514 tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 %add.ptr5, i8* nonnull align 4 %add.ptr, i64 16, i1 false) 515 ret void 516} 517 518attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 519 520attributes #1 = { argmemonly nounwind } 521