1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 4 5declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 6declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 7declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 8declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8>, i8) 9declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8>, i8) 10declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8>, i8) 11 12define i1 @pcmpestri_reg_eq_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 13; X32-LABEL: pcmpestri_reg_eq_i8: 14; X32: # %bb.0: # %entry 15; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 16; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 17; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 18; X32-NEXT: setae %al 19; X32-NEXT: retl 20; 21; X64-LABEL: pcmpestri_reg_eq_i8: 22; X64: # %bb.0: # %entry 23; X64-NEXT: movl %edi, %eax 24; X64-NEXT: movl %esi, %edx 25; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 26; X64-NEXT: setae %al 27; X64-NEXT: retq 28entry: 29 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 30 %result = icmp eq i32 %c, 0 31 ret i1 %result 32} 33 34define i32 @pcmpestri_reg_idx_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 35; X32-LABEL: pcmpestri_reg_idx_i8: 36; X32: # %bb.0: # %entry 37; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 38; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 39; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 40; X32-NEXT: movl %ecx, %eax 41; X32-NEXT: retl 42; 43; X64-LABEL: pcmpestri_reg_idx_i8: 44; X64: # %bb.0: # %entry 45; X64-NEXT: movl %edi, %eax 46; X64-NEXT: movl %esi, %edx 47; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 48; X64-NEXT: movl %ecx, %eax 49; X64-NEXT: retq 50entry: 51 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 52 ret i32 %idx 53} 54 55define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 56; X32-LABEL: pcmpestri_reg_diff_i8: 57; X32: # %bb.0: # %entry 58; X32-NEXT: pushl %ebp 59; X32-NEXT: movl %esp, %ebp 60; X32-NEXT: andl $-16, %esp 61; X32-NEXT: subl $48, %esp 62; X32-NEXT: movl 8(%ebp), %eax 63; X32-NEXT: movl 12(%ebp), %edx 64; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 65; X32-NEXT: cmpl $16, %ecx 66; X32-NEXT: jne .LBB2_2 67; X32-NEXT: # %bb.1: 68; X32-NEXT: xorl %eax, %eax 69; X32-NEXT: jmp .LBB2_3 70; X32-NEXT: .LBB2_2: # %compare 71; X32-NEXT: movdqa %xmm0, (%esp) 72; X32-NEXT: andl $15, %ecx 73; X32-NEXT: movb (%esp,%ecx), %al 74; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 75; X32-NEXT: subb 16(%esp,%ecx), %al 76; X32-NEXT: .LBB2_3: # %exit 77; X32-NEXT: movzbl %al, %eax 78; X32-NEXT: movl %ebp, %esp 79; X32-NEXT: popl %ebp 80; X32-NEXT: retl 81; 82; X64-LABEL: pcmpestri_reg_diff_i8: 83; X64: # %bb.0: # %entry 84; X64-NEXT: movl %edi, %eax 85; X64-NEXT: movl %esi, %edx 86; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 87; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 88; X64-NEXT: cmpl $16, %ecx 89; X64-NEXT: jne .LBB2_2 90; X64-NEXT: # %bb.1: 91; X64-NEXT: xorl %eax, %eax 92; X64-NEXT: movzbl %al, %eax 93; X64-NEXT: retq 94; X64-NEXT: .LBB2_2: # %compare 95; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 96; X64-NEXT: andl $15, %ecx 97; X64-NEXT: movb -24(%rsp,%rcx), %al 98; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 99; X64-NEXT: subb -40(%rsp,%rcx), %al 100; X64-NEXT: movzbl %al, %eax 101; X64-NEXT: retq 102entry: 103 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 104 %eq = icmp eq i32 %idx, 16 105 br i1 %eq, label %exit, label %compare 106 107compare: 108 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 109 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 110 %sub = sub i8 %lhs_c, %rhs_c 111 br label %exit 112 113exit: 114 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 115 %result_ext = zext i8 %result to i32 116 ret i32 %result_ext 117} 118 119define i1 @pcmpestri_mem_eq_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { 120; X32-LABEL: pcmpestri_mem_eq_i8: 121; X32: # %bb.0: # %entry 122; X32-NEXT: pushl %esi 123; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 124; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 125; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 126; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 127; X32-NEXT: movdqu (%esi), %xmm0 128; X32-NEXT: pcmpestri $24, (%ecx), %xmm0 129; X32-NEXT: setae %al 130; X32-NEXT: popl %esi 131; X32-NEXT: retl 132; 133; X64-LABEL: pcmpestri_mem_eq_i8: 134; X64: # %bb.0: # %entry 135; X64-NEXT: movq %rdx, %r8 136; X64-NEXT: movdqu (%rdi), %xmm0 137; X64-NEXT: movl %esi, %eax 138; X64-NEXT: movl %ecx, %edx 139; X64-NEXT: pcmpestri $24, (%r8), %xmm0 140; X64-NEXT: setae %al 141; X64-NEXT: retq 142entry: 143 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 144 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 145 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 146 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 147 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 148 %result = icmp eq i32 %c, 0 149 ret i1 %result 150} 151 152define i32 @pcmpestri_mem_idx_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { 153; X32-LABEL: pcmpestri_mem_idx_i8: 154; X32: # %bb.0: # %entry 155; X32-NEXT: pushl %esi 156; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 157; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 158; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 159; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 160; X32-NEXT: movdqu (%esi), %xmm0 161; X32-NEXT: pcmpestri $24, (%ecx), %xmm0 162; X32-NEXT: movl %ecx, %eax 163; X32-NEXT: popl %esi 164; X32-NEXT: retl 165; 166; X64-LABEL: pcmpestri_mem_idx_i8: 167; X64: # %bb.0: # %entry 168; X64-NEXT: movq %rdx, %r8 169; X64-NEXT: movdqu (%rdi), %xmm0 170; X64-NEXT: movl %esi, %eax 171; X64-NEXT: movl %ecx, %edx 172; X64-NEXT: pcmpestri $24, (%r8), %xmm0 173; X64-NEXT: movl %ecx, %eax 174; X64-NEXT: retq 175entry: 176 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 177 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 178 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 179 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 180 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 181 ret i32 %idx 182} 183 184define i32 @pcmpestri_mem_diff_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { 185; X32-LABEL: pcmpestri_mem_diff_i8: 186; X32: # %bb.0: # %entry 187; X32-NEXT: pushl %ebp 188; X32-NEXT: movl %esp, %ebp 189; X32-NEXT: pushl %esi 190; X32-NEXT: andl $-16, %esp 191; X32-NEXT: subl $48, %esp 192; X32-NEXT: movl 12(%ebp), %eax 193; X32-NEXT: movl 20(%ebp), %edx 194; X32-NEXT: movl 16(%ebp), %ecx 195; X32-NEXT: movl 8(%ebp), %esi 196; X32-NEXT: movdqu (%esi), %xmm1 197; X32-NEXT: movdqu (%ecx), %xmm0 198; X32-NEXT: pcmpestri $24, %xmm0, %xmm1 199; X32-NEXT: cmpl $16, %ecx 200; X32-NEXT: jne .LBB5_2 201; X32-NEXT: # %bb.1: 202; X32-NEXT: xorl %eax, %eax 203; X32-NEXT: jmp .LBB5_3 204; X32-NEXT: .LBB5_2: # %compare 205; X32-NEXT: movdqa %xmm1, (%esp) 206; X32-NEXT: andl $15, %ecx 207; X32-NEXT: movb (%esp,%ecx), %al 208; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 209; X32-NEXT: subb 16(%esp,%ecx), %al 210; X32-NEXT: .LBB5_3: # %exit 211; X32-NEXT: movzbl %al, %eax 212; X32-NEXT: leal -4(%ebp), %esp 213; X32-NEXT: popl %esi 214; X32-NEXT: popl %ebp 215; X32-NEXT: retl 216; 217; X64-LABEL: pcmpestri_mem_diff_i8: 218; X64: # %bb.0: # %entry 219; X64-NEXT: movdqu (%rdi), %xmm1 220; X64-NEXT: movdqu (%rdx), %xmm0 221; X64-NEXT: movl %esi, %eax 222; X64-NEXT: movl %ecx, %edx 223; X64-NEXT: pcmpestri $24, %xmm0, %xmm1 224; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 225; X64-NEXT: cmpl $16, %ecx 226; X64-NEXT: jne .LBB5_2 227; X64-NEXT: # %bb.1: 228; X64-NEXT: xorl %eax, %eax 229; X64-NEXT: movzbl %al, %eax 230; X64-NEXT: retq 231; X64-NEXT: .LBB5_2: # %compare 232; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 233; X64-NEXT: andl $15, %ecx 234; X64-NEXT: movb -24(%rsp,%rcx), %al 235; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 236; X64-NEXT: subb -40(%rsp,%rcx), %al 237; X64-NEXT: movzbl %al, %eax 238; X64-NEXT: retq 239entry: 240 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 241 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 242 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 243 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 244 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 245 %eq = icmp eq i32 %idx, 16 246 br i1 %eq, label %exit, label %compare 247 248compare: 249 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 250 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 251 %sub = sub i8 %lhs_c, %rhs_c 252 br label %exit 253 254exit: 255 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 256 %result_ext = zext i8 %result to i32 257 ret i32 %result_ext 258} 259 260define i1 @pcmpestri_reg_eq_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 261; X32-LABEL: pcmpestri_reg_eq_i16: 262; X32: # %bb.0: # %entry 263; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 264; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 265; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 266; X32-NEXT: setae %al 267; X32-NEXT: retl 268; 269; X64-LABEL: pcmpestri_reg_eq_i16: 270; X64: # %bb.0: # %entry 271; X64-NEXT: movl %edi, %eax 272; X64-NEXT: movl %esi, %edx 273; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 274; X64-NEXT: setae %al 275; X64-NEXT: retq 276entry: 277 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 278 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 279 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 280 %result = icmp eq i32 %c, 0 281 ret i1 %result 282} 283 284define i32 @pcmpestri_reg_idx_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 285; X32-LABEL: pcmpestri_reg_idx_i16: 286; X32: # %bb.0: # %entry 287; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 288; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 289; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 290; X32-NEXT: movl %ecx, %eax 291; X32-NEXT: retl 292; 293; X64-LABEL: pcmpestri_reg_idx_i16: 294; X64: # %bb.0: # %entry 295; X64-NEXT: movl %edi, %eax 296; X64-NEXT: movl %esi, %edx 297; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 298; X64-NEXT: movl %ecx, %eax 299; X64-NEXT: retq 300entry: 301 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 302 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 303 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 304 ret i32 %idx 305} 306 307define i32 @pcmpestri_reg_diff_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 308; X32-LABEL: pcmpestri_reg_diff_i16: 309; X32: # %bb.0: # %entry 310; X32-NEXT: pushl %ebp 311; X32-NEXT: movl %esp, %ebp 312; X32-NEXT: andl $-16, %esp 313; X32-NEXT: subl $48, %esp 314; X32-NEXT: movl 8(%ebp), %eax 315; X32-NEXT: movl 12(%ebp), %edx 316; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 317; X32-NEXT: cmpl $16, %ecx 318; X32-NEXT: jne .LBB8_2 319; X32-NEXT: # %bb.1: 320; X32-NEXT: xorl %eax, %eax 321; X32-NEXT: jmp .LBB8_3 322; X32-NEXT: .LBB8_2: # %compare 323; X32-NEXT: movdqa %xmm0, (%esp) 324; X32-NEXT: addl %ecx, %ecx 325; X32-NEXT: andl $14, %ecx 326; X32-NEXT: movzwl (%esp,%ecx), %eax 327; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 328; X32-NEXT: subw 16(%esp,%ecx), %ax 329; X32-NEXT: .LBB8_3: # %exit 330; X32-NEXT: movzwl %ax, %eax 331; X32-NEXT: movl %ebp, %esp 332; X32-NEXT: popl %ebp 333; X32-NEXT: retl 334; 335; X64-LABEL: pcmpestri_reg_diff_i16: 336; X64: # %bb.0: # %entry 337; X64-NEXT: movl %edi, %eax 338; X64-NEXT: movl %esi, %edx 339; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 340; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 341; X64-NEXT: cmpl $16, %ecx 342; X64-NEXT: jne .LBB8_2 343; X64-NEXT: # %bb.1: 344; X64-NEXT: xorl %eax, %eax 345; X64-NEXT: movzwl %ax, %eax 346; X64-NEXT: retq 347; X64-NEXT: .LBB8_2: # %compare 348; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 349; X64-NEXT: andl $7, %ecx 350; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 351; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 352; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 353; X64-NEXT: movzwl %ax, %eax 354; X64-NEXT: retq 355entry: 356 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 357 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 358 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 359 %eq = icmp eq i32 %idx, 16 360 br i1 %eq, label %exit, label %compare 361 362compare: 363 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 364 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 365 %sub = sub i16 %lhs_c, %rhs_c 366 br label %exit 367 368exit: 369 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 370 %result_ext = zext i16 %result to i32 371 ret i32 %result_ext 372} 373 374define i1 @pcmpestri_mem_eq_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { 375; X32-LABEL: pcmpestri_mem_eq_i16: 376; X32: # %bb.0: # %entry 377; X32-NEXT: pushl %esi 378; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 379; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 380; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 381; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 382; X32-NEXT: movdqu (%esi), %xmm0 383; X32-NEXT: pcmpestri $25, (%ecx), %xmm0 384; X32-NEXT: setae %al 385; X32-NEXT: popl %esi 386; X32-NEXT: retl 387; 388; X64-LABEL: pcmpestri_mem_eq_i16: 389; X64: # %bb.0: # %entry 390; X64-NEXT: movq %rdx, %r8 391; X64-NEXT: movdqu (%rdi), %xmm0 392; X64-NEXT: movl %esi, %eax 393; X64-NEXT: movl %ecx, %edx 394; X64-NEXT: pcmpestri $25, (%r8), %xmm0 395; X64-NEXT: setae %al 396; X64-NEXT: retq 397entry: 398 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 399 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 400 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 401 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 402 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 403 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 404 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 405 %result = icmp eq i32 %c, 0 406 ret i1 %result 407} 408 409define i32 @pcmpestri_mem_idx_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { 410; X32-LABEL: pcmpestri_mem_idx_i16: 411; X32: # %bb.0: # %entry 412; X32-NEXT: pushl %esi 413; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 414; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 415; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 416; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 417; X32-NEXT: movdqu (%esi), %xmm0 418; X32-NEXT: pcmpestri $25, (%ecx), %xmm0 419; X32-NEXT: movl %ecx, %eax 420; X32-NEXT: popl %esi 421; X32-NEXT: retl 422; 423; X64-LABEL: pcmpestri_mem_idx_i16: 424; X64: # %bb.0: # %entry 425; X64-NEXT: movq %rdx, %r8 426; X64-NEXT: movdqu (%rdi), %xmm0 427; X64-NEXT: movl %esi, %eax 428; X64-NEXT: movl %ecx, %edx 429; X64-NEXT: pcmpestri $25, (%r8), %xmm0 430; X64-NEXT: movl %ecx, %eax 431; X64-NEXT: retq 432entry: 433 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 434 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 435 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 436 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 437 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 438 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 439 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 440 ret i32 %idx 441} 442 443define i32 @pcmpestri_mem_diff_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { 444; X32-LABEL: pcmpestri_mem_diff_i16: 445; X32: # %bb.0: # %entry 446; X32-NEXT: pushl %ebp 447; X32-NEXT: movl %esp, %ebp 448; X32-NEXT: pushl %esi 449; X32-NEXT: andl $-16, %esp 450; X32-NEXT: subl $48, %esp 451; X32-NEXT: movl 12(%ebp), %eax 452; X32-NEXT: movl 20(%ebp), %edx 453; X32-NEXT: movl 16(%ebp), %ecx 454; X32-NEXT: movl 8(%ebp), %esi 455; X32-NEXT: movdqu (%esi), %xmm1 456; X32-NEXT: movdqu (%ecx), %xmm0 457; X32-NEXT: pcmpestri $25, %xmm0, %xmm1 458; X32-NEXT: cmpl $8, %ecx 459; X32-NEXT: jne .LBB11_2 460; X32-NEXT: # %bb.1: 461; X32-NEXT: xorl %eax, %eax 462; X32-NEXT: jmp .LBB11_3 463; X32-NEXT: .LBB11_2: # %compare 464; X32-NEXT: movdqa %xmm1, (%esp) 465; X32-NEXT: addl %ecx, %ecx 466; X32-NEXT: andl $14, %ecx 467; X32-NEXT: movzwl (%esp,%ecx), %eax 468; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 469; X32-NEXT: subw 16(%esp,%ecx), %ax 470; X32-NEXT: .LBB11_3: # %exit 471; X32-NEXT: movzwl %ax, %eax 472; X32-NEXT: leal -4(%ebp), %esp 473; X32-NEXT: popl %esi 474; X32-NEXT: popl %ebp 475; X32-NEXT: retl 476; 477; X64-LABEL: pcmpestri_mem_diff_i16: 478; X64: # %bb.0: # %entry 479; X64-NEXT: movdqu (%rdi), %xmm1 480; X64-NEXT: movdqu (%rdx), %xmm0 481; X64-NEXT: movl %esi, %eax 482; X64-NEXT: movl %ecx, %edx 483; X64-NEXT: pcmpestri $25, %xmm0, %xmm1 484; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 485; X64-NEXT: cmpl $8, %ecx 486; X64-NEXT: jne .LBB11_2 487; X64-NEXT: # %bb.1: 488; X64-NEXT: xorl %eax, %eax 489; X64-NEXT: movzwl %ax, %eax 490; X64-NEXT: retq 491; X64-NEXT: .LBB11_2: # %compare 492; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 493; X64-NEXT: andl $7, %ecx 494; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 495; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 496; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 497; X64-NEXT: movzwl %ax, %eax 498; X64-NEXT: retq 499entry: 500 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 501 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 502 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 503 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 504 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 505 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 506 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 507 %eq = icmp eq i32 %idx, 8 508 br i1 %eq, label %exit, label %compare 509 510compare: 511 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 512 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 513 %sub = sub i16 %lhs_c, %rhs_c 514 br label %exit 515 516exit: 517 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 518 %result_ext = zext i16 %result to i32 519 ret i32 %result_ext 520} 521 522define i1 @pcmpistri_reg_eq_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 523; X32-LABEL: pcmpistri_reg_eq_i8: 524; X32: # %bb.0: # %entry 525; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 526; X32-NEXT: setae %al 527; X32-NEXT: retl 528; 529; X64-LABEL: pcmpistri_reg_eq_i8: 530; X64: # %bb.0: # %entry 531; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 532; X64-NEXT: setae %al 533; X64-NEXT: retq 534entry: 535 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 536 %result = icmp eq i32 %c, 0 537 ret i1 %result 538} 539 540define i32 @pcmpistri_reg_idx_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 541; X32-LABEL: pcmpistri_reg_idx_i8: 542; X32: # %bb.0: # %entry 543; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 544; X32-NEXT: movl %ecx, %eax 545; X32-NEXT: retl 546; 547; X64-LABEL: pcmpistri_reg_idx_i8: 548; X64: # %bb.0: # %entry 549; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 550; X64-NEXT: movl %ecx, %eax 551; X64-NEXT: retq 552entry: 553 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 554 ret i32 %idx 555} 556 557define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 558; X32-LABEL: pcmpistri_reg_diff_i8: 559; X32: # %bb.0: # %entry 560; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 561; X32-NEXT: cmpl $16, %ecx 562; X32-NEXT: jne .LBB14_2 563; X32-NEXT: # %bb.1: 564; X32-NEXT: xorl %eax, %eax 565; X32-NEXT: movzbl %al, %eax 566; X32-NEXT: retl 567; X32-NEXT: .LBB14_2: # %compare 568; X32-NEXT: pushl %ebp 569; X32-NEXT: movl %esp, %ebp 570; X32-NEXT: andl $-16, %esp 571; X32-NEXT: subl $48, %esp 572; X32-NEXT: movdqa %xmm0, (%esp) 573; X32-NEXT: andl $15, %ecx 574; X32-NEXT: movb (%esp,%ecx), %al 575; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 576; X32-NEXT: subb 16(%esp,%ecx), %al 577; X32-NEXT: movl %ebp, %esp 578; X32-NEXT: popl %ebp 579; X32-NEXT: movzbl %al, %eax 580; X32-NEXT: retl 581; 582; X64-LABEL: pcmpistri_reg_diff_i8: 583; X64: # %bb.0: # %entry 584; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 585; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 586; X64-NEXT: cmpl $16, %ecx 587; X64-NEXT: jne .LBB14_2 588; X64-NEXT: # %bb.1: 589; X64-NEXT: xorl %eax, %eax 590; X64-NEXT: movzbl %al, %eax 591; X64-NEXT: retq 592; X64-NEXT: .LBB14_2: # %compare 593; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 594; X64-NEXT: andl $15, %ecx 595; X64-NEXT: movb -24(%rsp,%rcx), %al 596; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 597; X64-NEXT: subb -40(%rsp,%rcx), %al 598; X64-NEXT: movzbl %al, %eax 599; X64-NEXT: retq 600entry: 601 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 602 %eq = icmp eq i32 %idx, 16 603 br i1 %eq, label %exit, label %compare 604 605compare: 606 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 607 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 608 %sub = sub i8 %lhs_c, %rhs_c 609 br label %exit 610 611exit: 612 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 613 %result_ext = zext i8 %result to i32 614 ret i32 %result_ext 615} 616 617define i1 @pcmpistri_mem_eq_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { 618; X32-LABEL: pcmpistri_mem_eq_i8: 619; X32: # %bb.0: # %entry 620; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 621; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 622; X32-NEXT: movdqu (%ecx), %xmm0 623; X32-NEXT: pcmpistri $24, (%eax), %xmm0 624; X32-NEXT: setae %al 625; X32-NEXT: retl 626; 627; X64-LABEL: pcmpistri_mem_eq_i8: 628; X64: # %bb.0: # %entry 629; X64-NEXT: movdqu (%rdi), %xmm0 630; X64-NEXT: pcmpistri $24, (%rsi), %xmm0 631; X64-NEXT: setae %al 632; X64-NEXT: retq 633entry: 634 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 635 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 636 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 637 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 638 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 639 %result = icmp eq i32 %c, 0 640 ret i1 %result 641} 642 643define i32 @pcmpistri_mem_idx_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { 644; X32-LABEL: pcmpistri_mem_idx_i8: 645; X32: # %bb.0: # %entry 646; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 647; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 648; X32-NEXT: movdqu (%ecx), %xmm0 649; X32-NEXT: pcmpistri $24, (%eax), %xmm0 650; X32-NEXT: movl %ecx, %eax 651; X32-NEXT: retl 652; 653; X64-LABEL: pcmpistri_mem_idx_i8: 654; X64: # %bb.0: # %entry 655; X64-NEXT: movdqu (%rdi), %xmm0 656; X64-NEXT: pcmpistri $24, (%rsi), %xmm0 657; X64-NEXT: movl %ecx, %eax 658; X64-NEXT: retq 659entry: 660 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 661 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 662 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 663 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 664 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 665 ret i32 %idx 666} 667 668define i32 @pcmpistri_mem_diff_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { 669; X32-LABEL: pcmpistri_mem_diff_i8: 670; X32: # %bb.0: # %entry 671; X32-NEXT: pushl %ebp 672; X32-NEXT: movl %esp, %ebp 673; X32-NEXT: andl $-16, %esp 674; X32-NEXT: subl $48, %esp 675; X32-NEXT: movl 12(%ebp), %eax 676; X32-NEXT: movl 8(%ebp), %ecx 677; X32-NEXT: movdqu (%ecx), %xmm1 678; X32-NEXT: movdqu (%eax), %xmm0 679; X32-NEXT: pcmpistri $24, %xmm0, %xmm1 680; X32-NEXT: cmpl $16, %ecx 681; X32-NEXT: jne .LBB17_2 682; X32-NEXT: # %bb.1: 683; X32-NEXT: xorl %eax, %eax 684; X32-NEXT: jmp .LBB17_3 685; X32-NEXT: .LBB17_2: # %compare 686; X32-NEXT: movdqa %xmm1, (%esp) 687; X32-NEXT: andl $15, %ecx 688; X32-NEXT: movb (%esp,%ecx), %al 689; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 690; X32-NEXT: subb 16(%esp,%ecx), %al 691; X32-NEXT: .LBB17_3: # %exit 692; X32-NEXT: movzbl %al, %eax 693; X32-NEXT: movl %ebp, %esp 694; X32-NEXT: popl %ebp 695; X32-NEXT: retl 696; 697; X64-LABEL: pcmpistri_mem_diff_i8: 698; X64: # %bb.0: # %entry 699; X64-NEXT: movdqu (%rdi), %xmm1 700; X64-NEXT: movdqu (%rsi), %xmm0 701; X64-NEXT: pcmpistri $24, %xmm0, %xmm1 702; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 703; X64-NEXT: cmpl $16, %ecx 704; X64-NEXT: jne .LBB17_2 705; X64-NEXT: # %bb.1: 706; X64-NEXT: xorl %eax, %eax 707; X64-NEXT: movzbl %al, %eax 708; X64-NEXT: retq 709; X64-NEXT: .LBB17_2: # %compare 710; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 711; X64-NEXT: andl $15, %ecx 712; X64-NEXT: movb -24(%rsp,%rcx), %al 713; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 714; X64-NEXT: subb -40(%rsp,%rcx), %al 715; X64-NEXT: movzbl %al, %eax 716; X64-NEXT: retq 717entry: 718 %lhs_vptr = bitcast i8* %lhs_ptr to <16 x i8>* 719 %lhs = load <16 x i8>, <16 x i8>* %lhs_vptr, align 1 720 %rhs_vptr = bitcast i8* %rhs_ptr to <16 x i8>* 721 %rhs = load <16 x i8>, <16 x i8>* %rhs_vptr, align 1 722 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 723 %eq = icmp eq i32 %idx, 16 724 br i1 %eq, label %exit, label %compare 725 726compare: 727 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 728 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 729 %sub = sub i8 %lhs_c, %rhs_c 730 br label %exit 731 732exit: 733 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 734 %result_ext = zext i8 %result to i32 735 ret i32 %result_ext 736} 737 738define i1 @pcmpistri_reg_eq_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 739; X32-LABEL: pcmpistri_reg_eq_i16: 740; X32: # %bb.0: # %entry 741; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 742; X32-NEXT: setae %al 743; X32-NEXT: retl 744; 745; X64-LABEL: pcmpistri_reg_eq_i16: 746; X64: # %bb.0: # %entry 747; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 748; X64-NEXT: setae %al 749; X64-NEXT: retq 750entry: 751 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 752 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 753 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 754 %result = icmp eq i32 %c, 0 755 ret i1 %result 756} 757 758define i32 @pcmpistri_reg_idx_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 759; X32-LABEL: pcmpistri_reg_idx_i16: 760; X32: # %bb.0: # %entry 761; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 762; X32-NEXT: movl %ecx, %eax 763; X32-NEXT: retl 764; 765; X64-LABEL: pcmpistri_reg_idx_i16: 766; X64: # %bb.0: # %entry 767; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 768; X64-NEXT: movl %ecx, %eax 769; X64-NEXT: retq 770entry: 771 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 772 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 773 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 774 ret i32 %idx 775} 776 777define i32 @pcmpistri_reg_diff_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 778; X32-LABEL: pcmpistri_reg_diff_i16: 779; X32: # %bb.0: # %entry 780; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 781; X32-NEXT: cmpl $16, %ecx 782; X32-NEXT: jne .LBB20_2 783; X32-NEXT: # %bb.1: 784; X32-NEXT: xorl %eax, %eax 785; X32-NEXT: movzwl %ax, %eax 786; X32-NEXT: retl 787; X32-NEXT: .LBB20_2: # %compare 788; X32-NEXT: pushl %ebp 789; X32-NEXT: movl %esp, %ebp 790; X32-NEXT: andl $-16, %esp 791; X32-NEXT: subl $48, %esp 792; X32-NEXT: movdqa %xmm0, (%esp) 793; X32-NEXT: addl %ecx, %ecx 794; X32-NEXT: andl $14, %ecx 795; X32-NEXT: movzwl (%esp,%ecx), %eax 796; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 797; X32-NEXT: subw 16(%esp,%ecx), %ax 798; X32-NEXT: movl %ebp, %esp 799; X32-NEXT: popl %ebp 800; X32-NEXT: movzwl %ax, %eax 801; X32-NEXT: retl 802; 803; X64-LABEL: pcmpistri_reg_diff_i16: 804; X64: # %bb.0: # %entry 805; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 806; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 807; X64-NEXT: cmpl $16, %ecx 808; X64-NEXT: jne .LBB20_2 809; X64-NEXT: # %bb.1: 810; X64-NEXT: xorl %eax, %eax 811; X64-NEXT: movzwl %ax, %eax 812; X64-NEXT: retq 813; X64-NEXT: .LBB20_2: # %compare 814; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 815; X64-NEXT: andl $7, %ecx 816; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 817; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 818; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 819; X64-NEXT: movzwl %ax, %eax 820; X64-NEXT: retq 821entry: 822 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 823 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 824 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 825 %eq = icmp eq i32 %idx, 16 826 br i1 %eq, label %exit, label %compare 827 828compare: 829 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 830 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 831 %sub = sub i16 %lhs_c, %rhs_c 832 br label %exit 833 834exit: 835 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 836 %result_ext = zext i16 %result to i32 837 ret i32 %result_ext 838} 839 840define i1 @pcmpistri_mem_eq_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { 841; X32-LABEL: pcmpistri_mem_eq_i16: 842; X32: # %bb.0: # %entry 843; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 844; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 845; X32-NEXT: movdqu (%ecx), %xmm0 846; X32-NEXT: pcmpistri $25, (%eax), %xmm0 847; X32-NEXT: setae %al 848; X32-NEXT: retl 849; 850; X64-LABEL: pcmpistri_mem_eq_i16: 851; X64: # %bb.0: # %entry 852; X64-NEXT: movdqu (%rdi), %xmm0 853; X64-NEXT: pcmpistri $25, (%rsi), %xmm0 854; X64-NEXT: setae %al 855; X64-NEXT: retq 856entry: 857 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 858 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 859 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 860 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 861 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 862 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 863 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 864 %result = icmp eq i32 %c, 0 865 ret i1 %result 866} 867 868define i32 @pcmpistri_mem_idx_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { 869; X32-LABEL: pcmpistri_mem_idx_i16: 870; X32: # %bb.0: # %entry 871; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 872; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 873; X32-NEXT: movdqu (%ecx), %xmm0 874; X32-NEXT: pcmpistri $25, (%eax), %xmm0 875; X32-NEXT: movl %ecx, %eax 876; X32-NEXT: retl 877; 878; X64-LABEL: pcmpistri_mem_idx_i16: 879; X64: # %bb.0: # %entry 880; X64-NEXT: movdqu (%rdi), %xmm0 881; X64-NEXT: pcmpistri $25, (%rsi), %xmm0 882; X64-NEXT: movl %ecx, %eax 883; X64-NEXT: retq 884entry: 885 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 886 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 887 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 888 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 889 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 890 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 891 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 892 ret i32 %idx 893} 894 895define i32 @pcmpistri_mem_diff_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { 896; X32-LABEL: pcmpistri_mem_diff_i16: 897; X32: # %bb.0: # %entry 898; X32-NEXT: pushl %ebp 899; X32-NEXT: movl %esp, %ebp 900; X32-NEXT: andl $-16, %esp 901; X32-NEXT: subl $48, %esp 902; X32-NEXT: movl 12(%ebp), %eax 903; X32-NEXT: movl 8(%ebp), %ecx 904; X32-NEXT: movdqu (%ecx), %xmm1 905; X32-NEXT: movdqu (%eax), %xmm0 906; X32-NEXT: pcmpistri $25, %xmm0, %xmm1 907; X32-NEXT: cmpl $8, %ecx 908; X32-NEXT: jne .LBB23_2 909; X32-NEXT: # %bb.1: 910; X32-NEXT: xorl %eax, %eax 911; X32-NEXT: jmp .LBB23_3 912; X32-NEXT: .LBB23_2: # %compare 913; X32-NEXT: movdqa %xmm1, (%esp) 914; X32-NEXT: addl %ecx, %ecx 915; X32-NEXT: andl $14, %ecx 916; X32-NEXT: movzwl (%esp,%ecx), %eax 917; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 918; X32-NEXT: subw 16(%esp,%ecx), %ax 919; X32-NEXT: .LBB23_3: # %exit 920; X32-NEXT: movzwl %ax, %eax 921; X32-NEXT: movl %ebp, %esp 922; X32-NEXT: popl %ebp 923; X32-NEXT: retl 924; 925; X64-LABEL: pcmpistri_mem_diff_i16: 926; X64: # %bb.0: # %entry 927; X64-NEXT: movdqu (%rdi), %xmm1 928; X64-NEXT: movdqu (%rsi), %xmm0 929; X64-NEXT: pcmpistri $25, %xmm0, %xmm1 930; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 931; X64-NEXT: cmpl $8, %ecx 932; X64-NEXT: jne .LBB23_2 933; X64-NEXT: # %bb.1: 934; X64-NEXT: xorl %eax, %eax 935; X64-NEXT: movzwl %ax, %eax 936; X64-NEXT: retq 937; X64-NEXT: .LBB23_2: # %compare 938; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 939; X64-NEXT: andl $7, %ecx 940; X64-NEXT: movzwl -24(%rsp,%rcx,2), %eax 941; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 942; X64-NEXT: subw -40(%rsp,%rcx,2), %ax 943; X64-NEXT: movzwl %ax, %eax 944; X64-NEXT: retq 945entry: 946 %lhs_vptr = bitcast i16* %lhs_ptr to <8 x i16>* 947 %lhs = load <8 x i16>, <8 x i16>* %lhs_vptr, align 1 948 %rhs_vptr = bitcast i16* %rhs_ptr to <8 x i16>* 949 %rhs = load <8 x i16>, <8 x i16>* %rhs_vptr, align 1 950 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 951 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 952 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 953 %eq = icmp eq i32 %idx, 8 954 br i1 %eq, label %exit, label %compare 955 956compare: 957 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 958 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 959 %sub = sub i16 %lhs_c, %rhs_c 960 br label %exit 961 962exit: 963 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 964 %result_ext = zext i16 %result to i32 965 ret i32 %result_ext 966} 967 968define void @pcmpestr_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i32* %iptr, i32* %fptr) nounwind { 969; X32-LABEL: pcmpestr_index_flag: 970; X32: # %bb.0: # %entry 971; X32-NEXT: pushl %ebx 972; X32-NEXT: pushl %edi 973; X32-NEXT: pushl %esi 974; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 975; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 976; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 977; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 978; X32-NEXT: xorl %ebx, %ebx 979; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 980; X32-NEXT: setb %bl 981; X32-NEXT: movl %ecx, (%edi) 982; X32-NEXT: movl %ebx, (%esi) 983; X32-NEXT: popl %esi 984; X32-NEXT: popl %edi 985; X32-NEXT: popl %ebx 986; X32-NEXT: retl 987; 988; X64-LABEL: pcmpestr_index_flag: 989; X64: # %bb.0: # %entry 990; X64-NEXT: movq %rcx, %r8 991; X64-NEXT: movq %rdx, %r9 992; X64-NEXT: xorl %r10d, %r10d 993; X64-NEXT: movl %edi, %eax 994; X64-NEXT: movl %esi, %edx 995; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 996; X64-NEXT: setb %r10b 997; X64-NEXT: movl %ecx, (%r9) 998; X64-NEXT: movl %r10d, (%r8) 999; X64-NEXT: retq 1000entry: 1001 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1002 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1003 store i32 %index, i32* %iptr 1004 store i32 %flag, i32* %fptr 1005 ret void 1006} 1007 1008define void @pcmpestr_mask_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %fptr) nounwind { 1009; X32-LABEL: pcmpestr_mask_flag: 1010; X32: # %bb.0: # %entry 1011; X32-NEXT: pushl %ebx 1012; X32-NEXT: pushl %esi 1013; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1014; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1015; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1016; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1017; X32-NEXT: xorl %ebx, %ebx 1018; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 1019; X32-NEXT: setb %bl 1020; X32-NEXT: movdqa %xmm0, (%esi) 1021; X32-NEXT: movl %ebx, (%ecx) 1022; X32-NEXT: popl %esi 1023; X32-NEXT: popl %ebx 1024; X32-NEXT: retl 1025; 1026; X64-LABEL: pcmpestr_mask_flag: 1027; X64: # %bb.0: # %entry 1028; X64-NEXT: movq %rdx, %r8 1029; X64-NEXT: xorl %r9d, %r9d 1030; X64-NEXT: movl %edi, %eax 1031; X64-NEXT: movl %esi, %edx 1032; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1033; X64-NEXT: setb %r9b 1034; X64-NEXT: movdqa %xmm0, (%r8) 1035; X64-NEXT: movl %r9d, (%rcx) 1036; X64-NEXT: retq 1037entry: 1038 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1039 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1040 store <16 x i8> %mask, <16 x i8>* %mptr 1041 store i32 %flag, i32* %fptr 1042 ret void 1043} 1044 1045define void @pcmpestr_mask_index(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %iptr) nounwind { 1046; X32-LABEL: pcmpestr_mask_index: 1047; X32: # %bb.0: # %entry 1048; X32-NEXT: pushl %edi 1049; X32-NEXT: pushl %esi 1050; X32-NEXT: movdqa %xmm0, %xmm2 1051; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1052; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1053; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 1054; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1055; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 1056; X32-NEXT: pcmpestri $24, %xmm1, %xmm2 1057; X32-NEXT: movdqa %xmm0, (%edi) 1058; X32-NEXT: movl %ecx, (%esi) 1059; X32-NEXT: popl %esi 1060; X32-NEXT: popl %edi 1061; X32-NEXT: retl 1062; 1063; X64-LABEL: pcmpestr_mask_index: 1064; X64: # %bb.0: # %entry 1065; X64-NEXT: movq %rcx, %r8 1066; X64-NEXT: movq %rdx, %r9 1067; X64-NEXT: movdqa %xmm0, %xmm2 1068; X64-NEXT: movl %edi, %eax 1069; X64-NEXT: movl %esi, %edx 1070; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1071; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 1072; X64-NEXT: movdqa %xmm0, (%r9) 1073; X64-NEXT: movl %ecx, (%r8) 1074; X64-NEXT: retq 1075entry: 1076 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1077 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1078 store <16 x i8> %mask, <16 x i8>* %mptr 1079 store i32 %index, i32* %iptr 1080 ret void 1081} 1082 1083define void @pcmpestr_mask_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { 1084; X32-LABEL: pcmpestr_mask_index_flag: 1085; X32: # %bb.0: # %entry 1086; X32-NEXT: pushl %ebp 1087; X32-NEXT: pushl %ebx 1088; X32-NEXT: pushl %edi 1089; X32-NEXT: pushl %esi 1090; X32-NEXT: movdqa %xmm0, %xmm2 1091; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1092; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1093; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 1094; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1095; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 1096; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 1097; X32-NEXT: xorl %ebx, %ebx 1098; X32-NEXT: pcmpestri $24, %xmm1, %xmm2 1099; X32-NEXT: setb %bl 1100; X32-NEXT: movdqa %xmm0, (%ebp) 1101; X32-NEXT: movl %ecx, (%edi) 1102; X32-NEXT: movl %ebx, (%esi) 1103; X32-NEXT: popl %esi 1104; X32-NEXT: popl %edi 1105; X32-NEXT: popl %ebx 1106; X32-NEXT: popl %ebp 1107; X32-NEXT: retl 1108; 1109; X64-LABEL: pcmpestr_mask_index_flag: 1110; X64: # %bb.0: # %entry 1111; X64-NEXT: movq %rcx, %r9 1112; X64-NEXT: movq %rdx, %r10 1113; X64-NEXT: movdqa %xmm0, %xmm2 1114; X64-NEXT: movl %edi, %eax 1115; X64-NEXT: movl %esi, %edx 1116; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1117; X64-NEXT: xorl %esi, %esi 1118; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 1119; X64-NEXT: setb %sil 1120; X64-NEXT: movdqa %xmm0, (%r10) 1121; X64-NEXT: movl %ecx, (%r9) 1122; X64-NEXT: movl %esi, (%r8) 1123; X64-NEXT: retq 1124entry: 1125 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1126 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1127 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1128 store <16 x i8> %mask, <16 x i8>* %mptr 1129 store i32 %index, i32* %iptr 1130 store i32 %flag, i32* %fptr 1131 ret void 1132} 1133 1134define void @pcmpistr_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, i32* %iptr, i32* %fptr) nounwind { 1135; X32-LABEL: pcmpistr_index_flag: 1136; X32: # %bb.0: # %entry 1137; X32-NEXT: pushl %ebx 1138; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1139; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1140; X32-NEXT: xorl %ebx, %ebx 1141; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 1142; X32-NEXT: setb %bl 1143; X32-NEXT: movl %ecx, (%edx) 1144; X32-NEXT: movl %ebx, (%eax) 1145; X32-NEXT: popl %ebx 1146; X32-NEXT: retl 1147; 1148; X64-LABEL: pcmpistr_index_flag: 1149; X64: # %bb.0: # %entry 1150; X64-NEXT: xorl %eax, %eax 1151; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 1152; X64-NEXT: setb %al 1153; X64-NEXT: movl %ecx, (%rdi) 1154; X64-NEXT: movl %eax, (%rsi) 1155; X64-NEXT: retq 1156entry: 1157 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1158 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1159 store i32 %index, i32* %iptr 1160 store i32 %flag, i32* %fptr 1161 ret void 1162} 1163 1164define void @pcmpistr_mask_flag(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %fptr) nounwind { 1165; X32-LABEL: pcmpistr_mask_flag: 1166; X32: # %bb.0: # %entry 1167; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1168; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1169; X32-NEXT: xorl %edx, %edx 1170; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 1171; X32-NEXT: setb %dl 1172; X32-NEXT: movdqa %xmm0, (%ecx) 1173; X32-NEXT: movl %edx, (%eax) 1174; X32-NEXT: retl 1175; 1176; X64-LABEL: pcmpistr_mask_flag: 1177; X64: # %bb.0: # %entry 1178; X64-NEXT: xorl %eax, %eax 1179; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1180; X64-NEXT: setb %al 1181; X64-NEXT: movdqa %xmm0, (%rdi) 1182; X64-NEXT: movl %eax, (%rsi) 1183; X64-NEXT: retq 1184entry: 1185 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1186 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1187 store <16 x i8> %mask, <16 x i8>* %mptr 1188 store i32 %flag, i32* %fptr 1189 ret void 1190} 1191 1192define void @pcmpistr_mask_index(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %iptr) nounwind { 1193; X32-LABEL: pcmpistr_mask_index: 1194; X32: # %bb.0: # %entry 1195; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1196; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1197; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 1198; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 1199; X32-NEXT: movdqa %xmm0, (%edx) 1200; X32-NEXT: movl %ecx, (%eax) 1201; X32-NEXT: retl 1202; 1203; X64-LABEL: pcmpistr_mask_index: 1204; X64: # %bb.0: # %entry 1205; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 1206; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1207; X64-NEXT: movdqa %xmm0, (%rdi) 1208; X64-NEXT: movl %ecx, (%rsi) 1209; X64-NEXT: retq 1210entry: 1211 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1212 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1213 store <16 x i8> %mask, <16 x i8>* %mptr 1214 store i32 %index, i32* %iptr 1215 ret void 1216} 1217 1218define void @pcmpistr_mask_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { 1219; X32-LABEL: pcmpistr_mask_index_flag: 1220; X32: # %bb.0: # %entry 1221; X32-NEXT: pushl %ebx 1222; X32-NEXT: pushl %esi 1223; X32-NEXT: movdqa %xmm0, %xmm2 1224; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1225; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1226; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1227; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 1228; X32-NEXT: xorl %ebx, %ebx 1229; X32-NEXT: pcmpistri $24, %xmm1, %xmm2 1230; X32-NEXT: setb %bl 1231; X32-NEXT: movdqa %xmm0, (%esi) 1232; X32-NEXT: movl %ecx, (%edx) 1233; X32-NEXT: movl %ebx, (%eax) 1234; X32-NEXT: popl %esi 1235; X32-NEXT: popl %ebx 1236; X32-NEXT: retl 1237; 1238; X64-LABEL: pcmpistr_mask_index_flag: 1239; X64: # %bb.0: # %entry 1240; X64-NEXT: movdqa %xmm0, %xmm2 1241; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1242; X64-NEXT: xorl %eax, %eax 1243; X64-NEXT: pcmpistri $24, %xmm1, %xmm2 1244; X64-NEXT: setb %al 1245; X64-NEXT: movdqa %xmm0, (%rdi) 1246; X64-NEXT: movl %ecx, (%rsi) 1247; X64-NEXT: movl %eax, (%rdx) 1248; X64-NEXT: retq 1249entry: 1250 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1251 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1252 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1253 store <16 x i8> %mask, <16 x i8>* %mptr 1254 store i32 %index, i32* %iptr 1255 store i32 %flag, i32* %fptr 1256 ret void 1257} 1258 1259; Make sure we don't fold loads when we need to emit pcmpistrm and pcmpistri. 1260define void @pcmpistr_mask_index_flag_load(<16 x i8> %lhs, <16 x i8>* %rhsptr, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { 1261; X32-LABEL: pcmpistr_mask_index_flag_load: 1262; X32: # %bb.0: # %entry 1263; X32-NEXT: pushl %ebx 1264; X32-NEXT: pushl %esi 1265; X32-NEXT: movdqa %xmm0, %xmm1 1266; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1267; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1268; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1269; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1270; X32-NEXT: movdqu (%ecx), %xmm2 1271; X32-NEXT: pcmpistrm $24, %xmm2, %xmm0 1272; X32-NEXT: xorl %ebx, %ebx 1273; X32-NEXT: pcmpistri $24, %xmm2, %xmm1 1274; X32-NEXT: setb %bl 1275; X32-NEXT: movdqa %xmm0, (%esi) 1276; X32-NEXT: movl %ecx, (%edx) 1277; X32-NEXT: movl %ebx, (%eax) 1278; X32-NEXT: popl %esi 1279; X32-NEXT: popl %ebx 1280; X32-NEXT: retl 1281; 1282; X64-LABEL: pcmpistr_mask_index_flag_load: 1283; X64: # %bb.0: # %entry 1284; X64-NEXT: movq %rcx, %rax 1285; X64-NEXT: movdqa %xmm0, %xmm1 1286; X64-NEXT: movdqu (%rdi), %xmm2 1287; X64-NEXT: pcmpistrm $24, %xmm2, %xmm0 1288; X64-NEXT: xorl %edi, %edi 1289; X64-NEXT: pcmpistri $24, %xmm2, %xmm1 1290; X64-NEXT: setb %dil 1291; X64-NEXT: movdqa %xmm0, (%rsi) 1292; X64-NEXT: movl %ecx, (%rdx) 1293; X64-NEXT: movl %edi, (%rax) 1294; X64-NEXT: retq 1295entry: 1296 %rhs = load <16 x i8>, <16 x i8>* %rhsptr, align 1 1297 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1298 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1299 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1300 store <16 x i8> %mask, <16 x i8>* %mptr 1301 store i32 %index, i32* %iptr 1302 store i32 %flag, i32* %fptr 1303 ret void 1304} 1305 1306; Make sure we don't fold nontemporal loads. 1307define i32 @pcmpestri_nontemporal(<16 x i8> %lhs, i32 %lhs_len, <16 x i8>* %rhsptr, i32 %rhs_len) nounwind { 1308; X32-LABEL: pcmpestri_nontemporal: 1309; X32: # %bb.0: # %entry 1310; X32-NEXT: pushl %ebx 1311; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1312; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1313; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1314; X32-NEXT: movntdqa (%ecx), %xmm1 1315; X32-NEXT: xorl %ebx, %ebx 1316; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 1317; X32-NEXT: setb %bl 1318; X32-NEXT: movl %ebx, %eax 1319; X32-NEXT: popl %ebx 1320; X32-NEXT: retl 1321; 1322; X64-LABEL: pcmpestri_nontemporal: 1323; X64: # %bb.0: # %entry 1324; X64-NEXT: movntdqa (%rsi), %xmm1 1325; X64-NEXT: xorl %esi, %esi 1326; X64-NEXT: movl %edi, %eax 1327; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 1328; X64-NEXT: setb %sil 1329; X64-NEXT: movl %esi, %eax 1330; X64-NEXT: retq 1331entry: 1332 %rhs = load <16 x i8>, <16 x i8>* %rhsptr, align 16, !nontemporal !0 1333 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1334 ret i32 %flag 1335} 1336 1337!0 = !{ i32 1 } 1338