1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X32 3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c 6 7define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) { 8; X32-LABEL: test_mm_broadcastb_epi8: 9; X32: # BB#0: 10; X32-NEXT: vpbroadcastb %xmm0, %xmm0 11; X32-NEXT: retl 12; 13; X64-LABEL: test_mm_broadcastb_epi8: 14; X64: # BB#0: 15; X64-NEXT: vpbroadcastb %xmm0, %xmm0 16; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 18 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer 19 %res1 = bitcast <16 x i8> %res0 to <2 x i64> 20 ret <2 x i64> %res1 21} 22 23define <2 x i64> @test_mm_mask_broadcastb_epi8(<2 x i64> %a0, i16 %a1, <2 x i64> %a2) { 24; X32-LABEL: test_mm_mask_broadcastb_epi8: 25; X32: # BB#0: 26; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 27; X32-NEXT: kmovw %eax, %k1 28; X32-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} 29; X32-NEXT: retl 30; 31; X64-LABEL: test_mm_mask_broadcastb_epi8: 32; X64: # BB#0: 33; X64-NEXT: kmovw %edi, %k1 34; X64-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} 35; X64-NEXT: retq 36 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 37 %arg1 = bitcast i16 %a1 to <16 x i1> 38 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 39 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <16 x i32> zeroinitializer 40 %res1 = select <16 x i1> %arg1, <16 x i8> %res0, <16 x i8> %arg0 41 %res2 = bitcast <16 x i8> %res1 to <2 x i64> 42 ret <2 x i64> %res2 43} 44 45define <2 x i64> @test_mm_maskz_broadcastb_epi8(i16 %a0, <2 x i64> %a1) { 46; X32-LABEL: test_mm_maskz_broadcastb_epi8: 47; X32: # BB#0: 48; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 49; X32-NEXT: kmovw %eax, %k1 50; X32-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} 51; X32-NEXT: retl 52; 53; X64-LABEL: test_mm_maskz_broadcastb_epi8: 54; X64: # BB#0: 55; X64-NEXT: kmovw %edi, %k1 56; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} 57; X64-NEXT: retq 58 %arg0 = bitcast i16 %a0 to <16 x i1> 59 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 60 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <16 x i32> zeroinitializer 61 %res1 = select <16 x i1> %arg0, <16 x i8> %res0, <16 x i8> zeroinitializer 62 %res2 = bitcast <16 x i8> %res1 to <2 x i64> 63 ret <2 x i64> %res2 64} 65 66define <4 x i64> @test_mm256_broadcastb_epi8(<2 x i64> %a0) { 67; X32-LABEL: test_mm256_broadcastb_epi8: 68; X32: # BB#0: 69; X32-NEXT: vpbroadcastb %xmm0, %ymm0 70; X32-NEXT: retl 71; 72; X64-LABEL: test_mm256_broadcastb_epi8: 73; X64: # BB#0: 74; X64-NEXT: vpbroadcastb %xmm0, %ymm0 75; X64-NEXT: retq 76 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 77 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <32 x i32> zeroinitializer 78 %res1 = bitcast <32 x i8> %res0 to <4 x i64> 79 ret <4 x i64> %res1 80} 81 82define <4 x i64> @test_mm256_mask_broadcastb_epi8(<4 x i64> %a0, i32 %a1, <2 x i64> %a2) { 83; X32-LABEL: test_mm256_mask_broadcastb_epi8: 84; X32: # BB#0: 85; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 86; X32-NEXT: kmovd %eax, %k1 87; X32-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1} 88; X32-NEXT: retl 89; 90; X64-LABEL: test_mm256_mask_broadcastb_epi8: 91; X64: # BB#0: 92; X64-NEXT: kmovd %edi, %k1 93; X64-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1} 94; X64-NEXT: retq 95 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 96 %arg1 = bitcast i32 %a1 to <32 x i1> 97 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 98 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <32 x i32> zeroinitializer 99 %res1 = select <32 x i1> %arg1, <32 x i8> %res0, <32 x i8> %arg0 100 %res2 = bitcast <32 x i8> %res1 to <4 x i64> 101 ret <4 x i64> %res2 102} 103 104define <4 x i64> @test_mm256_maskz_broadcastb_epi8(i32 %a0, <2 x i64> %a1) { 105; X32-LABEL: test_mm256_maskz_broadcastb_epi8: 106; X32: # BB#0: 107; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 108; X32-NEXT: kmovd %eax, %k1 109; X32-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} 110; X32-NEXT: retl 111; 112; X64-LABEL: test_mm256_maskz_broadcastb_epi8: 113; X64: # BB#0: 114; X64-NEXT: kmovd %edi, %k1 115; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} 116; X64-NEXT: retq 117 %arg0 = bitcast i32 %a0 to <32 x i1> 118 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 119 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <32 x i32> zeroinitializer 120 %res1 = select <32 x i1> %arg0, <32 x i8> %res0, <32 x i8> zeroinitializer 121 %res2 = bitcast <32 x i8> %res1 to <4 x i64> 122 ret <4 x i64> %res2 123} 124 125define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) { 126; X32-LABEL: test_mm_broadcastw_epi16: 127; X32: # BB#0: 128; X32-NEXT: vpbroadcastw %xmm0, %xmm0 129; X32-NEXT: retl 130; 131; X64-LABEL: test_mm_broadcastw_epi16: 132; X64: # BB#0: 133; X64-NEXT: vpbroadcastw %xmm0, %xmm0 134; X64-NEXT: retq 135 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 136 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer 137 %res1 = bitcast <8 x i16> %res0 to <2 x i64> 138 ret <2 x i64> %res1 139} 140 141define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) { 142; X32-LABEL: test_mm_mask_broadcastw_epi16: 143; X32: # BB#0: 144; X32-NEXT: movb {{[0-9]+}}(%esp), %al 145; X32-NEXT: kmovw %eax, %k1 146; X32-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} 147; X32-NEXT: retl 148; 149; X64-LABEL: test_mm_mask_broadcastw_epi16: 150; X64: # BB#0: 151; X64-NEXT: kmovw %edi, %k1 152; X64-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} 153; X64-NEXT: retq 154 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 155 %arg1 = bitcast i8 %a1 to <8 x i1> 156 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 157 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <8 x i32> zeroinitializer 158 %res1 = select <8 x i1> %arg1, <8 x i16> %res0, <8 x i16> %arg0 159 %res2 = bitcast <8 x i16> %res1 to <2 x i64> 160 ret <2 x i64> %res2 161} 162 163define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) { 164; X32-LABEL: test_mm_maskz_broadcastw_epi16: 165; X32: # BB#0: 166; X32-NEXT: movb {{[0-9]+}}(%esp), %al 167; X32-NEXT: kmovw %eax, %k1 168; X32-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} 169; X32-NEXT: retl 170; 171; X64-LABEL: test_mm_maskz_broadcastw_epi16: 172; X64: # BB#0: 173; X64-NEXT: kmovw %edi, %k1 174; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} 175; X64-NEXT: retq 176 %arg0 = bitcast i8 %a0 to <8 x i1> 177 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 178 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <8 x i32> zeroinitializer 179 %res1 = select <8 x i1> %arg0, <8 x i16> %res0, <8 x i16> zeroinitializer 180 %res2 = bitcast <8 x i16> %res1 to <2 x i64> 181 ret <2 x i64> %res2 182} 183 184define <4 x i64> @test_mm256_broadcastw_epi16(<2 x i64> %a0) { 185; X32-LABEL: test_mm256_broadcastw_epi16: 186; X32: # BB#0: 187; X32-NEXT: vpbroadcastw %xmm0, %ymm0 188; X32-NEXT: retl 189; 190; X64-LABEL: test_mm256_broadcastw_epi16: 191; X64: # BB#0: 192; X64-NEXT: vpbroadcastw %xmm0, %ymm0 193; X64-NEXT: retq 194 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 195 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <16 x i32> zeroinitializer 196 %res1 = bitcast <16 x i16> %res0 to <4 x i64> 197 ret <4 x i64> %res1 198} 199 200define <4 x i64> @test_mm256_mask_broadcastw_epi16(<4 x i64> %a0, i16 %a1, <2 x i64> %a2) { 201; X32-LABEL: test_mm256_mask_broadcastw_epi16: 202; X32: # BB#0: 203; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 204; X32-NEXT: kmovw %eax, %k1 205; X32-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} 206; X32-NEXT: retl 207; 208; X64-LABEL: test_mm256_mask_broadcastw_epi16: 209; X64: # BB#0: 210; X64-NEXT: kmovw %edi, %k1 211; X64-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} 212; X64-NEXT: retq 213 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 214 %arg1 = bitcast i16 %a1 to <16 x i1> 215 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 216 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <16 x i32> zeroinitializer 217 %res1 = select <16 x i1> %arg1, <16 x i16> %res0, <16 x i16> %arg0 218 %res2 = bitcast <16 x i16> %res1 to <4 x i64> 219 ret <4 x i64> %res2 220} 221 222define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) { 223; X32-LABEL: test_mm256_maskz_broadcastw_epi16: 224; X32: # BB#0: 225; X32-NEXT: movw {{[0-9]+}}(%esp), %ax 226; X32-NEXT: kmovw %eax, %k1 227; X32-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} 228; X32-NEXT: retl 229; 230; X64-LABEL: test_mm256_maskz_broadcastw_epi16: 231; X64: # BB#0: 232; X64-NEXT: kmovw %edi, %k1 233; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} 234; X64-NEXT: retq 235 %arg0 = bitcast i16 %a0 to <16 x i1> 236 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 237 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <16 x i32> zeroinitializer 238 %res1 = select <16 x i1> %arg0, <16 x i16> %res0, <16 x i16> zeroinitializer 239 %res2 = bitcast <16 x i16> %res1 to <4 x i64> 240 ret <4 x i64> %res2 241} 242 243!0 = !{i32 1} 244 245