1; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s 2 3define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) { 4 ; CHECK: vpackssdw 5 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 6 ret <16 x i16> %res 7} 8declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 9 10 11define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { 12 ; CHECK: vpacksswb 13 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 14 ret <32 x i8> %res 15} 16declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 17 18 19define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { 20 ; CHECK: vpackuswb 21 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 22 ret <32 x i8> %res 23} 24declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 25 26 27define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { 28 ; CHECK: vpaddsb 29 %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 30 ret <32 x i8> %res 31} 32declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone 33 34 35define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) { 36 ; CHECK: vpaddsw 37 %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 38 ret <16 x i16> %res 39} 40declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone 41 42 43define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) { 44 ; CHECK: vpaddusb 45 %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 46 ret <32 x i8> %res 47} 48declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 49 50 51define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) { 52 ; CHECK: vpaddusw 53 %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 54 ret <16 x i16> %res 55} 56declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 57 58 59define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) { 60 ; CHECK: vpavgb 61 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 62 ret <32 x i8> %res 63} 64declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 65 66 67define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) { 68 ; CHECK: vpavgw 69 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 70 ret <16 x i16> %res 71} 72declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 73 74 75define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) { 76 ; CHECK: vpmaddwd 77 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] 78 ret <8 x i32> %res 79} 80declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 81 82 83define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) { 84 ; CHECK: vpmaxsw 85 %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 86 ret <16 x i16> %res 87} 88declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 89 90 91define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) { 92 ; CHECK: vpmaxub 93 %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 94 ret <32 x i8> %res 95} 96declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 97 98 99define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) { 100 ; CHECK: vpminsw 101 %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 102 ret <16 x i16> %res 103} 104declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 105 106 107define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) { 108 ; CHECK: vpminub 109 %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 110 ret <32 x i8> %res 111} 112declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 113 114 115define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) { 116 ; CHECK: vpmovmskb 117 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1] 118 ret i32 %res 119} 120declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 121 122 123define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) { 124 ; CHECK: vpmulhw 125 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 126 ret <16 x i16> %res 127} 128declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 129 130 131define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) { 132 ; CHECK: vpmulhuw 133 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 134 ret <16 x i16> %res 135} 136declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 137 138 139define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) { 140 ; CHECK: vpmuludq 141 %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1] 142 ret <4 x i64> %res 143} 144declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 145 146 147define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) { 148 ; CHECK: vpsadbw 149 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] 150 ret <4 x i64> %res 151} 152declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 153 154 155define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) { 156 ; CHECK: vpslld 157 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 158 ret <8 x i32> %res 159} 160declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 161 162 163define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) { 164 ; CHECK: vpsllq 165 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 166 ret <4 x i64> %res 167} 168declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 169 170 171define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) { 172 ; CHECK: vpsllw 173 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 174 ret <16 x i16> %res 175} 176declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 177 178 179define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) { 180 ; CHECK: vpslld 181 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 182 ret <8 x i32> %res 183} 184declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone 185 186 187define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) { 188 ; CHECK: vpsllq 189 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 190 ret <4 x i64> %res 191} 192declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone 193 194 195define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) { 196 ; CHECK: vpsllw 197 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 198 ret <16 x i16> %res 199} 200declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone 201 202 203define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) { 204 ; CHECK: vpsrad 205 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 206 ret <8 x i32> %res 207} 208declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 209 210 211define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) { 212 ; CHECK: vpsraw 213 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 214 ret <16 x i16> %res 215} 216declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 217 218 219define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) { 220 ; CHECK: vpsrad 221 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 222 ret <8 x i32> %res 223} 224declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone 225 226 227define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) { 228 ; CHECK: vpsraw 229 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 230 ret <16 x i16> %res 231} 232declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone 233 234 235define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) { 236 ; CHECK: vpsrld 237 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 238 ret <8 x i32> %res 239} 240declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 241 242 243define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) { 244 ; CHECK: vpsrlq 245 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 246 ret <4 x i64> %res 247} 248declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 249 250 251define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) { 252 ; CHECK: vpsrlw 253 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 254 ret <16 x i16> %res 255} 256declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 257 258 259define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) { 260 ; CHECK: vpsrld 261 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 262 ret <8 x i32> %res 263} 264declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone 265 266 267define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) { 268 ; CHECK: vpsrlq 269 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 270 ret <4 x i64> %res 271} 272declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone 273 274 275define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) { 276 ; CHECK: vpsrlw 277 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 278 ret <16 x i16> %res 279} 280declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone 281 282 283define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) { 284 ; CHECK: vpsubsb 285 %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 286 ret <32 x i8> %res 287} 288declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone 289 290 291define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) { 292 ; CHECK: vpsubsw 293 %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 294 ret <16 x i16> %res 295} 296declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone 297 298 299define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) { 300 ; CHECK: vpsubusb 301 %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 302 ret <32 x i8> %res 303} 304declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 305 306 307define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) { 308 ; CHECK: vpsubusw 309 %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 310 ret <16 x i16> %res 311} 312declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 313 314 315define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) { 316 ; CHECK: vpabsb 317 %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1] 318 ret <32 x i8> %res 319} 320declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 321 322 323define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) { 324 ; CHECK: vpabsd 325 %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1] 326 ret <8 x i32> %res 327} 328declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 329 330 331define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) { 332 ; CHECK: vpabsw 333 %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1] 334 ret <16 x i16> %res 335} 336declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 337 338 339define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { 340 ; CHECK: vphaddd 341 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 342 ret <8 x i32> %res 343} 344declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 345 346 347define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) { 348 ; CHECK: vphaddsw 349 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 350 ret <16 x i16> %res 351} 352declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 353 354 355define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) { 356 ; CHECK: vphaddw 357 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 358 ret <16 x i16> %res 359} 360declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 361 362 363define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) { 364 ; CHECK: vphsubd 365 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 366 ret <8 x i32> %res 367} 368declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 369 370 371define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) { 372 ; CHECK: vphsubsw 373 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 374 ret <16 x i16> %res 375} 376declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 377 378 379define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) { 380 ; CHECK: vphsubw 381 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 382 ret <16 x i16> %res 383} 384declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 385 386 387define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) { 388 ; CHECK: vpmaddubsw 389 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 390 ret <16 x i16> %res 391} 392declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 393 394 395define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) { 396 ; CHECK: vpmulhrsw 397 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 398 ret <16 x i16> %res 399} 400declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 401 402 403define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) { 404 ; CHECK: vpshufb 405 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] 406 ret <32 x i8> %res 407} 408declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 409 410 411define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) { 412 ; CHECK: vpsignb 413 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 414 ret <32 x i8> %res 415} 416declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 417 418 419define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) { 420 ; CHECK: vpsignd 421 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] 422 ret <8 x i32> %res 423} 424declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 425 426 427define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) { 428 ; CHECK: vpsignw 429 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 430 ret <16 x i16> %res 431} 432declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 433 434 435define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) { 436 ; CHECK: movl 437 ; CHECK: vmovntdqa 438 %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1] 439 ret <4 x i64> %res 440} 441declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 442 443 444define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 445 ; CHECK: vmpsadbw 446 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] 447 ret <16 x i16> %res 448} 449declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone 450 451 452define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) { 453 ; CHECK: vpackusdw 454 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 455 ret <16 x i16> %res 456} 457declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 458 459 460define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { 461 ; CHECK: vpblendvb 462 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] 463 ret <32 x i8> %res 464} 465declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 466 467 468define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 469 ; CHECK: vpblendw 470 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1] 471 ret <16 x i16> %res 472} 473declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone 474 475 476define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) { 477 ; CHECK: vpmaxsb 478 %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 479 ret <32 x i8> %res 480} 481declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 482 483 484define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) { 485 ; CHECK: vpmaxsd 486 %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 487 ret <8 x i32> %res 488} 489declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 490 491 492define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) { 493 ; CHECK: vpmaxud 494 %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 495 ret <8 x i32> %res 496} 497declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 498 499 500define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) { 501 ; CHECK: vpmaxuw 502 %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 503 ret <16 x i16> %res 504} 505declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 506 507 508define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) { 509 ; CHECK: vpminsb 510 %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 511 ret <32 x i8> %res 512} 513declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 514 515 516define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) { 517 ; CHECK: vpminsd 518 %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 519 ret <8 x i32> %res 520} 521declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 522 523 524define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) { 525 ; CHECK: vpminud 526 %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 527 ret <8 x i32> %res 528} 529declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 530 531 532define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) { 533 ; CHECK: vpminuw 534 %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 535 ret <16 x i16> %res 536} 537declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 538 539 540define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { 541 ; CHECK: vpmovsxbd 542 %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 543 ret <8 x i32> %res 544} 545declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone 546 547 548define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { 549 ; CHECK: vpmovsxbq 550 %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 551 ret <4 x i64> %res 552} 553declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone 554 555 556define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { 557 ; CHECK: vpmovsxbw 558 %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] 559 ret <16 x i16> %res 560} 561declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone 562 563 564define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { 565 ; CHECK: vpmovsxdq 566 %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 567 ret <4 x i64> %res 568} 569declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone 570 571 572define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { 573 ; CHECK: vpmovsxwd 574 %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 575 ret <8 x i32> %res 576} 577declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone 578 579 580define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { 581 ; CHECK: vpmovsxwq 582 %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 583 ret <4 x i64> %res 584} 585declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone 586 587 588define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { 589 ; CHECK: vpmovzxbd 590 %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] 591 ret <8 x i32> %res 592} 593declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone 594 595 596define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { 597 ; CHECK: vpmovzxbq 598 %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] 599 ret <4 x i64> %res 600} 601declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone 602 603 604define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { 605 ; CHECK: vpmovzxbw 606 %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] 607 ret <16 x i16> %res 608} 609declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone 610 611 612define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { 613 ; CHECK: vpmovzxdq 614 %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] 615 ret <4 x i64> %res 616} 617declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone 618 619 620define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { 621 ; CHECK: vpmovzxwd 622 %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] 623 ret <8 x i32> %res 624} 625declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone 626 627 628define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { 629 ; CHECK: vpmovzxwq 630 %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] 631 ret <4 x i64> %res 632} 633declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone 634 635 636define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) { 637 ; CHECK: vpmuldq 638 %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1] 639 ret <4 x i64> %res 640} 641declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 642 643 644define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) { 645 ; CHECK: vbroadcastsd 646 %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1] 647 ret <4 x double> %res 648} 649declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly 650 651 652define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) { 653 ; CHECK: vbroadcastss 654 %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] 655 ret <4 x float> %res 656} 657declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly 658 659 660define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) { 661 ; CHECK: vbroadcastss 662 %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1] 663 ret <8 x float> %res 664} 665declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly 666 667 668define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 669 ; CHECK: vpblendd 670 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1] 671 ret <4 x i32> %res 672} 673declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone 674 675 676define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 677 ; CHECK: vpblendd 678 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 679 ret <8 x i32> %res 680} 681declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 682 683 684define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) { 685 ; CHECK: vpbroadcastb 686 %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] 687 ret <16 x i8> %res 688} 689declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly 690 691 692define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) { 693 ; CHECK: vpbroadcastb 694 %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1] 695 ret <32 x i8> %res 696} 697declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly 698 699 700define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) { 701 ; CHECK: vpbroadcastw 702 %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 703 ret <8 x i16> %res 704} 705declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly 706 707 708define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) { 709 ; CHECK: vpbroadcastw 710 %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1] 711 ret <16 x i16> %res 712} 713declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly 714 715 716define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) { 717 ; CHECK: vbroadcastss 718 %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] 719 ret <4 x i32> %res 720} 721declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly 722 723 724define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) { 725 ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}} 726 %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1] 727 ret <8 x i32> %res 728} 729declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly 730 731 732define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) { 733 ; CHECK: vpbroadcastq 734 %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1] 735 ret <2 x i64> %res 736} 737declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly 738 739 740define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { 741 ; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}} 742 %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1] 743 ret <4 x i64> %res 744} 745declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly 746 747 748define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { 749 ; Check that the arguments are swapped between the intrinsic definition 750 ; and its lowering. Indeed, the offsets are the first source in 751 ; the instruction. 752 ; CHECK: vpermd %ymm0, %ymm1, %ymm0 753 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 754 ret <8 x i32> %res 755} 756declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 757 758 759define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) { 760 ; Check that the arguments are swapped between the intrinsic definition 761 ; and its lowering. Indeed, the offsets are the first source in 762 ; the instruction. 763 ; CHECK: vpermps %ymm0, %ymm1, %ymm0 764 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1] 765 ret <8 x float> %res 766} 767declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly 768 769 770define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) { 771 ; CHECK: vperm2i128 772 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1] 773 ret <4 x i64> %res 774} 775declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly 776 777 778define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) { 779 ; CHECK: vpmaskmovq 780 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 781 ret <2 x i64> %res 782} 783declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 784 785 786define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) { 787 ; CHECK: vpmaskmovq 788 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 789 ret <4 x i64> %res 790} 791declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 792 793 794define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) { 795 ; CHECK: vpmaskmovd 796 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 797 ret <4 x i32> %res 798} 799declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 800 801 802define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) { 803 ; CHECK: vpmaskmovd 804 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 805 ret <8 x i32> %res 806} 807declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 808 809 810define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { 811 ; CHECK: vpmaskmovq 812 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) 813 ret void 814} 815declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind 816 817 818define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { 819 ; CHECK: vpmaskmovq 820 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) 821 ret void 822} 823declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind 824 825 826define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { 827 ; CHECK: vpmaskmovd 828 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) 829 ret void 830} 831declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind 832 833 834define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { 835 ; CHECK: vpmaskmovd 836 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) 837 ret void 838} 839declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind 840 841 842define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { 843 ; CHECK: vpsllvd 844 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 845 ret <4 x i32> %res 846} 847declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 848 849 850define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 851 ; CHECK: vpsllvd 852 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 853 ret <8 x i32> %res 854} 855declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 856 857 858define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) { 859 ; CHECK: vpsllvq 860 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 861 ret <2 x i64> %res 862} 863declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 864 865 866define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 867 ; CHECK: vpsllvq 868 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 869 ret <4 x i64> %res 870} 871declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 872 873 874define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { 875 ; CHECK: vpsrlvd 876 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 877 ret <4 x i32> %res 878} 879declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 880 881 882define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 883 ; CHECK: vpsrlvd 884 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 885 ret <8 x i32> %res 886} 887declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 888 889 890define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) { 891 ; CHECK: vpsrlvq 892 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 893 ret <2 x i64> %res 894} 895declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 896 897 898define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 899 ; CHECK: vpsrlvq 900 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 901 ret <4 x i64> %res 902} 903declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 904 905 906define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) { 907 ; CHECK: vpsravd 908 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 909 ret <4 x i32> %res 910} 911declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 912 913 914define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { 915 ; CHECK: vpsravd 916 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 917 ret <8 x i32> %res 918} 919declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 920 921; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions 922define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { 923 ; CHECK: vmovdqu 924 ; add operation forces the execution domain. 925 %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 926 call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2) 927 ret void 928} 929declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind 930 931define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, 932 <4 x i32> %idx, <2 x double> %mask) { 933 ; CHECK: vgatherdpd 934 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, 935 i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; 936 ret <2 x double> %res 937} 938declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, 939 <4 x i32>, <2 x double>, i8) nounwind readonly 940 941define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, 942 <4 x i32> %idx, <4 x double> %mask) { 943 ; CHECK: vgatherdpd 944 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, 945 i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; 946 ret <4 x double> %res 947} 948declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, 949 <4 x i32>, <4 x double>, i8) nounwind readonly 950 951define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, 952 <2 x i64> %idx, <2 x double> %mask) { 953 ; CHECK: vgatherqpd 954 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, 955 i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; 956 ret <2 x double> %res 957} 958declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, 959 <2 x i64>, <2 x double>, i8) nounwind readonly 960 961define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, 962 <4 x i64> %idx, <4 x double> %mask) { 963 ; CHECK: vgatherqpd 964 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, 965 i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; 966 ret <4 x double> %res 967} 968declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, 969 <4 x i64>, <4 x double>, i8) nounwind readonly 970 971define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, 972 <4 x i32> %idx, <4 x float> %mask) { 973 ; CHECK: vgatherdps 974 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, 975 i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; 976 ret <4 x float> %res 977} 978declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, 979 <4 x i32>, <4 x float>, i8) nounwind readonly 980 981define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, 982 <8 x i32> %idx, <8 x float> %mask) { 983 ; CHECK: vgatherdps 984 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 985 i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; 986 ret <8 x float> %res 987} 988declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, 989 <8 x i32>, <8 x float>, i8) nounwind readonly 990 991define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, 992 <2 x i64> %idx, <4 x float> %mask) { 993 ; CHECK: vgatherqps 994 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, 995 i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; 996 ret <4 x float> %res 997} 998declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, 999 <2 x i64>, <4 x float>, i8) nounwind readonly 1000 1001define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, 1002 <4 x i64> %idx, <4 x float> %mask) { 1003 ; CHECK: vgatherqps 1004 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, 1005 i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; 1006 ret <4 x float> %res 1007} 1008declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, 1009 <4 x i64>, <4 x float>, i8) nounwind readonly 1010 1011define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, 1012 <4 x i32> %idx, <2 x i64> %mask) { 1013 ; CHECK: vpgatherdq 1014 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, 1015 i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; 1016 ret <2 x i64> %res 1017} 1018declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, 1019 <4 x i32>, <2 x i64>, i8) nounwind readonly 1020 1021define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, 1022 <4 x i32> %idx, <4 x i64> %mask) { 1023 ; CHECK: vpgatherdq 1024 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, 1025 i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; 1026 ret <4 x i64> %res 1027} 1028declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, 1029 <4 x i32>, <4 x i64>, i8) nounwind readonly 1030 1031define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, 1032 <2 x i64> %idx, <2 x i64> %mask) { 1033 ; CHECK: vpgatherqq 1034 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 1035 i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; 1036 ret <2 x i64> %res 1037} 1038declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, 1039 <2 x i64>, <2 x i64>, i8) nounwind readonly 1040 1041define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, 1042 <4 x i64> %idx, <4 x i64> %mask) { 1043 ; CHECK: vpgatherqq 1044 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, 1045 i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; 1046 ret <4 x i64> %res 1047} 1048declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, 1049 <4 x i64>, <4 x i64>, i8) nounwind readonly 1050 1051define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, 1052 <4 x i32> %idx, <4 x i32> %mask) { 1053 ; CHECK: vpgatherdd 1054 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, 1055 i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; 1056 ret <4 x i32> %res 1057} 1058declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, 1059 <4 x i32>, <4 x i32>, i8) nounwind readonly 1060 1061define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, 1062 <8 x i32> %idx, <8 x i32> %mask) { 1063 ; CHECK: vpgatherdd 1064 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, 1065 i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; 1066 ret <8 x i32> %res 1067} 1068declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, 1069 <8 x i32>, <8 x i32>, i8) nounwind readonly 1070 1071define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, 1072 <2 x i64> %idx, <4 x i32> %mask) { 1073 ; CHECK: vpgatherqd 1074 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, 1075 i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; 1076 ret <4 x i32> %res 1077} 1078declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, 1079 <2 x i64>, <4 x i32>, i8) nounwind readonly 1080 1081define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, 1082 <4 x i64> %idx, <4 x i32> %mask) { 1083 ; CHECK: vpgatherqd 1084 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, 1085 i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; 1086 ret <4 x i32> %res 1087} 1088declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, 1089 <4 x i64>, <4 x i32>, i8) nounwind readonly 1090 1091; PR13298 1092define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, 1093 <8 x i32> %idx, <8 x float> %mask, 1094 float* nocapture %out) { 1095; CHECK: test_gather_mask 1096; CHECK: vmovaps %ymm2, [[DEST:%.*]] 1097; CHECK: vgatherdps [[DEST]] 1098;; gather with mask 1099 %a_i8 = bitcast float* %a to i8* 1100 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1101 i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ; 1102 1103;; for debugging, we'll just dump out the mask 1104 %out_ptr = bitcast float * %out to <8 x float> * 1105 store <8 x float> %mask, <8 x float> * %out_ptr, align 4 1106 1107 ret <8 x float> %res 1108} 1109