1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s 3 4define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) { 5; CHECK-LABEL: test_int_x86_xop_vpermil2pd: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpermil2pd $1, %xmm2, %xmm1, %xmm0, %xmm0 8; CHECK-NEXT: retq 9 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 1) ; [#uses=1] 10 ret <2 x double> %res 11} 12define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x i64> %a2) { 13; CHECK-LABEL: test_int_x86_xop_vpermil2pd_mr: 14; CHECK: # %bb.0: 15; CHECK-NEXT: vpermil2pd $1, %xmm1, (%rdi), %xmm0, %xmm0 16; CHECK-NEXT: retq 17 %vec = load <2 x double>, <2 x double>* %a1 18 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x i64> %a2, i8 1) ; [#uses=1] 19 ret <2 x double> %res 20} 21define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64>* %a2) { 22; CHECK-LABEL: test_int_x86_xop_vpermil2pd_rm: 23; CHECK: # %bb.0: 24; CHECK-NEXT: vpermil2pd $1, (%rdi), %xmm1, %xmm0, %xmm0 25; CHECK-NEXT: retq 26 %vec = load <2 x i64>, <2 x i64>* %a2 27 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %vec, i8 1) ; [#uses=1] 28 ret <2 x double> %res 29} 30declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone 31 32define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) { 33; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vpermil2pd $2, %ymm2, %ymm1, %ymm0, %ymm0 36; CHECK-NEXT: retq 37 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 2) ; 38 ret <4 x double> %res 39} 40define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x i64> %a2) { 41; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_mr: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vpermil2pd $2, %ymm1, (%rdi), %ymm0, %ymm0 44; CHECK-NEXT: retq 45 %vec = load <4 x double>, <4 x double>* %a1 46 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x i64> %a2, i8 2) ; 47 ret <4 x double> %res 48} 49define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x i64>* %a2) { 50; CHECK-LABEL: test_int_x86_xop_vpermil2pd_256_rm: 51; CHECK: # %bb.0: 52; CHECK-NEXT: vpermil2pd $2, (%rdi), %ymm1, %ymm0, %ymm0 53; CHECK-NEXT: retq 54 %vec = load <4 x i64>, <4 x i64>* %a2 55 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %vec, i8 2) ; 56 ret <4 x double> %res 57} 58declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone 59 60define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) { 61; CHECK-LABEL: test_int_x86_xop_vpermil2ps: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 64; CHECK-NEXT: retq 65 %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 3) ; 66 ret <4 x float> %res 67} 68declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone 69 70define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) { 71; CHECK-LABEL: test_int_x86_xop_vpermil2ps_256: 72; CHECK: # %bb.0: 73; CHECK-NEXT: vpermil2ps $4, %ymm2, %ymm1, %ymm0, %ymm0 74; CHECK-NEXT: retq 75 %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 4) ; 76 ret <8 x float> %res 77} 78declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone 79 80define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 81; CHECK-LABEL: test_int_x86_xop_vpcmov: 82; CHECK: # %bb.0: 83; CHECK-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 84; CHECK-NEXT: retq 85 %1 = xor <2 x i64> %a2, <i64 -1, i64 -1> 86 %2 = and <2 x i64> %a0, %a2 87 %3 = and <2 x i64> %a1, %1 88 %4 = or <2 x i64> %2, %3 89 ret <2 x i64> %4 90} 91 92define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 93; CHECK-LABEL: test_int_x86_xop_vpcmov_256: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 96; CHECK-NEXT: retq 97 %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> 98 %2 = and <4 x i64> %a0, %a2 99 %3 = and <4 x i64> %a1, %1 100 %4 = or <4 x i64> %2, %3 101 ret <4 x i64> %4 102} 103define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) { 104; CHECK-LABEL: test_int_x86_xop_vpcmov_256_mr: 105; CHECK: # %bb.0: 106; CHECK-NEXT: vpcmov %ymm1, (%rdi), %ymm0, %ymm0 107; CHECK-NEXT: retq 108 %vec = load <4 x i64>, <4 x i64>* %a1 109 %1 = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> 110 %2 = and <4 x i64> %a0, %a2 111 %3 = and <4 x i64> %vec, %1 112 %4 = or <4 x i64> %2, %3 113 ret <4 x i64> %4 114} 115define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) { 116; CHECK-LABEL: test_int_x86_xop_vpcmov_256_rm: 117; CHECK: # %bb.0: 118; CHECK-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 119; CHECK-NEXT: retq 120 %vec = load <4 x i64>, <4 x i64>* %a2 121 %1 = xor <4 x i64> %vec, <i64 -1, i64 -1, i64 -1, i64 -1> 122 %2 = and <4 x i64> %a0, %vec 123 %3 = and <4 x i64> %a1, %1 124 %4 = or <4 x i64> %2, %3 125 ret <4 x i64> %4 126} 127 128define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) { 129; CHECK-LABEL: test_int_x86_xop_vphaddbd: 130; CHECK: # %bb.0: 131; CHECK-NEXT: vphaddbd %xmm0, %xmm0 132; CHECK-NEXT: retq 133 %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ; 134 ret <4 x i32> %res 135} 136declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone 137 138define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) { 139; CHECK-LABEL: test_int_x86_xop_vphaddbq: 140; CHECK: # %bb.0: 141; CHECK-NEXT: vphaddbq %xmm0, %xmm0 142; CHECK-NEXT: retq 143 %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ; 144 ret <2 x i64> %res 145} 146declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone 147 148define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) { 149; CHECK-LABEL: test_int_x86_xop_vphaddbw: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vphaddbw %xmm0, %xmm0 152; CHECK-NEXT: retq 153 %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ; 154 ret <8 x i16> %res 155} 156declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone 157 158define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) { 159; CHECK-LABEL: test_int_x86_xop_vphadddq: 160; CHECK: # %bb.0: 161; CHECK-NEXT: vphadddq %xmm0, %xmm0 162; CHECK-NEXT: retq 163 %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ; 164 ret <2 x i64> %res 165} 166declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone 167 168define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) { 169; CHECK-LABEL: test_int_x86_xop_vphaddubd: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vphaddubd %xmm0, %xmm0 172; CHECK-NEXT: retq 173 %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ; 174 ret <4 x i32> %res 175} 176declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone 177 178define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) { 179; CHECK-LABEL: test_int_x86_xop_vphaddubq: 180; CHECK: # %bb.0: 181; CHECK-NEXT: vphaddubq %xmm0, %xmm0 182; CHECK-NEXT: retq 183 %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ; 184 ret <2 x i64> %res 185} 186declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone 187 188define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) { 189; CHECK-LABEL: test_int_x86_xop_vphaddubw: 190; CHECK: # %bb.0: 191; CHECK-NEXT: vphaddubw %xmm0, %xmm0 192; CHECK-NEXT: retq 193 %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ; 194 ret <8 x i16> %res 195} 196declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone 197 198define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) { 199; CHECK-LABEL: test_int_x86_xop_vphaddudq: 200; CHECK: # %bb.0: 201; CHECK-NEXT: vphaddudq %xmm0, %xmm0 202; CHECK-NEXT: retq 203 %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ; 204 ret <2 x i64> %res 205} 206declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone 207 208define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) { 209; CHECK-LABEL: test_int_x86_xop_vphadduwd: 210; CHECK: # %bb.0: 211; CHECK-NEXT: vphadduwd %xmm0, %xmm0 212; CHECK-NEXT: retq 213 %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ; 214 ret <4 x i32> %res 215} 216declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone 217 218define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) { 219; CHECK-LABEL: test_int_x86_xop_vphadduwq: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vphadduwq %xmm0, %xmm0 222; CHECK-NEXT: retq 223 %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ; 224 ret <2 x i64> %res 225} 226declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone 227 228define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) { 229; CHECK-LABEL: test_int_x86_xop_vphaddwd: 230; CHECK: # %bb.0: 231; CHECK-NEXT: vphaddwd %xmm0, %xmm0 232; CHECK-NEXT: retq 233 %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ; 234 ret <4 x i32> %res 235} 236declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone 237 238define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) { 239; CHECK-LABEL: test_int_x86_xop_vphaddwq: 240; CHECK: # %bb.0: 241; CHECK-NEXT: vphaddwq %xmm0, %xmm0 242; CHECK-NEXT: retq 243 %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ; 244 ret <2 x i64> %res 245} 246declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone 247 248define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) { 249; CHECK-LABEL: test_int_x86_xop_vphsubbw: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vphsubbw %xmm0, %xmm0 252; CHECK-NEXT: retq 253 %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ; 254 ret <8 x i16> %res 255} 256declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone 257 258define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) { 259; CHECK-LABEL: test_int_x86_xop_vphsubdq: 260; CHECK: # %bb.0: 261; CHECK-NEXT: vphsubdq %xmm0, %xmm0 262; CHECK-NEXT: retq 263 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ; 264 ret <2 x i64> %res 265} 266define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) { 267; CHECK-LABEL: test_int_x86_xop_vphsubdq_mem: 268; CHECK: # %bb.0: 269; CHECK-NEXT: vphsubdq (%rdi), %xmm0 270; CHECK-NEXT: retq 271 %vec = load <4 x i32>, <4 x i32>* %a0 272 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ; 273 ret <2 x i64> %res 274} 275declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone 276 277define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) { 278; CHECK-LABEL: test_int_x86_xop_vphsubwd: 279; CHECK: # %bb.0: 280; CHECK-NEXT: vphsubwd %xmm0, %xmm0 281; CHECK-NEXT: retq 282 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ; 283 ret <4 x i32> %res 284} 285define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) { 286; CHECK-LABEL: test_int_x86_xop_vphsubwd_mem: 287; CHECK: # %bb.0: 288; CHECK-NEXT: vphsubwd (%rdi), %xmm0 289; CHECK-NEXT: retq 290 %vec = load <8 x i16>, <8 x i16>* %a0 291 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ; 292 ret <4 x i32> %res 293} 294declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone 295 296define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { 297; CHECK-LABEL: test_int_x86_xop_vpmacsdd: 298; CHECK: # %bb.0: 299; CHECK-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 300; CHECK-NEXT: retq 301 %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ; 302 ret <4 x i32> %res 303} 304declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 305 306define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 307; CHECK-LABEL: test_int_x86_xop_vpmacsdqh: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 310; CHECK-NEXT: retq 311 %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; 312 ret <2 x i64> %res 313} 314declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 315 316define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 317; CHECK-LABEL: test_int_x86_xop_vpmacsdql: 318; CHECK: # %bb.0: 319; CHECK-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 320; CHECK-NEXT: retq 321 %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; 322 ret <2 x i64> %res 323} 324declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 325 326define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { 327; CHECK-LABEL: test_int_x86_xop_vpmacssdd: 328; CHECK: # %bb.0: 329; CHECK-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 330; CHECK-NEXT: retq 331 %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ; 332 ret <4 x i32> %res 333} 334declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 335 336define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 337; CHECK-LABEL: test_int_x86_xop_vpmacssdqh: 338; CHECK: # %bb.0: 339; CHECK-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 340; CHECK-NEXT: retq 341 %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; 342 ret <2 x i64> %res 343} 344declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 345 346define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 347; CHECK-LABEL: test_int_x86_xop_vpmacssdql: 348; CHECK: # %bb.0: 349; CHECK-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 350; CHECK-NEXT: retq 351 %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ; 352 ret <2 x i64> %res 353} 354declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 355 356define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 357; CHECK-LABEL: test_int_x86_xop_vpmacsswd: 358; CHECK: # %bb.0: 359; CHECK-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 360; CHECK-NEXT: retq 361 %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; 362 ret <4 x i32> %res 363} 364declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 365 366define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { 367; CHECK-LABEL: test_int_x86_xop_vpmacssww: 368; CHECK: # %bb.0: 369; CHECK-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 370; CHECK-NEXT: retq 371 %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ; 372 ret <8 x i16> %res 373} 374declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 375 376define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 377; CHECK-LABEL: test_int_x86_xop_vpmacswd: 378; CHECK: # %bb.0: 379; CHECK-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 380; CHECK-NEXT: retq 381 %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; 382 ret <4 x i32> %res 383} 384declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 385 386define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { 387; CHECK-LABEL: test_int_x86_xop_vpmacsww: 388; CHECK: # %bb.0: 389; CHECK-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 390; CHECK-NEXT: retq 391 %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ; 392 ret <8 x i16> %res 393} 394declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 395 396define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 397; CHECK-LABEL: test_int_x86_xop_vpmadcsswd: 398; CHECK: # %bb.0: 399; CHECK-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 400; CHECK-NEXT: retq 401 %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; 402 ret <4 x i32> %res 403} 404declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 405 406define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 407; CHECK-LABEL: test_int_x86_xop_vpmadcswd: 408; CHECK: # %bb.0: 409; CHECK-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 410; CHECK-NEXT: retq 411 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ; 412 ret <4 x i32> %res 413} 414define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) { 415; CHECK-LABEL: test_int_x86_xop_vpmadcswd_mem: 416; CHECK: # %bb.0: 417; CHECK-NEXT: vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0 418; CHECK-NEXT: retq 419 %vec = load <8 x i16>, <8 x i16>* %a1 420 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ; 421 ret <4 x i32> %res 422} 423declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 424 425define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 426; CHECK-LABEL: test_int_x86_xop_vpperm: 427; CHECK: # %bb.0: 428; CHECK-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 429; CHECK-NEXT: retq 430 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; 431 ret <16 x i8> %res 432} 433define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) { 434; CHECK-LABEL: test_int_x86_xop_vpperm_rm: 435; CHECK: # %bb.0: 436; CHECK-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 437; CHECK-NEXT: retq 438 %vec = load <16 x i8>, <16 x i8>* %a2 439 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ; 440 ret <16 x i8> %res 441} 442define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) { 443; CHECK-LABEL: test_int_x86_xop_vpperm_mr: 444; CHECK: # %bb.0: 445; CHECK-NEXT: vpperm %xmm1, (%rdi), %xmm0, %xmm0 446; CHECK-NEXT: retq 447 %vec = load <16 x i8>, <16 x i8>* %a1 448 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ; 449 ret <16 x i8> %res 450} 451declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 452 453define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) { 454; CHECK-LABEL: test_int_x86_xop_vpshab: 455; CHECK: # %bb.0: 456; CHECK-NEXT: vpshab %xmm1, %xmm0, %xmm0 457; CHECK-NEXT: retq 458 %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ; 459 ret <16 x i8> %res 460} 461declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone 462 463define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) { 464; CHECK-LABEL: test_int_x86_xop_vpshad: 465; CHECK: # %bb.0: 466; CHECK-NEXT: vpshad %xmm1, %xmm0, %xmm0 467; CHECK-NEXT: retq 468 %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ; 469 ret <4 x i32> %res 470} 471declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone 472 473define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) { 474; CHECK-LABEL: test_int_x86_xop_vpshaq: 475; CHECK: # %bb.0: 476; CHECK-NEXT: vpshaq %xmm1, %xmm0, %xmm0 477; CHECK-NEXT: retq 478 %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ; 479 ret <2 x i64> %res 480} 481declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone 482 483define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) { 484; CHECK-LABEL: test_int_x86_xop_vpshaw: 485; CHECK: # %bb.0: 486; CHECK-NEXT: vpshaw %xmm1, %xmm0, %xmm0 487; CHECK-NEXT: retq 488 %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ; 489 ret <8 x i16> %res 490} 491declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone 492 493define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) { 494; CHECK-LABEL: test_int_x86_xop_vpshlb: 495; CHECK: # %bb.0: 496; CHECK-NEXT: vpshlb %xmm1, %xmm0, %xmm0 497; CHECK-NEXT: retq 498 %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ; 499 ret <16 x i8> %res 500} 501declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone 502 503define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) { 504; CHECK-LABEL: test_int_x86_xop_vpshld: 505; CHECK: # %bb.0: 506; CHECK-NEXT: vpshld %xmm1, %xmm0, %xmm0 507; CHECK-NEXT: retq 508 %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ; 509 ret <4 x i32> %res 510} 511declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone 512 513define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) { 514; CHECK-LABEL: test_int_x86_xop_vpshlq: 515; CHECK: # %bb.0: 516; CHECK-NEXT: vpshlq %xmm1, %xmm0, %xmm0 517; CHECK-NEXT: retq 518 %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ; 519 ret <2 x i64> %res 520} 521declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone 522 523define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) { 524; CHECK-LABEL: test_int_x86_xop_vpshlw: 525; CHECK: # %bb.0: 526; CHECK-NEXT: vpshlw %xmm1, %xmm0, %xmm0 527; CHECK-NEXT: retq 528 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ; 529 ret <8 x i16> %res 530} 531define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) { 532; CHECK-LABEL: test_int_x86_xop_vpshlw_rm: 533; CHECK: # %bb.0: 534; CHECK-NEXT: vpshlw (%rdi), %xmm0, %xmm0 535; CHECK-NEXT: retq 536 %vec = load <8 x i16>, <8 x i16>* %a1 537 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ; 538 ret <8 x i16> %res 539} 540define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) { 541; CHECK-LABEL: test_int_x86_xop_vpshlw_mr: 542; CHECK: # %bb.0: 543; CHECK-NEXT: vpshlw %xmm0, (%rdi), %xmm0 544; CHECK-NEXT: retq 545 %vec = load <8 x i16>, <8 x i16>* %a0 546 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ; 547 ret <8 x i16> %res 548} 549declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone 550 551define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) { 552; CHECK-LABEL: test_int_x86_xop_vfrcz_ss: 553; CHECK: # %bb.0: 554; CHECK-NEXT: vfrczss %xmm0, %xmm0 555; CHECK-NEXT: retq 556 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ; 557 ret <4 x float> %res 558} 559define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) { 560; CHECK-LABEL: test_int_x86_xop_vfrcz_ss_mem: 561; CHECK: # %bb.0: 562; CHECK-NEXT: vfrczss (%rdi), %xmm0 563; CHECK-NEXT: retq 564 %elem = load float, float* %a0 565 %vec = insertelement <4 x float> undef, float %elem, i32 0 566 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ; 567 ret <4 x float> %res 568} 569declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 570 571define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) { 572; CHECK-LABEL: test_int_x86_xop_vfrcz_sd: 573; CHECK: # %bb.0: 574; CHECK-NEXT: vfrczsd %xmm0, %xmm0 575; CHECK-NEXT: retq 576 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ; 577 ret <2 x double> %res 578} 579define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) { 580; CHECK-LABEL: test_int_x86_xop_vfrcz_sd_mem: 581; CHECK: # %bb.0: 582; CHECK-NEXT: vfrczsd (%rdi), %xmm0 583; CHECK-NEXT: retq 584 %elem = load double, double* %a0 585 %vec = insertelement <2 x double> undef, double %elem, i32 0 586 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ; 587 ret <2 x double> %res 588} 589declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 590 591define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) { 592; CHECK-LABEL: test_int_x86_xop_vfrcz_pd: 593; CHECK: # %bb.0: 594; CHECK-NEXT: vfrczpd %xmm0, %xmm0 595; CHECK-NEXT: retq 596 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ; 597 ret <2 x double> %res 598} 599define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) { 600; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_mem: 601; CHECK: # %bb.0: 602; CHECK-NEXT: vfrczpd (%rdi), %xmm0 603; CHECK-NEXT: retq 604 %vec = load <2 x double>, <2 x double>* %a0 605 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ; 606 ret <2 x double> %res 607} 608declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone 609 610define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) { 611; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_256: 612; CHECK: # %bb.0: 613; CHECK-NEXT: vfrczpd %ymm0, %ymm0 614; CHECK-NEXT: retq 615 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ; 616 ret <4 x double> %res 617} 618define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) { 619; CHECK-LABEL: test_int_x86_xop_vfrcz_pd_256_mem: 620; CHECK: # %bb.0: 621; CHECK-NEXT: vfrczpd (%rdi), %ymm0 622; CHECK-NEXT: retq 623 %vec = load <4 x double>, <4 x double>* %a0 624 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ; 625 ret <4 x double> %res 626} 627declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone 628 629define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) { 630; CHECK-LABEL: test_int_x86_xop_vfrcz_ps: 631; CHECK: # %bb.0: 632; CHECK-NEXT: vfrczps %xmm0, %xmm0 633; CHECK-NEXT: retq 634 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ; 635 ret <4 x float> %res 636} 637define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) { 638; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_mem: 639; CHECK: # %bb.0: 640; CHECK-NEXT: vfrczps (%rdi), %xmm0 641; CHECK-NEXT: retq 642 %vec = load <4 x float>, <4 x float>* %a0 643 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ; 644 ret <4 x float> %res 645} 646declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone 647 648define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) { 649; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_256: 650; CHECK: # %bb.0: 651; CHECK-NEXT: vfrczps %ymm0, %ymm0 652; CHECK-NEXT: retq 653 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ; 654 ret <8 x float> %res 655} 656define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) { 657; CHECK-LABEL: test_int_x86_xop_vfrcz_ps_256_mem: 658; CHECK: # %bb.0: 659; CHECK-NEXT: vfrczps (%rdi), %ymm0 660; CHECK-NEXT: retq 661 %vec = load <8 x float>, <8 x float>* %a0 662 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ; 663 ret <8 x float> %res 664} 665declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone 666