1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s 3 4 5define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) { 6; CHECK-LABEL: test_x86_sse2_cvtsd2si64: 7; CHECK: ## %bb.0: 8; CHECK-NEXT: vcvtsd2si %xmm0, %rax 9; CHECK-NEXT: retq 10 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1] 11 ret i64 %res 12} 13declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 14 15define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) { 16; CHECK-LABEL: test_x86_sse2_cvtsi642sd: 17; CHECK: ## %bb.0: 18; CHECK-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 19; CHECK-NEXT: retq 20 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1] 21 ret <2 x double> %res 22} 23declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone 24 25define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) { 26; CHECK-LABEL: test_x86_avx512_cvttsd2si64: 27; CHECK: ## %bb.0: 28; CHECK-NEXT: vcvttsd2si %xmm0, %rcx 29; CHECK-NEXT: vcvttsd2si {sae}, %xmm0, %rax 30; CHECK-NEXT: addq %rcx, %rax 31; CHECK-NEXT: retq 32 %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ; 33 %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ; 34 %res2 = add i64 %res0, %res1 35 ret i64 %res2 36} 37declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone 38 39define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) { 40; CHECK-LABEL: test_x86_avx512_cvttsd2usi64: 41; CHECK: ## %bb.0: 42; CHECK-NEXT: vcvttsd2usi %xmm0, %rcx 43; CHECK-NEXT: vcvttsd2usi {sae}, %xmm0, %rax 44; CHECK-NEXT: addq %rcx, %rax 45; CHECK-NEXT: retq 46 %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ; 47 %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ; 48 %res2 = add i64 %res0, %res1 49 ret i64 %res2 50} 51declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone 52 53define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) { 54; CHECK-LABEL: test_x86_sse_cvtss2si64: 55; CHECK: ## %bb.0: 56; CHECK-NEXT: vcvtss2si %xmm0, %rax 57; CHECK-NEXT: retq 58 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1] 59 ret i64 %res 60} 61declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 62 63 64define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) { 65; CHECK-LABEL: test_x86_sse_cvtsi642ss: 66; CHECK: ## %bb.0: 67; CHECK-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 68; CHECK-NEXT: retq 69 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1] 70 ret <4 x float> %res 71} 72declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone 73 74 75define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) { 76; CHECK-LABEL: test_x86_avx512_cvttss2si64: 77; CHECK: ## %bb.0: 78; CHECK-NEXT: vcvttss2si %xmm0, %rcx 79; CHECK-NEXT: vcvttss2si {sae}, %xmm0, %rax 80; CHECK-NEXT: addq %rcx, %rax 81; CHECK-NEXT: retq 82 %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ; 83 %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ; 84 %res2 = add i64 %res0, %res1 85 ret i64 %res2 86} 87declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone 88 89define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) { 90; CHECK-LABEL: test_x86_avx512_cvttss2usi: 91; CHECK: ## %bb.0: 92; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %ecx 93; CHECK-NEXT: vcvttss2usi %xmm0, %eax 94; CHECK-NEXT: addl %ecx, %eax 95; CHECK-NEXT: retq 96 %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ; 97 %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ; 98 %res2 = add i32 %res0, %res1 99 ret i32 %res2 100} 101declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone 102 103define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) { 104; CHECK-LABEL: test_x86_avx512_cvttss2usi64: 105; CHECK: ## %bb.0: 106; CHECK-NEXT: vcvttss2usi %xmm0, %rcx 107; CHECK-NEXT: vcvttss2usi {sae}, %xmm0, %rax 108; CHECK-NEXT: addq %rcx, %rax 109; CHECK-NEXT: retq 110 %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ; 111 %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ; 112 %res2 = add i64 %res0, %res1 113 ret i64 %res2 114} 115declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone 116 117define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { 118; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: 119; CHECK: ## %bb.0: 120; CHECK-NEXT: vcvtsd2usi %xmm0, %rax 121; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rcx 122; CHECK-NEXT: addq %rax, %rcx 123; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rax 124; CHECK-NEXT: addq %rcx, %rax 125; CHECK-NEXT: retq 126 127 %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4) 128 %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 11) 129 %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 9) 130 %res3 = add i64 %res, %res1 131 %res4 = add i64 %res3, %res2 132 ret i64 %res4 133} 134declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone 135 136define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { 137; CHECK-LABEL: test_x86_avx512_cvtsd2si64: 138; CHECK: ## %bb.0: 139; CHECK-NEXT: vcvtsd2si %xmm0, %rax 140; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rcx 141; CHECK-NEXT: addq %rax, %rcx 142; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rax 143; CHECK-NEXT: addq %rcx, %rax 144; CHECK-NEXT: retq 145 146 %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4) 147 %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 11) 148 %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 9) 149 %res3 = add i64 %res, %res1 150 %res4 = add i64 %res3, %res2 151 ret i64 %res4 152} 153declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone 154 155define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { 156; CHECK-LABEL: test_x86_avx512_cvtss2usi64: 157; CHECK: ## %bb.0: 158; CHECK-NEXT: vcvtss2usi %xmm0, %rax 159; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rcx 160; CHECK-NEXT: addq %rax, %rcx 161; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rax 162; CHECK-NEXT: addq %rcx, %rax 163; CHECK-NEXT: retq 164 165 %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4) 166 %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 11) 167 %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 9) 168 %res3 = add i64 %res, %res1 169 %res4 = add i64 %res3, %res2 170 ret i64 %res4 171} 172declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone 173 174define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { 175; CHECK-LABEL: test_x86_avx512_cvtss2si64: 176; CHECK: ## %bb.0: 177; CHECK-NEXT: vcvtss2si %xmm0, %rax 178; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rcx 179; CHECK-NEXT: addq %rax, %rcx 180; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rax 181; CHECK-NEXT: addq %rcx, %rax 182; CHECK-NEXT: retq 183 184 %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4) 185 %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 11) 186 %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 9) 187 %res3 = add i64 %res, %res1 188 %res4 = add i64 %res3, %res2 189 ret i64 %res4 190} 191declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone 192 193define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { 194; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: 195; CHECK: ## %bb.0: 196; CHECK-NEXT: vcvtsi2sd %rdi, {rz-sae}, %xmm0, %xmm0 197; CHECK-NEXT: retq 198 %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 11) ; <<<2 x double>> [#uses=1] 199 ret <2 x double> %res 200} 201declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwind readnone 202 203define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { 204; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: 205; CHECK: ## %bb.0: 206; CHECK-NEXT: vcvtsi2ss %rdi, {rz-sae}, %xmm0, %xmm0 207; CHECK-NEXT: retq 208 %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 11) ; <<<4 x float>> [#uses=1] 209 ret <4 x float> %res 210} 211declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind readnone 212 213define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) { 214; CHECK-LABEL: _mm_cvt_roundu64_ss: 215; CHECK: ## %bb.0: 216; CHECK-NEXT: vcvtusi2ss %rdi, {rd-sae}, %xmm0, %xmm0 217; CHECK-NEXT: retq 218 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 9) ; <<<4 x float>> [#uses=1] 219 ret <4 x float> %res 220} 221 222define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) { 223; CHECK-LABEL: _mm_cvtu64_ss: 224; CHECK: ## %bb.0: 225; CHECK-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 226; CHECK-NEXT: retq 227 %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] 228 ret <4 x float> %res 229} 230declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind readnone 231 232define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) { 233; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: 234; CHECK: ## %bb.0: 235; CHECK-NEXT: vcvtusi2sd %rdi, {rd-sae}, %xmm0, %xmm0 236; CHECK-NEXT: retq 237 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 9) ; <<<2 x double>> [#uses=1] 238 ret <2 x double> %res 239} 240 241define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) { 242; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: 243; CHECK: ## %bb.0: 244; CHECK-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 245; CHECK-NEXT: retq 246 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] 247 ret <2 x double> %res 248} 249declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone 250