• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2
3; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42
4; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX
5; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
6; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512
7; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512
8;
9; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SSE42
10; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42
11; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX
12
13target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
14target triple = "x86_64-apple-macosx10.8.0"
15
16define i32 @ceil(i32 %arg) {
17; SSE2-LABEL: 'ceil'
18; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
19; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
20; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
21; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
22; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
23; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
24; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
25; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
26; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
27;
28; SSE42-LABEL: 'ceil'
29; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
30; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
31; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
32; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
33; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
34; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
35; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
36; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
37; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
38;
39; AVX-LABEL: 'ceil'
40; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
41; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
42; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
43; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
44; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
45; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
46; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
47; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
48; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
49;
50; AVX512-LABEL: 'ceil'
51; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
52; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
53; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
54; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
55; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
56; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
57; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
58; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
59; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
60;
61  %F32 = call float @llvm.ceil.f32(float undef)
62  %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
63  %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
64  %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
65
66  %F64 = call double @llvm.ceil.f64(double undef)
67  %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
68  %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
69  %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
70
71  ret i32 undef
72}
73
74define i32 @floor(i32 %arg) {
75; SSE2-LABEL: 'floor'
76; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef)
77; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
78; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
79; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
80; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef)
81; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
82; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
83; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
84; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
85;
86; SSE42-LABEL: 'floor'
87; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
88; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
89; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
90; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
91; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
92; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
93; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
94; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
95; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
96;
97; AVX-LABEL: 'floor'
98; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
99; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
100; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
101; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
102; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
103; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
104; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
105; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
106; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
107;
108; AVX512-LABEL: 'floor'
109; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
110; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
111; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
112; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
113; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
114; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
115; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
116; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
117; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
118;
119  %F32 = call float @llvm.floor.f32(float undef)
120  %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
121  %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
122  %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
123
124  %F64 = call double @llvm.floor.f64(double undef)
125  %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
126  %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
127  %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
128
129  ret i32 undef
130}
131
132define i32 @nearbyint(i32 %arg) {
133; SSE2-LABEL: 'nearbyint'
134; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
135; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
136; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
137; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
138; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
139; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
140; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
141; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
142; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
143;
144; SSE42-LABEL: 'nearbyint'
145; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
146; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
147; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
148; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
149; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
150; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
151; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
152; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
153; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
154;
155; AVX-LABEL: 'nearbyint'
156; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
157; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
158; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
159; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
160; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
161; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
162; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
163; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
164; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
165;
166; AVX512-LABEL: 'nearbyint'
167; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
168; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
169; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
170; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
171; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
172; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
173; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
174; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
175; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
176;
177  %F32 = call float @llvm.nearbyint.f32(float undef)
178  %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
179  %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
180  %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
181
182  %F64 = call double @llvm.nearbyint.f64(double undef)
183  %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
184  %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
185  %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
186
187  ret i32 undef
188}
189
190define i32 @rint(i32 %arg) {
191; SSE2-LABEL: 'rint'
192; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
193; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
194; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
195; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
196; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
197; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
198; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
199; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
200; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
201;
202; SSE42-LABEL: 'rint'
203; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
204; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
205; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
206; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
207; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
208; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
209; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
210; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
211; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
212;
213; AVX-LABEL: 'rint'
214; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
215; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
216; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
217; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
218; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
219; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
220; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
221; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
222; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
223;
224; AVX512-LABEL: 'rint'
225; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
226; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
227; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
228; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
229; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
230; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
231; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
232; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
233; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
234;
235  %F32 = call float @llvm.rint.f32(float undef)
236  %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
237  %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
238  %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
239
240  %F64 = call double @llvm.rint.f64(double undef)
241  %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
242  %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
243  %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
244
245  ret i32 undef
246}
247
248define i32 @trunc(i32 %arg) {
249; SSE2-LABEL: 'trunc'
250; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
251; SSE2-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
252; SSE2-NEXT:  Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
253; SSE2-NEXT:  Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
254; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
255; SSE2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
256; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
257; SSE2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
258; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
259;
260; SSE42-LABEL: 'trunc'
261; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
262; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
263; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
264; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
265; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
266; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
267; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
268; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
269; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
270;
271; AVX-LABEL: 'trunc'
272; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
273; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
274; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
275; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
276; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
277; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
278; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
279; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
280; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
281;
282; AVX512-LABEL: 'trunc'
283; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
284; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
285; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
286; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
287; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
288; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
289; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
290; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
291; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
292;
293  %F32 = call float @llvm.trunc.f32(float undef)
294  %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
295  %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
296  %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
297
298  %F64 = call double @llvm.trunc.f64(double undef)
299  %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
300  %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
301  %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
302
303  ret i32 undef
304}
305
306declare float @llvm.ceil.f32(float)
307declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
308declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
309declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
310
311declare double @llvm.ceil.f64(double)
312declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
313declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
314declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
315
316declare float @llvm.floor.f32(float)
317declare <4 x float> @llvm.floor.v4f32(<4 x float>)
318declare <8 x float> @llvm.floor.v8f32(<8 x float>)
319declare <16 x float> @llvm.floor.v16f32(<16 x float>)
320
321declare double @llvm.floor.f64(double)
322declare <2 x double> @llvm.floor.v2f64(<2 x double>)
323declare <4 x double> @llvm.floor.v4f64(<4 x double>)
324declare <8 x double> @llvm.floor.v8f64(<8 x double>)
325
326declare float @llvm.nearbyint.f32(float)
327declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
328declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
329declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
330
331declare double @llvm.nearbyint.f64(double)
332declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
333declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
334declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
335
336declare float @llvm.rint.f32(float)
337declare <4 x float> @llvm.rint.v4f32(<4 x float>)
338declare <8 x float> @llvm.rint.v8f32(<8 x float>)
339declare <16 x float> @llvm.rint.v16f32(<16 x float>)
340
341declare double @llvm.rint.f64(double)
342declare <2 x double> @llvm.rint.v2f64(<2 x double>)
343declare <4 x double> @llvm.rint.v4f64(<4 x double>)
344declare <8 x double> @llvm.rint.v8f64(<8 x double>)
345
346declare float @llvm.trunc.f32(float)
347declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
348declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
349declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
350
351declare double @llvm.trunc.f64(double)
352declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
353declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
354declare <8 x double> @llvm.trunc.v8f64(<8 x double>)
355