1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 3; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 4; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefixes=AVX 5; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX 6; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 7; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 8; 9; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SSE42 10; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 11; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX 12 13target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 14target triple = "x86_64-apple-macosx10.8.0" 15 16define i32 @ceil(i32 %arg) { 17; SSE2-LABEL: 'ceil' 18; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef) 19; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) 20; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) 21; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) 22; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef) 23; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) 24; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) 25; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) 26; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 27; 28; SSE42-LABEL: 'ceil' 29; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) 30; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) 31; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) 32; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) 33; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) 34; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) 35; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) 36; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) 37; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 38; 39; AVX-LABEL: 'ceil' 40; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) 41; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) 42; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) 43; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) 44; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) 45; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) 46; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) 47; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) 48; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 49; 50; AVX512-LABEL: 'ceil' 51; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) 52; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) 53; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) 54; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) 55; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) 56; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) 57; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) 58; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) 59; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 60; 61 %F32 = call float @llvm.ceil.f32(float undef) 62 %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) 63 %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) 64 %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) 65 66 %F64 = call double @llvm.ceil.f64(double undef) 67 %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) 68 %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) 69 %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) 70 71 ret i32 undef 72} 73 74define i32 @floor(i32 %arg) { 75; SSE2-LABEL: 'floor' 76; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef) 77; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) 78; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) 79; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) 80; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef) 81; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) 82; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) 83; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) 84; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 85; 86; SSE42-LABEL: 'floor' 87; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) 88; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) 89; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) 90; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) 91; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) 92; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) 93; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) 94; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) 95; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 96; 97; AVX-LABEL: 'floor' 98; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) 99; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) 100; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) 101; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) 102; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) 103; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) 104; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) 105; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) 106; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 107; 108; AVX512-LABEL: 'floor' 109; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) 110; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) 111; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) 112; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) 113; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) 114; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) 115; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) 116; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) 117; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 118; 119 %F32 = call float @llvm.floor.f32(float undef) 120 %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) 121 %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) 122 %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) 123 124 %F64 = call double @llvm.floor.f64(double undef) 125 %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) 126 %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) 127 %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) 128 129 ret i32 undef 130} 131 132define i32 @nearbyint(i32 %arg) { 133; SSE2-LABEL: 'nearbyint' 134; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) 135; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) 136; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) 137; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) 138; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) 139; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) 140; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) 141; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) 142; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 143; 144; SSE42-LABEL: 'nearbyint' 145; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) 146; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) 147; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) 148; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) 149; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) 150; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) 151; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) 152; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) 153; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 154; 155; AVX-LABEL: 'nearbyint' 156; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) 157; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) 158; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) 159; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) 160; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) 161; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) 162; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) 163; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) 164; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 165; 166; AVX512-LABEL: 'nearbyint' 167; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) 168; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) 169; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) 170; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) 171; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) 172; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) 173; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) 174; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) 175; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 176; 177 %F32 = call float @llvm.nearbyint.f32(float undef) 178 %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) 179 %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) 180 %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) 181 182 %F64 = call double @llvm.nearbyint.f64(double undef) 183 %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) 184 %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) 185 %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) 186 187 ret i32 undef 188} 189 190define i32 @rint(i32 %arg) { 191; SSE2-LABEL: 'rint' 192; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef) 193; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) 194; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) 195; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) 196; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef) 197; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) 198; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) 199; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) 200; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 201; 202; SSE42-LABEL: 'rint' 203; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) 204; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) 205; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) 206; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) 207; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) 208; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) 209; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) 210; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) 211; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 212; 213; AVX-LABEL: 'rint' 214; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) 215; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) 216; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) 217; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) 218; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) 219; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) 220; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) 221; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) 222; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 223; 224; AVX512-LABEL: 'rint' 225; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) 226; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) 227; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) 228; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) 229; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) 230; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) 231; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) 232; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) 233; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 234; 235 %F32 = call float @llvm.rint.f32(float undef) 236 %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) 237 %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) 238 %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) 239 240 %F64 = call double @llvm.rint.f64(double undef) 241 %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) 242 %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) 243 %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) 244 245 ret i32 undef 246} 247 248define i32 @trunc(i32 %arg) { 249; SSE2-LABEL: 'trunc' 250; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef) 251; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) 252; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) 253; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) 254; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef) 255; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) 256; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) 257; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) 258; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 259; 260; SSE42-LABEL: 'trunc' 261; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) 262; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) 263; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) 264; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) 265; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) 266; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) 267; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) 268; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) 269; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 270; 271; AVX-LABEL: 'trunc' 272; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) 273; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) 274; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) 275; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) 276; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) 277; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) 278; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) 279; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) 280; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 281; 282; AVX512-LABEL: 'trunc' 283; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) 284; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) 285; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) 286; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) 287; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) 288; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) 289; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) 290; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) 291; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 292; 293 %F32 = call float @llvm.trunc.f32(float undef) 294 %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) 295 %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) 296 %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) 297 298 %F64 = call double @llvm.trunc.f64(double undef) 299 %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) 300 %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) 301 %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) 302 303 ret i32 undef 304} 305 306declare float @llvm.ceil.f32(float) 307declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 308declare <8 x float> @llvm.ceil.v8f32(<8 x float>) 309declare <16 x float> @llvm.ceil.v16f32(<16 x float>) 310 311declare double @llvm.ceil.f64(double) 312declare <2 x double> @llvm.ceil.v2f64(<2 x double>) 313declare <4 x double> @llvm.ceil.v4f64(<4 x double>) 314declare <8 x double> @llvm.ceil.v8f64(<8 x double>) 315 316declare float @llvm.floor.f32(float) 317declare <4 x float> @llvm.floor.v4f32(<4 x float>) 318declare <8 x float> @llvm.floor.v8f32(<8 x float>) 319declare <16 x float> @llvm.floor.v16f32(<16 x float>) 320 321declare double @llvm.floor.f64(double) 322declare <2 x double> @llvm.floor.v2f64(<2 x double>) 323declare <4 x double> @llvm.floor.v4f64(<4 x double>) 324declare <8 x double> @llvm.floor.v8f64(<8 x double>) 325 326declare float @llvm.nearbyint.f32(float) 327declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) 328declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>) 329declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>) 330 331declare double @llvm.nearbyint.f64(double) 332declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) 333declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) 334declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>) 335 336declare float @llvm.rint.f32(float) 337declare <4 x float> @llvm.rint.v4f32(<4 x float>) 338declare <8 x float> @llvm.rint.v8f32(<8 x float>) 339declare <16 x float> @llvm.rint.v16f32(<16 x float>) 340 341declare double @llvm.rint.f64(double) 342declare <2 x double> @llvm.rint.v2f64(<2 x double>) 343declare <4 x double> @llvm.rint.v4f64(<4 x double>) 344declare <8 x double> @llvm.rint.v8f64(<8 x double>) 345 346declare float @llvm.trunc.f32(float) 347declare <4 x float> @llvm.trunc.v4f32(<4 x float>) 348declare <8 x float> @llvm.trunc.v8f32(<8 x float>) 349declare <16 x float> @llvm.trunc.v16f32(<16 x float>) 350 351declare double @llvm.trunc.f64(double) 352declare <2 x double> @llvm.trunc.v2f64(<2 x double>) 353declare <4 x double> @llvm.trunc.v4f64(<4 x double>) 354declare <8 x double> @llvm.trunc.v8f64(<8 x double>) 355