1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt -cost-model -analyze -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP 3; RUN: opt -cost-model -analyze -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP 4; RUN: opt -cost-model -analyze -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-RECIP 5; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE 6; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE 7; RUN: opt -cost-model -analyze -cost-kind=code-size -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE-SIZE 8 9declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) 10declare {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64>, <2 x i64>) 11declare {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64>, <4 x i64>) 12declare {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x i64>, <8 x i64>) 13 14declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) 15declare {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4 x i32>, <4 x i32>) 16declare {<8 x i32>, <8 x i1>} @llvm.sadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) 17declare {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x i32>, <16 x i32>) 18 19declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) 20declare {<8 x i16>, <8 x i1>} @llvm.sadd.with.overflow.v8i16(<8 x i16>, <8 x i16>) 21declare {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x i16>, <16 x i16>) 22declare {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x i16>, <32 x i16>) 23 24declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) 25declare {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16 x i8>, <16 x i8>) 26declare {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8>, <32 x i8>) 27declare {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8>, <64 x i8>) 28 29define i32 @sadd(i32 %arg) { 30; V8M-RECIP-LABEL: 'sadd' 31; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 32; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 33; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 34; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 35; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 36; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 37; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 38; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 39; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 40; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 41; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 42; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 43; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 44; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 45; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 46; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 47; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 48; 49; NEON-RECIP-LABEL: 'sadd' 50; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 51; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 52; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 53; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 54; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 55; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 56; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 57; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 58; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 59; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 60; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 61; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 62; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 63; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 64; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 65; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 66; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 67; 68; MVE-RECIP-LABEL: 'sadd' 69; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 70; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 71; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 72; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 582 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 73; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 74; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 75; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 76; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 77; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 78; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 79; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 80; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 81; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 82; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 83; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 84; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 85; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 86; 87; V8M-SIZE-LABEL: 'sadd' 88; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 89; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 90; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 91; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 92; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 93; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 94; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 95; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 96; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 97; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 98; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 99; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 100; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 101; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 102; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 103; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 104; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 105; 106; NEON-SIZE-LABEL: 'sadd' 107; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 108; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 109; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 110; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 111; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 112; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 113; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 114; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 115; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 116; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 117; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 118; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 119; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 120; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 121; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 122; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 123; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 124; 125; MVE-SIZE-LABEL: 'sadd' 126; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 127; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 128; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 129; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 130; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 131; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 132; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 133; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 134; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 135; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 136; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 137; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 138; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 139; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 140; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 141; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 142; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 143; 144 %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef) 145 %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 146 %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 147 %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 148 149 %I32 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 undef, i32 undef) 150 %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 151 %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.sadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 152 %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 153 154 %I16 = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 undef, i16 undef) 155 %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.sadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 156 %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 157 %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 158 159 %I8 = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 undef) 160 %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 161 %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 162 %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 163 164 ret i32 undef 165} 166 167declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) 168declare {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64>, <2 x i64>) 169declare {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x i64>, <4 x i64>) 170declare {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x i64>, <8 x i64>) 171 172declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) 173declare {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32>, <4 x i32>) 174declare {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32>, <8 x i32>) 175declare {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x i32>, <16 x i32>) 176 177declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) 178declare {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16>, <8 x i16>) 179declare {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x i16>, <16 x i16>) 180declare {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x i16>, <32 x i16>) 181 182declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8) 183declare {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8>, <16 x i8>) 184declare {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8>, <32 x i8>) 185declare {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8>, <64 x i8>) 186 187define i32 @uadd(i32 %arg) { 188; V8M-RECIP-LABEL: 'uadd' 189; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 190; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 191; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 192; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 193; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 194; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 195; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 196; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 197; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 198; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 199; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 200; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 201; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 202; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 203; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 204; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 205; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 206; 207; NEON-RECIP-LABEL: 'uadd' 208; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 209; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 210; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 211; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 212; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 213; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 214; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 215; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 216; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 217; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 218; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 219; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 220; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 221; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 222; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 223; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 224; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 225; 226; MVE-RECIP-LABEL: 'uadd' 227; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 228; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 229; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 230; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 231; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 232; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 233; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 234; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 235; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 236; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 237; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 238; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 239; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 240; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 241; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 242; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 243; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 244; 245; V8M-SIZE-LABEL: 'uadd' 246; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 247; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 248; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 249; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 250; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 251; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 252; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 253; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 254; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 255; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 256; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 257; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 258; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 259; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 260; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 261; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 262; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 263; 264; NEON-SIZE-LABEL: 'uadd' 265; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 266; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 267; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 268; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 269; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 270; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 271; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 272; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 273; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 274; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 275; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 276; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 277; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 278; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 279; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 280; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 281; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 282; 283; MVE-SIZE-LABEL: 'uadd' 284; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 285; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 286; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 287; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 288; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 289; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 290; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 291; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 292; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 293; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 294; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 295; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 296; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 297; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 298; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 299; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 300; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 301; 302 %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef) 303 %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 304 %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 305 %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 306 307 %I32 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) 308 %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 309 %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 310 %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 311 312 %I16 = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 undef, i16 undef) 313 %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 314 %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 315 %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 316 317 %I8 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 undef) 318 %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 319 %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 320 %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 321 322 ret i32 undef 323} 324 325declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) 326declare {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64>, <2 x i64>) 327declare {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x i64>, <4 x i64>) 328declare {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x i64>, <8 x i64>) 329 330declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) 331declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>, <4 x i32>) 332declare {<8 x i32>, <8 x i1>} @llvm.ssub.with.overflow.v8i32(<8 x i32>, <8 x i32>) 333declare {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x i32>, <16 x i32>) 334 335declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16) 336declare {<8 x i16>, <8 x i1>} @llvm.ssub.with.overflow.v8i16(<8 x i16>, <8 x i16>) 337declare {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x i16>, <16 x i16>) 338declare {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x i16>, <32 x i16>) 339 340declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8) 341declare {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16 x i8>, <16 x i8>) 342declare {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8>, <32 x i8>) 343declare {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8>, <64 x i8>) 344 345define i32 @ssub(i32 %arg) { 346; V8M-RECIP-LABEL: 'ssub' 347; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 348; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 349; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 350; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 351; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 352; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 353; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 354; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 355; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 356; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 357; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 358; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 359; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 360; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 361; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 362; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 363; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 364; 365; NEON-RECIP-LABEL: 'ssub' 366; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 367; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 368; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 369; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 370; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 371; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 372; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 373; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 374; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 375; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 376; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 377; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 378; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 379; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 380; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 381; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 382; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 383; 384; MVE-RECIP-LABEL: 'ssub' 385; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 386; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 387; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 388; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 582 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 389; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 390; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 391; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 392; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 393; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 394; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 395; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 396; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 397; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 398; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 399; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 400; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 401; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 402; 403; V8M-SIZE-LABEL: 'ssub' 404; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 405; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 406; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 407; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 408; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 409; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 410; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 411; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 412; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 413; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 414; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 415; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 416; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 417; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 418; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 419; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 420; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 421; 422; NEON-SIZE-LABEL: 'ssub' 423; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 424; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 425; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 426; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 427; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 428; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 429; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 430; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 431; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 432; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 433; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 434; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 435; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 436; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 437; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 438; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 439; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 440; 441; MVE-SIZE-LABEL: 'ssub' 442; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 443; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 444; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 445; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 446; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 447; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 448; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 449; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 450; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 451; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 452; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 453; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 454; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 455; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 456; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 457; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 458; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 459; 460 %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef) 461 %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 462 %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 463 %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 464 465 %I32 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 undef, i32 undef) 466 %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 467 %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.ssub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 468 %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 469 470 %I16 = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 undef, i16 undef) 471 %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.ssub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 472 %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 473 %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 474 475 %I8 = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 undef) 476 %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 477 %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 478 %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 479 480 ret i32 undef 481} 482 483declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) 484declare {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64>, <2 x i64>) 485declare {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x i64>, <4 x i64>) 486declare {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x i64>, <8 x i64>) 487 488declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) 489declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>, <4 x i32>) 490declare {<8 x i32>, <8 x i1>} @llvm.usub.with.overflow.v8i32(<8 x i32>, <8 x i32>) 491declare {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x i32>, <16 x i32>) 492 493declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16) 494declare {<8 x i16>, <8 x i1>} @llvm.usub.with.overflow.v8i16(<8 x i16>, <8 x i16>) 495declare {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x i16>, <16 x i16>) 496declare {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x i16>, <32 x i16>) 497 498declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8) 499declare {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16 x i8>, <16 x i8>) 500declare {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8>, <32 x i8>) 501declare {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8>, <64 x i8>) 502 503define i32 @usub(i32 %arg) { 504; V8M-RECIP-LABEL: 'usub' 505; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 506; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 507; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 508; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 509; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 510; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 511; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 512; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 513; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 514; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 515; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 516; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 517; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 518; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 519; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 520; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 521; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 522; 523; NEON-RECIP-LABEL: 'usub' 524; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 525; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 526; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 527; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 528; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 529; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 530; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 531; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 532; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 533; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 534; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 535; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 536; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 537; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 538; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 539; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 540; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 541; 542; MVE-RECIP-LABEL: 'usub' 543; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 544; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 545; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 546; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 547; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 548; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 549; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 550; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 551; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 552; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 553; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 554; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 555; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 556; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 557; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 558; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 559; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 560; 561; V8M-SIZE-LABEL: 'usub' 562; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 563; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 564; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 565; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 566; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 567; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 568; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 569; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 570; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 571; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 572; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 573; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 574; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 575; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 576; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 577; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 578; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 579; 580; NEON-SIZE-LABEL: 'usub' 581; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 582; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 583; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 584; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 585; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 586; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 587; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 588; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 589; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 590; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 591; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 592; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 593; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 594; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 595; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 596; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 597; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 598; 599; MVE-SIZE-LABEL: 'usub' 600; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 601; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 602; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 603; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 604; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 605; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 606; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 607; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 608; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 609; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 610; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 611; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 612; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 613; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 614; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 615; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 616; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 617; 618 %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef) 619 %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 620 %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 621 %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 622 623 %I32 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 undef, i32 undef) 624 %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 625 %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.usub.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 626 %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 627 628 %I16 = call {i16, i1} @llvm.usub.with.overflow.i16(i16 undef, i16 undef) 629 %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.usub.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 630 %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 631 %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 632 633 %I8 = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 undef) 634 %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 635 %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 636 %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 637 638 ret i32 undef 639} 640 641declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) 642declare {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64>, <2 x i64>) 643declare {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x i64>, <4 x i64>) 644declare {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x i64>, <8 x i64>) 645 646declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) 647declare {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4 x i32>, <4 x i32>) 648declare {<8 x i32>, <8 x i1>} @llvm.smul.with.overflow.v8i32(<8 x i32>, <8 x i32>) 649declare {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x i32>, <16 x i32>) 650 651declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) 652declare {<8 x i16>, <8 x i1>} @llvm.smul.with.overflow.v8i16(<8 x i16>, <8 x i16>) 653declare {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x i16>, <16 x i16>) 654declare {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x i16>, <32 x i16>) 655 656declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8) 657declare {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16 x i8>, <16 x i8>) 658declare {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8>, <32 x i8>) 659declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>, <64 x i8>) 660 661define i32 @smul(i32 %arg) { 662; V8M-RECIP-LABEL: 'smul' 663; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 664; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 665; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 666; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 667; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 668; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 669; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 670; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 671; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 672; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 673; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 674; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 675; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 676; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 677; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 678; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 679; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 680; 681; NEON-RECIP-LABEL: 'smul' 682; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 683; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 684; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 154 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 685; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 686; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 687; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 688; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 689; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 690; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 691; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 692; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 693; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 694; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 695; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 696; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 697; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 698; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 699; 700; MVE-RECIP-LABEL: 'smul' 701; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 702; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 703; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 704; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 705; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 706; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 707; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 708; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1208 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 709; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 710; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 711; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 712; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 713; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 714; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 715; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 716; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 424 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 717; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 718; 719; V8M-SIZE-LABEL: 'smul' 720; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 721; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 722; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 723; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 724; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 725; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 726; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 727; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 728; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 729; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 730; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 731; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 732; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 733; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 734; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 735; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 736; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 737; 738; NEON-SIZE-LABEL: 'smul' 739; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 740; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 741; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 742; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 743; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 744; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 745; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 746; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 747; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 748; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 749; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 750; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 751; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 752; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 753; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 754; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 755; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 756; 757; MVE-SIZE-LABEL: 'smul' 758; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 759; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 760; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 761; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 325 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 762; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 763; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 764; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 765; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 557 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 766; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 767; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 768; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 769; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 770; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 771; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 772; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 773; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 774; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 775; 776 %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef) 777 %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 778 %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 779 %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 780 781 %I32 = call {i32, i1} @llvm.smul.with.overflow.i32(i32 undef, i32 undef) 782 %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 783 %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.smul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 784 %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 785 786 %I16 = call {i16, i1} @llvm.smul.with.overflow.i16(i16 undef, i16 undef) 787 %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 788 %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 789 %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 790 791 %I8 = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 undef) 792 %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 793 %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 794 %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 795 796 ret i32 undef 797} 798 799declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) 800declare {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64>, <2 x i64>) 801declare {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x i64>, <4 x i64>) 802declare {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x i64>, <8 x i64>) 803 804declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) 805declare {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32>, <4 x i32>) 806declare {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32>, <8 x i32>) 807declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>) 808 809declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) 810declare {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16>, <8 x i16>) 811declare {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x i16>, <16 x i16>) 812declare {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x i16>, <32 x i16>) 813 814declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8) 815declare {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8>, <16 x i8>) 816declare {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8>, <32 x i8>) 817declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>, <64 x i8>) 818 819define i32 @umul(i32 %arg) { 820; V8M-RECIP-LABEL: 'umul' 821; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 822; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 823; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 824; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 825; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 826; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 827; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 828; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 304 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 829; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 830; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 831; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 832; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 833; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 834; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 835; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 836; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 837; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 838; 839; NEON-RECIP-LABEL: 'umul' 840; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 841; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 842; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 843; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 844; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 845; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 846; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 847; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 848; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 849; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 850; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 851; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 852; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 853; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 854; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 855; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 856; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 857; 858; MVE-RECIP-LABEL: 'umul' 859; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 860; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 861; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 862; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 863; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 864; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 865; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 866; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1200 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 867; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 868; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 869; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 870; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 871; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 872; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 873; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 874; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 875; MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef 876; 877; V8M-SIZE-LABEL: 'umul' 878; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 879; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 880; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 881; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 882; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 883; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 884; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 885; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 886; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 887; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 888; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 889; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 890; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 891; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 892; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 893; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 894; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 895; 896; NEON-SIZE-LABEL: 'umul' 897; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 898; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 899; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 900; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 901; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 902; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 903; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 904; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 905; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 906; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 907; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 908; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 909; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 910; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 911; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 912; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 913; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 914; 915; MVE-SIZE-LABEL: 'umul' 916; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 917; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 918; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 919; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 920; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 921; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 922; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 923; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 553 for instruction: %V16I32 = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 924; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 925; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 926; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 927; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 928; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 929; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 930; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 931; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 932; MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 933; 934 %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef) 935 %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> undef, <2 x i64> undef) 936 %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x i64> undef, <4 x i64> undef) 937 %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x i64> undef, <8 x i64> undef) 938 939 %I32 = call {i32, i1} @llvm.umul.with.overflow.i32(i32 undef, i32 undef) 940 %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32> undef, <4 x i32> undef) 941 %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32> undef, <8 x i32> undef) 942 %V16I32 = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef) 943 944 %I16 = call {i16, i1} @llvm.umul.with.overflow.i16(i16 undef, i16 undef) 945 %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef) 946 %V16I16 = call {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef) 947 %V32I16 = call {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef) 948 949 %I8 = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 undef) 950 %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef) 951 %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef) 952 %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef) 953 954 ret i32 undef 955} 956