1; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s 2; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s 3 4define i128 @test_128bitmul(i128 %lhs, i128 %rhs) { 5; CHECK-LABEL: test_128bitmul: 6; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3 7; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2 8; CHECK: mul [[PART2:x[0-9]+]], x1, x2 9; CHECK: mul x0, x0, x2 10 11; CHECK-BE-LABEL: test_128bitmul: 12; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2 13; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3 14; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3 15; CHECK-BE: mul x1, x1, x3 16 17 %prod = mul i128 %lhs, %rhs 18 ret i128 %prod 19} 20 21; The machine combiner should create madd instructions when 22; optimizing for size because that's smaller than mul + add. 23 24define i128 @test_128bitmul_optsize(i128 %lhs, i128 %rhs) optsize { 25; CHECK-LABEL: test_128bitmul_optsize: 26; CHECK: umulh [[HI:x[0-9]+]], x0, x2 27; CHECK-NEXT: madd [[TEMP1:x[0-9]+]], x0, x3, [[HI]] 28; CHECK-NEXT: madd x1, x1, x2, [[TEMP1]] 29; CHECK-NEXT: mul x0, x0, x2 30; CHECK-NEXT: ret 31 32 %prod = mul i128 %lhs, %rhs 33 ret i128 %prod 34} 35 36define i128 @test_128bitmul_minsize(i128 %lhs, i128 %rhs) minsize { 37; CHECK-LABEL: test_128bitmul_minsize: 38; CHECK: umulh [[HI:x[0-9]+]], x0, x2 39; CHECK-NEXT: madd [[TEMP1:x[0-9]+]], x0, x3, [[HI]] 40; CHECK-NEXT: madd x1, x1, x2, [[TEMP1]] 41; CHECK-NEXT: mul x0, x0, x2 42; CHECK-NEXT: ret 43 44 %prod = mul i128 %lhs, %rhs 45 ret i128 %prod 46} 47 48