1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s 3 4; The fundamental problem: an add separated from other arithmetic by a sext can't 5; be combined with the later instructions. However, if the first add is 'nsw', 6; then we can promote the sext ahead of that add to allow optimizations. 7 8define i64 @add_nsw_consts(i32 %i) { 9; CHECK-LABEL: add_nsw_consts: 10; CHECK: # BB#0: 11; CHECK-NEXT: movslq %edi, %rax 12; CHECK-NEXT: addq $12, %rax 13; CHECK-NEXT: retq 14 15 %add = add nsw i32 %i, 5 16 %ext = sext i32 %add to i64 17 %idx = add i64 %ext, 7 18 ret i64 %idx 19} 20 21; An x86 bonus: If we promote the sext ahead of the 'add nsw', 22; we allow LEA formation and eliminate an add instruction. 23 24define i64 @add_nsw_sext_add(i32 %i, i64 %x) { 25; CHECK-LABEL: add_nsw_sext_add: 26; CHECK: # BB#0: 27; CHECK-NEXT: movslq %edi, %rax 28; CHECK-NEXT: leaq 5(%rsi,%rax), %rax 29; CHECK-NEXT: retq 30 31 %add = add nsw i32 %i, 5 32 %ext = sext i32 %add to i64 33 %idx = add i64 %x, %ext 34 ret i64 %idx 35} 36 37; Throw in a scale (left shift) because an LEA can do that too. 38; Use a negative constant (LEA displacement) to verify that's handled correctly. 39 40define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) { 41; CHECK-LABEL: add_nsw_sext_lsh_add: 42; CHECK: # BB#0: 43; CHECK-NEXT: movslq %edi, %rax 44; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax 45; CHECK-NEXT: retq 46 47 %add = add nsw i32 %i, -5 48 %ext = sext i32 %add to i64 49 %shl = shl i64 %ext, 3 50 %idx = add i64 %x, %shl 51 ret i64 %idx 52} 53 54; Don't promote the sext if it has no users. The wider add instruction needs an 55; extra byte to encode. 56 57define i64 @add_nsw_sext(i32 %i, i64 %x) { 58; CHECK-LABEL: add_nsw_sext: 59; CHECK: # BB#0: 60; CHECK-NEXT: addl $5, %edi 61; CHECK-NEXT: movslq %edi, %rax 62; CHECK-NEXT: retq 63 64 %add = add nsw i32 %i, 5 65 %ext = sext i32 %add to i64 66 ret i64 %ext 67} 68 69; The typical use case: a 64-bit system where an 'int' is used as an index into an array. 70 71define i8* @gep8(i32 %i, i8* %x) { 72; CHECK-LABEL: gep8: 73; CHECK: # BB#0: 74; CHECK-NEXT: movslq %edi, %rax 75; CHECK-NEXT: leaq 5(%rsi,%rax), %rax 76; CHECK-NEXT: retq 77 78 %add = add nsw i32 %i, 5 79 %ext = sext i32 %add to i64 80 %idx = getelementptr i8, i8* %x, i64 %ext 81 ret i8* %idx 82} 83 84define i16* @gep16(i32 %i, i16* %x) { 85; CHECK-LABEL: gep16: 86; CHECK: # BB#0: 87; CHECK-NEXT: movslq %edi, %rax 88; CHECK-NEXT: leaq -10(%rsi,%rax,2), %rax 89; CHECK-NEXT: retq 90 91 %add = add nsw i32 %i, -5 92 %ext = sext i32 %add to i64 93 %idx = getelementptr i16, i16* %x, i64 %ext 94 ret i16* %idx 95} 96 97define i32* @gep32(i32 %i, i32* %x) { 98; CHECK-LABEL: gep32: 99; CHECK: # BB#0: 100; CHECK-NEXT: movslq %edi, %rax 101; CHECK-NEXT: leaq 20(%rsi,%rax,4), %rax 102; CHECK-NEXT: retq 103 104 %add = add nsw i32 %i, 5 105 %ext = sext i32 %add to i64 106 %idx = getelementptr i32, i32* %x, i64 %ext 107 ret i32* %idx 108} 109 110define i64* @gep64(i32 %i, i64* %x) { 111; CHECK-LABEL: gep64: 112; CHECK: # BB#0: 113; CHECK-NEXT: movslq %edi, %rax 114; CHECK-NEXT: leaq -40(%rsi,%rax,8), %rax 115; CHECK-NEXT: retq 116 117 %add = add nsw i32 %i, -5 118 %ext = sext i32 %add to i64 119 %idx = getelementptr i64, i64* %x, i64 %ext 120 ret i64* %idx 121} 122 123; LEA can't scale by 16, but the adds can still be combined into an LEA. 124 125define i128* @gep128(i32 %i, i128* %x) { 126; CHECK-LABEL: gep128: 127; CHECK: # BB#0: 128; CHECK-NEXT: movslq %edi, %rax 129; CHECK-NEXT: shlq $4, %rax 130; CHECK-NEXT: leaq 80(%rsi,%rax), %rax 131; CHECK-NEXT: retq 132 133 %add = add nsw i32 %i, 5 134 %ext = sext i32 %add to i64 135 %idx = getelementptr i128, i128* %x, i64 %ext 136 ret i128* %idx 137} 138 139; A bigger win can be achieved when there is more than one use of the 140; sign extended value. In this case, we can eliminate sign extension 141; instructions plus use more efficient addressing modes for memory ops. 142 143define void @PR20134(i32* %a, i32 %i) { 144; CHECK-LABEL: PR20134: 145; CHECK: # BB#0: 146; CHECK-NEXT: movslq %esi, %rax 147; CHECK-NEXT: movl 4(%rdi,%rax,4), %ecx 148; CHECK-NEXT: addl 8(%rdi,%rax,4), %ecx 149; CHECK-NEXT: movl %ecx, (%rdi,%rax,4) 150; CHECK-NEXT: retq 151 152 %add1 = add nsw i32 %i, 1 153 %idx1 = sext i32 %add1 to i64 154 %gep1 = getelementptr i32, i32* %a, i64 %idx1 155 %load1 = load i32, i32* %gep1, align 4 156 157 %add2 = add nsw i32 %i, 2 158 %idx2 = sext i32 %add2 to i64 159 %gep2 = getelementptr i32, i32* %a, i64 %idx2 160 %load2 = load i32, i32* %gep2, align 4 161 162 %add3 = add i32 %load1, %load2 163 %idx3 = sext i32 %i to i64 164 %gep3 = getelementptr i32, i32* %a, i64 %idx3 165 store i32 %add3, i32* %gep3, align 4 166 ret void 167} 168 169