1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; bswap should be constant folded when it is passed a constant argument 3 4; RUN: llc < %s -mtriple=i686-- -mcpu=i686 | FileCheck %s 5; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=CHECK64 6 7declare i16 @llvm.bswap.i16(i16) 8 9declare i32 @llvm.bswap.i32(i32) 10 11declare i64 @llvm.bswap.i64(i64) 12 13define i16 @W(i16 %A) { 14; CHECK-LABEL: W: 15; CHECK: # %bb.0: 16; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax 17; CHECK-NEXT: rolw $8, %ax 18; CHECK-NEXT: retl 19; 20; CHECK64-LABEL: W: 21; CHECK64: # %bb.0: 22; CHECK64-NEXT: rolw $8, %di 23; CHECK64-NEXT: movl %edi, %eax 24; CHECK64-NEXT: retq 25 %Z = call i16 @llvm.bswap.i16( i16 %A ) ; <i16> [#uses=1] 26 ret i16 %Z 27} 28 29define i32 @X(i32 %A) { 30; CHECK-LABEL: X: 31; CHECK: # %bb.0: 32; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 33; CHECK-NEXT: bswapl %eax 34; CHECK-NEXT: retl 35; 36; CHECK64-LABEL: X: 37; CHECK64: # %bb.0: 38; CHECK64-NEXT: bswapl %edi 39; CHECK64-NEXT: movl %edi, %eax 40; CHECK64-NEXT: retq 41 %Z = call i32 @llvm.bswap.i32( i32 %A ) ; <i32> [#uses=1] 42 ret i32 %Z 43} 44 45define i64 @Y(i64 %A) { 46; CHECK-LABEL: Y: 47; CHECK: # %bb.0: 48; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx 49; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 50; CHECK-NEXT: bswapl %eax 51; CHECK-NEXT: bswapl %edx 52; CHECK-NEXT: retl 53; 54; CHECK64-LABEL: Y: 55; CHECK64: # %bb.0: 56; CHECK64-NEXT: bswapq %rdi 57; CHECK64-NEXT: movq %rdi, %rax 58; CHECK64-NEXT: retq 59 %Z = call i64 @llvm.bswap.i64( i64 %A ) ; <i64> [#uses=1] 60 ret i64 %Z 61} 62 63; rdar://9164521 64define i32 @test1(i32 %a) nounwind readnone { 65; CHECK-LABEL: test1: 66; CHECK: # %bb.0: # %entry 67; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 68; CHECK-NEXT: bswapl %eax 69; CHECK-NEXT: shrl $16, %eax 70; CHECK-NEXT: retl 71; 72; CHECK64-LABEL: test1: 73; CHECK64: # %bb.0: # %entry 74; CHECK64-NEXT: bswapl %edi 75; CHECK64-NEXT: shrl $16, %edi 76; CHECK64-NEXT: movl %edi, %eax 77; CHECK64-NEXT: retq 78entry: 79 80 %and = lshr i32 %a, 8 81 %shr3 = and i32 %and, 255 82 %and2 = shl i32 %a, 8 83 %shl = and i32 %and2, 65280 84 %or = or i32 %shr3, %shl 85 ret i32 %or 86} 87 88define i32 @test2(i32 %a) nounwind readnone { 89; CHECK-LABEL: test2: 90; CHECK: # %bb.0: # %entry 91; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax 92; CHECK-NEXT: bswapl %eax 93; CHECK-NEXT: sarl $16, %eax 94; CHECK-NEXT: retl 95; 96; CHECK64-LABEL: test2: 97; CHECK64: # %bb.0: # %entry 98; CHECK64-NEXT: bswapl %edi 99; CHECK64-NEXT: sarl $16, %edi 100; CHECK64-NEXT: movl %edi, %eax 101; CHECK64-NEXT: retq 102entry: 103 104 %and = lshr i32 %a, 8 105 %shr4 = and i32 %and, 255 106 %and2 = shl i32 %a, 8 107 %or = or i32 %shr4, %and2 108 %sext = shl i32 %or, 16 109 %conv3 = ashr exact i32 %sext, 16 110 ret i32 %conv3 111} 112 113@var8 = global i8 0 114@var16 = global i16 0 115 116; The "shl" below can move bits into the high parts of the value, so the 117; operation is not a "bswap, shr" pair. 118 119; rdar://problem/14814049 120define i64 @not_bswap() { 121; CHECK-LABEL: not_bswap: 122; CHECK: # %bb.0: 123; CHECK-NEXT: movzwl var16, %eax 124; CHECK-NEXT: movl %eax, %ecx 125; CHECK-NEXT: shrl $8, %ecx 126; CHECK-NEXT: shll $8, %eax 127; CHECK-NEXT: orl %ecx, %eax 128; CHECK-NEXT: xorl %edx, %edx 129; CHECK-NEXT: retl 130; 131; CHECK64-LABEL: not_bswap: 132; CHECK64: # %bb.0: 133; CHECK64-NEXT: movzwl {{.*}}(%rip), %eax 134; CHECK64-NEXT: movq %rax, %rcx 135; CHECK64-NEXT: shrq $8, %rcx 136; CHECK64-NEXT: shlq $8, %rax 137; CHECK64-NEXT: orq %rcx, %rax 138; CHECK64-NEXT: retq 139 %init = load i16, i16* @var16 140 %big = zext i16 %init to i64 141 142 %hishifted = lshr i64 %big, 8 143 %loshifted = shl i64 %big, 8 144 145 %notswapped = or i64 %hishifted, %loshifted 146 147 ret i64 %notswapped 148} 149 150; This time, the lshr (and subsequent or) is completely useless. While it's 151; technically correct to convert this into a "bswap, shr", it's suboptimal. A 152; simple shl works better. 153 154define i64 @not_useful_bswap() { 155; CHECK-LABEL: not_useful_bswap: 156; CHECK: # %bb.0: 157; CHECK-NEXT: movzbl var8, %eax 158; CHECK-NEXT: shll $8, %eax 159; CHECK-NEXT: xorl %edx, %edx 160; CHECK-NEXT: retl 161; 162; CHECK64-LABEL: not_useful_bswap: 163; CHECK64: # %bb.0: 164; CHECK64-NEXT: movzbl {{.*}}(%rip), %eax 165; CHECK64-NEXT: shlq $8, %rax 166; CHECK64-NEXT: retq 167 %init = load i8, i8* @var8 168 %big = zext i8 %init to i64 169 170 %hishifted = lshr i64 %big, 8 171 %loshifted = shl i64 %big, 8 172 173 %notswapped = or i64 %hishifted, %loshifted 174 175 ret i64 %notswapped 176} 177 178; Finally, it *is* OK to just mask off the shl if we know that the value is zero 179; beyond 16 bits anyway. This is a legitimate bswap. 180 181define i64 @finally_useful_bswap() { 182; CHECK-LABEL: finally_useful_bswap: 183; CHECK: # %bb.0: 184; CHECK-NEXT: movzwl var16, %eax 185; CHECK-NEXT: bswapl %eax 186; CHECK-NEXT: shrl $16, %eax 187; CHECK-NEXT: xorl %edx, %edx 188; CHECK-NEXT: retl 189; 190; CHECK64-LABEL: finally_useful_bswap: 191; CHECK64: # %bb.0: 192; CHECK64-NEXT: movzwl {{.*}}(%rip), %eax 193; CHECK64-NEXT: bswapq %rax 194; CHECK64-NEXT: shrq $48, %rax 195; CHECK64-NEXT: retq 196 %init = load i16, i16* @var16 197 %big = zext i16 %init to i64 198 199 %hishifted = lshr i64 %big, 8 200 %lomasked = and i64 %big, 255 201 %loshifted = shl i64 %lomasked, 8 202 203 %swapped = or i64 %hishifted, %loshifted 204 205 ret i64 %swapped 206} 207 208