1; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s 2 3define i64 @t0(<1 x i64>* %a, i32* %b) { 4; CHECK-LABEL: t0: 5; CHECK: # BB#0:{{.*}} %entry 6; CHECK: movq (%[[REG1:[a-z]+]]), %mm0 7; CHECK-NEXT: psllq (%[[REG2:[a-z]+]]), %mm0 8; CHECK-NEXT: movd %mm0, %rax 9; CHECK-NEXT: retq 10entry: 11 %0 = bitcast <1 x i64>* %a to x86_mmx* 12 %1 = load x86_mmx, x86_mmx* %0, align 8 13 %2 = load i32, i32* %b, align 4 14 %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2) 15 %4 = bitcast x86_mmx %3 to i64 16 ret i64 %4 17} 18declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) 19 20define i64 @t1(<1 x i64>* %a, i32* %b) { 21; CHECK-LABEL: t1: 22; CHECK: # BB#0:{{.*}} %entry 23; CHECK: movq (%[[REG1]]), %mm0 24; CHECK-NEXT: psrlq (%[[REG2]]), %mm0 25; CHECK-NEXT: movd %mm0, %rax 26; CHECK-NEXT: retq 27entry: 28 %0 = bitcast <1 x i64>* %a to x86_mmx* 29 %1 = load x86_mmx, x86_mmx* %0, align 8 30 %2 = load i32, i32* %b, align 4 31 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %1, i32 %2) 32 %4 = bitcast x86_mmx %3 to i64 33 ret i64 %4 34} 35declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) 36 37define i64 @t2(<1 x i64>* %a, i32* %b) { 38; CHECK-LABEL: t2: 39; CHECK: # BB#0:{{.*}} %entry 40; CHECK: movq (%[[REG1]]), %mm0 41; CHECK-NEXT: psllw (%[[REG2]]), %mm0 42; CHECK-NEXT: movd %mm0, %rax 43; CHECK-NEXT: retq 44entry: 45 %0 = bitcast <1 x i64>* %a to x86_mmx* 46 %1 = load x86_mmx, x86_mmx* %0, align 8 47 %2 = load i32, i32* %b, align 4 48 %3 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %1, i32 %2) 49 %4 = bitcast x86_mmx %3 to i64 50 ret i64 %4 51} 52declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) 53 54define i64 @t3(<1 x i64>* %a, i32* %b) { 55; CHECK-LABEL: t3: 56; CHECK: # BB#0:{{.*}} %entry 57; CHECK: movq (%[[REG1]]), %mm0 58; CHECK-NEXT: psrlw (%[[REG2]]), %mm0 59; CHECK-NEXT: movd %mm0, %rax 60; CHECK-NEXT: retq 61entry: 62 %0 = bitcast <1 x i64>* %a to x86_mmx* 63 %1 = load x86_mmx, x86_mmx* %0, align 8 64 %2 = load i32, i32* %b, align 4 65 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %1, i32 %2) 66 %4 = bitcast x86_mmx %3 to i64 67 ret i64 %4 68} 69declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) 70 71define i64 @t4(<1 x i64>* %a, i32* %b) { 72; CHECK-LABEL: t4: 73; CHECK: # BB#0:{{.*}} %entry 74; CHECK: movq (%[[REG1]]), %mm0 75; CHECK-NEXT: pslld (%[[REG2]]), %mm0 76; CHECK-NEXT: movd %mm0, %rax 77; CHECK-NEXT: retq 78entry: 79 %0 = bitcast <1 x i64>* %a to x86_mmx* 80 %1 = load x86_mmx, x86_mmx* %0, align 8 81 %2 = load i32, i32* %b, align 4 82 %3 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %1, i32 %2) 83 %4 = bitcast x86_mmx %3 to i64 84 ret i64 %4 85} 86declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) 87 88define i64 @t5(<1 x i64>* %a, i32* %b) { 89; CHECK-LABEL: t5: 90; CHECK: # BB#0:{{.*}} %entry 91; CHECK: movq (%[[REG1]]), %mm0 92; CHECK-NEXT: psrld (%[[REG2]]), %mm0 93; CHECK-NEXT: movd %mm0, %rax 94; CHECK-NEXT: retq 95entry: 96 %0 = bitcast <1 x i64>* %a to x86_mmx* 97 %1 = load x86_mmx, x86_mmx* %0, align 8 98 %2 = load i32, i32* %b, align 4 99 %3 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %1, i32 %2) 100 %4 = bitcast x86_mmx %3 to i64 101 ret i64 %4 102} 103declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) 104 105define i64 @t6(<1 x i64>* %a, i32* %b) { 106; CHECK-LABEL: t6: 107; CHECK: # BB#0:{{.*}} %entry 108; CHECK: movq (%[[REG1]]), %mm0 109; CHECK-NEXT: psraw (%[[REG2]]), %mm0 110; CHECK-NEXT: movd %mm0, %rax 111; CHECK-NEXT: retq 112entry: 113 %0 = bitcast <1 x i64>* %a to x86_mmx* 114 %1 = load x86_mmx, x86_mmx* %0, align 8 115 %2 = load i32, i32* %b, align 4 116 %3 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %1, i32 %2) 117 %4 = bitcast x86_mmx %3 to i64 118 ret i64 %4 119} 120declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) 121 122define i64 @t7(<1 x i64>* %a, i32* %b) { 123; CHECK-LABEL: t7: 124; CHECK: # BB#0:{{.*}} %entry 125; CHECK: movq (%[[REG1]]), %mm0 126; CHECK-NEXT: psrad (%[[REG2]]), %mm0 127; CHECK-NEXT: movd %mm0, %rax 128; CHECK-NEXT: retq 129entry: 130 %0 = bitcast <1 x i64>* %a to x86_mmx* 131 %1 = load x86_mmx, x86_mmx* %0, align 8 132 %2 = load i32, i32* %b, align 4 133 %3 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %1, i32 %2) 134 %4 = bitcast x86_mmx %3 to i64 135 ret i64 %4 136} 137declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) 138 139define i64 @tt0(x86_mmx %t, x86_mmx* %q) { 140; CHECK-LABEL: tt0: 141; CHECK: # BB#0:{{.*}} %entry 142; CHECK: paddb (%[[REG3:[a-z]+]]), %mm0 143; CHECK-NEXT: movd %mm0, %rax 144; CHECK-NEXT: emms 145; CHECK-NEXT: retq 146entry: 147 %v = load x86_mmx, x86_mmx* %q 148 %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v) 149 %s = bitcast x86_mmx %u to i64 150 call void @llvm.x86.mmx.emms() 151 ret i64 %s 152} 153declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) 154declare void @llvm.x86.mmx.emms() 155 156define i64 @tt1(x86_mmx %t, x86_mmx* %q) { 157; CHECK-LABEL: tt1: 158; CHECK: # BB#0:{{.*}} %entry 159; CHECK: paddw (%[[REG3]]), %mm0 160; CHECK-NEXT: movd %mm0, %rax 161; CHECK-NEXT: emms 162; CHECK-NEXT: retq 163entry: 164 %v = load x86_mmx, x86_mmx* %q 165 %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v) 166 %s = bitcast x86_mmx %u to i64 167 call void @llvm.x86.mmx.emms() 168 ret i64 %s 169} 170declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) 171 172define i64 @tt2(x86_mmx %t, x86_mmx* %q) { 173; CHECK-LABEL: tt2: 174; CHECK: # BB#0:{{.*}} %entry 175; CHECK: paddd (%[[REG3]]), %mm0 176; CHECK-NEXT: movd %mm0, %rax 177; CHECK-NEXT: emms 178; CHECK-NEXT: retq 179entry: 180 %v = load x86_mmx, x86_mmx* %q 181 %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v) 182 %s = bitcast x86_mmx %u to i64 183 call void @llvm.x86.mmx.emms() 184 ret i64 %s 185} 186declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) 187 188define i64 @tt3(x86_mmx %t, x86_mmx* %q) { 189; CHECK-LABEL: tt3: 190; CHECK: # BB#0:{{.*}} %entry 191; CHECK: paddq (%[[REG3]]), %mm0 192; CHECK-NEXT: movd %mm0, %rax 193; CHECK-NEXT: emms 194; CHECK-NEXT: retq 195entry: 196 %v = load x86_mmx, x86_mmx* %q 197 %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v) 198 %s = bitcast x86_mmx %u to i64 199 call void @llvm.x86.mmx.emms() 200 ret i64 %s 201} 202declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) 203 204define i64 @tt4(x86_mmx %t, x86_mmx* %q) { 205; CHECK-LABEL: tt4: 206; CHECK: # BB#0:{{.*}} %entry 207; CHECK: paddusb (%[[REG3]]), %mm0 208; CHECK-NEXT: movd %mm0, %rax 209; CHECK-NEXT: emms 210; CHECK-NEXT: retq 211entry: 212 %v = load x86_mmx, x86_mmx* %q 213 %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v) 214 %s = bitcast x86_mmx %u to i64 215 call void @llvm.x86.mmx.emms() 216 ret i64 %s 217} 218declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) 219 220define i64 @tt5(x86_mmx %t, x86_mmx* %q) { 221; CHECK-LABEL: tt5: 222; CHECK: # BB#0:{{.*}} %entry 223; CHECK: paddusw (%[[REG3]]), %mm0 224; CHECK-NEXT: movd %mm0, %rax 225; CHECK-NEXT: emms 226; CHECK-NEXT: retq 227entry: 228 %v = load x86_mmx, x86_mmx* %q 229 %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v) 230 %s = bitcast x86_mmx %u to i64 231 call void @llvm.x86.mmx.emms() 232 ret i64 %s 233} 234declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) 235 236define i64 @tt6(x86_mmx %t, x86_mmx* %q) { 237; CHECK-LABEL: tt6: 238; CHECK: # BB#0:{{.*}} %entry 239; CHECK: psrlw (%[[REG3]]), %mm0 240; CHECK-NEXT: movd %mm0, %rax 241; CHECK-NEXT: emms 242; CHECK-NEXT: retq 243entry: 244 %v = load x86_mmx, x86_mmx* %q 245 %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v) 246 %s = bitcast x86_mmx %u to i64 247 call void @llvm.x86.mmx.emms() 248 ret i64 %s 249} 250declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) 251 252define i64 @tt7(x86_mmx %t, x86_mmx* %q) { 253; CHECK-LABEL: tt7: 254; CHECK: # BB#0:{{.*}} %entry 255; CHECK: psrld (%[[REG3]]), %mm0 256; CHECK-NEXT: movd %mm0, %rax 257; CHECK-NEXT: emms 258; CHECK-NEXT: retq 259entry: 260 %v = load x86_mmx, x86_mmx* %q 261 %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v) 262 %s = bitcast x86_mmx %u to i64 263 call void @llvm.x86.mmx.emms() 264 ret i64 %s 265} 266declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) 267 268define i64 @tt8(x86_mmx %t, x86_mmx* %q) { 269; CHECK-LABEL: tt8: 270; CHECK: # BB#0:{{.*}} %entry 271; CHECK: psrlq (%[[REG3]]), %mm0 272; CHECK-NEXT: movd %mm0, %rax 273; CHECK-NEXT: emms 274; CHECK-NEXT: retq 275entry: 276 %v = load x86_mmx, x86_mmx* %q 277 %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v) 278 %s = bitcast x86_mmx %u to i64 279 call void @llvm.x86.mmx.emms() 280 ret i64 %s 281} 282declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) 283