1; RUN: llc -mcpu=pwr6 -mattr=+altivec -code-model=small < %s | FileCheck %s 2 3; Check vector extend load expansion with altivec enabled. 4 5target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" 6target triple = "powerpc64-unknown-linux-gnu" 7 8; Altivec does not provides an sext intruction, so it expands 9; a set of vector stores (stvx), bytes load/sign expand/store 10; (lbz/stb), and a final vector load (lvx) to load the result 11; extended vector. 12define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) { 13 %b = trunc <16 x i8> %a to <16 x i4> 14 %c = sext <16 x i4> %b to <16 x i8> 15 ret <16 x i8> %c 16} 17; CHECK-LABEL: v16si8_sext_in_reg: 18; CHECK: vslb 19; CHECK: vsrab 20; CHECK: blr 21 22; The zero extend uses a more clever logic: a vector splat 23; and a logic and to set higher bits to 0. 24define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) { 25 %b = trunc <16 x i8> %a to <16 x i4> 26 %c = zext <16 x i4> %b to <16 x i8> 27 ret <16 x i8> %c 28} 29; CHECK-LABEL: v16si8_zext_in_reg: 30; CHECK: vspltisb [[VMASK:[0-9]+]], 15 31; CHECK-NEXT: vand 2, 2, [[VMASK]] 32 33; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth). 34define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) { 35 %b = trunc <8 x i16> %a to <8 x i8> 36 %c = sext <8 x i8> %b to <8 x i16> 37 ret <8 x i16> %c 38} 39; CHECK-LABEL: v8si16_sext_in_reg: 40; CHECK: vslh 41; CHECK: vsrah 42; CHECK: blr 43 44; Same as v8si16_sext_in_reg, but instead of creating the mask 45; with a splat, loads it from memory. 46define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) { 47 %b = trunc <8 x i16> %a to <8 x i8> 48 %c = zext <8 x i8> %b to <8 x i16> 49 ret <8 x i16> %c 50} 51; CHECK-LABEL: v8si16_zext_in_reg: 52; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2) 53; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]] 54; CHECK-NEXT: vand 2, 2, [[VMASK]] 55 56; Same as v16si8_sext_in_reg, expands to load halfword (lha) and 57; store words (stw). 58define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) { 59 %b = trunc <4 x i32> %a to <4 x i16> 60 %c = sext <4 x i16> %b to <4 x i32> 61 ret <4 x i32> %c 62} 63; CHECK-LABEL: v4si32_sext_in_reg: 64; CHECK: vslw 65; CHECK: vsraw 66; CHECK: blr 67 68; Same as v8si16_sext_in_reg. 69define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) { 70 %b = trunc <4 x i32> %a to <4 x i16> 71 %c = zext <4 x i16> %b to <4 x i32> 72 ret <4 x i32> %c 73} 74; CHECK-LABEL: v4si32_zext_in_reg: 75; CHECK: vspltisw [[VMASK:[0-9]+]], -16 76; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]] 77; CHECK-NEXT: vand 2, 2, [[VMASK]] 78