1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3; rdar://9428579 4 5%type1 = type { <16 x i8> } 6%type2 = type { <8 x i8> } 7%type3 = type { <4 x i16> } 8 9 10define hidden fastcc void @t1(%type1** %argtable) nounwind { 11entry: 12; CHECK-LABEL: t1: 13; CHECK: ldr x[[REG:[0-9]+]], [x0] 14; CHECK: str q0, [x[[REG]]] 15 %tmp1 = load %type1*, %type1** %argtable, align 8 16 %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0 17 store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16 18 ret void 19} 20 21define hidden fastcc void @t2(%type2** %argtable) nounwind { 22entry: 23; CHECK-LABEL: t2: 24; CHECK: ldr x[[REG:[0-9]+]], [x0] 25; CHECK: str d0, [x[[REG]]] 26 %tmp1 = load %type2*, %type2** %argtable, align 8 27 %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0 28 store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8 29 ret void 30} 31 32; add a bunch of tests for rdar://11246289 33 34@globalArray64x2 = common global <2 x i64>* null, align 8 35@globalArray32x4 = common global <4 x i32>* null, align 8 36@globalArray16x8 = common global <8 x i16>* null, align 8 37@globalArray8x16 = common global <16 x i8>* null, align 8 38@globalArray64x1 = common global <1 x i64>* null, align 8 39@globalArray32x2 = common global <2 x i32>* null, align 8 40@globalArray16x4 = common global <4 x i16>* null, align 8 41@globalArray8x8 = common global <8 x i8>* null, align 8 42@floatglobalArray64x2 = common global <2 x double>* null, align 8 43@floatglobalArray32x4 = common global <4 x float>* null, align 8 44@floatglobalArray64x1 = common global <1 x double>* null, align 8 45@floatglobalArray32x2 = common global <2 x float>* null, align 8 46 47define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp { 48entry: 49; CHECK-LABEL: fct1_64x2: 50; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 51; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]] 52; CHECK: ldr [[BASE:x[0-9]+]], 53; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 54 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset 55 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 56 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 57 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset 58 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 59 ret void 60} 61 62define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp { 63entry: 64; CHECK-LABEL: fct2_64x2: 65; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 66; CHECK: ldr [[BASE:x[0-9]+]], 67; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 68 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3 69 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16 70 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8 71 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5 72 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16 73 ret void 74} 75 76define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp { 77entry: 78; CHECK-LABEL: fct1_32x4: 79; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 80; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 81; CHECK: ldr [[BASE:x[0-9]+]], 82; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 83 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset 84 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 85 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 86 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset 87 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 88 ret void 89} 90 91define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp { 92entry: 93; CHECK-LABEL: fct2_32x4: 94; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 95; CHECK: ldr [[BASE:x[0-9]+]], 96; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 97 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3 98 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16 99 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8 100 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5 101 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16 102 ret void 103} 104 105define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp { 106entry: 107; CHECK-LABEL: fct1_16x8: 108; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 109; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 110; CHECK: ldr [[BASE:x[0-9]+]], 111; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 112 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset 113 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 114 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 115 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset 116 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 117 ret void 118} 119 120define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp { 121entry: 122; CHECK-LABEL: fct2_16x8: 123; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 124; CHECK: ldr [[BASE:x[0-9]+]], 125; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 126 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3 127 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16 128 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8 129 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5 130 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16 131 ret void 132} 133 134define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp { 135entry: 136; CHECK-LABEL: fct1_8x16: 137; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4 138; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 139; CHECK: ldr [[BASE:x[0-9]+]], 140; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 141 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset 142 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 143 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 144 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset 145 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 146 ret void 147} 148 149define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp { 150entry: 151; CHECK-LABEL: fct2_8x16: 152; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48] 153; CHECK: ldr [[BASE:x[0-9]+]], 154; CHECK: str [[DEST]], {{\[}}[[BASE]], #80] 155 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3 156 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16 157 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8 158 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5 159 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16 160 ret void 161} 162 163define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp { 164entry: 165; CHECK-LABEL: fct1_64x1: 166; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 167; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 168; CHECK: ldr [[BASE:x[0-9]+]], 169; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 170 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset 171 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 172 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 173 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset 174 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 175 ret void 176} 177 178define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp { 179entry: 180; CHECK-LABEL: fct2_64x1: 181; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 182; CHECK: ldr [[BASE:x[0-9]+]], 183; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 184 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3 185 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8 186 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8 187 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5 188 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8 189 ret void 190} 191 192define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp { 193entry: 194; CHECK-LABEL: fct1_32x2: 195; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 196; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 197; CHECK: ldr [[BASE:x[0-9]+]], 198; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 199 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset 200 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 201 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 202 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset 203 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 204 ret void 205} 206 207define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp { 208entry: 209; CHECK-LABEL: fct2_32x2: 210; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 211; CHECK: ldr [[BASE:x[0-9]+]], 212; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 213 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3 214 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8 215 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8 216 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5 217 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8 218 ret void 219} 220 221define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp { 222entry: 223; CHECK-LABEL: fct1_16x4: 224; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 225; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 226; CHECK: ldr [[BASE:x[0-9]+]], 227; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 228 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset 229 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 230 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 231 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset 232 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 233 ret void 234} 235 236define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp { 237entry: 238; CHECK-LABEL: fct2_16x4: 239; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24] 240; CHECK: ldr [[BASE:x[0-9]+]], 241; CHECK: str [[DEST]], {{\[}}[[BASE]], #40] 242 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3 243 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8 244 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8 245 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5 246 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8 247 ret void 248} 249 250define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp { 251entry: 252; CHECK-LABEL: fct1_8x8: 253; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3 254; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]] 255; CHECK: ldr [[BASE:x[0-9]+]], 256; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]] 257 %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset 258 %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8 259 %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8 260 %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset 261 store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8 262 ret void 263} 264 265; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q 266; registers for unscaled vector accesses 267@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1 268 269define <1 x i64> @fct0() nounwind readonly ssp { 270entry: 271; CHECK-LABEL: fct0: 272; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 273 %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 274 ret <1 x i64> %0 275} 276 277define <2 x i32> @fct1() nounwind readonly ssp { 278entry: 279; CHECK-LABEL: fct1: 280; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 281 %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 282 ret <2 x i32> %0 283} 284 285define <4 x i16> @fct2() nounwind readonly ssp { 286entry: 287; CHECK-LABEL: fct2: 288; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 289 %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 290 ret <4 x i16> %0 291} 292 293define <8 x i8> @fct3() nounwind readonly ssp { 294entry: 295; CHECK-LABEL: fct3: 296; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3] 297 %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 298 ret <8 x i8> %0 299} 300 301define <2 x i64> @fct4() nounwind readonly ssp { 302entry: 303; CHECK-LABEL: fct4: 304; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 305 %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 306 ret <2 x i64> %0 307} 308 309define <4 x i32> @fct5() nounwind readonly ssp { 310entry: 311; CHECK-LABEL: fct5: 312; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 313 %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 314 ret <4 x i32> %0 315} 316 317define <8 x i16> @fct6() nounwind readonly ssp { 318entry: 319; CHECK-LABEL: fct6: 320; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 321 %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 322 ret <8 x i16> %0 323} 324 325define <16 x i8> @fct7() nounwind readonly ssp { 326entry: 327; CHECK-LABEL: fct7: 328; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3] 329 %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 330 ret <16 x i8> %0 331} 332 333define void @fct8() nounwind ssp { 334entry: 335; CHECK-LABEL: fct8: 336; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 337; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 338 %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8 339 store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8 340 ret void 341} 342 343define void @fct9() nounwind ssp { 344entry: 345; CHECK-LABEL: fct9: 346; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 347; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 348 %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8 349 store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8 350 ret void 351} 352 353define void @fct10() nounwind ssp { 354entry: 355; CHECK-LABEL: fct10: 356; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 357; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 358 %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8 359 store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8 360 ret void 361} 362 363define void @fct11() nounwind ssp { 364entry: 365; CHECK-LABEL: fct11: 366; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 367; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 368 %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8 369 store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8 370 ret void 371} 372 373define void @fct12() nounwind ssp { 374entry: 375; CHECK-LABEL: fct12: 376; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 377; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 378 %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16 379 store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16 380 ret void 381} 382 383define void @fct13() nounwind ssp { 384entry: 385; CHECK-LABEL: fct13: 386; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 387; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 388 %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16 389 store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16 390 ret void 391} 392 393define void @fct14() nounwind ssp { 394entry: 395; CHECK-LABEL: fct14: 396; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 397; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 398 %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16 399 store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16 400 ret void 401} 402 403define void @fct15() nounwind ssp { 404entry: 405; CHECK-LABEL: fct15: 406; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3] 407; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4] 408 %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16 409 store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8], [63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16 410 ret void 411} 412 413; Check the building of vector from a single loaded value. 414; Part of <rdar://problem/14170854> 415; 416; Single loads with immediate offset. 417define <8 x i8> @fct16(i8* nocapture %sp0) { 418; CHECK-LABEL: fct16: 419; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 420; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 421entry: 422 %addr = getelementptr i8, i8* %sp0, i64 1 423 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 424 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 425 %vmull.i = mul <8 x i8> %vec, %vec 426 ret <8 x i8> %vmull.i 427} 428 429define <16 x i8> @fct17(i8* nocapture %sp0) { 430; CHECK-LABEL: fct17: 431; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1] 432; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 433entry: 434 %addr = getelementptr i8, i8* %sp0, i64 1 435 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 436 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 437 %vmull.i = mul <16 x i8> %vec, %vec 438 ret <16 x i8> %vmull.i 439} 440 441define <4 x i16> @fct18(i16* nocapture %sp0) { 442; CHECK-LABEL: fct18: 443; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 444; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 445entry: 446 %addr = getelementptr i16, i16* %sp0, i64 1 447 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 448 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 449 %vmull.i = mul <4 x i16> %vec, %vec 450 ret <4 x i16> %vmull.i 451} 452 453define <8 x i16> @fct19(i16* nocapture %sp0) { 454; CHECK-LABEL: fct19: 455; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2] 456; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 457entry: 458 %addr = getelementptr i16, i16* %sp0, i64 1 459 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 460 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 461 %vmull.i = mul <8 x i16> %vec, %vec 462 ret <8 x i16> %vmull.i 463} 464 465define <2 x i32> @fct20(i32* nocapture %sp0) { 466; CHECK-LABEL: fct20: 467; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 468; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 469entry: 470 %addr = getelementptr i32, i32* %sp0, i64 1 471 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 472 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 473 %vmull.i = mul <2 x i32> %vec, %vec 474 ret <2 x i32> %vmull.i 475} 476 477define <4 x i32> @fct21(i32* nocapture %sp0) { 478; CHECK-LABEL: fct21: 479; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4] 480; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 481entry: 482 %addr = getelementptr i32, i32* %sp0, i64 1 483 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 484 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 485 %vmull.i = mul <4 x i32> %vec, %vec 486 ret <4 x i32> %vmull.i 487} 488 489define <1 x i64> @fct22(i64* nocapture %sp0) { 490; CHECK-LABEL: fct22: 491; CHECK: ldr d0, [x0, #8] 492entry: 493 %addr = getelementptr i64, i64* %sp0, i64 1 494 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 495 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 496 ret <1 x i64> %vec 497} 498 499define <2 x i64> @fct23(i64* nocapture %sp0) { 500; CHECK-LABEL: fct23: 501; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8] 502entry: 503 %addr = getelementptr i64, i64* %sp0, i64 1 504 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 505 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 506 ret <2 x i64> %vec 507} 508 509; 510; Single loads with register offset. 511define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) { 512; CHECK-LABEL: fct24: 513; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 514; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]] 515entry: 516 %addr = getelementptr i8, i8* %sp0, i64 %offset 517 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 518 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 519 %vmull.i = mul <8 x i8> %vec, %vec 520 ret <8 x i8> %vmull.i 521} 522 523define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) { 524; CHECK-LABEL: fct25: 525; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1] 526; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]] 527entry: 528 %addr = getelementptr i8, i8* %sp0, i64 %offset 529 %pix_sp0.0.copyload = load i8, i8* %addr, align 1 530 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0 531 %vmull.i = mul <16 x i8> %vec, %vec 532 ret <16 x i8> %vmull.i 533} 534 535define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) { 536; CHECK-LABEL: fct26: 537; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 538; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]] 539entry: 540 %addr = getelementptr i16, i16* %sp0, i64 %offset 541 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 542 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 543 %vmull.i = mul <4 x i16> %vec, %vec 544 ret <4 x i16> %vmull.i 545} 546 547define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) { 548; CHECK-LABEL: fct27: 549; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1] 550; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]] 551entry: 552 %addr = getelementptr i16, i16* %sp0, i64 %offset 553 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 554 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0 555 %vmull.i = mul <8 x i16> %vec, %vec 556 ret <8 x i16> %vmull.i 557} 558 559define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) { 560; CHECK-LABEL: fct28: 561; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 562; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]] 563entry: 564 %addr = getelementptr i32, i32* %sp0, i64 %offset 565 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 566 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 567 %vmull.i = mul <2 x i32> %vec, %vec 568 ret <2 x i32> %vmull.i 569} 570 571define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) { 572; CHECK-LABEL: fct29: 573; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2] 574; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]] 575entry: 576 %addr = getelementptr i32, i32* %sp0, i64 %offset 577 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 578 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0 579 %vmull.i = mul <4 x i32> %vec, %vec 580 ret <4 x i32> %vmull.i 581} 582 583define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) { 584; CHECK-LABEL: fct30: 585; CHECK: ldr d0, [x0, x1, lsl #3] 586entry: 587 %addr = getelementptr i64, i64* %sp0, i64 %offset 588 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 589 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 590 ret <1 x i64> %vec 591} 592 593define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) { 594; CHECK-LABEL: fct31: 595; CHECK: ldr d0, [x0, x1, lsl #3] 596entry: 597 %addr = getelementptr i64, i64* %sp0, i64 %offset 598 %pix_sp0.0.copyload = load i64, i64* %addr, align 1 599 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0 600 ret <2 x i64> %vec 601} 602