• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
4
5define i8* @ldrwu32_4(i8* %x, i8* %y) {
6; CHECK-LABEL: ldrwu32_4:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrw.u32 q0, [r0], #4
9; CHECK-NEXT:    vstrw.32 q0, [r1]
10; CHECK-NEXT:    bx lr
11entry:
12  %z = getelementptr inbounds i8, i8* %x, i32 4
13  %0 = bitcast i8* %x to <4 x i32>*
14  %1 = load <4 x i32>, <4 x i32>* %0, align 4
15  %2 = bitcast i8* %y to <4 x i32>*
16  store <4 x i32> %1, <4 x i32>* %2, align 4
17  ret i8* %z
18}
19
20define i8* @ldrwu32_3(i8* %x, i8* %y) {
21; CHECK-LE-LABEL: ldrwu32_3:
22; CHECK-LE:       @ %bb.0: @ %entry
23; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
24; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
25; CHECK-LE-NEXT:    bx lr
26;
27; CHECK-BE-LABEL: ldrwu32_3:
28; CHECK-BE:       @ %bb.0: @ %entry
29; CHECK-BE-NEXT:    vldrw.u32 q0, [r0]
30; CHECK-BE-NEXT:    adds r0, #3
31; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
32; CHECK-BE-NEXT:    bx lr
33entry:
34  %z = getelementptr inbounds i8, i8* %x, i32 3
35  %0 = bitcast i8* %x to <4 x i32>*
36  %1 = load <4 x i32>, <4 x i32>* %0, align 4
37  %2 = bitcast i8* %y to <4 x i32>*
38  store <4 x i32> %1, <4 x i32>* %2, align 4
39  ret i8* %z
40}
41
42define i8* @ldrwu32_m4(i8* %x, i8* %y) {
43; CHECK-LABEL: ldrwu32_m4:
44; CHECK:       @ %bb.0: @ %entry
45; CHECK-NEXT:    vldrw.u32 q0, [r0], #-4
46; CHECK-NEXT:    vstrw.32 q0, [r1]
47; CHECK-NEXT:    bx lr
48entry:
49  %z = getelementptr inbounds i8, i8* %x, i32 -4
50  %0 = bitcast i8* %x to <4 x i32>*
51  %1 = load <4 x i32>, <4 x i32>* %0, align 4
52  %2 = bitcast i8* %y to <4 x i32>*
53  store <4 x i32> %1, <4 x i32>* %2, align 4
54  ret i8* %z
55}
56
57define i8* @ldrwu32_508(i8* %x, i8* %y) {
58; CHECK-LABEL: ldrwu32_508:
59; CHECK:       @ %bb.0: @ %entry
60; CHECK-NEXT:    vldrw.u32 q0, [r0], #508
61; CHECK-NEXT:    vstrw.32 q0, [r1]
62; CHECK-NEXT:    bx lr
63entry:
64  %z = getelementptr inbounds i8, i8* %x, i32 508
65  %0 = bitcast i8* %x to <4 x i32>*
66  %1 = load <4 x i32>, <4 x i32>* %0, align 4
67  %2 = bitcast i8* %y to <4 x i32>*
68  store <4 x i32> %1, <4 x i32>* %2, align 4
69  ret i8* %z
70}
71
72define i8* @ldrwu32_512(i8* %x, i8* %y) {
73; CHECK-LABEL: ldrwu32_512:
74; CHECK:       @ %bb.0: @ %entry
75; CHECK-NEXT:    vldrw.u32 q0, [r0]
76; CHECK-NEXT:    add.w r0, r0, #512
77; CHECK-NEXT:    vstrw.32 q0, [r1]
78; CHECK-NEXT:    bx lr
79entry:
80  %z = getelementptr inbounds i8, i8* %x, i32 512
81  %0 = bitcast i8* %x to <4 x i32>*
82  %1 = load <4 x i32>, <4 x i32>* %0, align 4
83  %2 = bitcast i8* %y to <4 x i32>*
84  store <4 x i32> %1, <4 x i32>* %2, align 4
85  ret i8* %z
86}
87
88define i8* @ldrwu32_m508(i8* %x, i8* %y) {
89; CHECK-LABEL: ldrwu32_m508:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vldrw.u32 q0, [r0], #-508
92; CHECK-NEXT:    vstrw.32 q0, [r1]
93; CHECK-NEXT:    bx lr
94entry:
95  %z = getelementptr inbounds i8, i8* %x, i32 -508
96  %0 = bitcast i8* %x to <4 x i32>*
97  %1 = load <4 x i32>, <4 x i32>* %0, align 4
98  %2 = bitcast i8* %y to <4 x i32>*
99  store <4 x i32> %1, <4 x i32>* %2, align 4
100  ret i8* %z
101}
102
103define i8* @ldrwu32_m512(i8* %x, i8* %y) {
104; CHECK-LABEL: ldrwu32_m512:
105; CHECK:       @ %bb.0: @ %entry
106; CHECK-NEXT:    vldrw.u32 q0, [r0]
107; CHECK-NEXT:    sub.w r0, r0, #512
108; CHECK-NEXT:    vstrw.32 q0, [r1]
109; CHECK-NEXT:    bx lr
110entry:
111  %z = getelementptr inbounds i8, i8* %x, i32 -512
112  %0 = bitcast i8* %x to <4 x i32>*
113  %1 = load <4 x i32>, <4 x i32>* %0, align 4
114  %2 = bitcast i8* %y to <4 x i32>*
115  store <4 x i32> %1, <4 x i32>* %2, align 4
116  ret i8* %z
117}
118
119
120define i8* @ldrhu32_4(i8* %x, i8* %y) {
121; CHECK-LABEL: ldrhu32_4:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vldrh.u32 q0, [r0], #4
124; CHECK-NEXT:    vstrw.32 q0, [r1]
125; CHECK-NEXT:    bx lr
126entry:
127  %z = getelementptr inbounds i8, i8* %x, i32 4
128  %0 = bitcast i8* %x to <4 x i16>*
129  %1 = load <4 x i16>, <4 x i16>* %0, align 2
130  %2 = zext <4 x i16> %1 to <4 x i32>
131  %3 = bitcast i8* %y to <4 x i32>*
132  store <4 x i32> %2, <4 x i32>* %3, align 4
133  ret i8* %z
134}
135
136define i8* @ldrhu32_3(i8* %x, i8* %y) {
137; CHECK-LABEL: ldrhu32_3:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vldrh.u32 q0, [r0]
140; CHECK-NEXT:    adds r0, #3
141; CHECK-NEXT:    vstrw.32 q0, [r1]
142; CHECK-NEXT:    bx lr
143entry:
144  %z = getelementptr inbounds i8, i8* %x, i32 3
145  %0 = bitcast i8* %x to <4 x i16>*
146  %1 = load <4 x i16>, <4 x i16>* %0, align 2
147  %2 = zext <4 x i16> %1 to <4 x i32>
148  %3 = bitcast i8* %y to <4 x i32>*
149  store <4 x i32> %2, <4 x i32>* %3, align 4
150  ret i8* %z
151}
152
153define i8* @ldrhu32_2(i8* %x, i8* %y) {
154; CHECK-LABEL: ldrhu32_2:
155; CHECK:       @ %bb.0: @ %entry
156; CHECK-NEXT:    vldrh.u32 q0, [r0], #2
157; CHECK-NEXT:    vstrw.32 q0, [r1]
158; CHECK-NEXT:    bx lr
159entry:
160  %z = getelementptr inbounds i8, i8* %x, i32 2
161  %0 = bitcast i8* %x to <4 x i16>*
162  %1 = load <4 x i16>, <4 x i16>* %0, align 2
163  %2 = zext <4 x i16> %1 to <4 x i32>
164  %3 = bitcast i8* %y to <4 x i32>*
165  store <4 x i32> %2, <4 x i32>* %3, align 4
166  ret i8* %z
167}
168
169define i8* @ldrhu32_254(i8* %x, i8* %y) {
170; CHECK-LABEL: ldrhu32_254:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    vldrh.u32 q0, [r0], #254
173; CHECK-NEXT:    vstrw.32 q0, [r1]
174; CHECK-NEXT:    bx lr
175entry:
176  %z = getelementptr inbounds i8, i8* %x, i32 254
177  %0 = bitcast i8* %x to <4 x i16>*
178  %1 = load <4 x i16>, <4 x i16>* %0, align 2
179  %2 = zext <4 x i16> %1 to <4 x i32>
180  %3 = bitcast i8* %y to <4 x i32>*
181  store <4 x i32> %2, <4 x i32>* %3, align 4
182  ret i8* %z
183}
184
185define i8* @ldrhu32_256(i8* %x, i8* %y) {
186; CHECK-LABEL: ldrhu32_256:
187; CHECK:       @ %bb.0: @ %entry
188; CHECK-NEXT:    vldrh.u32 q0, [r0]
189; CHECK-NEXT:    add.w r0, r0, #256
190; CHECK-NEXT:    vstrw.32 q0, [r1]
191; CHECK-NEXT:    bx lr
192entry:
193  %z = getelementptr inbounds i8, i8* %x, i32 256
194  %0 = bitcast i8* %x to <4 x i16>*
195  %1 = load <4 x i16>, <4 x i16>* %0, align 2
196  %2 = zext <4 x i16> %1 to <4 x i32>
197  %3 = bitcast i8* %y to <4 x i32>*
198  store <4 x i32> %2, <4 x i32>* %3, align 4
199  ret i8* %z
200}
201
202define i8* @ldrhu32_m254(i8* %x, i8* %y) {
203; CHECK-LABEL: ldrhu32_m254:
204; CHECK:       @ %bb.0: @ %entry
205; CHECK-NEXT:    vldrh.u32 q0, [r0], #-254
206; CHECK-NEXT:    vstrw.32 q0, [r1]
207; CHECK-NEXT:    bx lr
208entry:
209  %z = getelementptr inbounds i8, i8* %x, i32 -254
210  %0 = bitcast i8* %x to <4 x i16>*
211  %1 = load <4 x i16>, <4 x i16>* %0, align 2
212  %2 = zext <4 x i16> %1 to <4 x i32>
213  %3 = bitcast i8* %y to <4 x i32>*
214  store <4 x i32> %2, <4 x i32>* %3, align 4
215  ret i8* %z
216}
217
218define i8* @ldrhu32_m256(i8* %x, i8* %y) {
219; CHECK-LABEL: ldrhu32_m256:
220; CHECK:       @ %bb.0: @ %entry
221; CHECK-NEXT:    vldrh.u32 q0, [r0]
222; CHECK-NEXT:    sub.w r0, r0, #256
223; CHECK-NEXT:    vstrw.32 q0, [r1]
224; CHECK-NEXT:    bx lr
225entry:
226  %z = getelementptr inbounds i8, i8* %x, i32 -256
227  %0 = bitcast i8* %x to <4 x i16>*
228  %1 = load <4 x i16>, <4 x i16>* %0, align 2
229  %2 = zext <4 x i16> %1 to <4 x i32>
230  %3 = bitcast i8* %y to <4 x i32>*
231  store <4 x i32> %2, <4 x i32>* %3, align 4
232  ret i8* %z
233}
234
235
236define i8* @ldrhs32_4(i8* %x, i8* %y) {
237; CHECK-LABEL: ldrhs32_4:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vldrh.s32 q0, [r0], #4
240; CHECK-NEXT:    vstrw.32 q0, [r1]
241; CHECK-NEXT:    bx lr
242entry:
243  %z = getelementptr inbounds i8, i8* %x, i32 4
244  %0 = bitcast i8* %x to <4 x i16>*
245  %1 = load <4 x i16>, <4 x i16>* %0, align 2
246  %2 = sext <4 x i16> %1 to <4 x i32>
247  %3 = bitcast i8* %y to <4 x i32>*
248  store <4 x i32> %2, <4 x i32>* %3, align 4
249  ret i8* %z
250}
251
252define i8* @ldrhs32_3(i8* %x, i8* %y) {
253; CHECK-LABEL: ldrhs32_3:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    vldrh.s32 q0, [r0]
256; CHECK-NEXT:    adds r0, #3
257; CHECK-NEXT:    vstrw.32 q0, [r1]
258; CHECK-NEXT:    bx lr
259entry:
260  %z = getelementptr inbounds i8, i8* %x, i32 3
261  %0 = bitcast i8* %x to <4 x i16>*
262  %1 = load <4 x i16>, <4 x i16>* %0, align 2
263  %2 = sext <4 x i16> %1 to <4 x i32>
264  %3 = bitcast i8* %y to <4 x i32>*
265  store <4 x i32> %2, <4 x i32>* %3, align 4
266  ret i8* %z
267}
268
269define i8* @ldrhs32_2(i8* %x, i8* %y) {
270; CHECK-LABEL: ldrhs32_2:
271; CHECK:       @ %bb.0: @ %entry
272; CHECK-NEXT:    vldrh.s32 q0, [r0], #2
273; CHECK-NEXT:    vstrw.32 q0, [r1]
274; CHECK-NEXT:    bx lr
275entry:
276  %z = getelementptr inbounds i8, i8* %x, i32 2
277  %0 = bitcast i8* %x to <4 x i16>*
278  %1 = load <4 x i16>, <4 x i16>* %0, align 2
279  %2 = sext <4 x i16> %1 to <4 x i32>
280  %3 = bitcast i8* %y to <4 x i32>*
281  store <4 x i32> %2, <4 x i32>* %3, align 4
282  ret i8* %z
283}
284
285define i8* @ldrhs32_254(i8* %x, i8* %y) {
286; CHECK-LABEL: ldrhs32_254:
287; CHECK:       @ %bb.0: @ %entry
288; CHECK-NEXT:    vldrh.s32 q0, [r0], #254
289; CHECK-NEXT:    vstrw.32 q0, [r1]
290; CHECK-NEXT:    bx lr
291entry:
292  %z = getelementptr inbounds i8, i8* %x, i32 254
293  %0 = bitcast i8* %x to <4 x i16>*
294  %1 = load <4 x i16>, <4 x i16>* %0, align 2
295  %2 = sext <4 x i16> %1 to <4 x i32>
296  %3 = bitcast i8* %y to <4 x i32>*
297  store <4 x i32> %2, <4 x i32>* %3, align 4
298  ret i8* %z
299}
300
301define i8* @ldrhs32_256(i8* %x, i8* %y) {
302; CHECK-LABEL: ldrhs32_256:
303; CHECK:       @ %bb.0: @ %entry
304; CHECK-NEXT:    vldrh.s32 q0, [r0]
305; CHECK-NEXT:    add.w r0, r0, #256
306; CHECK-NEXT:    vstrw.32 q0, [r1]
307; CHECK-NEXT:    bx lr
308entry:
309  %z = getelementptr inbounds i8, i8* %x, i32 256
310  %0 = bitcast i8* %x to <4 x i16>*
311  %1 = load <4 x i16>, <4 x i16>* %0, align 2
312  %2 = sext <4 x i16> %1 to <4 x i32>
313  %3 = bitcast i8* %y to <4 x i32>*
314  store <4 x i32> %2, <4 x i32>* %3, align 4
315  ret i8* %z
316}
317
318define i8* @ldrhs32_m254(i8* %x, i8* %y) {
319; CHECK-LABEL: ldrhs32_m254:
320; CHECK:       @ %bb.0: @ %entry
321; CHECK-NEXT:    vldrh.s32 q0, [r0], #-254
322; CHECK-NEXT:    vstrw.32 q0, [r1]
323; CHECK-NEXT:    bx lr
324entry:
325  %z = getelementptr inbounds i8, i8* %x, i32 -254
326  %0 = bitcast i8* %x to <4 x i16>*
327  %1 = load <4 x i16>, <4 x i16>* %0, align 2
328  %2 = sext <4 x i16> %1 to <4 x i32>
329  %3 = bitcast i8* %y to <4 x i32>*
330  store <4 x i32> %2, <4 x i32>* %3, align 4
331  ret i8* %z
332}
333
334define i8* @ldrhs32_m256(i8* %x, i8* %y) {
335; CHECK-LABEL: ldrhs32_m256:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    vldrh.s32 q0, [r0]
338; CHECK-NEXT:    sub.w r0, r0, #256
339; CHECK-NEXT:    vstrw.32 q0, [r1]
340; CHECK-NEXT:    bx lr
341entry:
342  %z = getelementptr inbounds i8, i8* %x, i32 -256
343  %0 = bitcast i8* %x to <4 x i16>*
344  %1 = load <4 x i16>, <4 x i16>* %0, align 2
345  %2 = sext <4 x i16> %1 to <4 x i32>
346  %3 = bitcast i8* %y to <4 x i32>*
347  store <4 x i32> %2, <4 x i32>* %3, align 4
348  ret i8* %z
349}
350
351
352define i8* @ldrhu16_4(i8* %x, i8* %y) {
353; CHECK-LABEL: ldrhu16_4:
354; CHECK:       @ %bb.0: @ %entry
355; CHECK-NEXT:    vldrh.u16 q0, [r0], #4
356; CHECK-NEXT:    vstrh.16 q0, [r1]
357; CHECK-NEXT:    bx lr
358entry:
359  %z = getelementptr inbounds i8, i8* %x, i32 4
360  %0 = bitcast i8* %x to <8 x i16>*
361  %1 = load <8 x i16>, <8 x i16>* %0, align 2
362  %2 = bitcast i8* %y to <8 x i16>*
363  store <8 x i16> %1, <8 x i16>* %2, align 2
364  ret i8* %z
365}
366
367define i8* @ldrhu16_3(i8* %x, i8* %y) {
368; CHECK-LE-LABEL: ldrhu16_3:
369; CHECK-LE:       @ %bb.0: @ %entry
370; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
371; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
372; CHECK-LE-NEXT:    bx lr
373;
374; CHECK-BE-LABEL: ldrhu16_3:
375; CHECK-BE:       @ %bb.0: @ %entry
376; CHECK-BE-NEXT:    vldrh.u16 q0, [r0]
377; CHECK-BE-NEXT:    adds r0, #3
378; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
379; CHECK-BE-NEXT:    bx lr
380entry:
381  %z = getelementptr inbounds i8, i8* %x, i32 3
382  %0 = bitcast i8* %x to <8 x i16>*
383  %1 = load <8 x i16>, <8 x i16>* %0, align 2
384  %2 = bitcast i8* %y to <8 x i16>*
385  store <8 x i16> %1, <8 x i16>* %2, align 2
386  ret i8* %z
387}
388
389define i8* @ldrhu16_2(i8* %x, i8* %y) {
390; CHECK-LABEL: ldrhu16_2:
391; CHECK:       @ %bb.0: @ %entry
392; CHECK-NEXT:    vldrh.u16 q0, [r0], #2
393; CHECK-NEXT:    vstrh.16 q0, [r1]
394; CHECK-NEXT:    bx lr
395entry:
396  %z = getelementptr inbounds i8, i8* %x, i32 2
397  %0 = bitcast i8* %x to <8 x i16>*
398  %1 = load <8 x i16>, <8 x i16>* %0, align 2
399  %2 = bitcast i8* %y to <8 x i16>*
400  store <8 x i16> %1, <8 x i16>* %2, align 2
401  ret i8* %z
402}
403
404define i8* @ldrhu16_254(i8* %x, i8* %y) {
405; CHECK-LABEL: ldrhu16_254:
406; CHECK:       @ %bb.0: @ %entry
407; CHECK-NEXT:    vldrh.u16 q0, [r0], #254
408; CHECK-NEXT:    vstrh.16 q0, [r1]
409; CHECK-NEXT:    bx lr
410entry:
411  %z = getelementptr inbounds i8, i8* %x, i32 254
412  %0 = bitcast i8* %x to <8 x i16>*
413  %1 = load <8 x i16>, <8 x i16>* %0, align 2
414  %2 = bitcast i8* %y to <8 x i16>*
415  store <8 x i16> %1, <8 x i16>* %2, align 2
416  ret i8* %z
417}
418
419define i8* @ldrhu16_256(i8* %x, i8* %y) {
420; CHECK-LABEL: ldrhu16_256:
421; CHECK:       @ %bb.0: @ %entry
422; CHECK-NEXT:    vldrh.u16 q0, [r0]
423; CHECK-NEXT:    add.w r0, r0, #256
424; CHECK-NEXT:    vstrh.16 q0, [r1]
425; CHECK-NEXT:    bx lr
426entry:
427  %z = getelementptr inbounds i8, i8* %x, i32 256
428  %0 = bitcast i8* %x to <8 x i16>*
429  %1 = load <8 x i16>, <8 x i16>* %0, align 2
430  %2 = bitcast i8* %y to <8 x i16>*
431  store <8 x i16> %1, <8 x i16>* %2, align 2
432  ret i8* %z
433}
434
435define i8* @ldrhu16_m254(i8* %x, i8* %y) {
436; CHECK-LABEL: ldrhu16_m254:
437; CHECK:       @ %bb.0: @ %entry
438; CHECK-NEXT:    vldrh.u16 q0, [r0], #-254
439; CHECK-NEXT:    vstrh.16 q0, [r1]
440; CHECK-NEXT:    bx lr
441entry:
442  %z = getelementptr inbounds i8, i8* %x, i32 -254
443  %0 = bitcast i8* %x to <8 x i16>*
444  %1 = load <8 x i16>, <8 x i16>* %0, align 2
445  %2 = bitcast i8* %y to <8 x i16>*
446  store <8 x i16> %1, <8 x i16>* %2, align 2
447  ret i8* %z
448}
449
450define i8* @ldrhu16_m256(i8* %x, i8* %y) {
451; CHECK-LABEL: ldrhu16_m256:
452; CHECK:       @ %bb.0: @ %entry
453; CHECK-NEXT:    vldrh.u16 q0, [r0]
454; CHECK-NEXT:    sub.w r0, r0, #256
455; CHECK-NEXT:    vstrh.16 q0, [r1]
456; CHECK-NEXT:    bx lr
457entry:
458  %z = getelementptr inbounds i8, i8* %x, i32 -256
459  %0 = bitcast i8* %x to <8 x i16>*
460  %1 = load <8 x i16>, <8 x i16>* %0, align 2
461  %2 = bitcast i8* %y to <8 x i16>*
462  store <8 x i16> %1, <8 x i16>* %2, align 2
463  ret i8* %z
464}
465
466
467define i8* @ldrbu32_4(i8* %x, i8* %y) {
468; CHECK-LABEL: ldrbu32_4:
469; CHECK:       @ %bb.0: @ %entry
470; CHECK-NEXT:    vldrb.u32 q0, [r0], #4
471; CHECK-NEXT:    vstrw.32 q0, [r1]
472; CHECK-NEXT:    bx lr
473entry:
474  %z = getelementptr inbounds i8, i8* %x, i32 4
475  %0 = bitcast i8* %x to <4 x i8>*
476  %1 = load <4 x i8>, <4 x i8>* %0, align 1
477  %2 = zext <4 x i8> %1 to <4 x i32>
478  %3 = bitcast i8* %y to <4 x i32>*
479  store <4 x i32> %2, <4 x i32>* %3, align 4
480  ret i8* %z
481}
482
483define i8* @ldrbu32_3(i8* %x, i8* %y) {
484; CHECK-LABEL: ldrbu32_3:
485; CHECK:       @ %bb.0: @ %entry
486; CHECK-NEXT:    vldrb.u32 q0, [r0], #3
487; CHECK-NEXT:    vstrw.32 q0, [r1]
488; CHECK-NEXT:    bx lr
489entry:
490  %z = getelementptr inbounds i8, i8* %x, i32 3
491  %0 = bitcast i8* %x to <4 x i8>*
492  %1 = load <4 x i8>, <4 x i8>* %0, align 1
493  %2 = zext <4 x i8> %1 to <4 x i32>
494  %3 = bitcast i8* %y to <4 x i32>*
495  store <4 x i32> %2, <4 x i32>* %3, align 4
496  ret i8* %z
497}
498
499define i8* @ldrbu32_127(i8* %x, i8* %y) {
500; CHECK-LABEL: ldrbu32_127:
501; CHECK:       @ %bb.0: @ %entry
502; CHECK-NEXT:    vldrb.u32 q0, [r0], #127
503; CHECK-NEXT:    vstrw.32 q0, [r1]
504; CHECK-NEXT:    bx lr
505entry:
506  %z = getelementptr inbounds i8, i8* %x, i32 127
507  %0 = bitcast i8* %x to <4 x i8>*
508  %1 = load <4 x i8>, <4 x i8>* %0, align 1
509  %2 = zext <4 x i8> %1 to <4 x i32>
510  %3 = bitcast i8* %y to <4 x i32>*
511  store <4 x i32> %2, <4 x i32>* %3, align 4
512  ret i8* %z
513}
514
515define i8* @ldrbu32_128(i8* %x, i8* %y) {
516; CHECK-LABEL: ldrbu32_128:
517; CHECK:       @ %bb.0: @ %entry
518; CHECK-NEXT:    vldrb.u32 q0, [r0]
519; CHECK-NEXT:    adds r0, #128
520; CHECK-NEXT:    vstrw.32 q0, [r1]
521; CHECK-NEXT:    bx lr
522entry:
523  %z = getelementptr inbounds i8, i8* %x, i32 128
524  %0 = bitcast i8* %x to <4 x i8>*
525  %1 = load <4 x i8>, <4 x i8>* %0, align 1
526  %2 = zext <4 x i8> %1 to <4 x i32>
527  %3 = bitcast i8* %y to <4 x i32>*
528  store <4 x i32> %2, <4 x i32>* %3, align 4
529  ret i8* %z
530}
531
532define i8* @ldrbu32_m127(i8* %x, i8* %y) {
533; CHECK-LABEL: ldrbu32_m127:
534; CHECK:       @ %bb.0: @ %entry
535; CHECK-NEXT:    vldrb.u32 q0, [r0], #-127
536; CHECK-NEXT:    vstrw.32 q0, [r1]
537; CHECK-NEXT:    bx lr
538entry:
539  %z = getelementptr inbounds i8, i8* %x, i32 -127
540  %0 = bitcast i8* %x to <4 x i8>*
541  %1 = load <4 x i8>, <4 x i8>* %0, align 1
542  %2 = zext <4 x i8> %1 to <4 x i32>
543  %3 = bitcast i8* %y to <4 x i32>*
544  store <4 x i32> %2, <4 x i32>* %3, align 4
545  ret i8* %z
546}
547
548define i8* @ldrbu32_m128(i8* %x, i8* %y) {
549; CHECK-LABEL: ldrbu32_m128:
550; CHECK:       @ %bb.0: @ %entry
551; CHECK-NEXT:    vldrb.u32 q0, [r0]
552; CHECK-NEXT:    subs r0, #128
553; CHECK-NEXT:    vstrw.32 q0, [r1]
554; CHECK-NEXT:    bx lr
555entry:
556  %z = getelementptr inbounds i8, i8* %x, i32 -128
557  %0 = bitcast i8* %x to <4 x i8>*
558  %1 = load <4 x i8>, <4 x i8>* %0, align 1
559  %2 = zext <4 x i8> %1 to <4 x i32>
560  %3 = bitcast i8* %y to <4 x i32>*
561  store <4 x i32> %2, <4 x i32>* %3, align 4
562  ret i8* %z
563}
564
565
566define i8* @ldrbs32_4(i8* %x, i8* %y) {
567; CHECK-LABEL: ldrbs32_4:
568; CHECK:       @ %bb.0: @ %entry
569; CHECK-NEXT:    vldrb.s32 q0, [r0], #4
570; CHECK-NEXT:    vstrw.32 q0, [r1]
571; CHECK-NEXT:    bx lr
572entry:
573  %z = getelementptr inbounds i8, i8* %x, i32 4
574  %0 = bitcast i8* %x to <4 x i8>*
575  %1 = load <4 x i8>, <4 x i8>* %0, align 1
576  %2 = sext <4 x i8> %1 to <4 x i32>
577  %3 = bitcast i8* %y to <4 x i32>*
578  store <4 x i32> %2, <4 x i32>* %3, align 4
579  ret i8* %z
580}
581
582define i8* @ldrbs32_3(i8* %x, i8* %y) {
583; CHECK-LABEL: ldrbs32_3:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vldrb.s32 q0, [r0], #3
586; CHECK-NEXT:    vstrw.32 q0, [r1]
587; CHECK-NEXT:    bx lr
588entry:
589  %z = getelementptr inbounds i8, i8* %x, i32 3
590  %0 = bitcast i8* %x to <4 x i8>*
591  %1 = load <4 x i8>, <4 x i8>* %0, align 1
592  %2 = sext <4 x i8> %1 to <4 x i32>
593  %3 = bitcast i8* %y to <4 x i32>*
594  store <4 x i32> %2, <4 x i32>* %3, align 4
595  ret i8* %z
596}
597
598define i8* @ldrbs32_127(i8* %x, i8* %y) {
599; CHECK-LABEL: ldrbs32_127:
600; CHECK:       @ %bb.0: @ %entry
601; CHECK-NEXT:    vldrb.s32 q0, [r0], #127
602; CHECK-NEXT:    vstrw.32 q0, [r1]
603; CHECK-NEXT:    bx lr
604entry:
605  %z = getelementptr inbounds i8, i8* %x, i32 127
606  %0 = bitcast i8* %x to <4 x i8>*
607  %1 = load <4 x i8>, <4 x i8>* %0, align 1
608  %2 = sext <4 x i8> %1 to <4 x i32>
609  %3 = bitcast i8* %y to <4 x i32>*
610  store <4 x i32> %2, <4 x i32>* %3, align 4
611  ret i8* %z
612}
613
614define i8* @ldrbs32_128(i8* %x, i8* %y) {
615; CHECK-LABEL: ldrbs32_128:
616; CHECK:       @ %bb.0: @ %entry
617; CHECK-NEXT:    vldrb.s32 q0, [r0]
618; CHECK-NEXT:    adds r0, #128
619; CHECK-NEXT:    vstrw.32 q0, [r1]
620; CHECK-NEXT:    bx lr
621entry:
622  %z = getelementptr inbounds i8, i8* %x, i32 128
623  %0 = bitcast i8* %x to <4 x i8>*
624  %1 = load <4 x i8>, <4 x i8>* %0, align 1
625  %2 = sext <4 x i8> %1 to <4 x i32>
626  %3 = bitcast i8* %y to <4 x i32>*
627  store <4 x i32> %2, <4 x i32>* %3, align 4
628  ret i8* %z
629}
630
631define i8* @ldrbs32_m127(i8* %x, i8* %y) {
632; CHECK-LABEL: ldrbs32_m127:
633; CHECK:       @ %bb.0: @ %entry
634; CHECK-NEXT:    vldrb.s32 q0, [r0], #-127
635; CHECK-NEXT:    vstrw.32 q0, [r1]
636; CHECK-NEXT:    bx lr
637entry:
638  %z = getelementptr inbounds i8, i8* %x, i32 -127
639  %0 = bitcast i8* %x to <4 x i8>*
640  %1 = load <4 x i8>, <4 x i8>* %0, align 1
641  %2 = sext <4 x i8> %1 to <4 x i32>
642  %3 = bitcast i8* %y to <4 x i32>*
643  store <4 x i32> %2, <4 x i32>* %3, align 4
644  ret i8* %z
645}
646
647define i8* @ldrbs32_m128(i8* %x, i8* %y) {
648; CHECK-LABEL: ldrbs32_m128:
649; CHECK:       @ %bb.0: @ %entry
650; CHECK-NEXT:    vldrb.s32 q0, [r0]
651; CHECK-NEXT:    subs r0, #128
652; CHECK-NEXT:    vstrw.32 q0, [r1]
653; CHECK-NEXT:    bx lr
654entry:
655  %z = getelementptr inbounds i8, i8* %x, i32 -128
656  %0 = bitcast i8* %x to <4 x i8>*
657  %1 = load <4 x i8>, <4 x i8>* %0, align 1
658  %2 = sext <4 x i8> %1 to <4 x i32>
659  %3 = bitcast i8* %y to <4 x i32>*
660  store <4 x i32> %2, <4 x i32>* %3, align 4
661  ret i8* %z
662}
663
664
665define i8* @ldrbu16_4(i8* %x, i8* %y) {
666; CHECK-LABEL: ldrbu16_4:
667; CHECK:       @ %bb.0: @ %entry
668; CHECK-NEXT:    vldrb.u16 q0, [r0], #4
669; CHECK-NEXT:    vstrh.16 q0, [r1]
670; CHECK-NEXT:    bx lr
671entry:
672  %z = getelementptr inbounds i8, i8* %x, i32 4
673  %0 = bitcast i8* %x to <8 x i8>*
674  %1 = load <8 x i8>, <8 x i8>* %0, align 1
675  %2 = zext <8 x i8> %1 to <8 x i16>
676  %3 = bitcast i8* %y to <8 x i16>*
677  store <8 x i16> %2, <8 x i16>* %3, align 2
678  ret i8* %z
679}
680
681define i8* @ldrbu16_3(i8* %x, i8* %y) {
682; CHECK-LABEL: ldrbu16_3:
683; CHECK:       @ %bb.0: @ %entry
684; CHECK-NEXT:    vldrb.u16 q0, [r0], #3
685; CHECK-NEXT:    vstrh.16 q0, [r1]
686; CHECK-NEXT:    bx lr
687entry:
688  %z = getelementptr inbounds i8, i8* %x, i32 3
689  %0 = bitcast i8* %x to <8 x i8>*
690  %1 = load <8 x i8>, <8 x i8>* %0, align 1
691  %2 = zext <8 x i8> %1 to <8 x i16>
692  %3 = bitcast i8* %y to <8 x i16>*
693  store <8 x i16> %2, <8 x i16>* %3, align 2
694  ret i8* %z
695}
696
697define i8* @ldrbu16_127(i8* %x, i8* %y) {
698; CHECK-LABEL: ldrbu16_127:
699; CHECK:       @ %bb.0: @ %entry
700; CHECK-NEXT:    vldrb.u16 q0, [r0], #127
701; CHECK-NEXT:    vstrh.16 q0, [r1]
702; CHECK-NEXT:    bx lr
703entry:
704  %z = getelementptr inbounds i8, i8* %x, i32 127
705  %0 = bitcast i8* %x to <8 x i8>*
706  %1 = load <8 x i8>, <8 x i8>* %0, align 1
707  %2 = zext <8 x i8> %1 to <8 x i16>
708  %3 = bitcast i8* %y to <8 x i16>*
709  store <8 x i16> %2, <8 x i16>* %3, align 2
710  ret i8* %z
711}
712
713define i8* @ldrbu16_128(i8* %x, i8* %y) {
714; CHECK-LABEL: ldrbu16_128:
715; CHECK:       @ %bb.0: @ %entry
716; CHECK-NEXT:    vldrb.u16 q0, [r0]
717; CHECK-NEXT:    adds r0, #128
718; CHECK-NEXT:    vstrh.16 q0, [r1]
719; CHECK-NEXT:    bx lr
720entry:
721  %z = getelementptr inbounds i8, i8* %x, i32 128
722  %0 = bitcast i8* %x to <8 x i8>*
723  %1 = load <8 x i8>, <8 x i8>* %0, align 1
724  %2 = zext <8 x i8> %1 to <8 x i16>
725  %3 = bitcast i8* %y to <8 x i16>*
726  store <8 x i16> %2, <8 x i16>* %3, align 2
727  ret i8* %z
728}
729
730define i8* @ldrbu16_m127(i8* %x, i8* %y) {
731; CHECK-LABEL: ldrbu16_m127:
732; CHECK:       @ %bb.0: @ %entry
733; CHECK-NEXT:    vldrb.u16 q0, [r0], #-127
734; CHECK-NEXT:    vstrh.16 q0, [r1]
735; CHECK-NEXT:    bx lr
736entry:
737  %z = getelementptr inbounds i8, i8* %x, i32 -127
738  %0 = bitcast i8* %x to <8 x i8>*
739  %1 = load <8 x i8>, <8 x i8>* %0, align 1
740  %2 = zext <8 x i8> %1 to <8 x i16>
741  %3 = bitcast i8* %y to <8 x i16>*
742  store <8 x i16> %2, <8 x i16>* %3, align 2
743  ret i8* %z
744}
745
746define i8* @ldrbu16_m128(i8* %x, i8* %y) {
747; CHECK-LABEL: ldrbu16_m128:
748; CHECK:       @ %bb.0: @ %entry
749; CHECK-NEXT:    vldrb.u16 q0, [r0]
750; CHECK-NEXT:    subs r0, #128
751; CHECK-NEXT:    vstrh.16 q0, [r1]
752; CHECK-NEXT:    bx lr
753entry:
754  %z = getelementptr inbounds i8, i8* %x, i32 -128
755  %0 = bitcast i8* %x to <8 x i8>*
756  %1 = load <8 x i8>, <8 x i8>* %0, align 1
757  %2 = zext <8 x i8> %1 to <8 x i16>
758  %3 = bitcast i8* %y to <8 x i16>*
759  store <8 x i16> %2, <8 x i16>* %3, align 2
760  ret i8* %z
761}
762
763
764define i8* @ldrbs16_4(i8* %x, i8* %y) {
765; CHECK-LABEL: ldrbs16_4:
766; CHECK:       @ %bb.0: @ %entry
767; CHECK-NEXT:    vldrb.s16 q0, [r0], #4
768; CHECK-NEXT:    vstrh.16 q0, [r1]
769; CHECK-NEXT:    bx lr
770entry:
771  %z = getelementptr inbounds i8, i8* %x, i32 4
772  %0 = bitcast i8* %x to <8 x i8>*
773  %1 = load <8 x i8>, <8 x i8>* %0, align 1
774  %2 = sext <8 x i8> %1 to <8 x i16>
775  %3 = bitcast i8* %y to <8 x i16>*
776  store <8 x i16> %2, <8 x i16>* %3, align 2
777  ret i8* %z
778}
779
780define i8* @ldrbs16_3(i8* %x, i8* %y) {
781; CHECK-LABEL: ldrbs16_3:
782; CHECK:       @ %bb.0: @ %entry
783; CHECK-NEXT:    vldrb.s16 q0, [r0], #3
784; CHECK-NEXT:    vstrh.16 q0, [r1]
785; CHECK-NEXT:    bx lr
786entry:
787  %z = getelementptr inbounds i8, i8* %x, i32 3
788  %0 = bitcast i8* %x to <8 x i8>*
789  %1 = load <8 x i8>, <8 x i8>* %0, align 1
790  %2 = sext <8 x i8> %1 to <8 x i16>
791  %3 = bitcast i8* %y to <8 x i16>*
792  store <8 x i16> %2, <8 x i16>* %3, align 2
793  ret i8* %z
794}
795
796define i8* @ldrbs16_127(i8* %x, i8* %y) {
797; CHECK-LABEL: ldrbs16_127:
798; CHECK:       @ %bb.0: @ %entry
799; CHECK-NEXT:    vldrb.s16 q0, [r0], #127
800; CHECK-NEXT:    vstrh.16 q0, [r1]
801; CHECK-NEXT:    bx lr
802entry:
803  %z = getelementptr inbounds i8, i8* %x, i32 127
804  %0 = bitcast i8* %x to <8 x i8>*
805  %1 = load <8 x i8>, <8 x i8>* %0, align 1
806  %2 = sext <8 x i8> %1 to <8 x i16>
807  %3 = bitcast i8* %y to <8 x i16>*
808  store <8 x i16> %2, <8 x i16>* %3, align 2
809  ret i8* %z
810}
811
812define i8* @ldrbs16_128(i8* %x, i8* %y) {
813; CHECK-LABEL: ldrbs16_128:
814; CHECK:       @ %bb.0: @ %entry
815; CHECK-NEXT:    vldrb.s16 q0, [r0]
816; CHECK-NEXT:    adds r0, #128
817; CHECK-NEXT:    vstrh.16 q0, [r1]
818; CHECK-NEXT:    bx lr
819entry:
820  %z = getelementptr inbounds i8, i8* %x, i32 128
821  %0 = bitcast i8* %x to <8 x i8>*
822  %1 = load <8 x i8>, <8 x i8>* %0, align 1
823  %2 = sext <8 x i8> %1 to <8 x i16>
824  %3 = bitcast i8* %y to <8 x i16>*
825  store <8 x i16> %2, <8 x i16>* %3, align 2
826  ret i8* %z
827}
828
829define i8* @ldrbs16_m127(i8* %x, i8* %y) {
830; CHECK-LABEL: ldrbs16_m127:
831; CHECK:       @ %bb.0: @ %entry
832; CHECK-NEXT:    vldrb.s16 q0, [r0], #-127
833; CHECK-NEXT:    vstrh.16 q0, [r1]
834; CHECK-NEXT:    bx lr
835entry:
836  %z = getelementptr inbounds i8, i8* %x, i32 -127
837  %0 = bitcast i8* %x to <8 x i8>*
838  %1 = load <8 x i8>, <8 x i8>* %0, align 1
839  %2 = sext <8 x i8> %1 to <8 x i16>
840  %3 = bitcast i8* %y to <8 x i16>*
841  store <8 x i16> %2, <8 x i16>* %3, align 2
842  ret i8* %z
843}
844
845define i8* @ldrbs16_m128(i8* %x, i8* %y) {
846; CHECK-LABEL: ldrbs16_m128:
847; CHECK:       @ %bb.0: @ %entry
848; CHECK-NEXT:    vldrb.s16 q0, [r0]
849; CHECK-NEXT:    subs r0, #128
850; CHECK-NEXT:    vstrh.16 q0, [r1]
851; CHECK-NEXT:    bx lr
852entry:
853  %z = getelementptr inbounds i8, i8* %x, i32 -128
854  %0 = bitcast i8* %x to <8 x i8>*
855  %1 = load <8 x i8>, <8 x i8>* %0, align 1
856  %2 = sext <8 x i8> %1 to <8 x i16>
857  %3 = bitcast i8* %y to <8 x i16>*
858  store <8 x i16> %2, <8 x i16>* %3, align 2
859  ret i8* %z
860}
861
862
863define i8* @ldrbu8_4(i8* %x, i8* %y) {
864; CHECK-LABEL: ldrbu8_4:
865; CHECK:       @ %bb.0: @ %entry
866; CHECK-NEXT:    vldrb.u8 q0, [r0], #4
867; CHECK-NEXT:    vstrb.8 q0, [r1]
868; CHECK-NEXT:    bx lr
869entry:
870  %z = getelementptr inbounds i8, i8* %x, i32 4
871  %0 = bitcast i8* %x to <16 x i8>*
872  %1 = load <16 x i8>, <16 x i8>* %0, align 1
873  %2 = bitcast i8* %y to <16 x i8>*
874  store <16 x i8> %1, <16 x i8>* %2, align 1
875  ret i8* %z
876}
877
878define i8* @ldrbu8_3(i8* %x, i8* %y) {
879; CHECK-LABEL: ldrbu8_3:
880; CHECK:       @ %bb.0: @ %entry
881; CHECK-NEXT:    vldrb.u8 q0, [r0], #3
882; CHECK-NEXT:    vstrb.8 q0, [r1]
883; CHECK-NEXT:    bx lr
884entry:
885  %z = getelementptr inbounds i8, i8* %x, i32 3
886  %0 = bitcast i8* %x to <16 x i8>*
887  %1 = load <16 x i8>, <16 x i8>* %0, align 1
888  %2 = bitcast i8* %y to <16 x i8>*
889  store <16 x i8> %1, <16 x i8>* %2, align 1
890  ret i8* %z
891}
892
893define i8* @ldrbu8_127(i8* %x, i8* %y) {
894; CHECK-LABEL: ldrbu8_127:
895; CHECK:       @ %bb.0: @ %entry
896; CHECK-NEXT:    vldrb.u8 q0, [r0], #127
897; CHECK-NEXT:    vstrb.8 q0, [r1]
898; CHECK-NEXT:    bx lr
899entry:
900  %z = getelementptr inbounds i8, i8* %x, i32 127
901  %0 = bitcast i8* %x to <16 x i8>*
902  %1 = load <16 x i8>, <16 x i8>* %0, align 1
903  %2 = bitcast i8* %y to <16 x i8>*
904  store <16 x i8> %1, <16 x i8>* %2, align 1
905  ret i8* %z
906}
907
908define i8* @ldrbu8_128(i8* %x, i8* %y) {
909; CHECK-LABEL: ldrbu8_128:
910; CHECK:       @ %bb.0: @ %entry
911; CHECK-NEXT:    vldrb.u8 q0, [r0]
912; CHECK-NEXT:    adds r0, #128
913; CHECK-NEXT:    vstrb.8 q0, [r1]
914; CHECK-NEXT:    bx lr
915entry:
916  %z = getelementptr inbounds i8, i8* %x, i32 128
917  %0 = bitcast i8* %x to <16 x i8>*
918  %1 = load <16 x i8>, <16 x i8>* %0, align 1
919  %2 = bitcast i8* %y to <16 x i8>*
920  store <16 x i8> %1, <16 x i8>* %2, align 1
921  ret i8* %z
922}
923
924define i8* @ldrbu8_m127(i8* %x, i8* %y) {
925; CHECK-LABEL: ldrbu8_m127:
926; CHECK:       @ %bb.0: @ %entry
927; CHECK-NEXT:    vldrb.u8 q0, [r0], #-127
928; CHECK-NEXT:    vstrb.8 q0, [r1]
929; CHECK-NEXT:    bx lr
930entry:
931  %z = getelementptr inbounds i8, i8* %x, i32 -127
932  %0 = bitcast i8* %x to <16 x i8>*
933  %1 = load <16 x i8>, <16 x i8>* %0, align 1
934  %2 = bitcast i8* %y to <16 x i8>*
935  store <16 x i8> %1, <16 x i8>* %2, align 1
936  ret i8* %z
937}
938
939define i8* @ldrbu8_m128(i8* %x, i8* %y) {
940; CHECK-LABEL: ldrbu8_m128:
941; CHECK:       @ %bb.0: @ %entry
942; CHECK-NEXT:    vldrb.u8 q0, [r0]
943; CHECK-NEXT:    subs r0, #128
944; CHECK-NEXT:    vstrb.8 q0, [r1]
945; CHECK-NEXT:    bx lr
946entry:
947  %z = getelementptr inbounds i8, i8* %x, i32 -128
948  %0 = bitcast i8* %x to <16 x i8>*
949  %1 = load <16 x i8>, <16 x i8>* %0, align 1
950  %2 = bitcast i8* %y to <16 x i8>*
951  store <16 x i8> %1, <16 x i8>* %2, align 1
952  ret i8* %z
953}
954
955
956define i8* @ldrwf32_4(i8* %x, i8* %y) {
957; CHECK-LABEL: ldrwf32_4:
958; CHECK:       @ %bb.0: @ %entry
959; CHECK-NEXT:    vldrw.u32 q0, [r0], #4
960; CHECK-NEXT:    vstrw.32 q0, [r1]
961; CHECK-NEXT:    bx lr
962entry:
963  %z = getelementptr inbounds i8, i8* %x, i32 4
964  %0 = bitcast i8* %x to <4 x float>*
965  %1 = load <4 x float>, <4 x float>* %0, align 4
966  %2 = bitcast i8* %y to <4 x float>*
967  store <4 x float> %1, <4 x float>* %2, align 4
968  ret i8* %z
969}
970
971define i8* @ldrwf16_4(i8* %x, i8* %y) {
972; CHECK-LABEL: ldrwf16_4:
973; CHECK:       @ %bb.0: @ %entry
974; CHECK-NEXT:    vldrh.u16 q0, [r0], #4
975; CHECK-NEXT:    vstrh.16 q0, [r1]
976; CHECK-NEXT:    bx lr
977entry:
978  %z = getelementptr inbounds i8, i8* %x, i32 4
979  %0 = bitcast i8* %x to <8 x half>*
980  %1 = load <8 x half>, <8 x half>* %0, align 2
981  %2 = bitcast i8* %y to <8 x half>*
982  store <8 x half> %1, <8 x half>* %2, align 2
983  ret i8* %z
984}
985
986define i8* @ldrwi32_align1(i8* %x, i8* %y) {
987; CHECK-LE-LABEL: ldrwi32_align1:
988; CHECK-LE:       @ %bb.0: @ %entry
989; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
990; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
991; CHECK-LE-NEXT:    bx lr
992;
993; CHECK-BE-LABEL: ldrwi32_align1:
994; CHECK-BE:       @ %bb.0: @ %entry
995; CHECK-BE-NEXT:    vldrb.u8 q0, [r0], #3
996; CHECK-BE-NEXT:    vrev32.8 q0, q0
997; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
998; CHECK-BE-NEXT:    bx lr
999entry:
1000  %z = getelementptr inbounds i8, i8* %x, i32 3
1001  %0 = bitcast i8* %x to <4 x i32>*
1002  %1 = load <4 x i32>, <4 x i32>* %0, align 1
1003  %2 = bitcast i8* %y to <4 x i32>*
1004  store <4 x i32> %1, <4 x i32>* %2, align 4
1005  ret i8* %z
1006}
1007
1008define i8* @ldrhi16_align1(i8* %x, i8* %y) {
1009; CHECK-LE-LABEL: ldrhi16_align1:
1010; CHECK-LE:       @ %bb.0: @ %entry
1011; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
1012; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1013; CHECK-LE-NEXT:    bx lr
1014;
1015; CHECK-BE-LABEL: ldrhi16_align1:
1016; CHECK-BE:       @ %bb.0: @ %entry
1017; CHECK-BE-NEXT:    vldrb.u8 q0, [r0], #3
1018; CHECK-BE-NEXT:    vrev16.8 q0, q0
1019; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1020; CHECK-BE-NEXT:    bx lr
1021entry:
1022  %z = getelementptr inbounds i8, i8* %x, i32 3
1023  %0 = bitcast i8* %x to <8 x i16>*
1024  %1 = load <8 x i16>, <8 x i16>* %0, align 1
1025  %2 = bitcast i8* %y to <8 x i16>*
1026  store <8 x i16> %1, <8 x i16>* %2, align 2
1027  ret i8* %z
1028}
1029
1030define i8* @ldrhi32_align1(i8* %x, i8* %y) {
1031; CHECK-LABEL: ldrhi32_align1:
1032; CHECK:       @ %bb.0: @ %entry
1033; CHECK-NEXT:    .pad #8
1034; CHECK-NEXT:    sub sp, #8
1035; CHECK-NEXT:    ldr r3, [r0, #4]
1036; CHECK-NEXT:    ldr r2, [r0]
1037; CHECK-NEXT:    adds r0, #3
1038; CHECK-NEXT:    strd r2, r3, [sp]
1039; CHECK-NEXT:    mov r2, sp
1040; CHECK-NEXT:    vldrh.s32 q0, [r2]
1041; CHECK-NEXT:    vstrw.32 q0, [r1]
1042; CHECK-NEXT:    add sp, #8
1043; CHECK-NEXT:    bx lr
1044entry:
1045  %z = getelementptr inbounds i8, i8* %x, i32 3
1046  %0 = bitcast i8* %x to <4 x i16>*
1047  %1 = load <4 x i16>, <4 x i16>* %0, align 1
1048  %2 = bitcast i8* %y to <4 x i32>*
1049  %3 = sext <4 x i16> %1 to <4 x i32>
1050  store <4 x i32> %3, <4 x i32>* %2, align 4
1051  ret i8* %z
1052}
1053
1054define i8* @ldrf32_align1(i8* %x, i8* %y) {
1055; CHECK-LE-LABEL: ldrf32_align1:
1056; CHECK-LE:       @ %bb.0: @ %entry
1057; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
1058; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
1059; CHECK-LE-NEXT:    bx lr
1060;
1061; CHECK-BE-LABEL: ldrf32_align1:
1062; CHECK-BE:       @ %bb.0: @ %entry
1063; CHECK-BE-NEXT:    vldrb.u8 q0, [r0], #3
1064; CHECK-BE-NEXT:    vrev32.8 q0, q0
1065; CHECK-BE-NEXT:    vstrw.32 q0, [r1]
1066; CHECK-BE-NEXT:    bx lr
1067entry:
1068  %z = getelementptr inbounds i8, i8* %x, i32 3
1069  %0 = bitcast i8* %x to <4 x float>*
1070  %1 = load <4 x float>, <4 x float>* %0, align 1
1071  %2 = bitcast i8* %y to <4 x float>*
1072  store <4 x float> %1, <4 x float>* %2, align 4
1073  ret i8* %z
1074}
1075
1076define i8* @ldrf16_align1(i8* %x, i8* %y) {
1077; CHECK-LE-LABEL: ldrf16_align1:
1078; CHECK-LE:       @ %bb.0: @ %entry
1079; CHECK-LE-NEXT:    vldrb.u8 q0, [r0], #3
1080; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1081; CHECK-LE-NEXT:    bx lr
1082;
1083; CHECK-BE-LABEL: ldrf16_align1:
1084; CHECK-BE:       @ %bb.0: @ %entry
1085; CHECK-BE-NEXT:    vldrb.u8 q0, [r0], #3
1086; CHECK-BE-NEXT:    vrev16.8 q0, q0
1087; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1088; CHECK-BE-NEXT:    bx lr
1089entry:
1090  %z = getelementptr inbounds i8, i8* %x, i32 3
1091  %0 = bitcast i8* %x to <8 x half>*
1092  %1 = load <8 x half>, <8 x half>* %0, align 1
1093  %2 = bitcast i8* %y to <8 x half>*
1094  store <8 x half> %1, <8 x half>* %2, align 2
1095  ret i8* %z
1096}
1097
1098define i8* @ldrh16_align8(i8* %x, i8* %y) {
1099; CHECK-LE-LABEL: ldrh16_align8:
1100; CHECK-LE:       @ %bb.0: @ %entry
1101; CHECK-LE-NEXT:    vldrw.u32 q0, [r0], #4
1102; CHECK-LE-NEXT:    vstrh.16 q0, [r1]
1103; CHECK-LE-NEXT:    bx lr
1104;
1105; CHECK-BE-LABEL: ldrh16_align8:
1106; CHECK-BE:       @ %bb.0: @ %entry
1107; CHECK-BE-NEXT:    vldrh.u16 q0, [r0], #4
1108; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
1109; CHECK-BE-NEXT:    bx lr
1110entry:
1111  %z = getelementptr inbounds i8, i8* %x, i32 4
1112  %0 = bitcast i8* %x to <8 x i16>*
1113  %1 = load <8 x i16>, <8 x i16>* %0, align 8
1114  %2 = bitcast i8* %y to <8 x i16>*
1115  store <8 x i16> %1, <8 x i16>* %2, align 2
1116  ret i8* %z
1117}
1118
1119
1120
1121
1122
1123define i8* @strw32_4(i8* %y, i8* %x) {
1124; CHECK-LE-LABEL: strw32_4:
1125; CHECK-LE:       @ %bb.0: @ %entry
1126; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1127; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
1128; CHECK-LE-NEXT:    bx lr
1129;
1130; CHECK-BE-LABEL: strw32_4:
1131; CHECK-BE:       @ %bb.0: @ %entry
1132; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1133; CHECK-BE-NEXT:    vstrw.32 q0, [r0], #4
1134; CHECK-BE-NEXT:    bx lr
1135entry:
1136  %z = getelementptr inbounds i8, i8* %y, i32 4
1137  %0 = bitcast i8* %x to <4 x i32>*
1138  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1139  %2 = bitcast i8* %y to <4 x i32>*
1140  store <4 x i32> %1, <4 x i32>* %2, align 4
1141  ret i8* %z
1142}
1143
1144define i8* @strw32_3(i8* %y, i8* %x) {
1145; CHECK-LE-LABEL: strw32_3:
1146; CHECK-LE:       @ %bb.0: @ %entry
1147; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1148; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
1149; CHECK-LE-NEXT:    bx lr
1150;
1151; CHECK-BE-LABEL: strw32_3:
1152; CHECK-BE:       @ %bb.0: @ %entry
1153; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1154; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
1155; CHECK-BE-NEXT:    adds r0, #3
1156; CHECK-BE-NEXT:    bx lr
1157entry:
1158  %z = getelementptr inbounds i8, i8* %y, i32 3
1159  %0 = bitcast i8* %x to <4 x i32>*
1160  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1161  %2 = bitcast i8* %y to <4 x i32>*
1162  store <4 x i32> %1, <4 x i32>* %2, align 4
1163  ret i8* %z
1164}
1165
1166define i8* @strw32_m4(i8* %y, i8* %x) {
1167; CHECK-LE-LABEL: strw32_m4:
1168; CHECK-LE:       @ %bb.0: @ %entry
1169; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1170; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #-4
1171; CHECK-LE-NEXT:    bx lr
1172;
1173; CHECK-BE-LABEL: strw32_m4:
1174; CHECK-BE:       @ %bb.0: @ %entry
1175; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1176; CHECK-BE-NEXT:    vstrw.32 q0, [r0], #-4
1177; CHECK-BE-NEXT:    bx lr
1178entry:
1179  %z = getelementptr inbounds i8, i8* %y, i32 -4
1180  %0 = bitcast i8* %x to <4 x i32>*
1181  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1182  %2 = bitcast i8* %y to <4 x i32>*
1183  store <4 x i32> %1, <4 x i32>* %2, align 4
1184  ret i8* %z
1185}
1186
1187define i8* @strw32_508(i8* %y, i8* %x) {
1188; CHECK-LABEL: strw32_508:
1189; CHECK:       @ %bb.0: @ %entry
1190; CHECK-NEXT:    vldrw.u32 q0, [r1]
1191; CHECK-NEXT:    vstrw.32 q0, [r0], #508
1192; CHECK-NEXT:    bx lr
1193entry:
1194  %z = getelementptr inbounds i8, i8* %y, i32 508
1195  %0 = bitcast i8* %x to <4 x i32>*
1196  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1197  %2 = bitcast i8* %y to <4 x i32>*
1198  store <4 x i32> %1, <4 x i32>* %2, align 4
1199  ret i8* %z
1200}
1201
1202define i8* @strw32_512(i8* %y, i8* %x) {
1203; CHECK-LABEL: strw32_512:
1204; CHECK:       @ %bb.0: @ %entry
1205; CHECK-NEXT:    vldrw.u32 q0, [r1]
1206; CHECK-NEXT:    vstrw.32 q0, [r0]
1207; CHECK-NEXT:    add.w r0, r0, #512
1208; CHECK-NEXT:    bx lr
1209entry:
1210  %z = getelementptr inbounds i8, i8* %y, i32 512
1211  %0 = bitcast i8* %x to <4 x i32>*
1212  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1213  %2 = bitcast i8* %y to <4 x i32>*
1214  store <4 x i32> %1, <4 x i32>* %2, align 4
1215  ret i8* %z
1216}
1217
1218define i8* @strw32_m508(i8* %y, i8* %x) {
1219; CHECK-LABEL: strw32_m508:
1220; CHECK:       @ %bb.0: @ %entry
1221; CHECK-NEXT:    vldrw.u32 q0, [r1]
1222; CHECK-NEXT:    vstrw.32 q0, [r0], #-508
1223; CHECK-NEXT:    bx lr
1224entry:
1225  %z = getelementptr inbounds i8, i8* %y, i32 -508
1226  %0 = bitcast i8* %x to <4 x i32>*
1227  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1228  %2 = bitcast i8* %y to <4 x i32>*
1229  store <4 x i32> %1, <4 x i32>* %2, align 4
1230  ret i8* %z
1231}
1232
1233define i8* @strw32_m512(i8* %y, i8* %x) {
1234; CHECK-LABEL: strw32_m512:
1235; CHECK:       @ %bb.0: @ %entry
1236; CHECK-NEXT:    vldrw.u32 q0, [r1]
1237; CHECK-NEXT:    vstrw.32 q0, [r0]
1238; CHECK-NEXT:    sub.w r0, r0, #512
1239; CHECK-NEXT:    bx lr
1240entry:
1241  %z = getelementptr inbounds i8, i8* %y, i32 -512
1242  %0 = bitcast i8* %x to <4 x i32>*
1243  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1244  %2 = bitcast i8* %y to <4 x i32>*
1245  store <4 x i32> %1, <4 x i32>* %2, align 4
1246  ret i8* %z
1247}
1248
1249
1250define i8* @strh32_4(i8* %y, i8* %x) {
1251; CHECK-LABEL: strh32_4:
1252; CHECK:       @ %bb.0: @ %entry
1253; CHECK-NEXT:    vldrh.u32 q0, [r1]
1254; CHECK-NEXT:    vstrh.32 q0, [r0], #4
1255; CHECK-NEXT:    bx lr
1256entry:
1257  %z = getelementptr inbounds i8, i8* %y, i32 4
1258  %0 = bitcast i8* %x to <4 x i16>*
1259  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1260  %2 = bitcast i8* %y to <4 x i16>*
1261  store <4 x i16> %1, <4 x i16>* %2, align 2
1262  ret i8* %z
1263}
1264
1265define i8* @strh32_3(i8* %y, i8* %x) {
1266; CHECK-LABEL: strh32_3:
1267; CHECK:       @ %bb.0: @ %entry
1268; CHECK-NEXT:    vldrh.u32 q0, [r1]
1269; CHECK-NEXT:    vstrh.32 q0, [r0]
1270; CHECK-NEXT:    adds r0, #3
1271; CHECK-NEXT:    bx lr
1272entry:
1273  %z = getelementptr inbounds i8, i8* %y, i32 3
1274  %0 = bitcast i8* %x to <4 x i16>*
1275  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1276  %2 = bitcast i8* %y to <4 x i16>*
1277  store <4 x i16> %1, <4 x i16>* %2, align 2
1278  ret i8* %z
1279}
1280
1281define i8* @strh32_2(i8* %y, i8* %x) {
1282; CHECK-LABEL: strh32_2:
1283; CHECK:       @ %bb.0: @ %entry
1284; CHECK-NEXT:    vldrh.u32 q0, [r1]
1285; CHECK-NEXT:    vstrh.32 q0, [r0], #2
1286; CHECK-NEXT:    bx lr
1287entry:
1288  %z = getelementptr inbounds i8, i8* %y, i32 2
1289  %0 = bitcast i8* %x to <4 x i16>*
1290  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1291  %2 = bitcast i8* %y to <4 x i16>*
1292  store <4 x i16> %1, <4 x i16>* %2, align 2
1293  ret i8* %z
1294}
1295
1296define i8* @strh32_254(i8* %y, i8* %x) {
1297; CHECK-LABEL: strh32_254:
1298; CHECK:       @ %bb.0: @ %entry
1299; CHECK-NEXT:    vldrh.u32 q0, [r1]
1300; CHECK-NEXT:    vstrh.32 q0, [r0], #254
1301; CHECK-NEXT:    bx lr
1302entry:
1303  %z = getelementptr inbounds i8, i8* %y, i32 254
1304  %0 = bitcast i8* %x to <4 x i16>*
1305  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1306  %2 = bitcast i8* %y to <4 x i16>*
1307  store <4 x i16> %1, <4 x i16>* %2, align 2
1308  ret i8* %z
1309}
1310
1311define i8* @strh32_256(i8* %y, i8* %x) {
1312; CHECK-LABEL: strh32_256:
1313; CHECK:       @ %bb.0: @ %entry
1314; CHECK-NEXT:    vldrh.u32 q0, [r1]
1315; CHECK-NEXT:    vstrh.32 q0, [r0]
1316; CHECK-NEXT:    add.w r0, r0, #256
1317; CHECK-NEXT:    bx lr
1318entry:
1319  %z = getelementptr inbounds i8, i8* %y, i32 256
1320  %0 = bitcast i8* %x to <4 x i16>*
1321  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1322  %2 = bitcast i8* %y to <4 x i16>*
1323  store <4 x i16> %1, <4 x i16>* %2, align 2
1324  ret i8* %z
1325}
1326
1327define i8* @strh32_m254(i8* %y, i8* %x) {
1328; CHECK-LABEL: strh32_m254:
1329; CHECK:       @ %bb.0: @ %entry
1330; CHECK-NEXT:    vldrh.u32 q0, [r1]
1331; CHECK-NEXT:    vstrh.32 q0, [r0], #-254
1332; CHECK-NEXT:    bx lr
1333entry:
1334  %z = getelementptr inbounds i8, i8* %y, i32 -254
1335  %0 = bitcast i8* %x to <4 x i16>*
1336  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1337  %2 = bitcast i8* %y to <4 x i16>*
1338  store <4 x i16> %1, <4 x i16>* %2, align 2
1339  ret i8* %z
1340}
1341
1342define i8* @strh32_m256(i8* %y, i8* %x) {
1343; CHECK-LABEL: strh32_m256:
1344; CHECK:       @ %bb.0: @ %entry
1345; CHECK-NEXT:    vldrh.u32 q0, [r1]
1346; CHECK-NEXT:    vstrh.32 q0, [r0]
1347; CHECK-NEXT:    sub.w r0, r0, #256
1348; CHECK-NEXT:    bx lr
1349entry:
1350  %z = getelementptr inbounds i8, i8* %y, i32 -256
1351  %0 = bitcast i8* %x to <4 x i16>*
1352  %1 = load <4 x i16>, <4 x i16>* %0, align 2
1353  %2 = bitcast i8* %y to <4 x i16>*
1354  store <4 x i16> %1, <4 x i16>* %2, align 2
1355  ret i8* %z
1356}
1357
1358
1359define i8* @strh16_4(i8* %y, i8* %x) {
1360; CHECK-LE-LABEL: strh16_4:
1361; CHECK-LE:       @ %bb.0: @ %entry
1362; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1363; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
1364; CHECK-LE-NEXT:    bx lr
1365;
1366; CHECK-BE-LABEL: strh16_4:
1367; CHECK-BE:       @ %bb.0: @ %entry
1368; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1369; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #4
1370; CHECK-BE-NEXT:    bx lr
1371entry:
1372  %z = getelementptr inbounds i8, i8* %y, i32 4
1373  %0 = bitcast i8* %x to <8 x i16>*
1374  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1375  %2 = bitcast i8* %y to <8 x i16>*
1376  store <8 x i16> %1, <8 x i16>* %2, align 2
1377  ret i8* %z
1378}
1379
1380define i8* @strh16_3(i8* %y, i8* %x) {
1381; CHECK-LE-LABEL: strh16_3:
1382; CHECK-LE:       @ %bb.0: @ %entry
1383; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1384; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
1385; CHECK-LE-NEXT:    bx lr
1386;
1387; CHECK-BE-LABEL: strh16_3:
1388; CHECK-BE:       @ %bb.0: @ %entry
1389; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1390; CHECK-BE-NEXT:    vstrh.16 q0, [r0]
1391; CHECK-BE-NEXT:    adds r0, #3
1392; CHECK-BE-NEXT:    bx lr
1393entry:
1394  %z = getelementptr inbounds i8, i8* %y, i32 3
1395  %0 = bitcast i8* %x to <8 x i16>*
1396  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1397  %2 = bitcast i8* %y to <8 x i16>*
1398  store <8 x i16> %1, <8 x i16>* %2, align 2
1399  ret i8* %z
1400}
1401
1402define i8* @strh16_2(i8* %y, i8* %x) {
1403; CHECK-LE-LABEL: strh16_2:
1404; CHECK-LE:       @ %bb.0: @ %entry
1405; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1406; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #2
1407; CHECK-LE-NEXT:    bx lr
1408;
1409; CHECK-BE-LABEL: strh16_2:
1410; CHECK-BE:       @ %bb.0: @ %entry
1411; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1412; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #2
1413; CHECK-BE-NEXT:    bx lr
1414entry:
1415  %z = getelementptr inbounds i8, i8* %y, i32 2
1416  %0 = bitcast i8* %x to <8 x i16>*
1417  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1418  %2 = bitcast i8* %y to <8 x i16>*
1419  store <8 x i16> %1, <8 x i16>* %2, align 2
1420  ret i8* %z
1421}
1422
1423define i8* @strh16_254(i8* %y, i8* %x) {
1424; CHECK-LABEL: strh16_254:
1425; CHECK:       @ %bb.0: @ %entry
1426; CHECK-NEXT:    vldrh.u16 q0, [r1]
1427; CHECK-NEXT:    vstrh.16 q0, [r0], #254
1428; CHECK-NEXT:    bx lr
1429entry:
1430  %z = getelementptr inbounds i8, i8* %y, i32 254
1431  %0 = bitcast i8* %x to <8 x i16>*
1432  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1433  %2 = bitcast i8* %y to <8 x i16>*
1434  store <8 x i16> %1, <8 x i16>* %2, align 2
1435  ret i8* %z
1436}
1437
1438define i8* @strh16_256(i8* %y, i8* %x) {
1439; CHECK-LABEL: strh16_256:
1440; CHECK:       @ %bb.0: @ %entry
1441; CHECK-NEXT:    vldrh.u16 q0, [r1]
1442; CHECK-NEXT:    vstrh.16 q0, [r0]
1443; CHECK-NEXT:    add.w r0, r0, #256
1444; CHECK-NEXT:    bx lr
1445entry:
1446  %z = getelementptr inbounds i8, i8* %y, i32 256
1447  %0 = bitcast i8* %x to <8 x i16>*
1448  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1449  %2 = bitcast i8* %y to <8 x i16>*
1450  store <8 x i16> %1, <8 x i16>* %2, align 2
1451  ret i8* %z
1452}
1453
1454define i8* @strh16_m254(i8* %y, i8* %x) {
1455; CHECK-LABEL: strh16_m254:
1456; CHECK:       @ %bb.0: @ %entry
1457; CHECK-NEXT:    vldrh.u16 q0, [r1]
1458; CHECK-NEXT:    vstrh.16 q0, [r0], #-254
1459; CHECK-NEXT:    bx lr
1460entry:
1461  %z = getelementptr inbounds i8, i8* %y, i32 -254
1462  %0 = bitcast i8* %x to <8 x i16>*
1463  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1464  %2 = bitcast i8* %y to <8 x i16>*
1465  store <8 x i16> %1, <8 x i16>* %2, align 2
1466  ret i8* %z
1467}
1468
1469define i8* @strh16_m256(i8* %y, i8* %x) {
1470; CHECK-LABEL: strh16_m256:
1471; CHECK:       @ %bb.0: @ %entry
1472; CHECK-NEXT:    vldrh.u16 q0, [r1]
1473; CHECK-NEXT:    vstrh.16 q0, [r0]
1474; CHECK-NEXT:    sub.w r0, r0, #256
1475; CHECK-NEXT:    bx lr
1476entry:
1477  %z = getelementptr inbounds i8, i8* %y, i32 -256
1478  %0 = bitcast i8* %x to <8 x i16>*
1479  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1480  %2 = bitcast i8* %y to <8 x i16>*
1481  store <8 x i16> %1, <8 x i16>* %2, align 2
1482  ret i8* %z
1483}
1484
1485
1486define i8* @strb32_4(i8* %y, i8* %x) {
1487; CHECK-LABEL: strb32_4:
1488; CHECK:       @ %bb.0: @ %entry
1489; CHECK-NEXT:    vldrb.u32 q0, [r1]
1490; CHECK-NEXT:    vstrb.32 q0, [r0], #4
1491; CHECK-NEXT:    bx lr
1492entry:
1493  %z = getelementptr inbounds i8, i8* %y, i32 4
1494  %0 = bitcast i8* %x to <4 x i8>*
1495  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1496  %2 = bitcast i8* %y to <4 x i8>*
1497  store <4 x i8> %1, <4 x i8>* %2, align 1
1498  ret i8* %z
1499}
1500
1501define i8* @strb32_3(i8* %y, i8* %x) {
1502; CHECK-LABEL: strb32_3:
1503; CHECK:       @ %bb.0: @ %entry
1504; CHECK-NEXT:    vldrb.u32 q0, [r1]
1505; CHECK-NEXT:    vstrb.32 q0, [r0], #3
1506; CHECK-NEXT:    bx lr
1507entry:
1508  %z = getelementptr inbounds i8, i8* %y, i32 3
1509  %0 = bitcast i8* %x to <4 x i8>*
1510  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1511  %2 = bitcast i8* %y to <4 x i8>*
1512  store <4 x i8> %1, <4 x i8>* %2, align 1
1513  ret i8* %z
1514}
1515
1516define i8* @strb32_127(i8* %y, i8* %x) {
1517; CHECK-LABEL: strb32_127:
1518; CHECK:       @ %bb.0: @ %entry
1519; CHECK-NEXT:    vldrb.u32 q0, [r1]
1520; CHECK-NEXT:    vstrb.32 q0, [r0], #127
1521; CHECK-NEXT:    bx lr
1522entry:
1523  %z = getelementptr inbounds i8, i8* %y, i32 127
1524  %0 = bitcast i8* %x to <4 x i8>*
1525  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1526  %2 = bitcast i8* %y to <4 x i8>*
1527  store <4 x i8> %1, <4 x i8>* %2, align 1
1528  ret i8* %z
1529}
1530
1531define i8* @strb32_128(i8* %y, i8* %x) {
1532; CHECK-LABEL: strb32_128:
1533; CHECK:       @ %bb.0: @ %entry
1534; CHECK-NEXT:    vldrb.u32 q0, [r1]
1535; CHECK-NEXT:    vstrb.32 q0, [r0]
1536; CHECK-NEXT:    adds r0, #128
1537; CHECK-NEXT:    bx lr
1538entry:
1539  %z = getelementptr inbounds i8, i8* %y, i32 128
1540  %0 = bitcast i8* %x to <4 x i8>*
1541  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1542  %2 = bitcast i8* %y to <4 x i8>*
1543  store <4 x i8> %1, <4 x i8>* %2, align 1
1544  ret i8* %z
1545}
1546
1547define i8* @strb32_m127(i8* %y, i8* %x) {
1548; CHECK-LABEL: strb32_m127:
1549; CHECK:       @ %bb.0: @ %entry
1550; CHECK-NEXT:    vldrb.u32 q0, [r1]
1551; CHECK-NEXT:    vstrb.32 q0, [r0], #-127
1552; CHECK-NEXT:    bx lr
1553entry:
1554  %z = getelementptr inbounds i8, i8* %y, i32 -127
1555  %0 = bitcast i8* %x to <4 x i8>*
1556  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1557  %2 = bitcast i8* %y to <4 x i8>*
1558  store <4 x i8> %1, <4 x i8>* %2, align 1
1559  ret i8* %z
1560}
1561
1562define i8* @strb32_m128(i8* %y, i8* %x) {
1563; CHECK-LABEL: strb32_m128:
1564; CHECK:       @ %bb.0: @ %entry
1565; CHECK-NEXT:    vldrb.u32 q0, [r1]
1566; CHECK-NEXT:    vstrb.32 q0, [r0]
1567; CHECK-NEXT:    subs r0, #128
1568; CHECK-NEXT:    bx lr
1569entry:
1570  %z = getelementptr inbounds i8, i8* %y, i32 -128
1571  %0 = bitcast i8* %x to <4 x i8>*
1572  %1 = load <4 x i8>, <4 x i8>* %0, align 1
1573  %2 = bitcast i8* %y to <4 x i8>*
1574  store <4 x i8> %1, <4 x i8>* %2, align 1
1575  ret i8* %z
1576}
1577
1578
1579define i8* @strb16_4(i8* %y, i8* %x) {
1580; CHECK-LABEL: strb16_4:
1581; CHECK:       @ %bb.0: @ %entry
1582; CHECK-NEXT:    vldrb.u16 q0, [r1]
1583; CHECK-NEXT:    vstrb.16 q0, [r0], #4
1584; CHECK-NEXT:    bx lr
1585entry:
1586  %z = getelementptr inbounds i8, i8* %y, i32 4
1587  %0 = bitcast i8* %x to <8 x i8>*
1588  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1589  %2 = bitcast i8* %y to <8 x i8>*
1590  store <8 x i8> %1, <8 x i8>* %2, align 1
1591  ret i8* %z
1592}
1593
1594define i8* @strb16_3(i8* %y, i8* %x) {
1595; CHECK-LABEL: strb16_3:
1596; CHECK:       @ %bb.0: @ %entry
1597; CHECK-NEXT:    vldrb.u16 q0, [r1]
1598; CHECK-NEXT:    vstrb.16 q0, [r0], #3
1599; CHECK-NEXT:    bx lr
1600entry:
1601  %z = getelementptr inbounds i8, i8* %y, i32 3
1602  %0 = bitcast i8* %x to <8 x i8>*
1603  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1604  %2 = bitcast i8* %y to <8 x i8>*
1605  store <8 x i8> %1, <8 x i8>* %2, align 1
1606  ret i8* %z
1607}
1608
1609define i8* @strb16_127(i8* %y, i8* %x) {
1610; CHECK-LABEL: strb16_127:
1611; CHECK:       @ %bb.0: @ %entry
1612; CHECK-NEXT:    vldrb.u16 q0, [r1]
1613; CHECK-NEXT:    vstrb.16 q0, [r0], #127
1614; CHECK-NEXT:    bx lr
1615entry:
1616  %z = getelementptr inbounds i8, i8* %y, i32 127
1617  %0 = bitcast i8* %x to <8 x i8>*
1618  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1619  %2 = bitcast i8* %y to <8 x i8>*
1620  store <8 x i8> %1, <8 x i8>* %2, align 1
1621  ret i8* %z
1622}
1623
1624define i8* @strb16_128(i8* %y, i8* %x) {
1625; CHECK-LABEL: strb16_128:
1626; CHECK:       @ %bb.0: @ %entry
1627; CHECK-NEXT:    vldrb.u16 q0, [r1]
1628; CHECK-NEXT:    vstrb.16 q0, [r0]
1629; CHECK-NEXT:    adds r0, #128
1630; CHECK-NEXT:    bx lr
1631entry:
1632  %z = getelementptr inbounds i8, i8* %y, i32 128
1633  %0 = bitcast i8* %x to <8 x i8>*
1634  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1635  %2 = bitcast i8* %y to <8 x i8>*
1636  store <8 x i8> %1, <8 x i8>* %2, align 1
1637  ret i8* %z
1638}
1639
1640define i8* @strb16_m127(i8* %y, i8* %x) {
1641; CHECK-LABEL: strb16_m127:
1642; CHECK:       @ %bb.0: @ %entry
1643; CHECK-NEXT:    vldrb.u16 q0, [r1]
1644; CHECK-NEXT:    vstrb.16 q0, [r0], #-127
1645; CHECK-NEXT:    bx lr
1646entry:
1647  %z = getelementptr inbounds i8, i8* %y, i32 -127
1648  %0 = bitcast i8* %x to <8 x i8>*
1649  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1650  %2 = bitcast i8* %y to <8 x i8>*
1651  store <8 x i8> %1, <8 x i8>* %2, align 1
1652  ret i8* %z
1653}
1654
1655define i8* @strb16_m128(i8* %y, i8* %x) {
1656; CHECK-LABEL: strb16_m128:
1657; CHECK:       @ %bb.0: @ %entry
1658; CHECK-NEXT:    vldrb.u16 q0, [r1]
1659; CHECK-NEXT:    vstrb.16 q0, [r0]
1660; CHECK-NEXT:    subs r0, #128
1661; CHECK-NEXT:    bx lr
1662entry:
1663  %z = getelementptr inbounds i8, i8* %y, i32 -128
1664  %0 = bitcast i8* %x to <8 x i8>*
1665  %1 = load <8 x i8>, <8 x i8>* %0, align 1
1666  %2 = bitcast i8* %y to <8 x i8>*
1667  store <8 x i8> %1, <8 x i8>* %2, align 1
1668  ret i8* %z
1669}
1670
1671
1672define i8* @strb8_4(i8* %y, i8* %x) {
1673; CHECK-LABEL: strb8_4:
1674; CHECK:       @ %bb.0: @ %entry
1675; CHECK-NEXT:    vldrb.u8 q0, [r1]
1676; CHECK-NEXT:    vstrb.8 q0, [r0], #4
1677; CHECK-NEXT:    bx lr
1678entry:
1679  %z = getelementptr inbounds i8, i8* %y, i32 4
1680  %0 = bitcast i8* %x to <16 x i8>*
1681  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1682  %2 = bitcast i8* %y to <16 x i8>*
1683  store <16 x i8> %1, <16 x i8>* %2, align 1
1684  ret i8* %z
1685}
1686
1687define i8* @strb8_3(i8* %y, i8* %x) {
1688; CHECK-LABEL: strb8_3:
1689; CHECK:       @ %bb.0: @ %entry
1690; CHECK-NEXT:    vldrb.u8 q0, [r1]
1691; CHECK-NEXT:    vstrb.8 q0, [r0], #3
1692; CHECK-NEXT:    bx lr
1693entry:
1694  %z = getelementptr inbounds i8, i8* %y, i32 3
1695  %0 = bitcast i8* %x to <16 x i8>*
1696  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1697  %2 = bitcast i8* %y to <16 x i8>*
1698  store <16 x i8> %1, <16 x i8>* %2, align 1
1699  ret i8* %z
1700}
1701
1702define i8* @strb8_127(i8* %y, i8* %x) {
1703; CHECK-LABEL: strb8_127:
1704; CHECK:       @ %bb.0: @ %entry
1705; CHECK-NEXT:    vldrb.u8 q0, [r1]
1706; CHECK-NEXT:    vstrb.8 q0, [r0], #127
1707; CHECK-NEXT:    bx lr
1708entry:
1709  %z = getelementptr inbounds i8, i8* %y, i32 127
1710  %0 = bitcast i8* %x to <16 x i8>*
1711  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1712  %2 = bitcast i8* %y to <16 x i8>*
1713  store <16 x i8> %1, <16 x i8>* %2, align 1
1714  ret i8* %z
1715}
1716
1717define i8* @strb8_128(i8* %y, i8* %x) {
1718; CHECK-LABEL: strb8_128:
1719; CHECK:       @ %bb.0: @ %entry
1720; CHECK-NEXT:    vldrb.u8 q0, [r1]
1721; CHECK-NEXT:    vstrb.8 q0, [r0]
1722; CHECK-NEXT:    adds r0, #128
1723; CHECK-NEXT:    bx lr
1724entry:
1725  %z = getelementptr inbounds i8, i8* %y, i32 128
1726  %0 = bitcast i8* %x to <16 x i8>*
1727  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1728  %2 = bitcast i8* %y to <16 x i8>*
1729  store <16 x i8> %1, <16 x i8>* %2, align 1
1730  ret i8* %z
1731}
1732
1733define i8* @strb8_m127(i8* %y, i8* %x) {
1734; CHECK-LABEL: strb8_m127:
1735; CHECK:       @ %bb.0: @ %entry
1736; CHECK-NEXT:    vldrb.u8 q0, [r1]
1737; CHECK-NEXT:    vstrb.8 q0, [r0], #-127
1738; CHECK-NEXT:    bx lr
1739entry:
1740  %z = getelementptr inbounds i8, i8* %y, i32 -127
1741  %0 = bitcast i8* %x to <16 x i8>*
1742  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1743  %2 = bitcast i8* %y to <16 x i8>*
1744  store <16 x i8> %1, <16 x i8>* %2, align 1
1745  ret i8* %z
1746}
1747
1748define i8* @strb8_m128(i8* %y, i8* %x) {
1749; CHECK-LABEL: strb8_m128:
1750; CHECK:       @ %bb.0: @ %entry
1751; CHECK-NEXT:    vldrb.u8 q0, [r1]
1752; CHECK-NEXT:    vstrb.8 q0, [r0]
1753; CHECK-NEXT:    subs r0, #128
1754; CHECK-NEXT:    bx lr
1755entry:
1756  %z = getelementptr inbounds i8, i8* %y, i32 -128
1757  %0 = bitcast i8* %x to <16 x i8>*
1758  %1 = load <16 x i8>, <16 x i8>* %0, align 1
1759  %2 = bitcast i8* %y to <16 x i8>*
1760  store <16 x i8> %1, <16 x i8>* %2, align 1
1761  ret i8* %z
1762}
1763
1764
1765define i8* @strf32_4(i8* %y, i8* %x) {
1766; CHECK-LE-LABEL: strf32_4:
1767; CHECK-LE:       @ %bb.0: @ %entry
1768; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1769; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
1770; CHECK-LE-NEXT:    bx lr
1771;
1772; CHECK-BE-LABEL: strf32_4:
1773; CHECK-BE:       @ %bb.0: @ %entry
1774; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1775; CHECK-BE-NEXT:    vstrw.32 q0, [r0], #4
1776; CHECK-BE-NEXT:    bx lr
1777entry:
1778  %z = getelementptr inbounds i8, i8* %y, i32 4
1779  %0 = bitcast i8* %x to <4 x float>*
1780  %1 = load <4 x float>, <4 x float>* %0, align 4
1781  %2 = bitcast i8* %y to <4 x float>*
1782  store <4 x float> %1, <4 x float>* %2, align 4
1783  ret i8* %z
1784}
1785
1786define i8* @strf16_4(i8* %y, i8* %x) {
1787; CHECK-LE-LABEL: strf16_4:
1788; CHECK-LE:       @ %bb.0: @ %entry
1789; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1790; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #4
1791; CHECK-LE-NEXT:    bx lr
1792;
1793; CHECK-BE-LABEL: strf16_4:
1794; CHECK-BE:       @ %bb.0: @ %entry
1795; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1796; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #4
1797; CHECK-BE-NEXT:    bx lr
1798entry:
1799  %z = getelementptr inbounds i8, i8* %y, i32 4
1800  %0 = bitcast i8* %x to <8 x half>*
1801  %1 = load <8 x half>, <8 x half>* %0, align 2
1802  %2 = bitcast i8* %y to <8 x half>*
1803  store <8 x half> %1, <8 x half>* %2, align 2
1804  ret i8* %z
1805}
1806
1807define i8* @strwi32_align1(i8* %y, i8* %x) {
1808; CHECK-LE-LABEL: strwi32_align1:
1809; CHECK-LE:       @ %bb.0: @ %entry
1810; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1811; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
1812; CHECK-LE-NEXT:    bx lr
1813;
1814; CHECK-BE-LABEL: strwi32_align1:
1815; CHECK-BE:       @ %bb.0: @ %entry
1816; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1817; CHECK-BE-NEXT:    vrev32.8 q0, q0
1818; CHECK-BE-NEXT:    vstrb.8 q0, [r0], #3
1819; CHECK-BE-NEXT:    bx lr
1820entry:
1821  %z = getelementptr inbounds i8, i8* %y, i32 3
1822  %0 = bitcast i8* %x to <4 x i32>*
1823  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1824  %2 = bitcast i8* %y to <4 x i32>*
1825  store <4 x i32> %1, <4 x i32>* %2, align 1
1826  ret i8* %z
1827}
1828
1829define i8* @strhi16_align1(i8* %y, i8* %x) {
1830; CHECK-LE-LABEL: strhi16_align1:
1831; CHECK-LE:       @ %bb.0: @ %entry
1832; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1833; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
1834; CHECK-LE-NEXT:    bx lr
1835;
1836; CHECK-BE-LABEL: strhi16_align1:
1837; CHECK-BE:       @ %bb.0: @ %entry
1838; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1839; CHECK-BE-NEXT:    vrev16.8 q0, q0
1840; CHECK-BE-NEXT:    vstrb.8 q0, [r0], #3
1841; CHECK-BE-NEXT:    bx lr
1842entry:
1843  %z = getelementptr inbounds i8, i8* %y, i32 3
1844  %0 = bitcast i8* %x to <8 x i16>*
1845  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1846  %2 = bitcast i8* %y to <8 x i16>*
1847  store <8 x i16> %1, <8 x i16>* %2, align 1
1848  ret i8* %z
1849}
1850
1851define i8* @strhi32_align1(i8* %y, i8* %x) {
1852; CHECK-LABEL: strhi32_align1:
1853; CHECK:       @ %bb.0: @ %entry
1854; CHECK-NEXT:    .pad #8
1855; CHECK-NEXT:    sub sp, #8
1856; CHECK-NEXT:    vldrw.u32 q0, [r1]
1857; CHECK-NEXT:    mov r1, sp
1858; CHECK-NEXT:    vstrh.32 q0, [r1]
1859; CHECK-NEXT:    ldrd r1, r2, [sp]
1860; CHECK-NEXT:    str r1, [r0]
1861; CHECK-NEXT:    str r2, [r0, #4]
1862; CHECK-NEXT:    adds r0, #3
1863; CHECK-NEXT:    add sp, #8
1864; CHECK-NEXT:    bx lr
1865entry:
1866  %z = getelementptr inbounds i8, i8* %y, i32 3
1867  %0 = bitcast i8* %x to <4 x i32>*
1868  %1 = load <4 x i32>, <4 x i32>* %0, align 4
1869  %2 = bitcast i8* %y to <4 x i16>*
1870  %3 = trunc <4 x i32> %1 to <4 x i16>
1871  store <4 x i16> %3, <4 x i16>* %2, align 1
1872  ret i8* %z
1873}
1874
1875define i8* @strf32_align1(i8* %y, i8* %x) {
1876; CHECK-LE-LABEL: strf32_align1:
1877; CHECK-LE:       @ %bb.0: @ %entry
1878; CHECK-LE-NEXT:    vldrw.u32 q0, [r1]
1879; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
1880; CHECK-LE-NEXT:    bx lr
1881;
1882; CHECK-BE-LABEL: strf32_align1:
1883; CHECK-BE:       @ %bb.0: @ %entry
1884; CHECK-BE-NEXT:    vldrw.u32 q0, [r1]
1885; CHECK-BE-NEXT:    vrev32.8 q0, q0
1886; CHECK-BE-NEXT:    vstrb.8 q0, [r0], #3
1887; CHECK-BE-NEXT:    bx lr
1888entry:
1889  %z = getelementptr inbounds i8, i8* %y, i32 3
1890  %0 = bitcast i8* %x to <4 x float>*
1891  %1 = load <4 x float>, <4 x float>* %0, align 4
1892  %2 = bitcast i8* %y to <4 x float>*
1893  store <4 x float> %1, <4 x float>* %2, align 1
1894  ret i8* %z
1895}
1896
1897define i8* @strf16_align1(i8* %y, i8* %x) {
1898; CHECK-LE-LABEL: strf16_align1:
1899; CHECK-LE:       @ %bb.0: @ %entry
1900; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1901; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #3
1902; CHECK-LE-NEXT:    bx lr
1903;
1904; CHECK-BE-LABEL: strf16_align1:
1905; CHECK-BE:       @ %bb.0: @ %entry
1906; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1907; CHECK-BE-NEXT:    vrev16.8 q0, q0
1908; CHECK-BE-NEXT:    vstrb.8 q0, [r0], #3
1909; CHECK-BE-NEXT:    bx lr
1910entry:
1911  %z = getelementptr inbounds i8, i8* %y, i32 3
1912  %0 = bitcast i8* %x to <8 x half>*
1913  %1 = load <8 x half>, <8 x half>* %0, align 2
1914  %2 = bitcast i8* %y to <8 x half>*
1915  store <8 x half> %1, <8 x half>* %2, align 1
1916  ret i8* %z
1917}
1918
1919define i8* @strf16_align8(i8* %y, i8* %x) {
1920; CHECK-LE-LABEL: strf16_align8:
1921; CHECK-LE:       @ %bb.0: @ %entry
1922; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
1923; CHECK-LE-NEXT:    vstrb.8 q0, [r0], #16
1924; CHECK-LE-NEXT:    bx lr
1925;
1926; CHECK-BE-LABEL: strf16_align8:
1927; CHECK-BE:       @ %bb.0: @ %entry
1928; CHECK-BE-NEXT:    vldrh.u16 q0, [r1]
1929; CHECK-BE-NEXT:    vstrh.16 q0, [r0], #16
1930; CHECK-BE-NEXT:    bx lr
1931entry:
1932  %z = getelementptr inbounds i8, i8* %y, i32 16
1933  %0 = bitcast i8* %x to <8 x i16>*
1934  %1 = load <8 x i16>, <8 x i16>* %0, align 2
1935  %2 = bitcast i8* %y to <8 x i16>*
1936  store <8 x i16> %1, <8 x i16>* %2, align 8
1937  ret i8* %z
1938}
1939