1; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s 2 3; i8* p; // p is 1 byte aligned 4; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 5define i32 @load_i32_by_i8_unaligned(i32* %arg) { 6; CHECK-LABEL: load_i32_by_i8_unaligned: 7; CHECK: ldr w0, [x0] 8; CHECK-NEXT: ret 9 %tmp = bitcast i32* %arg to i8* 10 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 11 %tmp2 = load i8, i8* %tmp1, align 1 12 %tmp3 = zext i8 %tmp2 to i32 13 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 14 %tmp5 = load i8, i8* %tmp4, align 1 15 %tmp6 = zext i8 %tmp5 to i32 16 %tmp7 = shl nuw nsw i32 %tmp6, 8 17 %tmp8 = or i32 %tmp7, %tmp3 18 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 19 %tmp10 = load i8, i8* %tmp9, align 1 20 %tmp11 = zext i8 %tmp10 to i32 21 %tmp12 = shl nuw nsw i32 %tmp11, 16 22 %tmp13 = or i32 %tmp8, %tmp12 23 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 24 %tmp15 = load i8, i8* %tmp14, align 1 25 %tmp16 = zext i8 %tmp15 to i32 26 %tmp17 = shl nuw nsw i32 %tmp16, 24 27 %tmp18 = or i32 %tmp13, %tmp17 28 ret i32 %tmp18 29} 30 31; i8* p; // p is 4 byte aligned 32; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24) 33define i32 @load_i32_by_i8_aligned(i32* %arg) { 34; CHECK-LABEL: load_i32_by_i8_aligned: 35; CHECK: ldr w0, [x0] 36; CHECK-NEXT: ret 37 %tmp = bitcast i32* %arg to i8* 38 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 39 %tmp2 = load i8, i8* %tmp1, align 4 40 %tmp3 = zext i8 %tmp2 to i32 41 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 42 %tmp5 = load i8, i8* %tmp4, align 1 43 %tmp6 = zext i8 %tmp5 to i32 44 %tmp7 = shl nuw nsw i32 %tmp6, 8 45 %tmp8 = or i32 %tmp7, %tmp3 46 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 47 %tmp10 = load i8, i8* %tmp9, align 1 48 %tmp11 = zext i8 %tmp10 to i32 49 %tmp12 = shl nuw nsw i32 %tmp11, 16 50 %tmp13 = or i32 %tmp8, %tmp12 51 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 52 %tmp15 = load i8, i8* %tmp14, align 1 53 %tmp16 = zext i8 %tmp15 to i32 54 %tmp17 = shl nuw nsw i32 %tmp16, 24 55 %tmp18 = or i32 %tmp13, %tmp17 56 ret i32 %tmp18 57} 58 59; i8* p; // p is 4 byte aligned 60; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 61define i32 @load_i32_by_i8_bswap(i32* %arg) { 62; CHECK-LABEL: load_i32_by_i8_bswap: 63; CHECK: ldr w8, [x0] 64; CHECK-NEXT: rev w0, w8 65; CHECK-NEXT: ret 66 %tmp = bitcast i32* %arg to i8* 67 %tmp1 = load i8, i8* %tmp, align 4 68 %tmp2 = zext i8 %tmp1 to i32 69 %tmp3 = shl nuw nsw i32 %tmp2, 24 70 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 71 %tmp5 = load i8, i8* %tmp4, align 1 72 %tmp6 = zext i8 %tmp5 to i32 73 %tmp7 = shl nuw nsw i32 %tmp6, 16 74 %tmp8 = or i32 %tmp7, %tmp3 75 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 76 %tmp10 = load i8, i8* %tmp9, align 1 77 %tmp11 = zext i8 %tmp10 to i32 78 %tmp12 = shl nuw nsw i32 %tmp11, 8 79 %tmp13 = or i32 %tmp8, %tmp12 80 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 81 %tmp15 = load i8, i8* %tmp14, align 1 82 %tmp16 = zext i8 %tmp15 to i32 83 %tmp17 = or i32 %tmp13, %tmp16 84 ret i32 %tmp17 85} 86 87; i8* p; // p is 8 byte aligned 88; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 89define i64 @load_i64_by_i8(i64* %arg) { 90; CHECK-LABEL: load_i64_by_i8: 91; CHECK: ldr x0, [x0] 92; CHECK-NEXT: ret 93 %tmp = bitcast i64* %arg to i8* 94 %tmp1 = load i8, i8* %tmp, align 8 95 %tmp2 = zext i8 %tmp1 to i64 96 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 97 %tmp4 = load i8, i8* %tmp3, align 1 98 %tmp5 = zext i8 %tmp4 to i64 99 %tmp6 = shl nuw nsw i64 %tmp5, 8 100 %tmp7 = or i64 %tmp6, %tmp2 101 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 102 %tmp9 = load i8, i8* %tmp8, align 1 103 %tmp10 = zext i8 %tmp9 to i64 104 %tmp11 = shl nuw nsw i64 %tmp10, 16 105 %tmp12 = or i64 %tmp7, %tmp11 106 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 107 %tmp14 = load i8, i8* %tmp13, align 1 108 %tmp15 = zext i8 %tmp14 to i64 109 %tmp16 = shl nuw nsw i64 %tmp15, 24 110 %tmp17 = or i64 %tmp12, %tmp16 111 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 112 %tmp19 = load i8, i8* %tmp18, align 1 113 %tmp20 = zext i8 %tmp19 to i64 114 %tmp21 = shl nuw nsw i64 %tmp20, 32 115 %tmp22 = or i64 %tmp17, %tmp21 116 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 117 %tmp24 = load i8, i8* %tmp23, align 1 118 %tmp25 = zext i8 %tmp24 to i64 119 %tmp26 = shl nuw nsw i64 %tmp25, 40 120 %tmp27 = or i64 %tmp22, %tmp26 121 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 122 %tmp29 = load i8, i8* %tmp28, align 1 123 %tmp30 = zext i8 %tmp29 to i64 124 %tmp31 = shl nuw nsw i64 %tmp30, 48 125 %tmp32 = or i64 %tmp27, %tmp31 126 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 127 %tmp34 = load i8, i8* %tmp33, align 1 128 %tmp35 = zext i8 %tmp34 to i64 129 %tmp36 = shl nuw i64 %tmp35, 56 130 %tmp37 = or i64 %tmp32, %tmp36 131 ret i64 %tmp37 132} 133 134; i8* p; // p is 8 byte aligned 135; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 136define i64 @load_i64_by_i8_bswap(i64* %arg) { 137; CHECK-LABEL: load_i64_by_i8_bswap: 138; CHECK: ldr x8, [x0] 139; CHECK-NEXT: rev x0, x8 140; CHECK-NEXT: ret 141 %tmp = bitcast i64* %arg to i8* 142 %tmp1 = load i8, i8* %tmp, align 8 143 %tmp2 = zext i8 %tmp1 to i64 144 %tmp3 = shl nuw i64 %tmp2, 56 145 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 146 %tmp5 = load i8, i8* %tmp4, align 1 147 %tmp6 = zext i8 %tmp5 to i64 148 %tmp7 = shl nuw nsw i64 %tmp6, 48 149 %tmp8 = or i64 %tmp7, %tmp3 150 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 151 %tmp10 = load i8, i8* %tmp9, align 1 152 %tmp11 = zext i8 %tmp10 to i64 153 %tmp12 = shl nuw nsw i64 %tmp11, 40 154 %tmp13 = or i64 %tmp8, %tmp12 155 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 156 %tmp15 = load i8, i8* %tmp14, align 1 157 %tmp16 = zext i8 %tmp15 to i64 158 %tmp17 = shl nuw nsw i64 %tmp16, 32 159 %tmp18 = or i64 %tmp13, %tmp17 160 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 161 %tmp20 = load i8, i8* %tmp19, align 1 162 %tmp21 = zext i8 %tmp20 to i64 163 %tmp22 = shl nuw nsw i64 %tmp21, 24 164 %tmp23 = or i64 %tmp18, %tmp22 165 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 166 %tmp25 = load i8, i8* %tmp24, align 1 167 %tmp26 = zext i8 %tmp25 to i64 168 %tmp27 = shl nuw nsw i64 %tmp26, 16 169 %tmp28 = or i64 %tmp23, %tmp27 170 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 171 %tmp30 = load i8, i8* %tmp29, align 1 172 %tmp31 = zext i8 %tmp30 to i64 173 %tmp32 = shl nuw nsw i64 %tmp31, 8 174 %tmp33 = or i64 %tmp28, %tmp32 175 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 176 %tmp35 = load i8, i8* %tmp34, align 1 177 %tmp36 = zext i8 %tmp35 to i64 178 %tmp37 = or i64 %tmp33, %tmp36 179 ret i64 %tmp37 180} 181 182; i8* p; // p[1] is 4 byte aligned 183; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 184define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { 185; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 186; CHECK: ldur w0, [x0, #1] 187; CHECK-NEXT: ret 188 189 %tmp = bitcast i32* %arg to i8* 190 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 191 %tmp2 = load i8, i8* %tmp1, align 4 192 %tmp3 = zext i8 %tmp2 to i32 193 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 194 %tmp5 = load i8, i8* %tmp4, align 1 195 %tmp6 = zext i8 %tmp5 to i32 196 %tmp7 = shl nuw nsw i32 %tmp6, 8 197 %tmp8 = or i32 %tmp7, %tmp3 198 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 199 %tmp10 = load i8, i8* %tmp9, align 1 200 %tmp11 = zext i8 %tmp10 to i32 201 %tmp12 = shl nuw nsw i32 %tmp11, 16 202 %tmp13 = or i32 %tmp8, %tmp12 203 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 204 %tmp15 = load i8, i8* %tmp14, align 1 205 %tmp16 = zext i8 %tmp15 to i32 206 %tmp17 = shl nuw nsw i32 %tmp16, 24 207 %tmp18 = or i32 %tmp13, %tmp17 208 ret i32 %tmp18 209} 210 211; i8* p; // p[-4] is 4 byte aligned 212; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 213define i32 @load_i32_by_i8_neg_offset(i32* %arg) { 214; CHECK-LABEL: load_i32_by_i8_neg_offset: 215; CHECK: ldur w0, [x0, #-4] 216; CHECK-NEXT: ret 217 218 %tmp = bitcast i32* %arg to i8* 219 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 220 %tmp2 = load i8, i8* %tmp1, align 4 221 %tmp3 = zext i8 %tmp2 to i32 222 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 223 %tmp5 = load i8, i8* %tmp4, align 1 224 %tmp6 = zext i8 %tmp5 to i32 225 %tmp7 = shl nuw nsw i32 %tmp6, 8 226 %tmp8 = or i32 %tmp7, %tmp3 227 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 228 %tmp10 = load i8, i8* %tmp9, align 1 229 %tmp11 = zext i8 %tmp10 to i32 230 %tmp12 = shl nuw nsw i32 %tmp11, 16 231 %tmp13 = or i32 %tmp8, %tmp12 232 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 233 %tmp15 = load i8, i8* %tmp14, align 1 234 %tmp16 = zext i8 %tmp15 to i32 235 %tmp17 = shl nuw nsw i32 %tmp16, 24 236 %tmp18 = or i32 %tmp13, %tmp17 237 ret i32 %tmp18 238} 239 240; i8* p; // p[1] is 4 byte aligned 241; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 242define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { 243; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: 244; CHECK: ldur w8, [x0, #1] 245; CHECK-NEXT: rev w0, w8 246; CHECK-NEXT: ret 247 248 %tmp = bitcast i32* %arg to i8* 249 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 250 %tmp2 = load i8, i8* %tmp1, align 1 251 %tmp3 = zext i8 %tmp2 to i32 252 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 253 %tmp5 = load i8, i8* %tmp4, align 1 254 %tmp6 = zext i8 %tmp5 to i32 255 %tmp7 = shl nuw nsw i32 %tmp6, 8 256 %tmp8 = or i32 %tmp7, %tmp3 257 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 258 %tmp10 = load i8, i8* %tmp9, align 1 259 %tmp11 = zext i8 %tmp10 to i32 260 %tmp12 = shl nuw nsw i32 %tmp11, 16 261 %tmp13 = or i32 %tmp8, %tmp12 262 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 263 %tmp15 = load i8, i8* %tmp14, align 4 264 %tmp16 = zext i8 %tmp15 to i32 265 %tmp17 = shl nuw nsw i32 %tmp16, 24 266 %tmp18 = or i32 %tmp13, %tmp17 267 ret i32 %tmp18 268} 269 270; i8* p; // p[-4] is 4 byte aligned 271; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 272define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { 273; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: 274; CHECK: ldur w8, [x0, #-4] 275; CHECK-NEXT: rev w0, w8 276; CHECK-NEXT: ret 277 278 %tmp = bitcast i32* %arg to i8* 279 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 280 %tmp2 = load i8, i8* %tmp1, align 1 281 %tmp3 = zext i8 %tmp2 to i32 282 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 283 %tmp5 = load i8, i8* %tmp4, align 1 284 %tmp6 = zext i8 %tmp5 to i32 285 %tmp7 = shl nuw nsw i32 %tmp6, 8 286 %tmp8 = or i32 %tmp7, %tmp3 287 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 288 %tmp10 = load i8, i8* %tmp9, align 1 289 %tmp11 = zext i8 %tmp10 to i32 290 %tmp12 = shl nuw nsw i32 %tmp11, 16 291 %tmp13 = or i32 %tmp8, %tmp12 292 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 293 %tmp15 = load i8, i8* %tmp14, align 4 294 %tmp16 = zext i8 %tmp15 to i32 295 %tmp17 = shl nuw nsw i32 %tmp16, 24 296 %tmp18 = or i32 %tmp13, %tmp17 297 ret i32 %tmp18 298} 299 300declare i16 @llvm.bswap.i16(i16) 301 302; i16* p; // p is 4 byte aligned 303; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16) 304define i32 @load_i32_by_bswap_i16(i32* %arg) { 305; CHECK-LABEL: load_i32_by_bswap_i16: 306; CHECK: ldr w8, [x0] 307; CHECK-NEXT: rev w0, w8 308; CHECK-NEXT: ret 309 310 %tmp = bitcast i32* %arg to i16* 311 %tmp1 = load i16, i16* %tmp, align 4 312 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 313 %tmp2 = zext i16 %tmp11 to i32 314 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 315 %tmp4 = load i16, i16* %tmp3, align 1 316 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 317 %tmp5 = zext i16 %tmp41 to i32 318 %tmp6 = shl nuw nsw i32 %tmp2, 16 319 %tmp7 = or i32 %tmp6, %tmp5 320 ret i32 %tmp7 321} 322 323; i16* p; // p is 4 byte aligned 324; (i32) p[0] | (sext(p[1] << 16) to i32) 325define i32 @load_i32_by_sext_i16(i32* %arg) { 326; CHECK-LABEL: load_i32_by_sext_i16: 327; CHECK: ldr w0, [x0] 328; CHECK-NEXT: ret 329 %tmp = bitcast i32* %arg to i16* 330 %tmp1 = load i16, i16* %tmp, align 4 331 %tmp2 = zext i16 %tmp1 to i32 332 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 333 %tmp4 = load i16, i16* %tmp3, align 1 334 %tmp5 = sext i16 %tmp4 to i32 335 %tmp6 = shl nuw nsw i32 %tmp5, 16 336 %tmp7 = or i32 %tmp6, %tmp2 337 ret i32 %tmp7 338} 339 340; i8* arg; i32 i; 341; p = arg + 12; 342; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 343define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { 344; CHECK-LABEL: load_i32_by_i8_base_offset_index: 345; CHECK: add x8, x0, w1, uxtw 346; CHECK-NEXT: ldr w0, [x8, #12] 347; CHECK-NEXT: ret 348 %tmp = add nuw nsw i32 %i, 3 349 %tmp2 = add nuw nsw i32 %i, 2 350 %tmp3 = add nuw nsw i32 %i, 1 351 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 352 %tmp5 = zext i32 %i to i64 353 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 354 %tmp7 = load i8, i8* %tmp6, align 4 355 %tmp8 = zext i8 %tmp7 to i32 356 %tmp9 = zext i32 %tmp3 to i64 357 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 358 %tmp11 = load i8, i8* %tmp10, align 1 359 %tmp12 = zext i8 %tmp11 to i32 360 %tmp13 = shl nuw nsw i32 %tmp12, 8 361 %tmp14 = or i32 %tmp13, %tmp8 362 %tmp15 = zext i32 %tmp2 to i64 363 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 364 %tmp17 = load i8, i8* %tmp16, align 1 365 %tmp18 = zext i8 %tmp17 to i32 366 %tmp19 = shl nuw nsw i32 %tmp18, 16 367 %tmp20 = or i32 %tmp14, %tmp19 368 %tmp21 = zext i32 %tmp to i64 369 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 370 %tmp23 = load i8, i8* %tmp22, align 1 371 %tmp24 = zext i8 %tmp23 to i32 372 %tmp25 = shl nuw i32 %tmp24, 24 373 %tmp26 = or i32 %tmp20, %tmp25 374 ret i32 %tmp26 375} 376 377; i8* arg; i32 i; 378; p = arg + 12; 379; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 380define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { 381; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 382; CHECK: add x8, x0, w1, uxtw 383; CHECK-NEXT: ldur w0, [x8, #13] 384; CHECK-NEXT: ret 385 %tmp = add nuw nsw i32 %i, 4 386 %tmp2 = add nuw nsw i32 %i, 3 387 %tmp3 = add nuw nsw i32 %i, 2 388 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 389 %tmp5 = add nuw nsw i32 %i, 1 390 %tmp27 = zext i32 %tmp5 to i64 391 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 392 %tmp29 = load i8, i8* %tmp28, align 4 393 %tmp30 = zext i8 %tmp29 to i32 394 %tmp31 = zext i32 %tmp3 to i64 395 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 396 %tmp33 = load i8, i8* %tmp32, align 1 397 %tmp34 = zext i8 %tmp33 to i32 398 %tmp35 = shl nuw nsw i32 %tmp34, 8 399 %tmp36 = or i32 %tmp35, %tmp30 400 %tmp37 = zext i32 %tmp2 to i64 401 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 402 %tmp39 = load i8, i8* %tmp38, align 1 403 %tmp40 = zext i8 %tmp39 to i32 404 %tmp41 = shl nuw nsw i32 %tmp40, 16 405 %tmp42 = or i32 %tmp36, %tmp41 406 %tmp43 = zext i32 %tmp to i64 407 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 408 %tmp45 = load i8, i8* %tmp44, align 1 409 %tmp46 = zext i8 %tmp45 to i32 410 %tmp47 = shl nuw i32 %tmp46, 24 411 %tmp48 = or i32 %tmp42, %tmp47 412 ret i32 %tmp48 413} 414 415; i8* p; // p is 2 byte aligned 416; (i32) p[0] | ((i32) p[1] << 8) 417define i32 @zext_load_i32_by_i8(i32* %arg) { 418; CHECK-LABEL: zext_load_i32_by_i8: 419; CHECK: ldrb w8, [x0] 420; CHECK-NEXT: ldrb w9, [x0, #1] 421; CHECK-NEXT: bfi w8, w9, #8, #8 422; CHECK-NEXT: mov w0, w8 423; CHECK-NEXT: ret 424 425 %tmp = bitcast i32* %arg to i8* 426 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 427 %tmp2 = load i8, i8* %tmp1, align 2 428 %tmp3 = zext i8 %tmp2 to i32 429 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 430 %tmp5 = load i8, i8* %tmp4, align 1 431 %tmp6 = zext i8 %tmp5 to i32 432 %tmp7 = shl nuw nsw i32 %tmp6, 8 433 %tmp8 = or i32 %tmp7, %tmp3 434 ret i32 %tmp8 435} 436 437; i8* p; // p is 2 byte aligned 438; ((i32) p[0] << 8) | ((i32) p[1] << 16) 439define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { 440; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 441; CHECK: ldrb w8, [x0] 442; CHECK-NEXT: ldrb w9, [x0, #1] 443; CHECK-NEXT: lsl w0, w8, #8 444; CHECK-NEXT: bfi w0, w9, #16, #8 445; CHECK-NEXT: ret 446 447 %tmp = bitcast i32* %arg to i8* 448 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 449 %tmp2 = load i8, i8* %tmp1, align 2 450 %tmp3 = zext i8 %tmp2 to i32 451 %tmp30 = shl nuw nsw i32 %tmp3, 8 452 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 453 %tmp5 = load i8, i8* %tmp4, align 1 454 %tmp6 = zext i8 %tmp5 to i32 455 %tmp7 = shl nuw nsw i32 %tmp6, 16 456 %tmp8 = or i32 %tmp7, %tmp30 457 ret i32 %tmp8 458} 459 460; i8* p; // p is 2 byte aligned 461; ((i32) p[0] << 16) | ((i32) p[1] << 24) 462define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { 463; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 464; CHECK: ldrb w8, [x0] 465; CHECK-NEXT: ldrb w9, [x0, #1] 466; CHECK-NEXT: lsl w0, w8, #16 467; CHECK-NEXT: bfi w0, w9, #24, #8 468; CHECK-NEXT: ret 469 470 %tmp = bitcast i32* %arg to i8* 471 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 472 %tmp2 = load i8, i8* %tmp1, align 2 473 %tmp3 = zext i8 %tmp2 to i32 474 %tmp30 = shl nuw nsw i32 %tmp3, 16 475 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 476 %tmp5 = load i8, i8* %tmp4, align 1 477 %tmp6 = zext i8 %tmp5 to i32 478 %tmp7 = shl nuw nsw i32 %tmp6, 24 479 %tmp8 = or i32 %tmp7, %tmp30 480 ret i32 %tmp8 481} 482; i8* p; // p is 2 byte aligned 483; (i32) p[1] | ((i32) p[0] << 8) 484define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { 485; CHECK-LABEL: zext_load_i32_by_i8_bswap: 486; CHECK: ldrb w8, [x0, #1] 487; CHECK-NEXT: ldrb w9, [x0] 488; CHECK-NEXT: bfi w8, w9, #8, #8 489; CHECK-NEXT: mov w0, w8 490; CHECK-NEXT: ret 491 492 %tmp = bitcast i32* %arg to i8* 493 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 494 %tmp2 = load i8, i8* %tmp1, align 1 495 %tmp3 = zext i8 %tmp2 to i32 496 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 497 %tmp5 = load i8, i8* %tmp4, align 2 498 %tmp6 = zext i8 %tmp5 to i32 499 %tmp7 = shl nuw nsw i32 %tmp6, 8 500 %tmp8 = or i32 %tmp7, %tmp3 501 ret i32 %tmp8 502} 503 504; i8* p; // p is 2 byte aligned 505; ((i32) p[1] << 8) | ((i32) p[0] << 16) 506define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { 507; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 508; CHECK: ldrb w8, [x0, #1] 509; CHECK-NEXT: ldrb w9, [x0] 510; CHECK-NEXT: lsl w0, w8, #8 511; CHECK-NEXT: bfi w0, w9, #16, #8 512; CHECK-NEXT: ret 513 514 %tmp = bitcast i32* %arg to i8* 515 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 516 %tmp2 = load i8, i8* %tmp1, align 1 517 %tmp3 = zext i8 %tmp2 to i32 518 %tmp30 = shl nuw nsw i32 %tmp3, 8 519 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 520 %tmp5 = load i8, i8* %tmp4, align 2 521 %tmp6 = zext i8 %tmp5 to i32 522 %tmp7 = shl nuw nsw i32 %tmp6, 16 523 %tmp8 = or i32 %tmp7, %tmp30 524 ret i32 %tmp8 525} 526 527; i8* p; // p is 2 byte aligned 528; ((i32) p[1] << 16) | ((i32) p[0] << 24) 529define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { 530; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 531; CHECK: ldrb w8, [x0, #1] 532; CHECK-NEXT: ldrb w9, [x0] 533; CHECK-NEXT: lsl w0, w8, #16 534; CHECK-NEXT: bfi w0, w9, #24, #8 535; CHECK-NEXT: ret 536 537 %tmp = bitcast i32* %arg to i8* 538 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 539 %tmp2 = load i8, i8* %tmp1, align 1 540 %tmp3 = zext i8 %tmp2 to i32 541 %tmp30 = shl nuw nsw i32 %tmp3, 16 542 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 543 %tmp5 = load i8, i8* %tmp4, align 2 544 %tmp6 = zext i8 %tmp5 to i32 545 %tmp7 = shl nuw nsw i32 %tmp6, 24 546 %tmp8 = or i32 %tmp7, %tmp30 547 ret i32 %tmp8 548} 549