1; RUN: llc < %s -mtriple=arm64eb-unknown | FileCheck %s 2 3; i8* p; // p is 4 byte aligned 4; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3] 5define i32 @load_i32_by_i8_big_endian(i32* %arg) { 6; CHECK-LABEL: load_i32_by_i8_big_endian: 7; CHECK: ldr w0, [x0] 8; CHECK-NEXT: ret 9 %tmp = bitcast i32* %arg to i8* 10 %tmp1 = load i8, i8* %tmp, align 4 11 %tmp2 = zext i8 %tmp1 to i32 12 %tmp3 = shl nuw nsw i32 %tmp2, 24 13 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 14 %tmp5 = load i8, i8* %tmp4, align 1 15 %tmp6 = zext i8 %tmp5 to i32 16 %tmp7 = shl nuw nsw i32 %tmp6, 16 17 %tmp8 = or i32 %tmp7, %tmp3 18 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 19 %tmp10 = load i8, i8* %tmp9, align 1 20 %tmp11 = zext i8 %tmp10 to i32 21 %tmp12 = shl nuw nsw i32 %tmp11, 8 22 %tmp13 = or i32 %tmp8, %tmp12 23 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3 24 %tmp15 = load i8, i8* %tmp14, align 1 25 %tmp16 = zext i8 %tmp15 to i32 26 %tmp17 = or i32 %tmp13, %tmp16 27 ret i32 %tmp17 28} 29 30; i8* p; // p is 4 byte aligned 31; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4]) 32define i32 @load_i32_by_i16_by_i8_big_endian(i32* %arg) { 33; CHECK-LABEL: load_i32_by_i16_by_i8_big_endian: 34; CHECK: ldr w0, [x0] 35; CHECK-NEXT: ret 36 %tmp = bitcast i32* %arg to i8* 37 %tmp1 = load i8, i8* %tmp, align 4 38 %tmp2 = zext i8 %tmp1 to i16 39 %tmp3 = getelementptr inbounds i8, i8* %tmp, i32 1 40 %tmp4 = load i8, i8* %tmp3, align 1 41 %tmp5 = zext i8 %tmp4 to i16 42 %tmp6 = shl nuw nsw i16 %tmp2, 8 43 %tmp7 = or i16 %tmp6, %tmp5 44 %tmp8 = getelementptr inbounds i8, i8* %tmp, i32 2 45 %tmp9 = load i8, i8* %tmp8, align 1 46 %tmp10 = zext i8 %tmp9 to i16 47 %tmp11 = getelementptr inbounds i8, i8* %tmp, i32 3 48 %tmp12 = load i8, i8* %tmp11, align 1 49 %tmp13 = zext i8 %tmp12 to i16 50 %tmp14 = shl nuw nsw i16 %tmp10, 8 51 %tmp15 = or i16 %tmp14, %tmp13 52 %tmp16 = zext i16 %tmp7 to i32 53 %tmp17 = zext i16 %tmp15 to i32 54 %tmp18 = shl nuw nsw i32 %tmp16, 16 55 %tmp19 = or i32 %tmp18, %tmp17 56 ret i32 %tmp19 57} 58 59; i16* p; // p is 4 byte aligned 60; ((i32) p[0] << 16) | (i32) p[1] 61define i32 @load_i32_by_i16(i32* %arg) { 62; CHECK-LABEL: load_i32_by_i16: 63; CHECK: ldr w0, [x0] 64; CHECK-NEXT: ret 65 %tmp = bitcast i32* %arg to i16* 66 %tmp1 = load i16, i16* %tmp, align 4 67 %tmp2 = zext i16 %tmp1 to i32 68 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 69 %tmp4 = load i16, i16* %tmp3, align 1 70 %tmp5 = zext i16 %tmp4 to i32 71 %tmp6 = shl nuw nsw i32 %tmp2, 16 72 %tmp7 = or i32 %tmp6, %tmp5 73 ret i32 %tmp7 74} 75 76; i16* p_16; // p_16 is 4 byte aligned 77; i8* p_8 = (i8*) p_16; 78; (i32) (p_16[0] << 16) | ((i32) p[2] << 8) | (i32) p[3] 79define i32 @load_i32_by_i16_i8(i32* %arg) { 80; CHECK-LABEL: load_i32_by_i16_i8: 81; CHECK: ldr w0, [x0] 82; CHECK-NEXT: ret 83 %tmp = bitcast i32* %arg to i16* 84 %tmp1 = bitcast i32* %arg to i8* 85 %tmp2 = load i16, i16* %tmp, align 4 86 %tmp3 = zext i16 %tmp2 to i32 87 %tmp4 = shl nuw nsw i32 %tmp3, 16 88 %tmp5 = getelementptr inbounds i8, i8* %tmp1, i32 2 89 %tmp6 = load i8, i8* %tmp5, align 1 90 %tmp7 = zext i8 %tmp6 to i32 91 %tmp8 = shl nuw nsw i32 %tmp7, 8 92 %tmp9 = getelementptr inbounds i8, i8* %tmp1, i32 3 93 %tmp10 = load i8, i8* %tmp9, align 1 94 %tmp11 = zext i8 %tmp10 to i32 95 %tmp12 = or i32 %tmp8, %tmp11 96 %tmp13 = or i32 %tmp12, %tmp4 97 ret i32 %tmp13 98} 99 100; i8* p; // p is 8 byte aligned 101; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56) 102define i64 @load_i64_by_i8_bswap(i64* %arg) { 103; CHECK-LABEL: load_i64_by_i8_bswap: 104; CHECK: ldr x8, [x0] 105; CHECK-NEXT: rev x0, x8 106; CHECK-NEXT: ret 107 %tmp = bitcast i64* %arg to i8* 108 %tmp1 = load i8, i8* %tmp, align 8 109 %tmp2 = zext i8 %tmp1 to i64 110 %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1 111 %tmp4 = load i8, i8* %tmp3, align 1 112 %tmp5 = zext i8 %tmp4 to i64 113 %tmp6 = shl nuw nsw i64 %tmp5, 8 114 %tmp7 = or i64 %tmp6, %tmp2 115 %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2 116 %tmp9 = load i8, i8* %tmp8, align 1 117 %tmp10 = zext i8 %tmp9 to i64 118 %tmp11 = shl nuw nsw i64 %tmp10, 16 119 %tmp12 = or i64 %tmp7, %tmp11 120 %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3 121 %tmp14 = load i8, i8* %tmp13, align 1 122 %tmp15 = zext i8 %tmp14 to i64 123 %tmp16 = shl nuw nsw i64 %tmp15, 24 124 %tmp17 = or i64 %tmp12, %tmp16 125 %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4 126 %tmp19 = load i8, i8* %tmp18, align 1 127 %tmp20 = zext i8 %tmp19 to i64 128 %tmp21 = shl nuw nsw i64 %tmp20, 32 129 %tmp22 = or i64 %tmp17, %tmp21 130 %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5 131 %tmp24 = load i8, i8* %tmp23, align 1 132 %tmp25 = zext i8 %tmp24 to i64 133 %tmp26 = shl nuw nsw i64 %tmp25, 40 134 %tmp27 = or i64 %tmp22, %tmp26 135 %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6 136 %tmp29 = load i8, i8* %tmp28, align 1 137 %tmp30 = zext i8 %tmp29 to i64 138 %tmp31 = shl nuw nsw i64 %tmp30, 48 139 %tmp32 = or i64 %tmp27, %tmp31 140 %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7 141 %tmp34 = load i8, i8* %tmp33, align 1 142 %tmp35 = zext i8 %tmp34 to i64 143 %tmp36 = shl nuw i64 %tmp35, 56 144 %tmp37 = or i64 %tmp32, %tmp36 145 ret i64 %tmp37 146} 147 148; i8* p; // p is 8 byte aligned 149; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7] 150define i64 @load_i64_by_i8(i64* %arg) { 151; CHECK-LABEL: load_i64_by_i8: 152; CHECK: ldr x0, [x0] 153; CHECK-NEXT: ret 154 %tmp = bitcast i64* %arg to i8* 155 %tmp1 = load i8, i8* %tmp, align 8 156 %tmp2 = zext i8 %tmp1 to i64 157 %tmp3 = shl nuw i64 %tmp2, 56 158 %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1 159 %tmp5 = load i8, i8* %tmp4, align 1 160 %tmp6 = zext i8 %tmp5 to i64 161 %tmp7 = shl nuw nsw i64 %tmp6, 48 162 %tmp8 = or i64 %tmp7, %tmp3 163 %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2 164 %tmp10 = load i8, i8* %tmp9, align 1 165 %tmp11 = zext i8 %tmp10 to i64 166 %tmp12 = shl nuw nsw i64 %tmp11, 40 167 %tmp13 = or i64 %tmp8, %tmp12 168 %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3 169 %tmp15 = load i8, i8* %tmp14, align 1 170 %tmp16 = zext i8 %tmp15 to i64 171 %tmp17 = shl nuw nsw i64 %tmp16, 32 172 %tmp18 = or i64 %tmp13, %tmp17 173 %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4 174 %tmp20 = load i8, i8* %tmp19, align 1 175 %tmp21 = zext i8 %tmp20 to i64 176 %tmp22 = shl nuw nsw i64 %tmp21, 24 177 %tmp23 = or i64 %tmp18, %tmp22 178 %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5 179 %tmp25 = load i8, i8* %tmp24, align 1 180 %tmp26 = zext i8 %tmp25 to i64 181 %tmp27 = shl nuw nsw i64 %tmp26, 16 182 %tmp28 = or i64 %tmp23, %tmp27 183 %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6 184 %tmp30 = load i8, i8* %tmp29, align 1 185 %tmp31 = zext i8 %tmp30 to i64 186 %tmp32 = shl nuw nsw i64 %tmp31, 8 187 %tmp33 = or i64 %tmp28, %tmp32 188 %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7 189 %tmp35 = load i8, i8* %tmp34, align 1 190 %tmp36 = zext i8 %tmp35 to i64 191 %tmp37 = or i64 %tmp33, %tmp36 192 ret i64 %tmp37 193} 194 195; i8* p; // p[1] is 4 byte aligned 196; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24) 197define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) { 198; CHECK-LABEL: load_i32_by_i8_nonzero_offset: 199; CHECK: ldur w8, [x0, #1] 200; CHECK-NEXT: rev w0, w8 201; CHECK-NEXT: ret 202 203 %tmp = bitcast i32* %arg to i8* 204 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 205 %tmp2 = load i8, i8* %tmp1, align 4 206 %tmp3 = zext i8 %tmp2 to i32 207 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2 208 %tmp5 = load i8, i8* %tmp4, align 1 209 %tmp6 = zext i8 %tmp5 to i32 210 %tmp7 = shl nuw nsw i32 %tmp6, 8 211 %tmp8 = or i32 %tmp7, %tmp3 212 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3 213 %tmp10 = load i8, i8* %tmp9, align 1 214 %tmp11 = zext i8 %tmp10 to i32 215 %tmp12 = shl nuw nsw i32 %tmp11, 16 216 %tmp13 = or i32 %tmp8, %tmp12 217 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4 218 %tmp15 = load i8, i8* %tmp14, align 1 219 %tmp16 = zext i8 %tmp15 to i32 220 %tmp17 = shl nuw nsw i32 %tmp16, 24 221 %tmp18 = or i32 %tmp13, %tmp17 222 ret i32 %tmp18 223} 224 225; i8* p; // p[-4] is 4 byte aligned 226; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24) 227define i32 @load_i32_by_i8_neg_offset(i32* %arg) { 228; CHECK-LABEL: load_i32_by_i8_neg_offset: 229; CHECK: ldur w8, [x0, #-4] 230; CHECK-NEXT: rev w0, w8 231; CHECK-NEXT: ret 232 233 %tmp = bitcast i32* %arg to i8* 234 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4 235 %tmp2 = load i8, i8* %tmp1, align 4 236 %tmp3 = zext i8 %tmp2 to i32 237 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3 238 %tmp5 = load i8, i8* %tmp4, align 1 239 %tmp6 = zext i8 %tmp5 to i32 240 %tmp7 = shl nuw nsw i32 %tmp6, 8 241 %tmp8 = or i32 %tmp7, %tmp3 242 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2 243 %tmp10 = load i8, i8* %tmp9, align 1 244 %tmp11 = zext i8 %tmp10 to i32 245 %tmp12 = shl nuw nsw i32 %tmp11, 16 246 %tmp13 = or i32 %tmp8, %tmp12 247 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1 248 %tmp15 = load i8, i8* %tmp14, align 1 249 %tmp16 = zext i8 %tmp15 to i32 250 %tmp17 = shl nuw nsw i32 %tmp16, 24 251 %tmp18 = or i32 %tmp13, %tmp17 252 ret i32 %tmp18 253} 254 255; i8* p; // p[1] is 4 byte aligned 256; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24) 257define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) { 258; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: 259; CHECK: ldur w0, [x0, #1] 260; CHECK-NEXT: ret 261 262 %tmp = bitcast i32* %arg to i8* 263 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4 264 %tmp2 = load i8, i8* %tmp1, align 1 265 %tmp3 = zext i8 %tmp2 to i32 266 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3 267 %tmp5 = load i8, i8* %tmp4, align 1 268 %tmp6 = zext i8 %tmp5 to i32 269 %tmp7 = shl nuw nsw i32 %tmp6, 8 270 %tmp8 = or i32 %tmp7, %tmp3 271 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2 272 %tmp10 = load i8, i8* %tmp9, align 1 273 %tmp11 = zext i8 %tmp10 to i32 274 %tmp12 = shl nuw nsw i32 %tmp11, 16 275 %tmp13 = or i32 %tmp8, %tmp12 276 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1 277 %tmp15 = load i8, i8* %tmp14, align 4 278 %tmp16 = zext i8 %tmp15 to i32 279 %tmp17 = shl nuw nsw i32 %tmp16, 24 280 %tmp18 = or i32 %tmp13, %tmp17 281 ret i32 %tmp18 282} 283 284; i8* p; // p[-4] is 4 byte aligned 285; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24) 286define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) { 287; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: 288; CHECK: ldur w0, [x0, #-4] 289; CHECK-NEXT: ret 290 291 %tmp = bitcast i32* %arg to i8* 292 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1 293 %tmp2 = load i8, i8* %tmp1, align 1 294 %tmp3 = zext i8 %tmp2 to i32 295 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2 296 %tmp5 = load i8, i8* %tmp4, align 1 297 %tmp6 = zext i8 %tmp5 to i32 298 %tmp7 = shl nuw nsw i32 %tmp6, 8 299 %tmp8 = or i32 %tmp7, %tmp3 300 %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3 301 %tmp10 = load i8, i8* %tmp9, align 1 302 %tmp11 = zext i8 %tmp10 to i32 303 %tmp12 = shl nuw nsw i32 %tmp11, 16 304 %tmp13 = or i32 %tmp8, %tmp12 305 %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4 306 %tmp15 = load i8, i8* %tmp14, align 4 307 %tmp16 = zext i8 %tmp15 to i32 308 %tmp17 = shl nuw nsw i32 %tmp16, 24 309 %tmp18 = or i32 %tmp13, %tmp17 310 ret i32 %tmp18 311} 312 313declare i16 @llvm.bswap.i16(i16) 314 315; i16* p; // p is 4 byte aligned 316; (i32) bswap(p[0]) | (i32) bswap(p[1] << 16) 317define i32 @load_i32_by_bswap_i16(i32* %arg) { 318; CHECK-LABEL: load_i32_by_bswap_i16: 319; CHECK: ldr w8, [x0] 320; CHECK-NEXT: rev w0, w8 321; CHECK-NEXT: ret 322 %tmp = bitcast i32* %arg to i16* 323 %tmp1 = load i16, i16* %tmp, align 4 324 %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1) 325 %tmp2 = zext i16 %tmp11 to i32 326 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 327 %tmp4 = load i16, i16* %tmp3, align 1 328 %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4) 329 %tmp5 = zext i16 %tmp41 to i32 330 %tmp6 = shl nuw nsw i32 %tmp5, 16 331 %tmp7 = or i32 %tmp6, %tmp2 332 ret i32 %tmp7 333} 334 335; i16* p; // p is 4 byte aligned 336; (i32) p[1] | (sext(p[0] << 16) to i32) 337define i32 @load_i32_by_sext_i16(i32* %arg) { 338; CHECK-LABEL: load_i32_by_sext_i16: 339; CHECK: ldr w0, [x0] 340; CHECK-NEXT: ret 341 %tmp = bitcast i32* %arg to i16* 342 %tmp1 = load i16, i16* %tmp, align 4 343 %tmp2 = sext i16 %tmp1 to i32 344 %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1 345 %tmp4 = load i16, i16* %tmp3, align 1 346 %tmp5 = zext i16 %tmp4 to i32 347 %tmp6 = shl nuw nsw i32 %tmp2, 16 348 %tmp7 = or i32 %tmp6, %tmp5 349 ret i32 %tmp7 350} 351 352; i8* arg; i32 i; 353; p = arg + 12; 354; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24) 355define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) { 356; CHECK-LABEL: load_i32_by_i8_base_offset_index: 357; CHECK: add x8, x0, w1, uxtw 358; CHECK-NEXT: ldr w8, [x8, #12] 359; CHECK-NEXT: rev w0, w8 360; CHECK-NEXT: ret 361 %tmp = add nuw nsw i32 %i, 3 362 %tmp2 = add nuw nsw i32 %i, 2 363 %tmp3 = add nuw nsw i32 %i, 1 364 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 365 %tmp5 = zext i32 %i to i64 366 %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5 367 %tmp7 = load i8, i8* %tmp6, align 4 368 %tmp8 = zext i8 %tmp7 to i32 369 %tmp9 = zext i32 %tmp3 to i64 370 %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9 371 %tmp11 = load i8, i8* %tmp10, align 1 372 %tmp12 = zext i8 %tmp11 to i32 373 %tmp13 = shl nuw nsw i32 %tmp12, 8 374 %tmp14 = or i32 %tmp13, %tmp8 375 %tmp15 = zext i32 %tmp2 to i64 376 %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15 377 %tmp17 = load i8, i8* %tmp16, align 1 378 %tmp18 = zext i8 %tmp17 to i32 379 %tmp19 = shl nuw nsw i32 %tmp18, 16 380 %tmp20 = or i32 %tmp14, %tmp19 381 %tmp21 = zext i32 %tmp to i64 382 %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21 383 %tmp23 = load i8, i8* %tmp22, align 1 384 %tmp24 = zext i8 %tmp23 to i32 385 %tmp25 = shl nuw i32 %tmp24, 24 386 %tmp26 = or i32 %tmp20, %tmp25 387 ret i32 %tmp26 388} 389 390; i8* arg; i32 i; 391; p = arg + 12; 392; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24) 393define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) { 394; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: 395; CHECK: add x8, x0, w1, uxtw 396; CHECK-NEXT: ldur w8, [x8, #13] 397; CHECK-NEXT: rev w0, w8 398; CHECK-NEXT: ret 399 %tmp = add nuw nsw i32 %i, 4 400 %tmp2 = add nuw nsw i32 %i, 3 401 %tmp3 = add nuw nsw i32 %i, 2 402 %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12 403 %tmp5 = add nuw nsw i32 %i, 1 404 %tmp27 = zext i32 %tmp5 to i64 405 %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27 406 %tmp29 = load i8, i8* %tmp28, align 4 407 %tmp30 = zext i8 %tmp29 to i32 408 %tmp31 = zext i32 %tmp3 to i64 409 %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31 410 %tmp33 = load i8, i8* %tmp32, align 1 411 %tmp34 = zext i8 %tmp33 to i32 412 %tmp35 = shl nuw nsw i32 %tmp34, 8 413 %tmp36 = or i32 %tmp35, %tmp30 414 %tmp37 = zext i32 %tmp2 to i64 415 %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37 416 %tmp39 = load i8, i8* %tmp38, align 1 417 %tmp40 = zext i8 %tmp39 to i32 418 %tmp41 = shl nuw nsw i32 %tmp40, 16 419 %tmp42 = or i32 %tmp36, %tmp41 420 %tmp43 = zext i32 %tmp to i64 421 %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43 422 %tmp45 = load i8, i8* %tmp44, align 1 423 %tmp46 = zext i8 %tmp45 to i32 424 %tmp47 = shl nuw i32 %tmp46, 24 425 %tmp48 = or i32 %tmp42, %tmp47 426 ret i32 %tmp48 427} 428; i8* p; // p is 2 byte aligned 429; (i32) p[0] | ((i32) p[1] << 8) 430define i32 @zext_load_i32_by_i8(i32* %arg) { 431; CHECK-LABEL: zext_load_i32_by_i8: 432; CHECK: ldrb w8, [x0] 433; CHECK-NEXT: ldrb w9, [x0, #1] 434; CHECK-NEXT: bfi w8, w9, #8, #8 435; CHECK-NEXT: mov w0, w8 436; CHECK-NEXT: ret 437 438 %tmp = bitcast i32* %arg to i8* 439 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 440 %tmp2 = load i8, i8* %tmp1, align 2 441 %tmp3 = zext i8 %tmp2 to i32 442 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 443 %tmp5 = load i8, i8* %tmp4, align 1 444 %tmp6 = zext i8 %tmp5 to i32 445 %tmp7 = shl nuw nsw i32 %tmp6, 8 446 %tmp8 = or i32 %tmp7, %tmp3 447 ret i32 %tmp8 448} 449 450; i8* p; // p is 2 byte aligned 451; ((i32) p[0] << 8) | ((i32) p[1] << 16) 452define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) { 453; CHECK-LABEL: zext_load_i32_by_i8_shl_8: 454; CHECK: ldrb w8, [x0] 455; CHECK-NEXT: ldrb w9, [x0, #1] 456; CHECK-NEXT: lsl w0, w8, #8 457; CHECK-NEXT: bfi w0, w9, #16, #8 458; CHECK-NEXT: ret 459 460 %tmp = bitcast i32* %arg to i8* 461 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 462 %tmp2 = load i8, i8* %tmp1, align 2 463 %tmp3 = zext i8 %tmp2 to i32 464 %tmp30 = shl nuw nsw i32 %tmp3, 8 465 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 466 %tmp5 = load i8, i8* %tmp4, align 1 467 %tmp6 = zext i8 %tmp5 to i32 468 %tmp7 = shl nuw nsw i32 %tmp6, 16 469 %tmp8 = or i32 %tmp7, %tmp30 470 ret i32 %tmp8 471} 472 473; i8* p; // p is 2 byte aligned 474; ((i32) p[0] << 16) | ((i32) p[1] << 24) 475define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) { 476; CHECK-LABEL: zext_load_i32_by_i8_shl_16: 477; CHECK: ldrb w8, [x0] 478; CHECK-NEXT: ldrb w9, [x0, #1] 479; CHECK-NEXT: lsl w0, w8, #16 480; CHECK-NEXT: bfi w0, w9, #24, #8 481; CHECK-NEXT: ret 482 483 %tmp = bitcast i32* %arg to i8* 484 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0 485 %tmp2 = load i8, i8* %tmp1, align 2 486 %tmp3 = zext i8 %tmp2 to i32 487 %tmp30 = shl nuw nsw i32 %tmp3, 16 488 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1 489 %tmp5 = load i8, i8* %tmp4, align 1 490 %tmp6 = zext i8 %tmp5 to i32 491 %tmp7 = shl nuw nsw i32 %tmp6, 24 492 %tmp8 = or i32 %tmp7, %tmp30 493 ret i32 %tmp8 494} 495; i8* p; // p is 2 byte aligned 496; (i32) p[1] | ((i32) p[0] << 8) 497define i32 @zext_load_i32_by_i8_bswap(i32* %arg) { 498; CHECK-LABEL: zext_load_i32_by_i8_bswap: 499; CHECK: ldrb w8, [x0, #1] 500; CHECK-NEXT: ldrb w9, [x0] 501; CHECK-NEXT: bfi w8, w9, #8, #8 502; CHECK-NEXT: mov w0, w8 503; CHECK-NEXT: ret 504 505 %tmp = bitcast i32* %arg to i8* 506 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 507 %tmp2 = load i8, i8* %tmp1, align 1 508 %tmp3 = zext i8 %tmp2 to i32 509 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 510 %tmp5 = load i8, i8* %tmp4, align 2 511 %tmp6 = zext i8 %tmp5 to i32 512 %tmp7 = shl nuw nsw i32 %tmp6, 8 513 %tmp8 = or i32 %tmp7, %tmp3 514 ret i32 %tmp8 515} 516 517; i8* p; // p is 2 byte aligned 518; ((i32) p[1] << 8) | ((i32) p[0] << 16) 519define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) { 520; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8: 521; CHECK: ldrb w8, [x0, #1] 522; CHECK-NEXT: ldrb w9, [x0] 523; CHECK-NEXT: lsl w0, w8, #8 524; CHECK-NEXT: bfi w0, w9, #16, #8 525; CHECK-NEXT: ret 526 527 %tmp = bitcast i32* %arg to i8* 528 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 529 %tmp2 = load i8, i8* %tmp1, align 1 530 %tmp3 = zext i8 %tmp2 to i32 531 %tmp30 = shl nuw nsw i32 %tmp3, 8 532 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 533 %tmp5 = load i8, i8* %tmp4, align 2 534 %tmp6 = zext i8 %tmp5 to i32 535 %tmp7 = shl nuw nsw i32 %tmp6, 16 536 %tmp8 = or i32 %tmp7, %tmp30 537 ret i32 %tmp8 538} 539 540; i8* p; // p is 2 byte aligned 541; ((i32) p[1] << 16) | ((i32) p[0] << 24) 542define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) { 543; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16: 544; CHECK: ldrb w8, [x0, #1] 545; CHECK-NEXT: ldrb w9, [x0] 546; CHECK-NEXT: lsl w0, w8, #16 547; CHECK-NEXT: bfi w0, w9, #24, #8 548; CHECK-NEXT: ret 549 550 %tmp = bitcast i32* %arg to i8* 551 %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1 552 %tmp2 = load i8, i8* %tmp1, align 1 553 %tmp3 = zext i8 %tmp2 to i32 554 %tmp30 = shl nuw nsw i32 %tmp3, 16 555 %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0 556 %tmp5 = load i8, i8* %tmp4, align 2 557 %tmp6 = zext i8 %tmp5 to i32 558 %tmp7 = shl nuw nsw i32 %tmp6, 24 559 %tmp8 = or i32 %tmp7, %tmp30 560 ret i32 %tmp8 561} 562 563; i8* p; 564; i16* p1.i16 = (i16*) p; 565; (p1.i16[0] << 8) | ((i16) p[2]) 566; 567; This is essentialy a i16 load from p[1], but we don't fold the pattern now 568; because in the original DAG we don't have p[1] address available 569define i16 @load_i16_from_nonzero_offset(i8* %p) { 570; CHECK-LABEL: load_i16_from_nonzero_offset: 571; CHECK: ldrh w8, [x0] 572; CHECK-NEXT: ldrb w0, [x0, #2] 573; CHECK-NEXT: bfi w0, w8, #8, #24 574; CHECK-NEXT: ret 575 576 %p1.i16 = bitcast i8* %p to i16* 577 %p2.i8 = getelementptr i8, i8* %p, i64 2 578 %v1 = load i16, i16* %p1.i16 579 %v2.i8 = load i8, i8* %p2.i8 580 %v2 = zext i8 %v2.i8 to i16 581 %v1.shl = shl i16 %v1, 8 582 %res = or i16 %v1.shl, %v2 583 ret i16 %res 584} 585