/external/libxaac/decoder/armv8/ |
D | ixheaacd_sbr_qmfsyn64_winadd.s | 54 LD1 {v0.4h}, [x0], #8 58 LD1 {v1.4h}, [x2], #8 81 LD1 {v2.4h}, [x0], x8 86 LD1 {v3.4h}, [x2], x9 88 LD1 {v4.4h}, [x0], x8 91 LD1 {v5.4h}, [x2], x9 93 LD1 {v6.4h}, [x0], x8 96 LD1 {v7.4h}, [x2], x9 98 LD1 {v8.4h}, [x0], x8 101 LD1 {v9.4h}, [x2], x9 [all …]
|
D | ixheaacd_sbr_qmf_analysis32_neon.s | 143 LD1 {v0.4h}, [x0], #8 154 LD1 {v2.4h}, [x0], x6 168 LD1 {v4.4h}, [x0], x6 175 LD1 {v6.4h}, [x0], x6 182 LD1 {v8.4h}, [x0], x6 190 LD1 {v10.4h}, [x1], #8 201 LD1 {v12.4h}, [x1], x6 211 LD1 {v14.4h}, [x1], x6 216 LD1 {v16.4h}, [x1], x6 223 LD1 {v18.4h}, [x1], x6 [all …]
|
D | ixheaacd_inv_dit_fft_8pt.s | 33 LD1 {v1.s}[0], [x0], x5 34 LD1 {v2.s}[0], [x6], x5 35 LD1 {v1.s}[1], [x0], x5 36 LD1 {v2.s}[1], [x6], x5 37 LD1 {v3.s}[0], [x0], x5 38 LD1 {v4.s}[0], [x6], x5 39 LD1 {v3.s}[1], [x0], x5 40 LD1 {v4.s}[1], [x6], x5 41 LD1 {v5.s}[0], [x0], x5 42 LD1 {v6.s}[0], [x6], x5 [all …]
|
D | ixheaacd_cos_sin_mod_loop2.s | 58 LD1 {v2.s}[0], [x3] 69 LD1 {v3.s}[0], [x3] // im = *psubband1; 77 LD1 {v2.s}[1], [x11] //re = *psubband12; 84 LD1 {v4.s}[0], [x10] 101 LD1 {v3.s}[1], [x11] 132 LD1 {v2.2s}, [x0] 133 LD1 {v3.2s}, [x10] 179 LD1 {v2.s}[0], [x3] 180 LD1 {v2.s}[1], [x11]
|
D | ixheaacd_no_lap1.s | 61 LD1 {V0.4S}, [X6], X8 64 LD1 {V6.4S}, [X6], X8 78 LD1 {V0.4S}, [X6], X8 92 LD1 {V6.4S}, [X6], X8
|
/external/XNNPACK/src/f32-dwconv/ |
D | up4x9-minmax-aarch64-neonfma-cortex-a55.S | 96 LD1 {v0.2S}, [x17], 8 99 LD1 {v1.2S}, [x17], 8 102 LD1 {v4.2S}, [x7], 8 105 LD1 {v5.2S}, [x17], 8 108 LD1 {v6.2S}, [x7], 8 111 LD1 {v7.2S}, [x17], 8 114 LD1 {v28.2S}, [x8], 8 117 LD1 {v29.2S}, [x17], 8 120 LD1 {v10.2S}, [x8], 8 123 LD1 {v11.2S}, [x17], 8 [all …]
|
/external/libhevc/common/arm64/ |
D | ihevc_inter_pred_chroma_copy.s | 121 LD1 {v0.s}[0],[x0] //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 125 LD1 {v0.s}[0],[x7],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 128 LD1 {v0.s}[0],[x7],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 131 LD1 {v0.s}[0],[x7],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 152 LD1 {v0.s}[0],[x0] //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 156 LD1 {v0.s}[0],[x7],x2 //vld1_lane_u32((uint32_t *)pu1_src_tmp, src_tmp, 0) 176 LD1 {v0.8b},[x0],#8 //vld1_u8(pu1_src_tmp) 179 LD1 {v1.8b},[x7],x2 //vld1_u8(pu1_src_tmp) 182 LD1 {v2.8b},[x7],x2 //vld1_u8(pu1_src_tmp) 184 LD1 {v3.8b},[x7],x2 //vld1_u8(pu1_src_tmp) [all …]
|
D | ihevc_sao_band_offset_luma.s | 100 LD1 {v1.8b},[x14],#8 //band_table.val[0] 103 LD1 {v2.8b},[x14],#8 //band_table.val[1] 110 LD1 {v3.8b},[x14],#8 //band_table.val[2] 114 LD1 {v4.8b},[x14],#8 //band_table.val[3] 118 LD1 {v0.8b},[x4],#8 //Load pu1_src[(ht - 1) * src_strd + col] 123 LD1 {v30.8b},[x6] //pi1_sao_offset load 205 LD1 {v13.8b},[x4] //au1_cur_row = vld1_u8(pu1_src_cpy) 208 LD1 {v15.8b},[x5] //au1_cur_row = vld1_u8(pu1_src_cpy) 210 LD1 {v17.8b},[x6] //au1_cur_row = vld1_u8(pu1_src_cpy) 212 LD1 {v19.8b},[x10] //au1_cur_row = vld1_u8(pu1_src_cpy)
|
D | ihevc_sao_edge_offset_class1.s | 116 LD1 {v6.8b},[x14] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 117 LD1 {v7.8b},[x6] //offset_tbl = vld1_s8(pi1_sao_offset) 131 …LD1 {v1.16b},[x9],#16 //pu1_top_row = vld1q_u8(pu1_src_top_cpy || pu1_src - src_… 132 LD1 {v3.16b},[x0],#16 //pu1_cur_row = vld1q_u8(pu1_src) 134 LD1 {v30.16b},[x12],#16 //vld1q_u8(pu1_src[(ht - 1) * src_strd]) 145 LD1 {v18.16b},[x10] //pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 149 LD1 {v30.16b},[x6] //II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 222 LD1 {v18.16b},[x10] //pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 267 …LD1 {v1.16b},[x9],#16 //pu1_top_row = vld1q_u8(pu1_src_top_cpy || pu1_src - src_… 268 LD1 {v3.16b},[x0],#16 //pu1_cur_row = vld1q_u8(pu1_src) [all …]
|
D | ihevc_sao_edge_offset_class1_chroma.s | 136 LD1 {v6.8b},[x14] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 137 LD1 {v7.8b},[x6] //offset_tbl_u = vld1_s8(pi1_sao_offset_u) 138 LD1 {v1.8b},[x7] //offset_tbl_v = vld1_s8(pi1_sao_offset_v) 152 …LD1 {v28.16b},[x11],#16 //pu1_top_row = vld1q_u8(pu1_src_top_cpy || pu1_src - src_… 154 LD1 {v3.16b},[x0],#16 //pu1_cur_row = vld1q_u8(pu1_src) 157 LD1 {v30.16b},[x12],#16 //vld1q_u8(pu1_src[(ht - 1) * src_strd]) 169 LD1 {v18.16b},[x10] //pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 176 LD1 {v30.16b},[x6] //II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 270 LD1 {v18.16b},[x10] //pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 327 …LD1 {v28.16b},[x11] //pu1_top_row = vld1q_u8(pu1_src_top_cpy || pu1_src - src_… [all …]
|
D | ihevc_sao_band_offset_chroma.s | 114 LD1 {v1.8b},[x14],#8 //band_table_u.val[0] 119 LD1 {v2.8b},[x14],#8 //band_table_u.val[1] 123 LD1 {v3.8b},[x14],#8 //band_table_u.val[2] 131 LD1 {v4.8b},[x14],#8 //band_table_u.val[3] 135 LD1 {v0.8b},[x4],#8 //Load pu1_src[(ht - 1) * src_strd + col] 140 LD1 {v30.8b},[x7] //pi1_sao_offset_u load 160 LD1 {v9.8b},[x14],#8 //band_table_v.val[0] 163 LD1 {v10.8b},[x14],#8 //band_table_v.val[1] 219 LD1 {v11.8b},[x14],#8 //band_table_v.val[2] 222 LD1 {v12.8b},[x14],#8 //band_table_v.val[3] [all …]
|
D | ihevc_sao_edge_offset_class2.s | 108 LD1 {v0.8b},[x11],#8 //pu1_src[(ht - 1) * src_strd + col] 215 LD1 {v7.8b},[x6] //offset_tbl = vld1_s8(pi1_sao_offset) 222 LD1 {v6.8b},[x11] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 264 …LD1 {v3.16b},[x8] //pu1_top_row = vld1q_u8(pu1_src - src_strd - 1) || vld1q_… 268 LD1 {v5.16b},[x0] //pu1_cur_row = vld1q_u8(pu1_src) 294 LD1 {v16.16b},[x8] //I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 360 LD1 {v16.16b},[x8] //II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 364 LD1 {v30.16b},[x11] //III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 476 LD1 {v16.16b},[x8] //pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 581 …LD1 {v3.16b},[x8] //pu1_top_row = vld1q_u8(pu1_src - src_strd - 1) || vld1q_… [all …]
|
D | ihevc_sao_edge_offset_class2_chroma.s | 122 LD1 {v0.8b},[x11],#8 //pu1_src[(ht - 1) * src_strd + col] 323 LD1 {v6.8b},[x6] //offset_tbl_u = vld1_s8(pi1_sao_offset_u) 332 LD1 {v7.8b},[x6] //offset_tbl_v = vld1_s8(pi1_sao_offset_v) 365 LD1 {v5.16b},[x0] //pu1_cur_row = vld1q_u8(pu1_src) 381 …LD1 {v3.16b},[x8] //pu1_top_row = vld1q_u8(pu1_src - src_strd - 2) || vld1q_… 406 LD1 {v16.16b},[x8] //I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 454 LD1 {v30.8b},[x2] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 502 LD1 {v16.16b},[x8] //II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 505 LD1 {v30.16b},[x11] //III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 556 LD1 {v22.8b},[x2] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) [all …]
|
D | ihevc_sao_edge_offset_class3_chroma.s | 119 LD1 {v0.8b},[x11],#8 //pu1_src[(ht - 1) * src_strd + col] 318 LD1 {v6.8b},[x6] //offset_tbl_u = vld1_s8(pi1_sao_offset_u) 320 LD1 {v7.8b},[x6] //offset_tbl_v = vld1_s8(pi1_sao_offset_v) 356 LD1 {v5.16b},[x0] //pu1_cur_row = vld1q_u8(pu1_src) 367 LD1 {v3.16b},[x8] //pu1_top_row = vld1q_u8(pu1_src - src_strd + 2) 398 LD1 {v16.16b},[x11] //I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 440 LD1 {v28.8b},[x2] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 492 LD1 {v16.16b},[x11] //II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 502 LD1 {v30.16b},[x4] //III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 517 LD1 {v21.8b},[x2] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) [all …]
|
D | ihevc_sao_edge_offset_class3.s | 108 LD1 {v0.8b},[x11],#8 //pu1_src[(ht - 1) * src_strd + col] 227 LD1 {v7.8b},[x6] //offset_tbl = vld1_s8(pi1_sao_offset) 239 LD1 {v6.8b},[x6] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 278 LD1 {v3.16b},[x8] //pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) 282 LD1 {v5.16b},[x0] //pu1_cur_row = vld1q_u8(pu1_src) 304 LD1 {v16.16b},[x8] //I pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 374 LD1 {v16.16b},[x8] //II pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 378 LD1 {v30.16b},[x2] //III pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 500 LD1 {v16.16b},[x8] //pu1_next_row = vld1q_u8(pu1_src_cpy + src_strd) 606 LD1 {v3.16b},[x8] //pu1_top_row = vld1q_u8(pu1_src - src_strd + 1) [all …]
|
D | ihevc_sao_edge_offset_class0.s | 100 LD1 {v5.8b},[x14] //edge_idx_tbl = vld1_s8(gi1_table_edge_idx) 104 LD1 {v7.8b},[x8] //offset_tbl = vld1_s8(pi1_sao_offset) 110 LD1 {v0.8b},[x4],#8 //Load pu1_src[(ht - 1) * src_strd + col] 145 LD1 {v17.16b},[x12],x1 //pu1_cur_row = vld1q_u8(pu1_src_cpy) 152 LD1 {v26.16b},[x12] //II Iteration pu1_cur_row = vld1q_u8(pu1_src_cpy) 290 LD1 {v17.16b},[x12] //pu1_cur_row = vld1q_u8(pu1_src_cpy)
|
/external/libhevc/decoder/arm64/ |
D | ihevcd_fmt_conv_420sp_to_420sp.s | 114 LD1 {v0.8b},[x0],#8 115 LD1 {v1.8b},[x0],#8 116 LD1 {v2.8b},[x0],#8 117 LD1 {v3.8b},[x0],#8 134 LD1 {v0.8b},[x0],#8 135 LD1 {v1.8b},[x0],#8 136 LD1 {v2.8b},[x0],#8 137 LD1 {v3.8b},[x0],#8 172 LD1 {v0.8b},[x1],#8 173 LD1 {v1.8b},[x1],#8 [all …]
|
/external/llvm/test/CodeGen/ARM/ |
D | vcombine.ll | 7 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 10 ; CHECK-LE-DAG: vmov r2, r3, [[LD1]] 23 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 26 ; CHECK-LE-DAG: vmov r2, r3, [[LD1]] 40 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 43 ; CHECK-LE: vmov r2, r3, [[LD1]] 57 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 60 ; CHECK-LE: vmov r2, r3, [[LD1]] 73 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 76 ; CHECK-LE: vmov r2, r3, [[LD1]] [all …]
|
/external/llvm/docs/ |
D | BigEndianNEON.rst | 52 ``LDR`` and ``LD1`` 66 Big endian vector load using ``LD1``. Note that the lanes retain the correct ordering. 69 Because of this, the instruction ``LD1`` performs a vector load but performs byte swapping not on t… 71 It may seem that ``LD1`` should suffice to peform vector loads on a big endian machine. However the… 76 …2. The content of a vector register is the same *as if* it had been loaded with an ``LD1`` instruc… 78 Because ``LD1 == LDR + REV`` and similarly ``LDR == LD1 + REV`` (on a big endian system), we can si… 109 …t one advantage over ``LD1`` and ``ST1``. ``LDR`` and ``STR`` are oblivious to the size of the ind… 124 …``uint16x4_t``, which is equivalent in register content, if we passed as ``LD1`` we'd break this c… 133 …LD1`` only requires it to be as aligned as the lane size. If we canonicalised on using ``LDR``, we… 143 | | ``LDR`` layout | ``LD1`` layout | [all …]
|
/external/llvm-project/llvm/docs/ |
D | BigEndianNEON.rst | 52 ``LDR`` and ``LD1`` 66 Big endian vector load using ``LD1``. Note that the lanes retain the correct ordering. 69 Because of this, the instruction ``LD1`` performs a vector load but performs byte swapping not on t… 71 It may seem that ``LD1`` should suffice to peform vector loads on a big endian machine. However the… 76 …2. The content of a vector register is the same *as if* it had been loaded with an ``LD1`` instruc… 78 Because ``LD1 == LDR + REV`` and similarly ``LDR == LD1 + REV`` (on a big endian system), we can si… 109 …t one advantage over ``LD1`` and ``ST1``. ``LDR`` and ``STR`` are oblivious to the size of the ind… 124 …``uint16x4_t``, which is equivalent in register content, if we passed as ``LD1`` we'd break this c… 133 …LD1`` only requires it to be as aligned as the lane size. If we canonicalised on using ``LDR``, we… 143 | | ``LDR`` layout | ``LD1`` layout | [all …]
|
/external/llvm-project/llvm/test/CodeGen/ARM/ |
D | vcombine.ll | 7 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 10 ; CHECK-LE-DAG: vmov r2, r3, [[LD1]] 23 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 26 ; CHECK-LE-DAG: vmov r2, r3, [[LD1]] 40 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 42 ; CHECK-LE: vmov r2, r3, [[LD1]] 57 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 59 ; CHECK-LE: vmov r2, r3, [[LD1]] 73 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] 75 ; CHECK-LE: vmov r2, r3, [[LD1]] [all …]
|
/external/llvm-project/llvm/test/CodeGen/NVPTX/ |
D | proxy-reg-erasure-ptx.ll | 79 ; PTX-DAG: ld.param.v2.b64 {[[LD0:%rd[0-9]+]], [[LD1:%rd[0-9]+]]}, [retval0+0]; 83 ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%rd[0-9]+]], [[LD1]]; 85 ; PTX-WITH-DAG: st.param.v2.b64 [func_retval0+0], {[[LD0]], [[LD1]]}; 140 …; PTX-DAG: ld.param.v4.b32 {[[LD0:%r[0-9]+]], [[LD1:%r[0-9]+]], [[LD2:%r[0-9]+]], [[LD3:%r[0-9]+]]… 144 ; PTX-WITHOUT-DAG: mov.b32 [[PROXY1:%r[0-9]+]], [[LD1]]; 148 ; PTX-WITH-DAG: st.param.v4.b32 [func_retval0+0], {[[LD0]], [[LD1]], [[LD2]], [[LD3]]}; 173 ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0+0]; 177 ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]]; 179 ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0+0], {[[LD0]], [[LD1]]};
|
/external/llvm/test/CodeGen/X86/ |
D | merge-store-partially-alias-loads.ll | 22 ; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<LD1[%tmp12]> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64 24 ; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1 27 ; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<ST1[%tmp14]> [[ST2]], [[LD1]], t{{[0-9]+}}, undef:i64
|
/external/llvm-project/llvm/test/Transforms/InstCombine/ |
D | multiple-uses-load-bitcast-select.ll | 8 ; CHECK-NEXT: [[LD1:%.*]] = load double, double* [[Y1]], align 8 10 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp olt double [[LD1]], [[LD2]] 11 ; CHECK-NEXT: [[TMP121:%.*]] = select i1 [[TMP10]], double [[LD1]], double [[LD2]]
|
/external/llvm-project/llvm/test/CodeGen/X86/ |
D | merge-store-partially-alias-loads.ll | 22 ; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<(load 1 from %ir.tmp12)> [[ENTRYTOKEN]], [[ADDPTR]], un… 24 ; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<(store 1 into %ir.tmp14)> [[ENTRYTOKEN]], [[LD1]], t{{[0-… 25 ; DBGDAG-DAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
|