/external/gemmlowp/meta/generators/ |
D | qnt_Nx8_neon.py | 101 for lane in lanes: 102 emitter.EmitVAdd('i32', lane[0], lane[0], lane[1]) 104 for lane in lanes: 105 emitter.EmitVMul('i32', lane[0], lane[0], multiplicative_offset) 107 for lane in lanes: 108 emitter.EmitVAdd('i32', lane[0], lane[0], rounding_offset) 110 for lane in lanes: 111 emitter.EmitVShl('s32', lane[0], lane[0], shift) 113 for lane in lanes: 114 emitter.EmitVQmovn('s32', lane[2], lane[0]) [all …]
|
D | zip_Nx8_neon.py | 68 for lane in lanes: 69 emitter.EmitVMov('i16', lane.aggregator, emitter.ImmediateConstant(0)) 77 for lane in lanes: 79 '1.8', lane.load, 80 emitter.DereferenceIncrement(lane.input_address, alignment)) 83 for lane in lanes: 84 emitter.EmitVAddw('u8', lane.aggregator, lane.aggregator, lane.load) 85 store_registers.append(lane.load) 98 for lane in lanes: 99 emitter.EmitVMov('i8', lane.load, emitter.ImmediateConstant(0)) [all …]
|
D | neon_emitter.py | 350 def Lane(self, value, lane): argument 351 return '%s[%d]' % (value, lane)
|
/external/opencv3/modules/core/include/opencv2/core/cuda/ |
D | warp.hpp | 97 unsigned int lane = laneId(); in transform() local 99 InIt1 t1 = beg1 + lane; in transform() 100 InIt2 t2 = beg2 + lane; in transform() 109 const unsigned int lane = laneId(); in reduce() local 111 if (lane < 16) in reduce() 113 T partial = ptr[lane]; in reduce() 115 ptr[lane] = partial = op(partial, ptr[lane + 16]); in reduce() 116 ptr[lane] = partial = op(partial, ptr[lane + 8]); in reduce() 117 ptr[lane] = partial = op(partial, ptr[lane + 4]); in reduce() 118 ptr[lane] = partial = op(partial, ptr[lane + 2]); in reduce() [all …]
|
D | scan.hpp | 68 const unsigned int lane = idx & 31; in operator ()() local 71 if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]); in operator ()() 72 if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]); in operator ()() 73 if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]); in operator ()() 74 if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]); in operator ()() 75 if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]); in operator ()() 80 return (lane > 0) ? ptr [idx - 1] : 0; in operator ()() 102 const unsigned int lane = threadIdx.x & 31; in operator ()() local 114 return (lane > 0) ? ptr [idx - 1] : 0; in operator ()() 143 const unsigned int lane = tid & warp_mask; in operator ()() local [all …]
|
D | warp_reduce.hpp | 57 const unsigned int lane = tid & 31; // index of thread in warp (0..31) in warp_reduce() local 59 if (lane < 16) in warp_reduce() 70 return ptr[tid - lane]; in warp_reduce()
|
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/ |
D | warp.hpp | 104 uint lane = Warp::laneId(); in warpTransform() local 106 InIt1 t1 = beg1 + lane; in warpTransform() 107 InIt2 t2 = beg2 + lane; in warpTransform() 116 uint lane = Warp::laneId(); in warpYota() local 117 value += lane; in warpYota() 119 for(OutIt t = beg + lane; t < end; t += WARP_SIZE, value += WARP_SIZE) in warpYota()
|
/external/llvm/lib/Target/ARM/ |
D | ARMInstrNEON.td | 267 // Register list of one D register, with byte lane subscripting. 277 // ...with half-word lane subscripting. 287 // ...with word lane subscripting. 298 // Register list of two D registers with byte lane subscripting. 308 // ...with half-word lane subscripting. 318 // ...with word lane subscripting. 328 // Register list of two Q registers with half-word lane subscripting. 338 // ...with word lane subscripting. 350 // Register list of three D registers with byte lane subscripting. 360 // ...with half-word lane subscripting. [all …]
|
/external/opencv3/modules/videoio/src/ |
D | cap_mjpeg_encoder.cpp | 1290 uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs)); in writeFrameData() local 1291 … uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData() 1292 lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step)); in writeFrameData() 1293 … uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData() 1297 lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs)); in writeFrameData() 1298 t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData() 1299 lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step)); in writeFrameData() 1300 t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData() 1306 int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data))); in writeFrameData() local 1308 lane = vsubq_s16(lane, delta); in writeFrameData() [all …]
|
/external/vixl/test/ |
D | test-simulator-a64.cc | 1295 for (unsigned lane = 0; lane < vd_lane_count; lane++) { in Test1OpNEON() local 1296 unsigned index = lane + (iteration * vd_lane_count); in Test1OpNEON() 1318 for (unsigned lane = 0; lane < vd_lane_count; lane++) { in Test1OpNEON() local 1319 unsigned output_index = (n * vd_lane_count) + lane; in Test1OpNEON() 1336 for (unsigned lane = 0; in Test1OpNEON() local 1337 lane < std::max(vd_lane_count, vn_lane_count); in Test1OpNEON() 1338 lane++) { in Test1OpNEON() 1339 unsigned output_index = (n * vd_lane_count) + lane; in Test1OpNEON() 1340 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; in Test1OpNEON() 1486 for (unsigned lane = 0; lane < vd_lane_count; lane++) { in Test1OpAcrossNEON() local [all …]
|
/external/llvm/test/CodeGen/ARM/ |
D | 2012-05-04-vmov.ll | 14 ; vmov.32 should not be used to get a lane: 15 ; vmov.32 <dst>, <src>[<lane>]. 16 ; but vmov.32 <dst>[<lane>], <src> is fine.
|
D | a15-partial-update.ll | 6 ; to write the lane 1 of a D register containing the value of
|
D | coalesce-subregs.ll | 71 ; This function has lane insertions that span basic blocks. 120 ; This function inserts a lane into a fully defined vector. 121 ; The destination lane isn't read, so the subregs can coalesce. 152 ; It is inserting the %add value into a dead lane, but %add causes interference 153 ; in the entry block, and we don't do dead lane checks across basic blocks.
|
/external/llvm/test/CodeGen/AArch64/ |
D | arm64-neon-simd-ldst-one.ll | 131 %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer 132 ret <16 x i8> %lane 141 %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer 142 ret <8 x i16> %lane 151 %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer 152 ret <4 x i32> %lane 161 %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer 162 ret <2 x i64> %lane 171 %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer 172 ret <4 x float> %lane [all …]
|
D | fp16-vector-load-store.ll | 43 ; Load to one lane of v4f16 53 ; Load to one lane of v8f16 81 ; Store from one lane of v4f16 91 ; Store from one lane of v8f16 286 ; NEON intrinsics - loads and stores to/from one lane 300 ; Load one lane of 2 x v4f16 309 ; Load one lane of 3 x v4f16 318 ; Load one lane of 4 x v4f16 327 ; Store one lane of 2 x v4f16 336 ; Store one lane of 3 x v4f16 [all …]
|
D | arm64-build-vector.ll | 3 ; Check that building up a vector w/ only one non-zero lane initializes 22 ; copy for lane zero.
|
D | arm64-neon-2velem.ll | 386 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> 387 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 398 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 399 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 410 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> 411 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) 420 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 421 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) 431 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> 432 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) [all …]
|
/external/llvm/docs/ |
D | BigEndianNEON.rst | 22 This trivial C function takes a vector of four ints and sets the zero'th lane to the value "42":: 61 …use of the byte swapping the lane indices end up being swapped! The zero'th item as laid out in me… 95 Use of ``LDR`` would break this lane ordering property. This doesn't preclude the use of ``LDR``, b… 97 1. Insert a ``REV`` instruction to reverse the lane order after every ``LDR``. 98 …hat rely on lane layout, and for every access to an individual lane (``insertelement``/``extractel… 109 …re not - the lane size is encoded within them. This is important across an ABI boundary, because i… 126 … should be undefined. But there may be functions that are agnostic to the lane layout of the vecto… 128 So to preserve ABI compatibility, we need to use the ``LDR`` lane layout across function calls. 133 …128-bit aligned, whereas ``LD1`` only requires it to be as aligned as the lane size. If we canonic… 152 …issue with lane ordering, it was decided, would have to change target-agnostic compiler passes and… [all …]
|
/external/llvm/test/CodeGen/Thumb2/ |
D | 2013-03-02-vduplane-nonconstant-source-index.ll | 3 define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind { 8 %val = extractelement <4 x i32> %phitmp, i32 %lane
|
/external/vixl/src/vixl/a64/ |
D | simulator-a64.cc | 650 for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) { in PrintVRegisterFPHelper() local 652 (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane) in PrintVRegisterFPHelper() 653 : vreg(code).Get<double>(lane); in PrintVRegisterFPHelper() 745 unsigned lane) { in PrintVRead() argument 752 GetPrintRegLaneCount(format), lane); in PrintVRead() 776 unsigned lane) { in PrintVWrite() argument 788 PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane); in PrintVWrite() 790 PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane); in PrintVWrite() 3212 int lane = instr->NEONLSIndex(index_shift); in NEONLoadStoreSingleStructHelper() local 3221 ld1(vf, vreg(rt), lane, addr); in NEONLoadStoreSingleStructHelper() [all …]
|
D | simulator-a64.h | 291 void Insert(int lane, T new_value) { in Insert() argument 292 VIXL_ASSERT(lane >= 0); in Insert() 294 (lane * sizeof(new_value))) <= kSizeInBytes); in Insert() 295 memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value)); in Insert() 301 T Get(int lane = 0) const { 303 VIXL_ASSERT(lane >= 0); 304 VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes); 305 memcpy(&result, &value_[lane * sizeof(result)], sizeof(result)); 1157 PrintRegisterFormat format, unsigned lane); 1159 PrintRegisterFormat format, unsigned lane); [all …]
|
D | macro-assembler-a64.h | 2555 int lane, in Ld1() argument 2559 ld1(vt, lane, src); in Ld1() 2576 int lane, in Ld2() argument 2580 ld2(vt, vt2, lane, src); in Ld2() 2600 int lane, in Ld3() argument 2604 ld3(vt, vt2, vt3, lane, src); in Ld3() 2627 int lane, in Ld4() argument 2631 ld4(vt, vt2, vt3, vt4, lane, src); in Ld4() 2750 int lane, in St1() argument 2754 st1(vt, lane, dst); in St1() [all …]
|
/external/llvm/lib/Target/PowerPC/ |
D | README_ALTIVEC.txt | 325 big-endian lane 0, using xscvspdpn to produce a double-precision 327 double-precision lane 0, and reinterpreting lane 0 as an FPR or 334 element into big-endian lane 1, using a direct move to a GPR, and 338 element into big-endian lane 3, using a direct move to a GPR, and 342 element into big-endian lane 7, using a direct move to a GPR, and
|
/external/v8/src/js/ |
D | harmony-simd.js | 101 function NAMEExtractLaneJS(instance, lane) { argument 102 return %NAMEExtractLane(instance, lane); 150 function NAMEReplaceLaneJS(instance, lane, value) { argument 151 return %NAMEReplaceLane(instance, lane, value); 172 function NAMEReplaceLaneJS(instance, lane, value) { 173 return %NAMEReplaceLane(instance, lane, TO_NUMBER(value));
|
/external/llvm/include/llvm/IR/ |
D | IntrinsicsARM.td | 422 // Vector load N-element structure to one lane. 424 // lane is assigned), the lane number, and the alignment. 461 // Vector store N-element structure from one lane. 462 // Source operands are: the address, the N vectors, the lane number, and
|