Home
last modified time | relevance | path

Searched refs:lane (Results 1 – 25 of 69) sorted by relevance

123

/external/gemmlowp/meta/generators/
Dqnt_Nx8_neon.py101 for lane in lanes:
102 emitter.EmitVAdd('i32', lane[0], lane[0], lane[1])
104 for lane in lanes:
105 emitter.EmitVMul('i32', lane[0], lane[0], multiplicative_offset)
107 for lane in lanes:
108 emitter.EmitVAdd('i32', lane[0], lane[0], rounding_offset)
110 for lane in lanes:
111 emitter.EmitVShl('s32', lane[0], lane[0], shift)
113 for lane in lanes:
114 emitter.EmitVQmovn('s32', lane[2], lane[0])
[all …]
Dzip_Nx8_neon.py68 for lane in lanes:
69 emitter.EmitVMov('i16', lane.aggregator, emitter.ImmediateConstant(0))
77 for lane in lanes:
79 '1.8', lane.load,
80 emitter.DereferenceIncrement(lane.input_address, alignment))
83 for lane in lanes:
84 emitter.EmitVAddw('u8', lane.aggregator, lane.aggregator, lane.load)
85 store_registers.append(lane.load)
98 for lane in lanes:
99 emitter.EmitVMov('i8', lane.load, emitter.ImmediateConstant(0))
[all …]
Dneon_emitter.py350 def Lane(self, value, lane): argument
351 return '%s[%d]' % (value, lane)
/external/opencv3/modules/core/include/opencv2/core/cuda/
Dwarp.hpp97 unsigned int lane = laneId(); in transform() local
99 InIt1 t1 = beg1 + lane; in transform()
100 InIt2 t2 = beg2 + lane; in transform()
109 const unsigned int lane = laneId(); in reduce() local
111 if (lane < 16) in reduce()
113 T partial = ptr[lane]; in reduce()
115 ptr[lane] = partial = op(partial, ptr[lane + 16]); in reduce()
116 ptr[lane] = partial = op(partial, ptr[lane + 8]); in reduce()
117 ptr[lane] = partial = op(partial, ptr[lane + 4]); in reduce()
118 ptr[lane] = partial = op(partial, ptr[lane + 2]); in reduce()
[all …]
Dscan.hpp68 const unsigned int lane = idx & 31; in operator ()() local
71 if ( lane >= 1) ptr [idx ] = op(ptr [idx - 1], ptr [idx]); in operator ()()
72 if ( lane >= 2) ptr [idx ] = op(ptr [idx - 2], ptr [idx]); in operator ()()
73 if ( lane >= 4) ptr [idx ] = op(ptr [idx - 4], ptr [idx]); in operator ()()
74 if ( lane >= 8) ptr [idx ] = op(ptr [idx - 8], ptr [idx]); in operator ()()
75 if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]); in operator ()()
80 return (lane > 0) ? ptr [idx - 1] : 0; in operator ()()
102 const unsigned int lane = threadIdx.x & 31; in operator ()() local
114 return (lane > 0) ? ptr [idx - 1] : 0; in operator ()()
143 const unsigned int lane = tid & warp_mask; in operator ()() local
[all …]
Dwarp_reduce.hpp57 const unsigned int lane = tid & 31; // index of thread in warp (0..31) in warp_reduce() local
59 if (lane < 16) in warp_reduce()
70 return ptr[tid - lane]; in warp_reduce()
/external/opencv3/modules/cudev/include/opencv2/cudev/warp/
Dwarp.hpp104 uint lane = Warp::laneId(); in warpTransform() local
106 InIt1 t1 = beg1 + lane; in warpTransform()
107 InIt2 t2 = beg2 + lane; in warpTransform()
116 uint lane = Warp::laneId(); in warpYota() local
117 value += lane; in warpYota()
119 for(OutIt t = beg + lane; t < end; t += WARP_SIZE, value += WARP_SIZE) in warpYota()
/external/llvm/lib/Target/ARM/
DARMInstrNEON.td267 // Register list of one D register, with byte lane subscripting.
277 // ...with half-word lane subscripting.
287 // ...with word lane subscripting.
298 // Register list of two D registers with byte lane subscripting.
308 // ...with half-word lane subscripting.
318 // ...with word lane subscripting.
328 // Register list of two Q registers with half-word lane subscripting.
338 // ...with word lane subscripting.
350 // Register list of three D registers with byte lane subscripting.
360 // ...with half-word lane subscripting.
[all …]
/external/opencv3/modules/videoio/src/
Dcap_mjpeg_encoder.cpp1290 uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs)); in writeFrameData() local
1291 … uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData()
1292 lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step)); in writeFrameData()
1293 … uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData()
1297 lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs)); in writeFrameData()
1298 t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData()
1299 lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step)); in writeFrameData()
1300 t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo)); in writeFrameData()
1306 int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data))); in writeFrameData() local
1308 lane = vsubq_s16(lane, delta); in writeFrameData()
[all …]
/external/vixl/test/
Dtest-simulator-a64.cc1295 for (unsigned lane = 0; lane < vd_lane_count; lane++) { in Test1OpNEON() local
1296 unsigned index = lane + (iteration * vd_lane_count); in Test1OpNEON()
1318 for (unsigned lane = 0; lane < vd_lane_count; lane++) { in Test1OpNEON() local
1319 unsigned output_index = (n * vd_lane_count) + lane; in Test1OpNEON()
1336 for (unsigned lane = 0; in Test1OpNEON() local
1337 lane < std::max(vd_lane_count, vn_lane_count); in Test1OpNEON()
1338 lane++) { in Test1OpNEON()
1339 unsigned output_index = (n * vd_lane_count) + lane; in Test1OpNEON()
1340 unsigned input_index_n = (first_index_n + lane) % inputs_n_length; in Test1OpNEON()
1486 for (unsigned lane = 0; lane < vd_lane_count; lane++) { in Test1OpAcrossNEON() local
[all …]
/external/llvm/test/CodeGen/ARM/
D2012-05-04-vmov.ll14 ; vmov.32 should not be used to get a lane:
15 ; vmov.32 <dst>, <src>[<lane>].
16 ; but vmov.32 <dst>[<lane>], <src> is fine.
Da15-partial-update.ll6 ; to write the lane 1 of a D register containing the value of
Dcoalesce-subregs.ll71 ; This function has lane insertions that span basic blocks.
120 ; This function inserts a lane into a fully defined vector.
121 ; The destination lane isn't read, so the subregs can coalesce.
152 ; It is inserting the %add value into a dead lane, but %add causes interference
153 ; in the entry block, and we don't do dead lane checks across basic blocks.
/external/llvm/test/CodeGen/AArch64/
Darm64-neon-simd-ldst-one.ll131 %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
132 ret <16 x i8> %lane
141 %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
142 ret <8 x i16> %lane
151 %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
152 ret <4 x i32> %lane
161 %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
162 ret <2 x i64> %lane
171 %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
172 ret <4 x float> %lane
[all …]
Dfp16-vector-load-store.ll43 ; Load to one lane of v4f16
53 ; Load to one lane of v8f16
81 ; Store from one lane of v4f16
91 ; Store from one lane of v8f16
286 ; NEON intrinsics - loads and stores to/from one lane
300 ; Load one lane of 2 x v4f16
309 ; Load one lane of 3 x v4f16
318 ; Load one lane of 4 x v4f16
327 ; Store one lane of 2 x v4f16
336 ; Store one lane of 3 x v4f16
[all …]
Darm64-build-vector.ll3 ; Check that building up a vector w/ only one non-zero lane initializes
22 ; copy for lane zero.
Darm64-neon-2velem.ll386 %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1>
387 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
398 %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
399 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
410 %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3>
411 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
420 %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
421 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a)
431 %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1>
432 %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a)
[all …]
/external/llvm/docs/
DBigEndianNEON.rst22 This trivial C function takes a vector of four ints and sets the zero'th lane to the value "42"::
61 …use of the byte swapping the lane indices end up being swapped! The zero'th item as laid out in me…
95 Use of ``LDR`` would break this lane ordering property. This doesn't preclude the use of ``LDR``, b…
97 1. Insert a ``REV`` instruction to reverse the lane order after every ``LDR``.
98 …hat rely on lane layout, and for every access to an individual lane (``insertelement``/``extractel…
109 …re not - the lane size is encoded within them. This is important across an ABI boundary, because i…
126 … should be undefined. But there may be functions that are agnostic to the lane layout of the vecto…
128 So to preserve ABI compatibility, we need to use the ``LDR`` lane layout across function calls.
133 …128-bit aligned, whereas ``LD1`` only requires it to be as aligned as the lane size. If we canonic…
152 …issue with lane ordering, it was decided, would have to change target-agnostic compiler passes and…
[all …]
/external/llvm/test/CodeGen/Thumb2/
D2013-03-02-vduplane-nonconstant-source-index.ll3 define void @bar(<4 x i32>* %p, i32 %lane, <4 x i32> %phitmp) nounwind {
8 %val = extractelement <4 x i32> %phitmp, i32 %lane
/external/vixl/src/vixl/a64/
Dsimulator-a64.cc650 for (int lane = leftmost_lane; lane >= rightmost_lane; lane--) { in PrintVRegisterFPHelper() local
652 (lane_size_in_bytes == kSRegSizeInBytes) ? vreg(code).Get<float>(lane) in PrintVRegisterFPHelper()
653 : vreg(code).Get<double>(lane); in PrintVRegisterFPHelper()
745 unsigned lane) { in PrintVRead() argument
752 GetPrintRegLaneCount(format), lane); in PrintVRead()
776 unsigned lane) { in PrintVWrite() argument
788 PrintVRegisterRawHelper(reg_code, reg_size, lane_size * lane); in PrintVWrite()
790 PrintVRegisterFPHelper(reg_code, lane_size, lane_count, lane); in PrintVWrite()
3212 int lane = instr->NEONLSIndex(index_shift); in NEONLoadStoreSingleStructHelper() local
3221 ld1(vf, vreg(rt), lane, addr); in NEONLoadStoreSingleStructHelper()
[all …]
Dsimulator-a64.h291 void Insert(int lane, T new_value) { in Insert() argument
292 VIXL_ASSERT(lane >= 0); in Insert()
294 (lane * sizeof(new_value))) <= kSizeInBytes); in Insert()
295 memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value)); in Insert()
301 T Get(int lane = 0) const {
303 VIXL_ASSERT(lane >= 0);
304 VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes);
305 memcpy(&result, &value_[lane * sizeof(result)], sizeof(result));
1157 PrintRegisterFormat format, unsigned lane);
1159 PrintRegisterFormat format, unsigned lane);
[all …]
Dmacro-assembler-a64.h2555 int lane, in Ld1() argument
2559 ld1(vt, lane, src); in Ld1()
2576 int lane, in Ld2() argument
2580 ld2(vt, vt2, lane, src); in Ld2()
2600 int lane, in Ld3() argument
2604 ld3(vt, vt2, vt3, lane, src); in Ld3()
2627 int lane, in Ld4() argument
2631 ld4(vt, vt2, vt3, vt4, lane, src); in Ld4()
2750 int lane, in St1() argument
2754 st1(vt, lane, dst); in St1()
[all …]
/external/llvm/lib/Target/PowerPC/
DREADME_ALTIVEC.txt325 big-endian lane 0, using xscvspdpn to produce a double-precision
327 double-precision lane 0, and reinterpreting lane 0 as an FPR or
334 element into big-endian lane 1, using a direct move to a GPR, and
338 element into big-endian lane 3, using a direct move to a GPR, and
342 element into big-endian lane 7, using a direct move to a GPR, and
/external/v8/src/js/
Dharmony-simd.js101 function NAMEExtractLaneJS(instance, lane) { argument
102 return %NAMEExtractLane(instance, lane);
150 function NAMEReplaceLaneJS(instance, lane, value) { argument
151 return %NAMEReplaceLane(instance, lane, value);
172 function NAMEReplaceLaneJS(instance, lane, value) {
173 return %NAMEReplaceLane(instance, lane, TO_NUMBER(value));
/external/llvm/include/llvm/IR/
DIntrinsicsARM.td422 // Vector load N-element structure to one lane.
424 // lane is assigned), the lane number, and the alignment.
461 // Vector store N-element structure from one lane.
462 // Source operands are: the address, the N vectors, the lane number, and

123