Home
last modified time | relevance | path

Searched refs:lanes (Results 1 – 25 of 173) sorted by relevance

1234567

/external/gemmlowp/meta/generators/
Dzip_Nx8_neon.py40 lanes = []
44 lanes.append(ZipLane(input_address, registers.DoubleRegister(),
48 lanes.append(ZipLane(address_register, registers.DoubleRegister(),
52 return lanes
64 def GenerateClearAggregators(emitter, lanes): argument
65 for lane in lanes:
69 def GenerateLoadAggregateStore(emitter, lanes, output_address, alignment): argument
74 for lane in lanes:
80 for lane in lanes:
88 def GenerateLeftoverLoadAggregateStore(emitter, leftovers, lanes, argument
[all …]
Dqnt_Nx8_neon.py26 def BuildName(lanes, leftovers, aligned): argument
27 name = 'qnt_%dx8' % lanes
35 def LoadAndDuplicateOffsets(emitter, registers, lanes, offsets): argument
36 if lanes == 1 or lanes == 2 or lanes == 3:
38 for unused_i in range(0, lanes):
46 raise ConfigurationError('Unsupported number of lanes: %d' % lanes)
55 lanes = []
60 lanes.append(QntLane(source,
68 lanes.append(QntLane(input_register,
77 return lanes
[all …]
Dmul_Nx8_Mx8_neon.py22 self.lanes = []
25 self.lanes.append(lane)
28 for i in range(0, len(self.lanes)):
29 registers.FreeRegister(self.lanes[i])
30 self.lanes[i] = None
34 lanes = MulLanes(address)
36 lanes.AddLane(registers.DoubleRegister())
37 return lanes
41 lanes = MulLanes(address)
42 lanes.AddLane(registers.Low(quad_register))
[all …]
Dmul_1x8_Mx8_neon.py197 def BuildName(result_type, lhs_add, rhs_add, lanes): argument
198 name = 'mul_1x8_%dx8_%s' % (lanes, result_type)
279 for lanes in range(1, 5):
280 GenerateMul1x8Mx8(emitter, result_type, lhs_add, rhs_add, lanes)
/external/gemmlowp/meta/
Dtest_streams_correctness.cc105 template <int lanes, int leftover>
116 prepare_row_major_data(lanes, all_elements, stride, in); in test_2()
117 Stream<std::uint8_t, lanes, 8, leftover, RowMajorWithSum>::Pack(in, params, in test_2()
119 if (check(out, lanes, all_elements)) { in test_2()
124 std::cout << "Row: " << lanes << "x8x" << leftover << " : " in test_2()
131 for (int stride = lanes; stride < lanes + 4; ++stride) { in test_2()
138 prepare_column_major_data(lanes, all_elements, stride, in); in test_2()
139 Stream<std::uint8_t, lanes, 8, leftover, ColumnMajorWithSum>::Pack(in, params, in test_2()
141 if (check(out, lanes, all_elements)) { in test_2()
146 std::cout << "Column: " << lanes << "x8x" << leftover << " : " in test_2()
[all …]
/external/llvm-project/llvm/test/CodeGen/AMDGPU/
Ddetect-dead-lanes.mir1 # RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s
42 # Check defined lanes transfer; Includes checking for some special cases like
122 # Check used lanes transfer; Includes checking for some special cases like
203 # Check that copies to physregs use all lanes, copies from physregs define all
204 # lanes. So we should not get a dead/undef flag here.
296 ; let's swiffle some lanes around for fun...
308 # for the used lanes. The example reads sub3 lane at the end, however with each
349 ; rotate lanes, but skip sub2 lane...
359 # Similar to loop1 test, but check for fixpoint of defined lanes.
392 ; rotate subreg lanes, skipping sub1
Dloop_exit_with_xor.ll3 ; Where the mask of lanes wanting to exit the loop on this iteration is not
35 ; Where the mask of lanes wanting to exit the loop on this iteration is
59 ; Another case where the mask of lanes wanting to exit the loop is not masked
Dscalar-branch-missing-and-exec.ll9 ; without ensuring that the resulting mask has bits clear for inactive lanes.
11 ; set bits for inactive lanes.
Dsubreg-undef-def-with-other-subreg-defs.mir4 # Deciding which lanes are killed needs to account for other defs in the
8 # current vreg uses because it shared no lanes with %0.sub1 use on the
/external/llvm/test/CodeGen/AMDGPU/
Ddetect-dead-lanes.mir1 # RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s
53 # Check defined lanes transfer; Includes checking for some special cases like
134 # Check used lanes transfer; Includes checking for some special cases like
216 # Check that copies to physregs use all lanes, copies from physregs define all
217 # lanes. So we should not get a dead/undef flag here.
315 ; let's swiffle some lanes around for fun...
327 # for the used lanes. The example reads sub3 lane at the end, however with each
371 ; rotate lanes, but skip sub2 lane...
381 # Similar to loop1 test, but check for fixpoint of defined lanes.
417 ; rotate subreg lanes, skipping sub1
/external/skqp/src/compute/hs/gen/
Dtarget_opencl.c305 m->warps * config->warp.lanes, in hsg_target_opencl()
334 m->warps * config->warp.lanes, in hsg_target_opencl()
500 ops->b * config->warp.lanes, in hsg_target_opencl()
508 ops->b * config->warp.lanes, in hsg_target_opencl()
518 ops->b * config->warp.lanes); in hsg_target_opencl()
526 ops->b * config->warp.lanes); in hsg_target_opencl()
Dtarget_cuda.c336 m->warps * config->warp.lanes, in hsg_target_cuda()
373 m->warps * config->warp.lanes, in hsg_target_cuda()
569 ops->b * config->warp.lanes, in hsg_target_cuda()
577 ops->b * config->warp.lanes, in hsg_target_cuda()
587 ops->b * config->warp.lanes); in hsg_target_cuda()
595 ops->b * config->warp.lanes); in hsg_target_cuda()
Dtarget_glsl.c324 m->warps * config->warp.lanes, in hsg_target_glsl()
369 m->warps * config->warp.lanes, in hsg_target_glsl()
588 ops->b * config->warp.lanes, in hsg_target_glsl()
596 ops->b * config->warp.lanes, in hsg_target_glsl()
606 ops->b * config->warp.lanes); in hsg_target_glsl()
614 ops->b * config->warp.lanes); in hsg_target_glsl()
/external/llvm-project/llvm/test/MachineVerifier/
Dtest_g_fcmp.mir18 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes
22 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes
Dtest_g_icmp.mir18 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes
22 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes
/external/llvm-project/llvm/docs/
DAMDGPUModifierSyntax.rst117 all lanes in its group.
129 1, 2, 4, 8 or 16 lanes.
132 … Reverses the lanes for groups of 2, 4, 8, 16 or 32 lanes.
988 Selects which lanes to pull data from, within a group of 8 lanes. This is a mandatory modifier.
1004 dpp8:[{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7}] Select lanes to read from.
1019 Controls interaction with inactive lanes for *dpp8* instructions. The default value is zero.
1021 Note: *inactive* lanes are those whose :ref:`exec<amdgpu_synid_exec>` mask bit is zero.
1028 fi:0 Fetch zero when accessing data from inactive lanes.
1029 fi:1 Fetch pre-exist values from inactive lanes.
1050 Note: the lanes of a wavefront are organized in four *rows* and four *banks*.
[all …]
/external/tensorflow/tensorflow/core/profiler/internal/
Dtfprof_timeline.cc351 for (int64 i = 0, end = p->lanes.size(); i < end; ++i) { in AllocateLanes()
352 const auto& lane = p->lanes[i]; in AllocateLanes()
368 l = p->lanes.size(); in AllocateLanes()
371 p->lanes.push_back(nlane); in AllocateLanes()
373 p->lanes[l][start_time] = end_time; in AllocateLanes()
/external/vixl/src/aarch64/
Dregisters-aarch64.h551 explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1) in VIXL_DECLARE_REGISTER_COMMON()
555 EncodeLaneSizeInBits(size_in_bits, lanes)) { in VIXL_DECLARE_REGISTER_COMMON()
582 static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) { in EncodeLaneSizeInBits() argument
583 VIXL_ASSERT(lanes >= 1); in EncodeLaneSizeInBits()
584 VIXL_ASSERT((size_in_bits % lanes) == 0); in EncodeLaneSizeInBits()
585 return EncodeSizeInBits(size_in_bits / lanes); in EncodeLaneSizeInBits()
/external/llvm/lib/Target/AArch64/
DAArch64SchedVulcan.td669 // ASIMD load, 1 element, all lanes, D-form, B/H/S
670 // ASIMD load, 1 element, all lanes, D-form, D
671 // ASIMD load, 1 element, all lanes, Q-form
691 // ASIMD load, 2 element, all lanes, D-form, B/H/S
692 // ASIMD load, 2 element, all lanes, D-form, D
693 // ASIMD load, 2 element, all lanes, Q-form
714 // ASIMD load, 3 element, all lanes, D-form, B/H/S
715 // ASIMD load, 3 element, all lanes, D-form, D
716 // ASIMD load, 3 element, all lanes, Q-form, B/H/S
717 // ASIMD load, 3 element, all lanes, Q-form, D
[all …]
/external/rust/crates/ppv-lite86/src/
Dsoft.rs165 fn from_lanes(lanes: [W; 2]) -> Self { in from_lanes()
166 x2::new(lanes)
353 fn from_lanes(lanes: [W; 4]) -> Self { in from_lanes()
354 x4(lanes)
/external/mesa3d/src/gallium/drivers/nouveau/codegen/
Dnv50_ir_lowering_gm107.cpp142 add->lanes = 1; /* abused for .ndv */ in handleManualTXD()
150 add->lanes = 1; /* abused for .ndv */ in handleManualTXD()
190 mov->lanes = 1 << l; in handleManualTXD()
229 insn->lanes = 0; /* abused for !.ndv */ in handleDFDX()
/external/arm-trusted-firmware/plat/brcm/board/stingray/src/
Dpaxb.c267 unsigned int lanes = 0; in pcie_set_default_tx_coeff() local
271 for (lanes = 0; lanes < link_width; lanes = lanes + 2) { in pcie_set_default_tx_coeff()
/external/tensorflow/tensorflow/python/client/
Dtimeline.py416 lanes = [0]
419 for (i, lts) in enumerate(lanes):
422 lanes[l] = ns.all_start_micros + ns.all_end_rel_micros
425 l = len(lanes)
426 lanes.append(ns.all_start_micros + ns.all_end_rel_micros)
/external/llvm-project/clang/include/clang/Basic/
Darm_neon_incl.td87 // - "H" - Halve the number of lanes in the type.
88 // - "D" - Double the number of lanes in the type.
104 // all lanes. The type of the vector is the base type of the intrinsic.
109 // the same type by duplicating the scalar value into all lanes.
168 // is a width in bits to reverse. The lanes this maps to is determined
173 // mask0 - The initial sequence of lanes for shuffle ARG0
175 // mask0 - The initial sequence of lanes for shuffle ARG1
/external/skqp/src/compute/skc/platforms/cl_12/kernels/
Drasters_alloc.cl43 // init with defaults for all lanes
113 // broadcast block pool base to all lanes

1234567