/external/gemmlowp/meta/generators/ |
D | zip_Nx8_neon.py | 40 lanes = [] 44 lanes.append(ZipLane(input_address, registers.DoubleRegister(), 48 lanes.append(ZipLane(address_register, registers.DoubleRegister(), 52 return lanes 64 def GenerateClearAggregators(emitter, lanes): argument 65 for lane in lanes: 69 def GenerateLoadAggregateStore(emitter, lanes, output_address, alignment): argument 74 for lane in lanes: 80 for lane in lanes: 88 def GenerateLeftoverLoadAggregateStore(emitter, leftovers, lanes, argument [all …]
|
D | qnt_Nx8_neon.py | 26 def BuildName(lanes, leftovers, aligned): argument 27 name = 'qnt_%dx8' % lanes 35 def LoadAndDuplicateOffsets(emitter, registers, lanes, offsets): argument 36 if lanes == 1 or lanes == 2 or lanes == 3: 38 for unused_i in range(0, lanes): 46 raise ConfigurationError('Unsupported number of lanes: %d' % lanes) 55 lanes = [] 60 lanes.append(QntLane(source, 68 lanes.append(QntLane(input_register, 77 return lanes [all …]
|
D | mul_Nx8_Mx8_neon.py | 22 self.lanes = [] 25 self.lanes.append(lane) 28 for i in range(0, len(self.lanes)): 29 registers.FreeRegister(self.lanes[i]) 30 self.lanes[i] = None 34 lanes = MulLanes(address) 36 lanes.AddLane(registers.DoubleRegister()) 37 return lanes 41 lanes = MulLanes(address) 42 lanes.AddLane(registers.Low(quad_register)) [all …]
|
D | mul_1x8_Mx8_neon.py | 197 def BuildName(result_type, lhs_add, rhs_add, lanes): argument 198 name = 'mul_1x8_%dx8_%s' % (lanes, result_type) 279 for lanes in range(1, 5): 280 GenerateMul1x8Mx8(emitter, result_type, lhs_add, rhs_add, lanes)
|
/external/gemmlowp/meta/ |
D | test_streams_correctness.cc | 105 template <int lanes, int leftover> 116 prepare_row_major_data(lanes, all_elements, stride, in); in test_2() 117 Stream<std::uint8_t, lanes, 8, leftover, RowMajorWithSum>::Pack(in, params, in test_2() 119 if (check(out, lanes, all_elements)) { in test_2() 124 std::cout << "Row: " << lanes << "x8x" << leftover << " : " in test_2() 131 for (int stride = lanes; stride < lanes + 4; ++stride) { in test_2() 138 prepare_column_major_data(lanes, all_elements, stride, in); in test_2() 139 Stream<std::uint8_t, lanes, 8, leftover, ColumnMajorWithSum>::Pack(in, params, in test_2() 141 if (check(out, lanes, all_elements)) { in test_2() 146 std::cout << "Column: " << lanes << "x8x" << leftover << " : " in test_2() [all …]
|
/external/llvm-project/llvm/test/CodeGen/AMDGPU/ |
D | detect-dead-lanes.mir | 1 # RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s 42 # Check defined lanes transfer; Includes checking for some special cases like 122 # Check used lanes transfer; Includes checking for some special cases like 203 # Check that copies to physregs use all lanes, copies from physregs define all 204 # lanes. So we should not get a dead/undef flag here. 296 ; let's swiffle some lanes around for fun... 308 # for the used lanes. The example reads sub3 lane at the end, however with each 349 ; rotate lanes, but skip sub2 lane... 359 # Similar to loop1 test, but check for fixpoint of defined lanes. 392 ; rotate subreg lanes, skipping sub1
|
D | loop_exit_with_xor.ll | 3 ; Where the mask of lanes wanting to exit the loop on this iteration is not 35 ; Where the mask of lanes wanting to exit the loop on this iteration is 59 ; Another case where the mask of lanes wanting to exit the loop is not masked
|
D | scalar-branch-missing-and-exec.ll | 9 ; without ensuring that the resulting mask has bits clear for inactive lanes. 11 ; set bits for inactive lanes.
|
D | subreg-undef-def-with-other-subreg-defs.mir | 4 # Deciding which lanes are killed needs to account for other defs in the 8 # current vreg uses because it shared no lanes with %0.sub1 use on the
|
/external/llvm/test/CodeGen/AMDGPU/ |
D | detect-dead-lanes.mir | 1 # RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s 53 # Check defined lanes transfer; Includes checking for some special cases like 134 # Check used lanes transfer; Includes checking for some special cases like 216 # Check that copies to physregs use all lanes, copies from physregs define all 217 # lanes. So we should not get a dead/undef flag here. 315 ; let's swiffle some lanes around for fun... 327 # for the used lanes. The example reads sub3 lane at the end, however with each 371 ; rotate lanes, but skip sub2 lane... 381 # Similar to loop1 test, but check for fixpoint of defined lanes. 417 ; rotate subreg lanes, skipping sub1
|
/external/skqp/src/compute/hs/gen/ |
D | target_opencl.c | 305 m->warps * config->warp.lanes, in hsg_target_opencl() 334 m->warps * config->warp.lanes, in hsg_target_opencl() 500 ops->b * config->warp.lanes, in hsg_target_opencl() 508 ops->b * config->warp.lanes, in hsg_target_opencl() 518 ops->b * config->warp.lanes); in hsg_target_opencl() 526 ops->b * config->warp.lanes); in hsg_target_opencl()
|
D | target_cuda.c | 336 m->warps * config->warp.lanes, in hsg_target_cuda() 373 m->warps * config->warp.lanes, in hsg_target_cuda() 569 ops->b * config->warp.lanes, in hsg_target_cuda() 577 ops->b * config->warp.lanes, in hsg_target_cuda() 587 ops->b * config->warp.lanes); in hsg_target_cuda() 595 ops->b * config->warp.lanes); in hsg_target_cuda()
|
D | target_glsl.c | 324 m->warps * config->warp.lanes, in hsg_target_glsl() 369 m->warps * config->warp.lanes, in hsg_target_glsl() 588 ops->b * config->warp.lanes, in hsg_target_glsl() 596 ops->b * config->warp.lanes, in hsg_target_glsl() 606 ops->b * config->warp.lanes); in hsg_target_glsl() 614 ops->b * config->warp.lanes); in hsg_target_glsl()
|
/external/llvm-project/llvm/test/MachineVerifier/ |
D | test_g_fcmp.mir | 18 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes 22 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes
|
D | test_g_icmp.mir | 18 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes 22 ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of lanes
|
/external/llvm-project/llvm/docs/ |
D | AMDGPUModifierSyntax.rst | 117 all lanes in its group. 129 1, 2, 4, 8 or 16 lanes. 132 … Reverses the lanes for groups of 2, 4, 8, 16 or 32 lanes. 988 Selects which lanes to pull data from, within a group of 8 lanes. This is a mandatory modifier. 1004 dpp8:[{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7},{0..7}] Select lanes to read from. 1019 Controls interaction with inactive lanes for *dpp8* instructions. The default value is zero. 1021 Note: *inactive* lanes are those whose :ref:`exec<amdgpu_synid_exec>` mask bit is zero. 1028 fi:0 Fetch zero when accessing data from inactive lanes. 1029 fi:1 Fetch pre-exist values from inactive lanes. 1050 Note: the lanes of a wavefront are organized in four *rows* and four *banks*. [all …]
|
/external/tensorflow/tensorflow/core/profiler/internal/ |
D | tfprof_timeline.cc | 351 for (int64 i = 0, end = p->lanes.size(); i < end; ++i) { in AllocateLanes() 352 const auto& lane = p->lanes[i]; in AllocateLanes() 368 l = p->lanes.size(); in AllocateLanes() 371 p->lanes.push_back(nlane); in AllocateLanes() 373 p->lanes[l][start_time] = end_time; in AllocateLanes()
|
/external/vixl/src/aarch64/ |
D | registers-aarch64.h | 551 explicit VRegister(int code, int size_in_bits = kQRegSize, int lanes = 1) in VIXL_DECLARE_REGISTER_COMMON() 555 EncodeLaneSizeInBits(size_in_bits, lanes)) { in VIXL_DECLARE_REGISTER_COMMON() 582 static EncodedSize EncodeLaneSizeInBits(int size_in_bits, int lanes) { in EncodeLaneSizeInBits() argument 583 VIXL_ASSERT(lanes >= 1); in EncodeLaneSizeInBits() 584 VIXL_ASSERT((size_in_bits % lanes) == 0); in EncodeLaneSizeInBits() 585 return EncodeSizeInBits(size_in_bits / lanes); in EncodeLaneSizeInBits()
|
/external/llvm/lib/Target/AArch64/ |
D | AArch64SchedVulcan.td | 669 // ASIMD load, 1 element, all lanes, D-form, B/H/S 670 // ASIMD load, 1 element, all lanes, D-form, D 671 // ASIMD load, 1 element, all lanes, Q-form 691 // ASIMD load, 2 element, all lanes, D-form, B/H/S 692 // ASIMD load, 2 element, all lanes, D-form, D 693 // ASIMD load, 2 element, all lanes, Q-form 714 // ASIMD load, 3 element, all lanes, D-form, B/H/S 715 // ASIMD load, 3 element, all lanes, D-form, D 716 // ASIMD load, 3 element, all lanes, Q-form, B/H/S 717 // ASIMD load, 3 element, all lanes, Q-form, D [all …]
|
/external/rust/crates/ppv-lite86/src/ |
D | soft.rs | 165 fn from_lanes(lanes: [W; 2]) -> Self { in from_lanes() 166 x2::new(lanes) 353 fn from_lanes(lanes: [W; 4]) -> Self { in from_lanes() 354 x4(lanes)
|
/external/mesa3d/src/gallium/drivers/nouveau/codegen/ |
D | nv50_ir_lowering_gm107.cpp | 142 add->lanes = 1; /* abused for .ndv */ in handleManualTXD() 150 add->lanes = 1; /* abused for .ndv */ in handleManualTXD() 190 mov->lanes = 1 << l; in handleManualTXD() 229 insn->lanes = 0; /* abused for !.ndv */ in handleDFDX()
|
/external/arm-trusted-firmware/plat/brcm/board/stingray/src/ |
D | paxb.c | 267 unsigned int lanes = 0; in pcie_set_default_tx_coeff() local 271 for (lanes = 0; lanes < link_width; lanes = lanes + 2) { in pcie_set_default_tx_coeff()
|
/external/tensorflow/tensorflow/python/client/ |
D | timeline.py | 416 lanes = [0] 419 for (i, lts) in enumerate(lanes): 422 lanes[l] = ns.all_start_micros + ns.all_end_rel_micros 425 l = len(lanes) 426 lanes.append(ns.all_start_micros + ns.all_end_rel_micros)
|
/external/llvm-project/clang/include/clang/Basic/ |
D | arm_neon_incl.td | 87 // - "H" - Halve the number of lanes in the type. 88 // - "D" - Double the number of lanes in the type. 104 // all lanes. The type of the vector is the base type of the intrinsic. 109 // the same type by duplicating the scalar value into all lanes. 168 // is a width in bits to reverse. The lanes this maps to is determined 173 // mask0 - The initial sequence of lanes for shuffle ARG0 175 // mask0 - The initial sequence of lanes for shuffle ARG1
|
/external/skqp/src/compute/skc/platforms/cl_12/kernels/ |
D | rasters_alloc.cl | 43 // init with defaults for all lanes 113 // broadcast block pool base to all lanes
|