/external/XNNPACK/scripts/ |
D | generate-f32-gemm.sh | 9 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/1x4-sca… 10 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/1x4… 12 tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/2x4-sca… 13 tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/2x4… 15 tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/4x2-sca… 17 tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/4x4-sca… 18 tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/4x4… 21 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/1x4-was… 22 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=1 -o src/f32-gemm/gen-inc/1x4… 24 tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/2x4-was… [all …]
|
D | generate-f16-gemm.sh | 9 tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=8 -o src/f16-gemm/gen/4x8-neonfp16ar… 10 tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=8 -o src/f16-gemm/gen/6x8-neonfp16ar… 11 tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=8 -o src/f16-gemm/gen/8x8-neonfp16ar… 14 tools/generate-gemm-test.py --spec test/f16-gemm.yaml --output test/f16-gemm.cc
|
D | generate-tests.sh | 8 tools/generate-gemm-test.py --spec test/q8-gemm.yaml --output test/q8-gemm.cc 9 tools/generate-gemm-test.py --spec test/q8-igemm.yaml --output test/q8-igemm.cc
|
D | generate-f32-ppmm.sh | 26 tools/generate-gemm-test.py --spec test/f32-ppmm.yaml --output test/f32-ppmm.cc
|
/external/XNNPACK/test/ |
D | deconvolution-nhwc.cc | 27 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 39 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 52 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 65 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 73 …for (size_t output_channels = 1; output_channels <= xnn_params.q8.gemm.nr * 2; output_channels *= … 90 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 102 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 103 .output_pixel_stride(xnn_params.q8.gemm.nr * 2 + 13) 114 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) 126 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3) [all …]
|
/external/XNNPACK/src/ |
D | fully-connected-nc.c | 132 const uint32_t nr = xnn_params.q8.gemm.nr; in xnn_create_fully_connected_nc_q8() 133 const uint32_t kr = UINT32_C(1) << xnn_params.q8.gemm.log2_kr; in xnn_create_fully_connected_nc_q8() 177 fully_connected_op->ukernel.gemm = (struct xnn_ukernel_gemm) { in xnn_create_fully_connected_nc_q8() 178 .default_function = xnn_params.q8.gemm.gemm, in xnn_create_fully_connected_nc_q8() 179 .mr = xnn_params.q8.gemm.mr, in xnn_create_fully_connected_nc_q8() 273 const uint32_t nr = xnn_params.f32.gemm.nr; in xnn_create_fully_connected_nc_f32() 274 const uint32_t kr = UINT32_C(1) << xnn_params.f32.gemm.log2_kr; in xnn_create_fully_connected_nc_f32() 275 const uint32_t sr = UINT32_C(1) << xnn_params.f32.gemm.log2_sr; in xnn_create_fully_connected_nc_f32() 312 fully_connected_op->ukernel.gemm = (struct xnn_ukernel_gemm) { in xnn_create_fully_connected_nc_f32() 313 .default_function = xnn_params.f32.gemm.gemm, in xnn_create_fully_connected_nc_f32() [all …]
|
D | init.c | 76 xnn_params.q8.gemm = (struct gemm_parameters) { in init() 77 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon, in init() 139 xnn_params.f32.gemm = (struct gemm_parameters) { in init() 140 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53, in init() 152 xnn_params.f32.gemm = (struct gemm_parameters) { in init() 153 … .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_pld_cortex_a75, in init() 163 xnn_params.f32.gemm = (struct gemm_parameters) { in init() 164 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, in init() 174 xnn_params.f32.gemm = (struct gemm_parameters) { in init() 175 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_lane_ld128, in init() [all …]
|
D | convolution-nhwc.c | 285 const uint32_t nr = xnn_params.q8.gemm.nr; in xnn_create_convolution2d_nhwc_q8() 286 const uint32_t kr = UINT32_C(1) << xnn_params.q8.gemm.log2_kr; in xnn_create_convolution2d_nhwc_q8() 306 convolution_op->ukernel.gemm = (struct xnn_ukernel_gemm) { in xnn_create_convolution2d_nhwc_q8() 307 .mr = xnn_params.q8.gemm.mr, in xnn_create_convolution2d_nhwc_q8() 310 .default_function = xnn_params.q8.gemm.gemm, in xnn_create_convolution2d_nhwc_q8() 328 .mr = xnn_params.q8.gemm.mr, in xnn_create_convolution2d_nhwc_q8() 331 .default_function = xnn_params.q8.gemm.igemm, in xnn_create_convolution2d_nhwc_q8() 615 const uint32_t nr = xnn_params.f32.gemm.nr; in xnn_create_convolution2d_nhwc_f32() 616 const uint32_t kr = UINT32_C(1) << xnn_params.f32.gemm.log2_kr; in xnn_create_convolution2d_nhwc_f32() 617 const uint32_t sr = UINT32_C(1) << xnn_params.f32.gemm.log2_sr; in xnn_create_convolution2d_nhwc_f32() [all …]
|
D | deconvolution-nhwc.c | 190 const uint32_t mr = xnn_params.q8.gemm.mr; in xnn_create_deconvolution2d_nhwc_q8() 191 const uint32_t nr = xnn_params.q8.gemm.nr; in xnn_create_deconvolution2d_nhwc_q8() 192 const uint32_t kr = UINT32_C(1) << xnn_params.q8.gemm.log2_kr; in xnn_create_deconvolution2d_nhwc_q8() 193 const xnn_igemm_ukernel_function ukernel_function = xnn_params.q8.gemm.igemm; in xnn_create_deconvolution2d_nhwc_q8() 428 uint32_t mr = xnn_params.f32.gemm.mr; in xnn_create_deconvolution2d_nhwc_f32() 429 uint32_t nr = xnn_params.f32.gemm.nr; in xnn_create_deconvolution2d_nhwc_f32() 430 uint32_t kr = UINT32_C(1) << xnn_params.f32.gemm.log2_kr; in xnn_create_deconvolution2d_nhwc_f32() 431 uint32_t sr = UINT32_C(1) << xnn_params.f32.gemm.log2_sr; in xnn_create_deconvolution2d_nhwc_f32() 432 xnn_igemm_ukernel_function ukernel_function = xnn_params.f32.gemm.igemm; in xnn_create_deconvolution2d_nhwc_f32()
|
/external/XNNPACK/ |
D | Android.bp | 76 "src/f32-gemm/gen-inc/1x4-scalar.c", 77 "src/f32-gemm/gen-inc/2x4-scalar.c", 78 "src/f32-gemm/gen-inc/4x4-scalar.c", 79 "src/f32-gemm/gen/1x4-scalar.c", 80 "src/f32-gemm/gen/2x4-scalar.c", 81 "src/f32-gemm/gen/4x2-scalar.c", 82 "src/f32-gemm/gen/4x4-scalar.c", 187 "src/q8-gemm/2x2-scalar.c", 233 "src/f32-gemm/gen/1x8-psimd-loadsplat.c", 234 "src/f32-gemm/gen/1x8-psimd-splat.c", [all …]
|
D | BUILD.bazel | 113 "src/f32-gemm/gen-inc/1x4-scalar.c", 114 "src/f32-gemm/gen-inc/2x4-scalar.c", 115 "src/f32-gemm/gen-inc/4x4-scalar.c", 116 "src/f32-gemm/gen/1x4-scalar.c", 117 "src/f32-gemm/gen/2x4-scalar.c", 118 "src/f32-gemm/gen/4x2-scalar.c", 119 "src/f32-gemm/gen/4x4-scalar.c", 224 "src/q8-gemm/2x2-scalar.c", 265 "src/f32-gemm/gen-inc/1x4-wasm.c", 266 "src/f32-gemm/gen-inc/2x4-wasm.c", [all …]
|
D | CMakeLists.txt | 205 src/f32-gemm/gen/1x4-scalar.c 206 src/f32-gemm/gen/2x4-scalar.c 207 src/f32-gemm/gen/4x2-scalar.c 208 src/f32-gemm/gen/4x4-scalar.c 209 src/f32-gemm/gen-inc/1x4-scalar.c 210 src/f32-gemm/gen-inc/2x4-scalar.c 211 src/f32-gemm/gen-inc/4x4-scalar.c 316 src/q8-gemm/2x2-scalar.c 361 src/f32-gemm/gen/1x8-psimd-loadsplat.c 362 src/f32-gemm/gen/1x8-psimd-splat.c [all …]
|
/external/gemmlowp/test/ |
D | benchmark.cc | 68 for (auto gemm : gemms) { in time_for_gemms() local 69 int rows = gemm.rows; in time_for_gemms() 70 int depth = gemm.depth; in time_for_gemms() 71 int cols = gemm.cols; in time_for_gemms() 135 for (auto gemm : gemms) { in gflops_for_gemms() local 136 ops += 2.0 * gemm.rows * gemm.depth * gemm.cols; in gflops_for_gemms() 175 for (auto gemm : benchmark_gemms) { in benchmark() local 178 unique_gemm.push_back(gemm); in benchmark() 182 benchmark_results[gemm].emplace_back(gflops); in benchmark()
|
/external/tensorflow/tensorflow/compiler/xla/service/gpu/ |
D | gemm_algorithm_picker.cc | 61 const HloInstruction* gemm, se::DeviceMemoryBase lhs_buffer, in DoUncachedGemmAutotune() argument 71 gemm->backend_config<GemmBackendConfig>().ValueOrDie(); in DoUncachedGemmAutotune() 73 gemm->GetModule()->config().debug_options().xla_gpu_autotune_level(); in DoUncachedGemmAutotune() 77 VLOG(3) << "Starting autotune of GemmThunk " << gemm->ToString(); in DoUncachedGemmAutotune() 90 InitializeBuffer(stream, gemm->shape().element_type(), &rng_state, in DoUncachedGemmAutotune() 99 CHECK(RunGemm(gemm, backend_config, lhs_buffer, rhs_buffer, output_buffer, in DoUncachedGemmAutotune() 188 return {best_result->gemm().algorithm()}; in DoUncachedGemmAutotune()
|
D | gemm_thunk.cc | 158 Status RunGemm(const HloInstruction *gemm, in RunGemm() argument 167 CHECK(IsCublasGemm(*gemm)); in RunGemm() 169 const Shape &output_shape = gemm->shape(); in RunGemm() 170 const HloInstruction *lhs = gemm->operand(0); in RunGemm() 171 const HloInstruction *rhs = gemm->operand(1); in RunGemm() 243 implements_whole_instruction ? gemm : nullptr) in RunGemm()
|
D | gemm_thunk.h | 72 const HloInstruction* gemm, const GemmBackendConfig& backend_config,
|
/external/gemmlowp/meta/ |
D | README | 52 Using those multiplication and packing/unpacking primitives 144 gemm function 53 versions were prepared. On top of them one high level gemm function that would 64 The library shows up to 35% faster gemm execution in some cases (e.g. ImageNet 71 -- generated ARM/NEON 8bit x 8bit gemm implementation. Contains all the 73 a single gemm function that switches between the optimized versions based 77 -- a simple parallelization scheme for the gemm function.
|
/external/tensorflow/tensorflow/core/kernels/ |
D | eigen_mkldnn_contraction_kernel_test.cc | 139 Scalar gemm = mkldnn_result(i); in TEST() local 142 Scalar delta = std::abs(gemm - matmul); in TEST() 210 Scalar gemm = res(i, j); in TEST() local 212 EXPECT_EQ(gemm, matmul); in TEST()
|
/external/eigen/bench/perf_monitoring/gemm/ |
D | gemm.cpp | 17 void gemm(const Mat &A, const Mat &B, Mat &C) in gemm() function 47 BENCH(t, tries, rep, gemm(A,B,C)); in bench()
|
/external/tensorflow/third_party/mkl_dnn/ |
D | mkldnn.BUILD | 91 "src/cpu/gemm", 133 "src/cpu/gemm",
|
/external/XNNPACK/src/xnnpack/ |
D | operator.h | 147 struct xnn_ukernel_gemm gemm; member 268 struct gemm_context gemm; member
|
/external/eigen/bench/ |
D | bench_gemm.cpp | 130 EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) in gemm() function 249 gemm(a,b,c); in main() 268 BENCH(tmt, tries, rep, gemm(a,b,c)); in main() 279 BENCH(tmono, tries, rep, gemm(a,b,c)); in main()
|
/external/eigen/bench/btl/libs/BLAS/ |
D | blas_interface_impl.hh | 42 BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); in matrix_matrix_product() 46 BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); in transposed_matrix_matrix_product()
|
/external/XNNPACK/bench/ |
D | f32-gemm-e2e.cc | 27 xnn_f32_gemm_ukernel_function gemm, in GEMMEnd2EndBenchmark() argument 43 xnn_params.f32.gemm = (struct gemm_parameters) { in GEMMEnd2EndBenchmark() 44 .gemm = xnn_gemm_ukernel_function(gemm), in GEMMEnd2EndBenchmark()
|
/external/tensorflow/tensorflow/core/protobuf/ |
D | autotuning.proto | 67 GemmKey gemm = 6; field
|