Home
last modified time | relevance | path

Searched refs:gemm (Results 1 – 25 of 52) sorted by relevance

123

/external/XNNPACK/scripts/
Dgenerate-f32-gemm.sh9 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/1x4-sca…
10 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/1x4…
12 tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/2x4-sca…
13 tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/2x4…
15 tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=2 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/4x2-sca…
17 tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=0 -o src/f32-gemm/gen/4x4-sca…
18 tools/xngen src/f32-gemm/scalar.c.in -D MR=4 -D NR=4 -D WASM=0 -D INC=1 -o src/f32-gemm/gen-inc/4x4…
21 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/1x4-was…
22 tools/xngen src/f32-gemm/scalar.c.in -D MR=1 -D NR=4 -D WASM=1 -D INC=1 -o src/f32-gemm/gen-inc/1x4…
24 tools/xngen src/f32-gemm/scalar.c.in -D MR=2 -D NR=4 -D WASM=1 -D INC=0 -o src/f32-gemm/gen/2x4-was…
[all …]
Dgenerate-f16-gemm.sh9 tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=8 -o src/f16-gemm/gen/4x8-neonfp16ar…
10 tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=8 -o src/f16-gemm/gen/6x8-neonfp16ar…
11 tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=8 -o src/f16-gemm/gen/8x8-neonfp16ar…
14 tools/generate-gemm-test.py --spec test/f16-gemm.yaml --output test/f16-gemm.cc
Dgenerate-tests.sh8 tools/generate-gemm-test.py --spec test/q8-gemm.yaml --output test/q8-gemm.cc
9 tools/generate-gemm-test.py --spec test/q8-igemm.yaml --output test/q8-igemm.cc
Dgenerate-f32-ppmm.sh26 tools/generate-gemm-test.py --spec test/f32-ppmm.yaml --output test/f32-ppmm.cc
/external/XNNPACK/test/
Ddeconvolution-nhwc.cc27 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
39 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
52 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
65 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
73 …for (size_t output_channels = 1; output_channels <= xnn_params.q8.gemm.nr * 2; output_channels *= …
90 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
102 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
103 .output_pixel_stride(xnn_params.q8.gemm.nr * 2 + 13)
114 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
126 .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
[all …]
/external/XNNPACK/src/
Dfully-connected-nc.c132 const uint32_t nr = xnn_params.q8.gemm.nr; in xnn_create_fully_connected_nc_q8()
133 const uint32_t kr = UINT32_C(1) << xnn_params.q8.gemm.log2_kr; in xnn_create_fully_connected_nc_q8()
177 fully_connected_op->ukernel.gemm = (struct xnn_ukernel_gemm) { in xnn_create_fully_connected_nc_q8()
178 .default_function = xnn_params.q8.gemm.gemm, in xnn_create_fully_connected_nc_q8()
179 .mr = xnn_params.q8.gemm.mr, in xnn_create_fully_connected_nc_q8()
273 const uint32_t nr = xnn_params.f32.gemm.nr; in xnn_create_fully_connected_nc_f32()
274 const uint32_t kr = UINT32_C(1) << xnn_params.f32.gemm.log2_kr; in xnn_create_fully_connected_nc_f32()
275 const uint32_t sr = UINT32_C(1) << xnn_params.f32.gemm.log2_sr; in xnn_create_fully_connected_nc_f32()
312 fully_connected_op->ukernel.gemm = (struct xnn_ukernel_gemm) { in xnn_create_fully_connected_nc_f32()
313 .default_function = xnn_params.f32.gemm.gemm, in xnn_create_fully_connected_nc_f32()
[all …]
Dinit.c76 xnn_params.q8.gemm = (struct gemm_parameters) { in init()
77 .gemm = (xnn_gemm_ukernel_function) xnn_q8_gemm_ukernel_4x8__neon, in init()
139 xnn_params.f32.gemm = (struct gemm_parameters) { in init()
140 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53, in init()
152 xnn_params.f32.gemm = (struct gemm_parameters) { in init()
153 … .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_pld_cortex_a75, in init()
163 xnn_params.f32.gemm = (struct gemm_parameters) { in init()
164 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75, in init()
174 xnn_params.f32.gemm = (struct gemm_parameters) { in init()
175 .gemm = (xnn_gemm_ukernel_function) xnn_f32_gemm_ukernel_4x8__neon_lane_ld128, in init()
[all …]
Dconvolution-nhwc.c285 const uint32_t nr = xnn_params.q8.gemm.nr; in xnn_create_convolution2d_nhwc_q8()
286 const uint32_t kr = UINT32_C(1) << xnn_params.q8.gemm.log2_kr; in xnn_create_convolution2d_nhwc_q8()
306 convolution_op->ukernel.gemm = (struct xnn_ukernel_gemm) { in xnn_create_convolution2d_nhwc_q8()
307 .mr = xnn_params.q8.gemm.mr, in xnn_create_convolution2d_nhwc_q8()
310 .default_function = xnn_params.q8.gemm.gemm, in xnn_create_convolution2d_nhwc_q8()
328 .mr = xnn_params.q8.gemm.mr, in xnn_create_convolution2d_nhwc_q8()
331 .default_function = xnn_params.q8.gemm.igemm, in xnn_create_convolution2d_nhwc_q8()
615 const uint32_t nr = xnn_params.f32.gemm.nr; in xnn_create_convolution2d_nhwc_f32()
616 const uint32_t kr = UINT32_C(1) << xnn_params.f32.gemm.log2_kr; in xnn_create_convolution2d_nhwc_f32()
617 const uint32_t sr = UINT32_C(1) << xnn_params.f32.gemm.log2_sr; in xnn_create_convolution2d_nhwc_f32()
[all …]
Ddeconvolution-nhwc.c190 const uint32_t mr = xnn_params.q8.gemm.mr; in xnn_create_deconvolution2d_nhwc_q8()
191 const uint32_t nr = xnn_params.q8.gemm.nr; in xnn_create_deconvolution2d_nhwc_q8()
192 const uint32_t kr = UINT32_C(1) << xnn_params.q8.gemm.log2_kr; in xnn_create_deconvolution2d_nhwc_q8()
193 const xnn_igemm_ukernel_function ukernel_function = xnn_params.q8.gemm.igemm; in xnn_create_deconvolution2d_nhwc_q8()
428 uint32_t mr = xnn_params.f32.gemm.mr; in xnn_create_deconvolution2d_nhwc_f32()
429 uint32_t nr = xnn_params.f32.gemm.nr; in xnn_create_deconvolution2d_nhwc_f32()
430 uint32_t kr = UINT32_C(1) << xnn_params.f32.gemm.log2_kr; in xnn_create_deconvolution2d_nhwc_f32()
431 uint32_t sr = UINT32_C(1) << xnn_params.f32.gemm.log2_sr; in xnn_create_deconvolution2d_nhwc_f32()
432 xnn_igemm_ukernel_function ukernel_function = xnn_params.f32.gemm.igemm; in xnn_create_deconvolution2d_nhwc_f32()
/external/XNNPACK/
DAndroid.bp76 "src/f32-gemm/gen-inc/1x4-scalar.c",
77 "src/f32-gemm/gen-inc/2x4-scalar.c",
78 "src/f32-gemm/gen-inc/4x4-scalar.c",
79 "src/f32-gemm/gen/1x4-scalar.c",
80 "src/f32-gemm/gen/2x4-scalar.c",
81 "src/f32-gemm/gen/4x2-scalar.c",
82 "src/f32-gemm/gen/4x4-scalar.c",
187 "src/q8-gemm/2x2-scalar.c",
233 "src/f32-gemm/gen/1x8-psimd-loadsplat.c",
234 "src/f32-gemm/gen/1x8-psimd-splat.c",
[all …]
DBUILD.bazel113 "src/f32-gemm/gen-inc/1x4-scalar.c",
114 "src/f32-gemm/gen-inc/2x4-scalar.c",
115 "src/f32-gemm/gen-inc/4x4-scalar.c",
116 "src/f32-gemm/gen/1x4-scalar.c",
117 "src/f32-gemm/gen/2x4-scalar.c",
118 "src/f32-gemm/gen/4x2-scalar.c",
119 "src/f32-gemm/gen/4x4-scalar.c",
224 "src/q8-gemm/2x2-scalar.c",
265 "src/f32-gemm/gen-inc/1x4-wasm.c",
266 "src/f32-gemm/gen-inc/2x4-wasm.c",
[all …]
DCMakeLists.txt205 src/f32-gemm/gen/1x4-scalar.c
206 src/f32-gemm/gen/2x4-scalar.c
207 src/f32-gemm/gen/4x2-scalar.c
208 src/f32-gemm/gen/4x4-scalar.c
209 src/f32-gemm/gen-inc/1x4-scalar.c
210 src/f32-gemm/gen-inc/2x4-scalar.c
211 src/f32-gemm/gen-inc/4x4-scalar.c
316 src/q8-gemm/2x2-scalar.c
361 src/f32-gemm/gen/1x8-psimd-loadsplat.c
362 src/f32-gemm/gen/1x8-psimd-splat.c
[all …]
/external/gemmlowp/test/
Dbenchmark.cc68 for (auto gemm : gemms) { in time_for_gemms() local
69 int rows = gemm.rows; in time_for_gemms()
70 int depth = gemm.depth; in time_for_gemms()
71 int cols = gemm.cols; in time_for_gemms()
135 for (auto gemm : gemms) { in gflops_for_gemms() local
136 ops += 2.0 * gemm.rows * gemm.depth * gemm.cols; in gflops_for_gemms()
175 for (auto gemm : benchmark_gemms) { in benchmark() local
178 unique_gemm.push_back(gemm); in benchmark()
182 benchmark_results[gemm].emplace_back(gflops); in benchmark()
/external/tensorflow/tensorflow/compiler/xla/service/gpu/
Dgemm_algorithm_picker.cc61 const HloInstruction* gemm, se::DeviceMemoryBase lhs_buffer, in DoUncachedGemmAutotune() argument
71 gemm->backend_config<GemmBackendConfig>().ValueOrDie(); in DoUncachedGemmAutotune()
73 gemm->GetModule()->config().debug_options().xla_gpu_autotune_level(); in DoUncachedGemmAutotune()
77 VLOG(3) << "Starting autotune of GemmThunk " << gemm->ToString(); in DoUncachedGemmAutotune()
90 InitializeBuffer(stream, gemm->shape().element_type(), &rng_state, in DoUncachedGemmAutotune()
99 CHECK(RunGemm(gemm, backend_config, lhs_buffer, rhs_buffer, output_buffer, in DoUncachedGemmAutotune()
188 return {best_result->gemm().algorithm()}; in DoUncachedGemmAutotune()
Dgemm_thunk.cc158 Status RunGemm(const HloInstruction *gemm, in RunGemm() argument
167 CHECK(IsCublasGemm(*gemm)); in RunGemm()
169 const Shape &output_shape = gemm->shape(); in RunGemm()
170 const HloInstruction *lhs = gemm->operand(0); in RunGemm()
171 const HloInstruction *rhs = gemm->operand(1); in RunGemm()
243 implements_whole_instruction ? gemm : nullptr) in RunGemm()
Dgemm_thunk.h72 const HloInstruction* gemm, const GemmBackendConfig& backend_config,
/external/gemmlowp/meta/
DREADME52 Using those multiplication and packing/unpacking primitives 144 gemm function
53 versions were prepared. On top of them one high level gemm function that would
64 The library shows up to 35% faster gemm execution in some cases (e.g. ImageNet
71 -- generated ARM/NEON 8bit x 8bit gemm implementation. Contains all the
73 a single gemm function that switches between the optimized versions based
77 -- a simple parallelization scheme for the gemm function.
/external/tensorflow/tensorflow/core/kernels/
Deigen_mkldnn_contraction_kernel_test.cc139 Scalar gemm = mkldnn_result(i); in TEST() local
142 Scalar delta = std::abs(gemm - matmul); in TEST()
210 Scalar gemm = res(i, j); in TEST() local
212 EXPECT_EQ(gemm, matmul); in TEST()
/external/eigen/bench/perf_monitoring/gemm/
Dgemm.cpp17 void gemm(const Mat &A, const Mat &B, Mat &C) in gemm() function
47 BENCH(t, tries, rep, gemm(A,B,C)); in bench()
/external/tensorflow/third_party/mkl_dnn/
Dmkldnn.BUILD91 "src/cpu/gemm",
133 "src/cpu/gemm",
/external/XNNPACK/src/xnnpack/
Doperator.h147 struct xnn_ukernel_gemm gemm; member
268 struct gemm_context gemm; member
/external/eigen/bench/
Dbench_gemm.cpp130 EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c) in gemm() function
249 gemm(a,b,c); in main()
268 BENCH(tmt, tries, rep, gemm(a,b,c)); in main()
279 BENCH(tmono, tries, rep, gemm(a,b,c)); in main()
/external/eigen/bench/btl/libs/BLAS/
Dblas_interface_impl.hh42 BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); in matrix_matrix_product()
46 BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N); in transposed_matrix_matrix_product()
/external/XNNPACK/bench/
Df32-gemm-e2e.cc27 xnn_f32_gemm_ukernel_function gemm, in GEMMEnd2EndBenchmark() argument
43 xnn_params.f32.gemm = (struct gemm_parameters) { in GEMMEnd2EndBenchmark()
44 .gemm = xnn_gemm_ukernel_function(gemm), in GEMMEnd2EndBenchmark()
/external/tensorflow/tensorflow/core/protobuf/
Dautotuning.proto67 GemmKey gemm = 6; field

123