Lines Matching refs:mtls
87 static void setupGEMM(MTLaunchStructForEachBlas *mtls, const Allocation **ain, RsBlasCall* call, in setupGEMM() argument
94 memset(mtls, 0, sizeof(MTLaunchStructForEachBlas)); in setupGEMM()
95 mtls->rs = ctx; in setupGEMM()
96 mtls->sc = call; in setupGEMM()
97 mtls->dimPtr = &mtls->fep.dim; in setupGEMM()
98 mtls->fep.dim.x = nn; in setupGEMM()
99 mtls->fep.dim.y = mm; in setupGEMM()
100 mtls->fep.dim.z = kk; in setupGEMM()
102 memcpy(mtls->ains, ain, 3 * sizeof(ain[0])); in setupGEMM()
123 mtls->numTileM = 1; in setupGEMM()
124 mtls->numTileN = 1; in setupGEMM()
125 mtls->tileSizeM = mm; in setupGEMM()
126 mtls->tileSizeN = nn; in setupGEMM()
129 mtls->isThreadable = (tileSizeM > 0 || tileSizeN > 0); in setupGEMM()
131 mtls->numTileM += (mm - 1) / tileSizeM; in setupGEMM()
132 mtls->tileSizeM = tileSizeM; in setupGEMM()
135 mtls->numTileN += (nn - 1) / tileSizeN; in setupGEMM()
136 mtls->tileSizeN = tileSizeN; in setupGEMM()
139 mtls->mSliceNum = 0; in setupGEMM()
145 RsBlasCall* call, MTLaunchStructForEachBlas *mtls) { in walk_tiled_gemm() argument
157 ain[0] = mtls->ains[0]; in walk_tiled_gemm()
158 ain[1] = mtls->ains[1]; in walk_tiled_gemm()
159 ain[2] = mtls->ains[2]; in walk_tiled_gemm()
167 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); in walk_tiled_gemm()
169 uint32_t mStart = (slice % mtls->numTileM) * mtls->tileSizeM; in walk_tiled_gemm()
170 uint32_t mEnd = mStart + mtls->tileSizeM; in walk_tiled_gemm()
176 uint32_t nStart = (slice / mtls->numTileM) * mtls->tileSizeN; in walk_tiled_gemm()
177 uint32_t nEnd = nStart + mtls->tileSizeN; in walk_tiled_gemm()
193 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr; in walk_2d_sgemm() local
194 RsBlasCall* call = (RsBlasCall*) mtls->sc; in walk_2d_sgemm()
199 walk_tiled_gemm<float, float, FnPtr_cblas_sgemm>(cblas_sgemm, alpha, beta, 1, call, mtls); in walk_2d_sgemm()
204 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr; in walk_2d_dgemm() local
205 RsBlasCall* call = (RsBlasCall*) mtls->sc; in walk_2d_dgemm()
210 walk_tiled_gemm<double, double, FnPtr_cblas_dgemm>(cblas_dgemm, alpha, beta, 1, call, mtls); in walk_2d_dgemm()
215 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr; in walk_2d_cgemm() local
216 RsBlasCall* call = (RsBlasCall*) mtls->sc; in walk_2d_cgemm()
221 walk_tiled_gemm<float, void *, FnPtr_cblas_cgemm>(cblas_cgemm, alpha, beta, 2, call, mtls); in walk_2d_cgemm()
226 MTLaunchStructForEachBlas *mtls = (MTLaunchStructForEachBlas *)usr; in walk_2d_zgemm() local
227 RsBlasCall* call = (RsBlasCall*) mtls->sc; in walk_2d_zgemm()
232 walk_tiled_gemm<double, void *, FnPtr_cblas_zgemm>(cblas_zgemm, alpha, beta, 2, call, mtls); in walk_2d_zgemm()
259 MTLaunchStructForEachBlas mtls; in invokeForEach() local
638 setupGEMM(&mtls, ain, call, mCtx); in invokeForEach()
639 if (mtls.isThreadable) { in invokeForEach()
640 mCtx->launchThreads(walk_2d_sgemm, &mtls); in invokeForEach()
675 setupGEMM(&mtls, ain, call, mCtx); in invokeForEach()
676 if (mtls.isThreadable) { in invokeForEach()
677 mCtx->launchThreads(walk_2d_dgemm, &mtls); in invokeForEach()
711 setupGEMM(&mtls, ain, call, mCtx); in invokeForEach()
712 if (mtls.isThreadable) { in invokeForEach()
713 mCtx->launchThreads(walk_2d_cgemm, &mtls); in invokeForEach()
747 setupGEMM(&mtls, ain, call, mCtx); in invokeForEach()
748 if (mtls.isThreadable) { in invokeForEach()
749 mCtx->launchThreads(walk_2d_zgemm, &mtls); in invokeForEach()