1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_THREAD_COST_MODEL_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_THREAD_COST_MODEL_H_
19
20 #include <stdint.h>
21 #include "nnacl/op_base.h"
22 #include "include/api/context.h"
23 #include "schema/ops_generated.h"
24
25 namespace mindspore::lite {
26 typedef struct ThreadCostContext {
27 int64_t total_unit_num_;
28 int64_t per_unit_load_num_;
29 int64_t per_unit_store_num_;
30 float per_unit_compute_cost_;
31 } ThreadCostContext;
32
33 struct ThreadCostModel {
UnitCostThreadCostModel34 static float UnitCost(const ThreadCostContext *thread_cost_context) {
35 return per_unit_load_cost_ * thread_cost_context->per_unit_load_num_ +
36 per_unit_store_cost_ * thread_cost_context->per_unit_store_num_ +
37 thread_cost_context->per_unit_compute_cost_ * per_unit_compute_num_;
38 }
39
TotalCostThreadCostModel40 static float TotalCost(const ThreadCostContext *thread_cost_context) {
41 return thread_cost_context->total_unit_num_ * UnitCost(thread_cost_context);
42 }
43
44 // ThreadNum assesses parallel thread num. Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task
45 // granularity needs to be increased to mitigate parallelization overheads.
ParallelDegreeThreadCostModel46 static float ParallelDegree(const ThreadCostContext *thread_cost_context) {
47 return TotalCost(thread_cost_context) / parallel_thread_cost_;
48 }
49
ThreadNumThreadCostModel50 static int ThreadNum(const ThreadCostContext *thread_cost_context) {
51 return MSMAX(1,
52 static_cast<int>((TotalCost(thread_cost_context) - thread_startup_cost_) / single_thread_cost_ + 0.9));
53 }
54
ThreadBlockSizeThreadCostModel55 static int64_t ThreadBlockSize(const ThreadCostContext *thread_cost_context) {
56 return static_cast<int64_t>(parallel_thread_cost_ / UnitCost(thread_cost_context));
57 }
58 static int GetOptimalThreadNum(const ThreadCostContext *thread_cost_context, const int thread_num);
59
60 static float per_unit_load_cost_; // per unit load cost
61 static float per_unit_store_cost_; // per unit store cost
62 static int64_t per_unit_compute_num_; // per unit compute num
63
64 static float thread_startup_cost_; // thread startup inherent cost
65 static float single_thread_cost_; // Minimum cost of single-threaded
66 static float parallel_thread_cost_; // Minimum cost of per thread in parallel-thread
67 };
68
69 float GetKernelComputeCost(int32_t kernel_type);
70 int ThreadNumUpdateStrategy(const ThreadCostContext *thread_cost_context, int task_num);
71
72 #ifdef DYNAMIC_THREAD_DISTRIBUTE
73 int UpdateThreadNum(int32_t kernel_type, int64_t per_unit_load_num, int64_t per_unit_store_num, int64_t unit_num,
74 int thread_num);
75 #else
UpdateThreadNum(int32_t kernel_type,int64_t per_unit_load_num,int64_t per_unit_store_num,int64_t unit_num,int thread_num)76 inline int UpdateThreadNum(int32_t kernel_type, int64_t per_unit_load_num, int64_t per_unit_store_num, int64_t unit_num,
77 int thread_num) {
78 (void)kernel_type;
79 (void)per_unit_load_num;
80 (void)per_unit_store_num;
81 (void)unit_num;
82 return thread_num;
83 }
84 #endif
85 } // namespace mindspore::lite
86
87 #endif // MINDSPORE_LITE_SRC_INNER_CONTEXT_H
88