• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_THREAD_COST_MODEL_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_THREAD_COST_MODEL_H_
19 
20 #include <stdint.h>
21 #include "nnacl/op_base.h"
22 #include "include/api/context.h"
23 #include "schema/ops_generated.h"
24 
25 namespace mindspore::lite {
26 typedef struct ThreadCostContext {
27   int64_t total_unit_num_;
28   int64_t per_unit_load_num_;
29   int64_t per_unit_store_num_;
30   float per_unit_compute_cost_;
31 } ThreadCostContext;
32 
33 struct ThreadCostModel {
UnitCostThreadCostModel34   static float UnitCost(const ThreadCostContext *thread_cost_context) {
35     return per_unit_load_cost_ * thread_cost_context->per_unit_load_num_ +
36            per_unit_store_cost_ * thread_cost_context->per_unit_store_num_ +
37            thread_cost_context->per_unit_compute_cost_ * per_unit_compute_num_;
38   }
39 
TotalCostThreadCostModel40   static float TotalCost(const ThreadCostContext *thread_cost_context) {
41     return thread_cost_context->total_unit_num_ * UnitCost(thread_cost_context);
42   }
43 
44   // ThreadNum assesses parallel thread num. Value of 1.0 means ideal parallel task size. Values < 1.0 mean that task
45   // granularity needs to be increased to mitigate parallelization overheads.
ParallelDegreeThreadCostModel46   static float ParallelDegree(const ThreadCostContext *thread_cost_context) {
47     return TotalCost(thread_cost_context) / parallel_thread_cost_;
48   }
49 
ThreadNumThreadCostModel50   static int ThreadNum(const ThreadCostContext *thread_cost_context) {
51     return MSMAX(1,
52                  static_cast<int>((TotalCost(thread_cost_context) - thread_startup_cost_) / single_thread_cost_ + 0.9));
53   }
54 
ThreadBlockSizeThreadCostModel55   static int64_t ThreadBlockSize(const ThreadCostContext *thread_cost_context) {
56     return static_cast<int64_t>(parallel_thread_cost_ / UnitCost(thread_cost_context));
57   }
58   static int GetOptimalThreadNum(const ThreadCostContext *thread_cost_context, const int thread_num);
59 
60   static float per_unit_load_cost_;      // per unit load cost
61   static float per_unit_store_cost_;     // per unit store cost
62   static int64_t per_unit_compute_num_;  // per unit compute num
63 
64   static float thread_startup_cost_;   // thread startup inherent cost
65   static float single_thread_cost_;    // Minimum cost of single-threaded
66   static float parallel_thread_cost_;  // Minimum cost of per thread in parallel-thread
67 };
68 
69 float GetKernelComputeCost(int32_t kernel_type);
70 int ThreadNumUpdateStrategy(const ThreadCostContext *thread_cost_context, int task_num);
71 
72 #ifdef DYNAMIC_THREAD_DISTRIBUTE
73 int UpdateThreadNum(int32_t kernel_type, int64_t per_unit_load_num, int64_t per_unit_store_num, int64_t unit_num,
74                     int thread_num);
75 #else
UpdateThreadNum(int32_t kernel_type,int64_t per_unit_load_num,int64_t per_unit_store_num,int64_t unit_num,int thread_num)76 inline int UpdateThreadNum(int32_t kernel_type, int64_t per_unit_load_num, int64_t per_unit_store_num, int64_t unit_num,
77                            int thread_num) {
78   (void)kernel_type;
79   (void)per_unit_load_num;
80   (void)per_unit_store_num;
81   (void)unit_num;
82   return thread_num;
83 }
84 #endif
85 }  // namespace mindspore::lite
86 
87 #endif  // MINDSPORE_LITE_SRC_INNER_CONTEXT_H
88