1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/common_runtime/process_util.h"
17
18 #ifdef INTEL_MKL
19 #ifdef _OPENMP
20 #include <omp.h>
21 #endif // _OPENMP
22 #endif // INTEL_MKL
23 #include <string.h>
24
25 #include "tensorflow/core/lib/core/threadpool.h"
26 #include "tensorflow/core/platform/byte_order.h"
27 #include "tensorflow/core/platform/cpu_info.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/tracing.h"
30 #include "tensorflow/core/platform/types.h"
31 #include "tensorflow/core/util/util.h"
32
33 namespace tensorflow {
34
35 namespace {
36
DefaultNumInterOpThreads()37 int32 DefaultNumInterOpThreads() {
38 // Use environment setting if specified (init once)
39 static int env_num_threads = NumInterOpThreadsFromEnvironment();
40 if (env_num_threads > 0) {
41 return env_num_threads;
42 }
43
44 // Default to using the number of cores available in the process.
45 return port::NumSchedulableCPUs();
46 }
47
InitComputePool(const SessionOptions & options)48 static thread::ThreadPool* InitComputePool(const SessionOptions& options) {
49 int32 inter_op_parallelism_threads =
50 options.config.inter_op_parallelism_threads();
51 if (inter_op_parallelism_threads == 0) {
52 inter_op_parallelism_threads = DefaultNumInterOpThreads();
53 }
54 return new thread::ThreadPool(Env::Default(), "Compute",
55 inter_op_parallelism_threads);
56 }
57
58 } // namespace
59
ComputePool(const SessionOptions & options)60 thread::ThreadPool* ComputePool(const SessionOptions& options) {
61 static thread::ThreadPool* compute_pool = InitComputePool(options);
62 return compute_pool;
63 }
64
NumInterOpThreadsFromEnvironment()65 int32 NumInterOpThreadsFromEnvironment() {
66 int32 num;
67 const char* val = std::getenv("TF_NUM_INTEROP_THREADS");
68 return (val && strings::safe_strto32(val, &num)) ? num : 0;
69 }
70
NumIntraOpThreadsFromEnvironment()71 int32 NumIntraOpThreadsFromEnvironment() {
72 int32 num;
73 const char* val = std::getenv("TF_NUM_INTRAOP_THREADS");
74 return (val && strings::safe_strto32(val, &num)) ? num : 0;
75 }
76
NumInterOpThreadsFromSessionOptions(const SessionOptions & options)77 int32 NumInterOpThreadsFromSessionOptions(const SessionOptions& options) {
78 const int32 inter_op = options.config.inter_op_parallelism_threads();
79 if (inter_op != 0) return inter_op;
80 #ifdef INTEL_MKL
81 if (!DisableMKL()) {
82 // MKL library executes ops in parallel using OMP threads
83 // Set inter_op conservatively to avoid thread oversubscription that could
84 // lead to severe perf degradations and OMP resource exhaustion
85 int mkl_intra_op = 1;
86 #ifdef _OPENMP
87 mkl_intra_op = omp_get_max_threads();
88 #endif // _OPENMP
89 DCHECK_GE(mkl_intra_op, 1);
90 const int32 mkl_inter_op = std::max(
91 (DefaultNumInterOpThreads() + mkl_intra_op - 1) / mkl_intra_op, 2);
92 VLOG(0)
93 << "Creating new thread pool with default inter op setting: "
94 << mkl_inter_op
95 << ". Tune using inter_op_parallelism_threads for best performance.";
96 return mkl_inter_op;
97 }
98 #endif // INTEL_MKL
99 return DefaultNumInterOpThreads();
100 }
101
NewThreadPoolFromSessionOptions(const SessionOptions & options)102 thread::ThreadPool* NewThreadPoolFromSessionOptions(
103 const SessionOptions& options) {
104 const int32 num_threads = NumInterOpThreadsFromSessionOptions(options);
105 VLOG(1) << "Direct session inter op parallelism threads: " << num_threads;
106 return new thread::ThreadPool(options.env, "Compute", num_threads);
107 }
108
SchedClosure(std::function<void ()> closure)109 void SchedClosure(std::function<void()> closure) {
110 if (!tracing::EventCollector::IsEnabled()) {
111 return Env::Default()->SchedClosure(std::move(closure));
112 }
113 uint64 id = tracing::GetUniqueArg();
114 tracing::RecordEvent(tracing::EventCategory::kScheduleClosure, id);
115
116 Env::Default()->SchedClosure(std::bind(
117 [id](std::function<void()> closure) {
118 tracing::ScopedRegion region(tracing::EventCategory::kRunClosure, id);
119 closure();
120 },
121 std::move(closure)));
122 }
123
SchedNonBlockingClosureAfter(int64 micros,std::function<void ()> closure)124 void SchedNonBlockingClosureAfter(int64 micros, std::function<void()> closure) {
125 Env::Default()->SchedClosureAfter(micros, std::move(closure));
126 }
127
128 } // namespace tensorflow
129