1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/grappler/costs/virtual_placer.h"
17 #include "tensorflow/core/framework/node_def.pb.h"
18 #include "tensorflow/core/grappler/clusters/cluster.h"
19 #include "tensorflow/core/grappler/devices.h"
20 #include "tensorflow/core/lib/strings/str_util.h"
21 #include "tensorflow/core/util/device_name_utils.h"
22
23 namespace tensorflow {
24 namespace grappler {
25
VirtualPlacer(const std::unordered_map<string,DeviceProperties> & devices)26 VirtualPlacer::VirtualPlacer(
27 const std::unordered_map<string, DeviceProperties>& devices)
28 : devices_(devices),
29 // Default job name for canonical device name. Needs to be set before the
30 // first call to to_lfqn_or_empty()
31 default_job_name_lowercase_("localhost") {
32 lfqn_map_.reserve(devices_.size());
33 for (const auto& kv : devices_) {
34 const auto lfqn = to_lfqn_or_empty(kv.first);
35 if (lfqn.empty()) {
36 LOG(ERROR) << "VirtualPlacer couldn't parse device name from cluster: "
37 << kv.first;
38 } else {
39 lfqn_map_[lfqn] = kv.first;
40 }
41 }
42
43 if (devices_.empty()) {
44 // If there are no devices in the cluster, add a single device, "UNKNOWN" to
45 // the cluster.
46 default_device_name_ = "UNKNOWN";
47 DeviceProperties& prop = devices_["UNKNOWN"];
48 prop.set_type("UNKNOWN");
49 } else if (devices_.size() == 1) {
50 // If there is only one device in the cluster, use it as default device,
51 // whatever it is.
52 default_device_name_ = devices_.begin()->first;
53 } else {
54 // Default device is set from the devices in the cluster in the following
55 // priority: /gpu:0, /cpu:0, or any device.
56 // TODO(dyoon): This logic assumes single machine with CPU and GPU devices.
57 // Make it more general to support multiple machines, job types, and devices
58 // other than CPU and GPU.
59 std::map<int, string> cpu_devices; // CPU device map: id -> device name.
60 std::map<int, string> gpu_devices; // GPU device map: id -> device name.
61 for (const auto& kv : lfqn_map_) {
62 const auto& lfqn = kv.first;
63 const auto& cluster_device_name = kv.second;
64 DeviceNameUtils::ParsedName parsed_name;
65 bool parsed = DeviceNameUtils::ParseFullName(lfqn, &parsed_name);
66 if (parsed) {
67 // Parsed devices are stored to cpu_devices or gpu_devices map,
68 // addressed (and ordered) by device id.
69 const auto type = absl::AsciiStrToLower(parsed_name.type);
70 if (type == "gpu") {
71 gpu_devices[parsed_name.id] = cluster_device_name;
72 } else if (type == "cpu") {
73 cpu_devices[parsed_name.id] = cluster_device_name;
74 }
75 }
76 }
77
78 if (!gpu_devices.empty()) {
79 // GPU:0 (or GPU with smallest device id).
80 default_device_name_ = gpu_devices.begin()->second;
81 } else if (!cpu_devices.empty()) {
82 // CPU:0 (or CPU with smallest device id).
83 default_device_name_ = cpu_devices.begin()->second;
84 } else {
85 default_device_name_ = devices_.begin()->first; // Any device.
86 }
87 }
88 VLOG(3) << "default device name: " << default_device_name_;
89
90 // Scan the device names from the cluster, and if there is one job name used,
91 // use it for canonical device name.
92 std::unordered_set<string> job_names_from_cluster;
93 for (const auto& device : lfqn_map_) {
94 const auto& lfqn = device.first;
95 DeviceNameUtils::ParsedName parsed_name;
96 bool parsed = DeviceNameUtils::ParseFullName(lfqn, &parsed_name);
97 if (parsed && !parsed_name.job.empty()) {
98 job_names_from_cluster.insert(parsed_name.job);
99 if (job_names_from_cluster.size() > 1) {
100 break;
101 }
102 }
103 }
104 // If there is only one type of job name in all the devices in the cluster,
105 // use that one as default job name; otherwise, use localhost.
106 // TODO(dyoon): this should be improved, especially when the cluster is
107 // composed of multiple worker, PS, and other types of jobs.
108 if (job_names_from_cluster.size() == 1) {
109 auto it = job_names_from_cluster.begin();
110 default_job_name_lowercase_ = *it;
111 }
112 VLOG(3) << "default job name: " << default_job_name_lowercase_;
113 }
114
get_device(const NodeDef & node) const115 const DeviceProperties& VirtualPlacer::get_device(const NodeDef& node) const {
116 string device = get_canonical_device_name(node);
117 VLOG(3) << "node.name=" << node.name() << " node.device=" << node.device()
118 << " is placed on: " << device;
119 auto it = devices_.find(device);
120 DCHECK(it != devices_.end());
121 return it->second;
122 }
123
get_canonical_device_name(const NodeDef & node) const124 string VirtualPlacer::get_canonical_device_name(const NodeDef& node) const {
125 if (node.device().empty()) {
126 return default_device_name_;
127 }
128
129 const auto lfqn = to_lfqn_or_empty(node.device());
130 if (lfqn.empty()) {
131 return default_device_name_;
132 }
133
134 const auto it = lfqn_map_.find(lfqn);
135 if (it != lfqn_map_.end()) {
136 return it->second;
137 }
138
139 return default_device_name_;
140 }
141
to_lfqn_or_empty(const string & device_name) const142 string VirtualPlacer::to_lfqn_or_empty(const string& device_name) const {
143 DeviceNameUtils::ParsedName parsed_name;
144 const auto lowercase_name = absl::AsciiStrToLower(device_name);
145 bool parsed = DeviceNameUtils::ParseFullName(lowercase_name, &parsed_name);
146 if (!parsed) {
147 parsed = DeviceNameUtils::ParseLocalName(lowercase_name, &parsed_name);
148 parsed_name.job = "localhost";
149 }
150 if (!parsed) {
151 if (lowercase_name == "gpu" || lowercase_name == "cpu") {
152 parsed_name.job = "localhost";
153 parsed_name.type = lowercase_name;
154 parsed = true;
155 }
156 }
157 if (!parsed) {
158 return {};
159 }
160
161 if (parsed_name.job.empty()) {
162 parsed_name.job = default_job_name_lowercase_;
163 }
164
165 // Have to do this, because parser returns uppercase types for CPU and GPU.
166 parsed_name.type = absl::AsciiStrToLower(parsed_name.type);
167
168 string lfqn = strings::StrCat(
169 "/job:", parsed_name.job, "/replica:", parsed_name.replica,
170 "/task:", parsed_name.task, "/device:", parsed_name.type, ":",
171 parsed_name.id);
172 return lfqn;
173 }
174
175 } // end namespace grappler
176 } // end namespace tensorflow
177