1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/framework/resource_mgr.h"
17
18 #include <atomic>
19
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29
30 namespace tensorflow {
31
32 // Used to generate unique names for anonymous variables
33 static std::atomic<int64> current_id_;
34
MakeResourceHandle(const string & container,const string & name,const DeviceBase & device,const TypeIndex & type_index,const std::vector<DtypeAndPartialTensorShape> & dtypes_and_shapes)35 ResourceHandle MakeResourceHandle(
36 const string& container, const string& name, const DeviceBase& device,
37 const TypeIndex& type_index,
38 const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes) {
39 ResourceHandle result;
40 result.set_device(device.name());
41 result.set_container(container);
42 if (name == ResourceHandle::ANONYMOUS_NAME) {
43 result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
44 } else {
45 result.set_name(name);
46 }
47 result.set_hash_code(type_index.hash_code());
48 result.set_maybe_type_name(type_index.name());
49 result.set_dtypes_and_shapes(dtypes_and_shapes);
50 return result;
51 }
52
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)53 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
54 const string& container, const string& name,
55 const TypeIndex& type_index) {
56 Tensor* handle;
57 TF_RETURN_IF_ERROR(
58 context->allocate_output(output_index, TensorShape({}), &handle));
59 handle->scalar<ResourceHandle>()() =
60 MakeResourceHandle(container, name, *context->device(), type_index);
61 return Status::OK();
62 }
63
64 namespace internal {
65
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)66 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
67 if (ctx->device()->attributes().name() != p.device()) {
68 return errors::InvalidArgument(
69 "Trying to access resource ", p.name(), " located in device ",
70 p.device(), " from device ", ctx->device()->attributes().name());
71 }
72 return Status::OK();
73 }
74
75 } // end namespace internal
76
InsertDebugTypeName(uint64 hash_code,const string & type_name)77 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
78 const string& type_name) {
79 auto iter = debug_type_names_.emplace(hash_code, type_name);
80 if (iter.first->second != type_name) {
81 return errors::AlreadyExists("Duplicate hash code found for type ",
82 type_name);
83 }
84 return Status::OK();
85 }
86
DebugTypeName(uint64 hash_code) const87 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
88 auto type_name_iter = debug_type_names_.find(hash_code);
89 if (type_name_iter == debug_type_names_.end()) {
90 return "<unknown>";
91 } else {
92 return type_name_iter->second.c_str();
93 }
94 }
95
ResourceAndName()96 ResourceMgr::ResourceAndName::ResourceAndName()
97 : resource(nullptr), name(nullptr) {}
98
ResourceAndName(ResourceBase * resource,string name)99 ResourceMgr::ResourceAndName::ResourceAndName(ResourceBase* resource,
100 string name)
101 : resource(resource), name(absl::make_unique<string>(std::move(name))) {}
102
ResourceAndName(ResourceAndName && other)103 ResourceMgr::ResourceAndName::ResourceAndName(
104 ResourceAndName&& other) noexcept {
105 resource = std::move(other.resource);
106 name = std::move(other.name);
107 }
108
~ResourceAndName()109 ResourceMgr::ResourceAndName::~ResourceAndName() {}
110
operator =(ResourceAndName && other)111 ResourceMgr::ResourceAndName& ResourceMgr::ResourceAndName::operator=(
112 ResourceAndName&& other) noexcept {
113 resource = std::move(other.resource);
114 name = std::move(other.name);
115 return *this;
116 }
117
ResourceMgr()118 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
119
ResourceMgr(const string & default_container)120 ResourceMgr::ResourceMgr(const string& default_container)
121 : default_container_(default_container) {}
122
~ResourceMgr()123 ResourceMgr::~ResourceMgr() { Clear(); }
124
Clear()125 void ResourceMgr::Clear() {
126 // We do the deallocation outside of the lock to avoid a potential deadlock
127 // in case any of the destructors access the resource manager.
128 std::unordered_map<string, Container*> tmp_containers;
129 {
130 mutex_lock l(mu_);
131 tmp_containers = std::move(containers_);
132 }
133 for (const auto& p : tmp_containers) {
134 delete p.second;
135 }
136 tmp_containers.clear();
137 }
138
DebugString() const139 string ResourceMgr::DebugString() const {
140 mutex_lock l(mu_);
141 struct Line {
142 const string* container;
143 const string type;
144 const string* resource;
145 const string detail;
146 };
147 std::vector<Line> lines;
148 for (const auto& p : containers_) {
149 const string& container = p.first;
150 for (const auto& q : *p.second) {
151 const Key& key = q.first;
152 const char* type = DebugTypeName(key.first);
153 Line l{&container, port::Demangle(type), q.second.name.get(),
154 q.second.resource->DebugString()};
155 lines.push_back(l);
156 }
157 }
158 std::vector<string> text;
159 text.reserve(lines.size());
160 for (const Line& line : lines) {
161 text.push_back(strings::Printf(
162 "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
163 line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
164 }
165 std::sort(text.begin(), text.end());
166 return absl::StrJoin(text, "\n");
167 }
168
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)169 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
170 const string& name, ResourceBase* resource) {
171 Container** b = &containers_[container];
172 if (*b == nullptr) {
173 *b = new Container;
174 }
175
176 // NOTE: Separating out the construction of the map key and value so that the
177 // key can contain a StringPiece that borrows from the string in the value.
178 ResourceAndName resource_and_name(resource, name);
179 StringPiece borrowed_name(*resource_and_name.name);
180 Container::value_type key_and_value(Key(type.hash_code(), borrowed_name),
181 std::move(resource_and_name));
182
183 if ((*b)->insert(std::move(key_and_value)).second) {
184 TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
185 return Status::OK();
186 }
187 return errors::AlreadyExists("Resource ", container, "/", name, "/",
188 type.name());
189 }
190
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const191 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
192 const string& name,
193 ResourceBase** resource) const {
194 const Container* b = gtl::FindPtrOrNull(containers_, container);
195 if (b == nullptr) {
196 return errors::NotFound("Container ", container,
197 " does not exist. (Could not find resource: ",
198 container, "/", name, ")");
199 }
200 auto iter = b->find({type.hash_code(), name});
201 if (iter == b->end()) {
202 return errors::NotFound("Resource ", container, "/", name, "/", type.name(),
203 " does not exist.");
204 }
205 *resource = const_cast<ResourceBase*>(iter->second.resource.get());
206 (*resource)->Ref();
207 return Status::OK();
208 }
209
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)210 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
211 const string& resource_name,
212 const string& type_name) {
213 ResourceAndName resource_and_name;
214 {
215 mutex_lock l(mu_);
216 Container* b = gtl::FindPtrOrNull(containers_, container);
217 if (b == nullptr) {
218 return errors::NotFound("Container ", container, " does not exist.");
219 }
220 auto iter = b->find({type_hash_code, resource_name});
221 if (iter == b->end()) {
222 return errors::NotFound("Resource ", container, "/", resource_name, "/",
223 type_name, " does not exist.");
224 }
225 std::swap(resource_and_name, iter->second);
226 b->erase(iter);
227 }
228 DCHECK(resource_and_name.resource != nullptr);
229 return Status::OK();
230 }
231
DoDelete(const string & container,TypeIndex type,const string & resource_name)232 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
233 const string& resource_name) {
234 return DoDelete(container, type.hash_code(), resource_name, type.name());
235 }
236
Delete(const ResourceHandle & handle)237 Status ResourceMgr::Delete(const ResourceHandle& handle) {
238 return DoDelete(handle.container(), handle.hash_code(), handle.name(),
239 "<unknown>");
240 }
241
Cleanup(const string & container)242 Status ResourceMgr::Cleanup(const string& container) {
243 {
244 tf_shared_lock l(mu_);
245 if (!gtl::FindOrNull(containers_, container)) {
246 // Nothing to cleanup.
247 return Status::OK();
248 }
249 }
250 Container* b = nullptr;
251 {
252 mutex_lock l(mu_);
253 auto iter = containers_.find(container);
254 if (iter == containers_.end()) {
255 // Nothing to cleanup, it's OK (concurrent cleanup).
256 return Status::OK();
257 }
258 b = iter->second;
259 containers_.erase(iter);
260 }
261 CHECK(b != nullptr);
262 delete b;
263 return Status::OK();
264 }
265
IsValidContainerName(StringPiece s)266 static bool IsValidContainerName(StringPiece s) {
267 using ::tensorflow::strings::Scanner;
268 return Scanner(s)
269 .One(Scanner::LETTER_DIGIT_DOT)
270 .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
271 .Eos()
272 .GetResult();
273 }
274
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)275 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
276 bool use_node_name_as_default) {
277 CHECK(rmgr);
278 rmgr_ = rmgr;
279 string attr_container;
280 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
281 if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
282 return errors::InvalidArgument("container contains invalid characters: ",
283 attr_container);
284 }
285 string attr_shared_name;
286 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
287 if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
288 return errors::InvalidArgument("shared_name cannot start with '_':",
289 attr_shared_name);
290 }
291 if (!attr_container.empty()) {
292 container_ = attr_container;
293 } else {
294 container_ = rmgr_->default_container();
295 }
296 if (!attr_shared_name.empty()) {
297 name_ = attr_shared_name;
298 } else if (use_node_name_as_default) {
299 name_ = ndef.name();
300 } else {
301 resource_is_private_to_kernel_ = true;
302 static std::atomic<int64> counter(0);
303 name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
304 }
305 return Status::OK();
306 }
307
DebugString() const308 string ContainerInfo::DebugString() const {
309 return strings::StrCat("[", container(), ",", name(), ",",
310 resource_is_private_to_kernel() ? "private" : "public",
311 "]");
312 }
313
HandleFromInput(OpKernelContext * ctx,int input)314 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
315 return ctx->input(input).flat<ResourceHandle>()(0);
316 }
317
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)318 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
319 ResourceHandle* handle) {
320 const Tensor* tensor;
321 TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
322 *handle = tensor->flat<ResourceHandle>()(0);
323 return Status::OK();
324 }
325
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)326 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
327 TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
328 return ctx->resource_manager()->Delete(p);
329 }
330
ResourceHandlesShape(shape_inference::InferenceContext * c)331 Status ResourceHandlesShape(shape_inference::InferenceContext* c) {
332 int n;
333 TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
334 for (int i = 0; i < n; ++i) {
335 c->set_output(i, c->Scalar());
336 }
337 return Status::OK();
338 }
339
340 } // end namespace tensorflow
341