1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/framework/resource_mgr.h"
17
18 #include <atomic>
19
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29 #include "tensorflow/core/platform/stacktrace.h"
30
31 namespace tensorflow {
32
MakeResourceHandle(const string & container,const string & name,const DeviceBase & device,const TypeIndex & type_index,const std::vector<DtypeAndPartialTensorShape> & dtypes_and_shapes,const absl::optional<ManagedStackTrace> & definition_stack_trace)33 ResourceHandle MakeResourceHandle(
34 const string& container, const string& name, const DeviceBase& device,
35 const TypeIndex& type_index,
36 const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes,
37 const absl::optional<ManagedStackTrace>& definition_stack_trace) {
38 ResourceHandle result;
39 result.set_device(device.name());
40 result.set_container(container);
41 result.set_definition_stack_trace(definition_stack_trace);
42 if (name == ResourceHandle::ANONYMOUS_NAME) {
43 result.set_name(
44 strings::StrCat("_AnonymousVar", ResourceHandle::GenerateUniqueId()));
45 } else {
46 result.set_name(name);
47 }
48 result.set_hash_code(type_index.hash_code());
49 result.set_maybe_type_name(type_index.name());
50 result.set_dtypes_and_shapes(dtypes_and_shapes);
51 return result;
52 }
53
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)54 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
55 const string& container, const string& name,
56 const TypeIndex& type_index) {
57 Tensor* handle;
58 TF_RETURN_IF_ERROR(
59 context->allocate_output(output_index, TensorShape({}), &handle));
60 handle->scalar<ResourceHandle>()() =
61 MakeResourceHandle(container, name, *context->device(), type_index);
62 return Status::OK();
63 }
64
65 namespace internal {
66
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)67 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
68 if (ctx->device()->attributes().name() != p.device()) {
69 return errors::InvalidArgument(
70 "Trying to access resource ", p.name(), " located in device ",
71 p.device(), " from device ", ctx->device()->attributes().name());
72 }
73 return Status::OK();
74 }
75
76 } // end namespace internal
77
InsertDebugTypeName(uint64 hash_code,const string & type_name)78 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
79 const string& type_name) {
80 auto iter = debug_type_names_.emplace(hash_code, type_name);
81 if (iter.first->second != type_name) {
82 return errors::AlreadyExists("Duplicate hash code found for type ",
83 type_name);
84 }
85 return Status::OK();
86 }
87
DebugTypeName(uint64 hash_code) const88 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
89 auto type_name_iter = debug_type_names_.find(hash_code);
90 if (type_name_iter == debug_type_names_.end()) {
91 return "<unknown>";
92 } else {
93 return type_name_iter->second.c_str();
94 }
95 }
96
ResourceAndName()97 ResourceMgr::ResourceAndName::ResourceAndName()
98 : resource(nullptr), name(nullptr) {}
99
ResourceAndName(ResourceBase * resource,string name)100 ResourceMgr::ResourceAndName::ResourceAndName(ResourceBase* resource,
101 string name)
102 : resource(resource), name(absl::make_unique<string>(std::move(name))) {}
103
ResourceAndName(ResourceAndName && other)104 ResourceMgr::ResourceAndName::ResourceAndName(
105 ResourceAndName&& other) noexcept {
106 resource = std::move(other.resource);
107 name = std::move(other.name);
108 }
109
~ResourceAndName()110 ResourceMgr::ResourceAndName::~ResourceAndName() {}
111
operator =(ResourceAndName && other)112 ResourceMgr::ResourceAndName& ResourceMgr::ResourceAndName::operator=(
113 ResourceAndName&& other) noexcept {
114 resource = std::move(other.resource);
115 name = std::move(other.name);
116 return *this;
117 }
118
ResourceMgr()119 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
120
ResourceMgr(const string & default_container)121 ResourceMgr::ResourceMgr(const string& default_container)
122 : default_container_(default_container) {}
123
~ResourceMgr()124 ResourceMgr::~ResourceMgr() { Clear(); }
125
Clear()126 void ResourceMgr::Clear() {
127 // We do the deallocation outside of the lock to avoid a potential deadlock
128 // in case any of the destructors access the resource manager.
129 std::unordered_map<string, Container*> tmp_containers;
130 {
131 mutex_lock l(mu_);
132 tmp_containers = std::move(containers_);
133 }
134 for (const auto& p : tmp_containers) {
135 delete p.second;
136 }
137 tmp_containers.clear();
138 }
139
DebugString() const140 string ResourceMgr::DebugString() const {
141 mutex_lock l(mu_);
142 struct Line {
143 const string* container;
144 const string type;
145 const string* resource;
146 const string detail;
147 };
148 std::vector<Line> lines;
149 for (const auto& p : containers_) {
150 const string& container = p.first;
151 for (const auto& q : *p.second) {
152 const Key& key = q.first;
153 const char* type = DebugTypeName(key.first);
154 Line l{&container, port::Demangle(type), q.second.name.get(),
155 q.second.resource->DebugString()};
156 lines.push_back(l);
157 }
158 }
159 std::vector<string> text;
160 text.reserve(lines.size());
161 for (const Line& line : lines) {
162 text.push_back(strings::Printf(
163 "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
164 line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
165 }
166 std::sort(text.begin(), text.end());
167 return absl::StrJoin(text, "\n");
168 }
169
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)170 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
171 const string& name, ResourceBase* resource) {
172 Container** b = &containers_[container];
173 if (*b == nullptr) {
174 *b = new Container;
175 }
176
177 // NOTE: Separating out the construction of the map key and value so that the
178 // key can contain a StringPiece that borrows from the string in the value.
179 ResourceAndName resource_and_name(resource, name);
180 StringPiece borrowed_name(*resource_and_name.name);
181 Container::value_type key_and_value(Key(type.hash_code(), borrowed_name),
182 std::move(resource_and_name));
183
184 if ((*b)->insert(std::move(key_and_value)).second) {
185 TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
186 return Status::OK();
187 }
188 return errors::AlreadyExists("Resource ", container, "/", name, "/",
189 type.name());
190 }
191
Lookup(const ResourceHandle & handle,ResourceBase ** resource) const192 Status ResourceMgr::Lookup(const ResourceHandle& handle,
193 ResourceBase** resource) const {
194 tf_shared_lock l(mu_);
195 return DoLookup(handle.container(), handle.hash_code(),
196 /*type_name=*/"ResourceBase", handle.name(), resource);
197 }
198
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const199 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
200 const string& name,
201 ResourceBase** resource) const {
202 return DoLookup(container, type.hash_code(), type.name(), name, resource);
203 }
204
DoLookup(const string & container,uint64 type_hash_code,const string & type_name,const string & resource_name,ResourceBase ** resource) const205 Status ResourceMgr::DoLookup(const string& container, uint64 type_hash_code,
206 const string& type_name,
207 const string& resource_name,
208 ResourceBase** resource) const {
209 const Container* b = gtl::FindPtrOrNull(containers_, container);
210 if (b == nullptr) {
211 return errors::NotFound("Container ", container,
212 " does not exist. (Could not find resource: ",
213 container, "/", resource_name, ")");
214 }
215 auto iter = b->find({type_hash_code, resource_name});
216 if (iter == b->end()) {
217 return errors::NotFound("Resource ", container, "/", resource_name, "/",
218 type_name, " does not exist.");
219 }
220 *resource = const_cast<ResourceBase*>(iter->second.resource.get());
221 (*resource)->Ref();
222 return Status::OK();
223 }
224
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)225 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
226 const string& resource_name,
227 const string& type_name) {
228 ResourceAndName resource_and_name;
229 {
230 mutex_lock l(mu_);
231 Container* b = gtl::FindPtrOrNull(containers_, container);
232 if (b == nullptr) {
233 return errors::NotFound("Container ", container, " does not exist.");
234 }
235 auto iter = b->find({type_hash_code, resource_name});
236 if (iter == b->end()) {
237 return errors::NotFound("Resource ", container, "/", resource_name, "/",
238 type_name, " does not exist.");
239 }
240 std::swap(resource_and_name, iter->second);
241 b->erase(iter);
242 }
243 DCHECK(resource_and_name.resource != nullptr);
244 return Status::OK();
245 }
246
DoDelete(const string & container,TypeIndex type,const string & resource_name)247 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
248 const string& resource_name) {
249 return DoDelete(container, type.hash_code(), resource_name, type.name());
250 }
251
Delete(const ResourceHandle & handle)252 Status ResourceMgr::Delete(const ResourceHandle& handle) {
253 return DoDelete(handle.container(), handle.hash_code(), handle.name(),
254 "<unknown>");
255 }
256
Cleanup(const string & container)257 Status ResourceMgr::Cleanup(const string& container) {
258 {
259 tf_shared_lock l(mu_);
260 if (!gtl::FindOrNull(containers_, container)) {
261 // Nothing to cleanup.
262 return Status::OK();
263 }
264 }
265 Container* b = nullptr;
266 {
267 mutex_lock l(mu_);
268 auto iter = containers_.find(container);
269 if (iter == containers_.end()) {
270 // Nothing to cleanup, it's OK (concurrent cleanup).
271 return Status::OK();
272 }
273 b = iter->second;
274 containers_.erase(iter);
275 }
276 CHECK(b != nullptr);
277 delete b;
278 return Status::OK();
279 }
280
IsValidContainerName(StringPiece s)281 static bool IsValidContainerName(StringPiece s) {
282 using ::tensorflow::strings::Scanner;
283 return Scanner(s)
284 .One(Scanner::LETTER_DIGIT_DOT)
285 .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
286 .Eos()
287 .GetResult();
288 }
289
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)290 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
291 bool use_node_name_as_default) {
292 CHECK(rmgr);
293 rmgr_ = rmgr;
294 string attr_container;
295 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
296 if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
297 return errors::InvalidArgument("container contains invalid characters: ",
298 attr_container);
299 }
300 string attr_shared_name;
301 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
302 if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
303 return errors::InvalidArgument("shared_name cannot start with '_':",
304 attr_shared_name);
305 }
306 if (!attr_container.empty()) {
307 container_ = attr_container;
308 } else {
309 container_ = rmgr_->default_container();
310 }
311 if (!attr_shared_name.empty()) {
312 name_ = attr_shared_name;
313 } else if (use_node_name_as_default) {
314 name_ = ndef.name();
315 } else {
316 resource_is_private_to_kernel_ = true;
317 static std::atomic<int64> counter(0);
318 name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
319 }
320 return Status::OK();
321 }
322
DebugString() const323 string ContainerInfo::DebugString() const {
324 return strings::StrCat("[", container(), ",", name(), ",",
325 resource_is_private_to_kernel() ? "private" : "public",
326 "]");
327 }
328
HandleFromInput(OpKernelContext * ctx,int input)329 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
330 return ctx->input(input).flat<ResourceHandle>()(0);
331 }
332
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)333 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
334 ResourceHandle* handle) {
335 const Tensor* tensor;
336 TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
337 *handle = tensor->flat<ResourceHandle>()(0);
338 return Status::OK();
339 }
340
LookupResource(OpKernelContext * ctx,const ResourceHandle & p,ResourceBase ** value)341 Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p,
342 ResourceBase** value) {
343 TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
344 return ctx->resource_manager()->Lookup(p, value);
345 }
346
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)347 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
348 TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
349 return ctx->resource_manager()->Delete(p);
350 }
351
352 } // end namespace tensorflow
353