• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/framework/resource_mgr.h"
17 
18 #include <atomic>
19 
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29 #include "tensorflow/core/platform/stacktrace.h"
30 
31 namespace tensorflow {
32 
MakeResourceHandle(const string & container,const string & name,const DeviceBase & device,const TypeIndex & type_index,const std::vector<DtypeAndPartialTensorShape> & dtypes_and_shapes,const absl::optional<ManagedStackTrace> & definition_stack_trace)33 ResourceHandle MakeResourceHandle(
34     const string& container, const string& name, const DeviceBase& device,
35     const TypeIndex& type_index,
36     const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes,
37     const absl::optional<ManagedStackTrace>& definition_stack_trace) {
38   ResourceHandle result;
39   result.set_device(device.name());
40   result.set_container(container);
41   result.set_definition_stack_trace(definition_stack_trace);
42   if (name == ResourceHandle::ANONYMOUS_NAME) {
43     result.set_name(
44         strings::StrCat("_AnonymousVar", ResourceHandle::GenerateUniqueId()));
45   } else {
46     result.set_name(name);
47   }
48   result.set_hash_code(type_index.hash_code());
49   result.set_maybe_type_name(type_index.name());
50   result.set_dtypes_and_shapes(dtypes_and_shapes);
51   return result;
52 }
53 
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)54 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
55                                   const string& container, const string& name,
56                                   const TypeIndex& type_index) {
57   Tensor* handle;
58   TF_RETURN_IF_ERROR(
59       context->allocate_output(output_index, TensorShape({}), &handle));
60   handle->scalar<ResourceHandle>()() =
61       MakeResourceHandle(container, name, *context->device(), type_index);
62   return Status::OK();
63 }
64 
65 namespace internal {
66 
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)67 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
68   if (ctx->device()->attributes().name() != p.device()) {
69     return errors::InvalidArgument(
70         "Trying to access resource ", p.name(), " located in device ",
71         p.device(), " from device ", ctx->device()->attributes().name());
72   }
73   return Status::OK();
74 }
75 
76 }  // end namespace internal
77 
InsertDebugTypeName(uint64 hash_code,const string & type_name)78 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
79                                         const string& type_name) {
80   auto iter = debug_type_names_.emplace(hash_code, type_name);
81   if (iter.first->second != type_name) {
82     return errors::AlreadyExists("Duplicate hash code found for type ",
83                                  type_name);
84   }
85   return Status::OK();
86 }
87 
DebugTypeName(uint64 hash_code) const88 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
89   auto type_name_iter = debug_type_names_.find(hash_code);
90   if (type_name_iter == debug_type_names_.end()) {
91     return "<unknown>";
92   } else {
93     return type_name_iter->second.c_str();
94   }
95 }
96 
ResourceAndName()97 ResourceMgr::ResourceAndName::ResourceAndName()
98     : resource(nullptr), name(nullptr) {}
99 
ResourceAndName(ResourceBase * resource,string name)100 ResourceMgr::ResourceAndName::ResourceAndName(ResourceBase* resource,
101                                               string name)
102     : resource(resource), name(absl::make_unique<string>(std::move(name))) {}
103 
ResourceAndName(ResourceAndName && other)104 ResourceMgr::ResourceAndName::ResourceAndName(
105     ResourceAndName&& other) noexcept {
106   resource = std::move(other.resource);
107   name = std::move(other.name);
108 }
109 
~ResourceAndName()110 ResourceMgr::ResourceAndName::~ResourceAndName() {}
111 
operator =(ResourceAndName && other)112 ResourceMgr::ResourceAndName& ResourceMgr::ResourceAndName::operator=(
113     ResourceAndName&& other) noexcept {
114   resource = std::move(other.resource);
115   name = std::move(other.name);
116   return *this;
117 }
118 
ResourceMgr()119 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
120 
ResourceMgr(const string & default_container)121 ResourceMgr::ResourceMgr(const string& default_container)
122     : default_container_(default_container) {}
123 
~ResourceMgr()124 ResourceMgr::~ResourceMgr() { Clear(); }
125 
Clear()126 void ResourceMgr::Clear() {
127   // We do the deallocation outside of the lock to avoid a potential deadlock
128   // in case any of the destructors access the resource manager.
129   std::unordered_map<string, Container*> tmp_containers;
130   {
131     mutex_lock l(mu_);
132     tmp_containers = std::move(containers_);
133   }
134   for (const auto& p : tmp_containers) {
135     delete p.second;
136   }
137   tmp_containers.clear();
138 }
139 
DebugString() const140 string ResourceMgr::DebugString() const {
141   mutex_lock l(mu_);
142   struct Line {
143     const string* container;
144     const string type;
145     const string* resource;
146     const string detail;
147   };
148   std::vector<Line> lines;
149   for (const auto& p : containers_) {
150     const string& container = p.first;
151     for (const auto& q : *p.second) {
152       const Key& key = q.first;
153       const char* type = DebugTypeName(key.first);
154       Line l{&container, port::Demangle(type), q.second.name.get(),
155              q.second.resource->DebugString()};
156       lines.push_back(l);
157     }
158   }
159   std::vector<string> text;
160   text.reserve(lines.size());
161   for (const Line& line : lines) {
162     text.push_back(strings::Printf(
163         "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
164         line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
165   }
166   std::sort(text.begin(), text.end());
167   return absl::StrJoin(text, "\n");
168 }
169 
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)170 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
171                              const string& name, ResourceBase* resource) {
172   Container** b = &containers_[container];
173   if (*b == nullptr) {
174     *b = new Container;
175   }
176 
177   // NOTE: Separating out the construction of the map key and value so that the
178   // key can contain a StringPiece that borrows from the string in the value.
179   ResourceAndName resource_and_name(resource, name);
180   StringPiece borrowed_name(*resource_and_name.name);
181   Container::value_type key_and_value(Key(type.hash_code(), borrowed_name),
182                                       std::move(resource_and_name));
183 
184   if ((*b)->insert(std::move(key_and_value)).second) {
185     TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
186     return Status::OK();
187   }
188   return errors::AlreadyExists("Resource ", container, "/", name, "/",
189                                type.name());
190 }
191 
Lookup(const ResourceHandle & handle,ResourceBase ** resource) const192 Status ResourceMgr::Lookup(const ResourceHandle& handle,
193                            ResourceBase** resource) const {
194   tf_shared_lock l(mu_);
195   return DoLookup(handle.container(), handle.hash_code(),
196                   /*type_name=*/"ResourceBase", handle.name(), resource);
197 }
198 
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const199 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
200                              const string& name,
201                              ResourceBase** resource) const {
202   return DoLookup(container, type.hash_code(), type.name(), name, resource);
203 }
204 
DoLookup(const string & container,uint64 type_hash_code,const string & type_name,const string & resource_name,ResourceBase ** resource) const205 Status ResourceMgr::DoLookup(const string& container, uint64 type_hash_code,
206                              const string& type_name,
207                              const string& resource_name,
208                              ResourceBase** resource) const {
209   const Container* b = gtl::FindPtrOrNull(containers_, container);
210   if (b == nullptr) {
211     return errors::NotFound("Container ", container,
212                             " does not exist. (Could not find resource: ",
213                             container, "/", resource_name, ")");
214   }
215   auto iter = b->find({type_hash_code, resource_name});
216   if (iter == b->end()) {
217     return errors::NotFound("Resource ", container, "/", resource_name, "/",
218                             type_name, " does not exist.");
219   }
220   *resource = const_cast<ResourceBase*>(iter->second.resource.get());
221   (*resource)->Ref();
222   return Status::OK();
223 }
224 
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)225 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
226                              const string& resource_name,
227                              const string& type_name) {
228   ResourceAndName resource_and_name;
229   {
230     mutex_lock l(mu_);
231     Container* b = gtl::FindPtrOrNull(containers_, container);
232     if (b == nullptr) {
233       return errors::NotFound("Container ", container, " does not exist.");
234     }
235     auto iter = b->find({type_hash_code, resource_name});
236     if (iter == b->end()) {
237       return errors::NotFound("Resource ", container, "/", resource_name, "/",
238                               type_name, " does not exist.");
239     }
240     std::swap(resource_and_name, iter->second);
241     b->erase(iter);
242   }
243   DCHECK(resource_and_name.resource != nullptr);
244   return Status::OK();
245 }
246 
DoDelete(const string & container,TypeIndex type,const string & resource_name)247 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
248                              const string& resource_name) {
249   return DoDelete(container, type.hash_code(), resource_name, type.name());
250 }
251 
Delete(const ResourceHandle & handle)252 Status ResourceMgr::Delete(const ResourceHandle& handle) {
253   return DoDelete(handle.container(), handle.hash_code(), handle.name(),
254                   "<unknown>");
255 }
256 
Cleanup(const string & container)257 Status ResourceMgr::Cleanup(const string& container) {
258   {
259     tf_shared_lock l(mu_);
260     if (!gtl::FindOrNull(containers_, container)) {
261       // Nothing to cleanup.
262       return Status::OK();
263     }
264   }
265   Container* b = nullptr;
266   {
267     mutex_lock l(mu_);
268     auto iter = containers_.find(container);
269     if (iter == containers_.end()) {
270       // Nothing to cleanup, it's OK (concurrent cleanup).
271       return Status::OK();
272     }
273     b = iter->second;
274     containers_.erase(iter);
275   }
276   CHECK(b != nullptr);
277   delete b;
278   return Status::OK();
279 }
280 
IsValidContainerName(StringPiece s)281 static bool IsValidContainerName(StringPiece s) {
282   using ::tensorflow::strings::Scanner;
283   return Scanner(s)
284       .One(Scanner::LETTER_DIGIT_DOT)
285       .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
286       .Eos()
287       .GetResult();
288 }
289 
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)290 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
291                            bool use_node_name_as_default) {
292   CHECK(rmgr);
293   rmgr_ = rmgr;
294   string attr_container;
295   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
296   if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
297     return errors::InvalidArgument("container contains invalid characters: ",
298                                    attr_container);
299   }
300   string attr_shared_name;
301   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
302   if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
303     return errors::InvalidArgument("shared_name cannot start with '_':",
304                                    attr_shared_name);
305   }
306   if (!attr_container.empty()) {
307     container_ = attr_container;
308   } else {
309     container_ = rmgr_->default_container();
310   }
311   if (!attr_shared_name.empty()) {
312     name_ = attr_shared_name;
313   } else if (use_node_name_as_default) {
314     name_ = ndef.name();
315   } else {
316     resource_is_private_to_kernel_ = true;
317     static std::atomic<int64> counter(0);
318     name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
319   }
320   return Status::OK();
321 }
322 
DebugString() const323 string ContainerInfo::DebugString() const {
324   return strings::StrCat("[", container(), ",", name(), ",",
325                          resource_is_private_to_kernel() ? "private" : "public",
326                          "]");
327 }
328 
HandleFromInput(OpKernelContext * ctx,int input)329 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
330   return ctx->input(input).flat<ResourceHandle>()(0);
331 }
332 
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)333 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
334                        ResourceHandle* handle) {
335   const Tensor* tensor;
336   TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
337   *handle = tensor->flat<ResourceHandle>()(0);
338   return Status::OK();
339 }
340 
LookupResource(OpKernelContext * ctx,const ResourceHandle & p,ResourceBase ** value)341 Status LookupResource(OpKernelContext* ctx, const ResourceHandle& p,
342                       ResourceBase** value) {
343   TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
344   return ctx->resource_manager()->Lookup(p, value);
345 }
346 
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)347 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
348   TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
349   return ctx->resource_manager()->Delete(p);
350 }
351 
352 }  //  end namespace tensorflow
353