• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/framework/resource_mgr.h"
17 
18 #include <atomic>
19 
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29 #include "tensorflow/core/platform/stacktrace.h"
30 
31 namespace tensorflow {
32 
33 // Used to generate unique names for anonymous variables
34 static std::atomic<int64> current_id_;
35 
MakeResourceHandle(const string & container,const string & name,const DeviceBase & device,const TypeIndex & type_index,const std::vector<DtypeAndPartialTensorShape> & dtypes_and_shapes,const absl::optional<ManagedStackTrace> & definition_stack_trace)36 ResourceHandle MakeResourceHandle(
37     const string& container, const string& name, const DeviceBase& device,
38     const TypeIndex& type_index,
39     const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes,
40     const absl::optional<ManagedStackTrace>& definition_stack_trace) {
41   ResourceHandle result;
42   result.set_device(device.name());
43   result.set_container(container);
44   result.set_definition_stack_trace(definition_stack_trace);
45   if (name == ResourceHandle::ANONYMOUS_NAME) {
46     result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
47   } else {
48     result.set_name(name);
49   }
50   result.set_hash_code(type_index.hash_code());
51   result.set_maybe_type_name(type_index.name());
52   result.set_dtypes_and_shapes(dtypes_and_shapes);
53   return result;
54 }
55 
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)56 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
57                                   const string& container, const string& name,
58                                   const TypeIndex& type_index) {
59   Tensor* handle;
60   TF_RETURN_IF_ERROR(
61       context->allocate_output(output_index, TensorShape({}), &handle));
62   handle->scalar<ResourceHandle>()() =
63       MakeResourceHandle(container, name, *context->device(), type_index);
64   return Status::OK();
65 }
66 
67 namespace internal {
68 
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)69 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
70   if (ctx->device()->attributes().name() != p.device()) {
71     return errors::InvalidArgument(
72         "Trying to access resource ", p.name(), " located in device ",
73         p.device(), " from device ", ctx->device()->attributes().name());
74   }
75   return Status::OK();
76 }
77 
78 }  // end namespace internal
79 
InsertDebugTypeName(uint64 hash_code,const string & type_name)80 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
81                                         const string& type_name) {
82   auto iter = debug_type_names_.emplace(hash_code, type_name);
83   if (iter.first->second != type_name) {
84     return errors::AlreadyExists("Duplicate hash code found for type ",
85                                  type_name);
86   }
87   return Status::OK();
88 }
89 
DebugTypeName(uint64 hash_code) const90 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
91   auto type_name_iter = debug_type_names_.find(hash_code);
92   if (type_name_iter == debug_type_names_.end()) {
93     return "<unknown>";
94   } else {
95     return type_name_iter->second.c_str();
96   }
97 }
98 
ResourceAndName()99 ResourceMgr::ResourceAndName::ResourceAndName()
100     : resource(nullptr), name(nullptr) {}
101 
ResourceAndName(ResourceBase * resource,string name)102 ResourceMgr::ResourceAndName::ResourceAndName(ResourceBase* resource,
103                                               string name)
104     : resource(resource), name(absl::make_unique<string>(std::move(name))) {}
105 
ResourceAndName(ResourceAndName && other)106 ResourceMgr::ResourceAndName::ResourceAndName(
107     ResourceAndName&& other) noexcept {
108   resource = std::move(other.resource);
109   name = std::move(other.name);
110 }
111 
~ResourceAndName()112 ResourceMgr::ResourceAndName::~ResourceAndName() {}
113 
operator =(ResourceAndName && other)114 ResourceMgr::ResourceAndName& ResourceMgr::ResourceAndName::operator=(
115     ResourceAndName&& other) noexcept {
116   resource = std::move(other.resource);
117   name = std::move(other.name);
118   return *this;
119 }
120 
ResourceMgr()121 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
122 
ResourceMgr(const string & default_container)123 ResourceMgr::ResourceMgr(const string& default_container)
124     : default_container_(default_container) {}
125 
~ResourceMgr()126 ResourceMgr::~ResourceMgr() { Clear(); }
127 
Clear()128 void ResourceMgr::Clear() {
129   // We do the deallocation outside of the lock to avoid a potential deadlock
130   // in case any of the destructors access the resource manager.
131   std::unordered_map<string, Container*> tmp_containers;
132   {
133     mutex_lock l(mu_);
134     tmp_containers = std::move(containers_);
135   }
136   for (const auto& p : tmp_containers) {
137     delete p.second;
138   }
139   tmp_containers.clear();
140 }
141 
DebugString() const142 string ResourceMgr::DebugString() const {
143   mutex_lock l(mu_);
144   struct Line {
145     const string* container;
146     const string type;
147     const string* resource;
148     const string detail;
149   };
150   std::vector<Line> lines;
151   for (const auto& p : containers_) {
152     const string& container = p.first;
153     for (const auto& q : *p.second) {
154       const Key& key = q.first;
155       const char* type = DebugTypeName(key.first);
156       Line l{&container, port::Demangle(type), q.second.name.get(),
157              q.second.resource->DebugString()};
158       lines.push_back(l);
159     }
160   }
161   std::vector<string> text;
162   text.reserve(lines.size());
163   for (const Line& line : lines) {
164     text.push_back(strings::Printf(
165         "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
166         line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
167   }
168   std::sort(text.begin(), text.end());
169   return absl::StrJoin(text, "\n");
170 }
171 
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)172 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
173                              const string& name, ResourceBase* resource) {
174   Container** b = &containers_[container];
175   if (*b == nullptr) {
176     *b = new Container;
177   }
178 
179   // NOTE: Separating out the construction of the map key and value so that the
180   // key can contain a StringPiece that borrows from the string in the value.
181   ResourceAndName resource_and_name(resource, name);
182   StringPiece borrowed_name(*resource_and_name.name);
183   Container::value_type key_and_value(Key(type.hash_code(), borrowed_name),
184                                       std::move(resource_and_name));
185 
186   if ((*b)->insert(std::move(key_and_value)).second) {
187     TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
188     return Status::OK();
189   }
190   return errors::AlreadyExists("Resource ", container, "/", name, "/",
191                                type.name());
192 }
193 
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const194 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
195                              const string& name,
196                              ResourceBase** resource) const {
197   const Container* b = gtl::FindPtrOrNull(containers_, container);
198   if (b == nullptr) {
199     return errors::NotFound("Container ", container,
200                             " does not exist. (Could not find resource: ",
201                             container, "/", name, ")");
202   }
203   auto iter = b->find({type.hash_code(), name});
204   if (iter == b->end()) {
205     return errors::NotFound("Resource ", container, "/", name, "/", type.name(),
206                             " does not exist.");
207   }
208   *resource = const_cast<ResourceBase*>(iter->second.resource.get());
209   (*resource)->Ref();
210   return Status::OK();
211 }
212 
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)213 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
214                              const string& resource_name,
215                              const string& type_name) {
216   ResourceAndName resource_and_name;
217   {
218     mutex_lock l(mu_);
219     Container* b = gtl::FindPtrOrNull(containers_, container);
220     if (b == nullptr) {
221       return errors::NotFound("Container ", container, " does not exist.");
222     }
223     auto iter = b->find({type_hash_code, resource_name});
224     if (iter == b->end()) {
225       return errors::NotFound("Resource ", container, "/", resource_name, "/",
226                               type_name, " does not exist.");
227     }
228     std::swap(resource_and_name, iter->second);
229     b->erase(iter);
230   }
231   DCHECK(resource_and_name.resource != nullptr);
232   return Status::OK();
233 }
234 
DoDelete(const string & container,TypeIndex type,const string & resource_name)235 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
236                              const string& resource_name) {
237   return DoDelete(container, type.hash_code(), resource_name, type.name());
238 }
239 
Delete(const ResourceHandle & handle)240 Status ResourceMgr::Delete(const ResourceHandle& handle) {
241   return DoDelete(handle.container(), handle.hash_code(), handle.name(),
242                   "<unknown>");
243 }
244 
Cleanup(const string & container)245 Status ResourceMgr::Cleanup(const string& container) {
246   {
247     tf_shared_lock l(mu_);
248     if (!gtl::FindOrNull(containers_, container)) {
249       // Nothing to cleanup.
250       return Status::OK();
251     }
252   }
253   Container* b = nullptr;
254   {
255     mutex_lock l(mu_);
256     auto iter = containers_.find(container);
257     if (iter == containers_.end()) {
258       // Nothing to cleanup, it's OK (concurrent cleanup).
259       return Status::OK();
260     }
261     b = iter->second;
262     containers_.erase(iter);
263   }
264   CHECK(b != nullptr);
265   delete b;
266   return Status::OK();
267 }
268 
IsValidContainerName(StringPiece s)269 static bool IsValidContainerName(StringPiece s) {
270   using ::tensorflow::strings::Scanner;
271   return Scanner(s)
272       .One(Scanner::LETTER_DIGIT_DOT)
273       .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
274       .Eos()
275       .GetResult();
276 }
277 
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)278 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
279                            bool use_node_name_as_default) {
280   CHECK(rmgr);
281   rmgr_ = rmgr;
282   string attr_container;
283   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
284   if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
285     return errors::InvalidArgument("container contains invalid characters: ",
286                                    attr_container);
287   }
288   string attr_shared_name;
289   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
290   if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
291     return errors::InvalidArgument("shared_name cannot start with '_':",
292                                    attr_shared_name);
293   }
294   if (!attr_container.empty()) {
295     container_ = attr_container;
296   } else {
297     container_ = rmgr_->default_container();
298   }
299   if (!attr_shared_name.empty()) {
300     name_ = attr_shared_name;
301   } else if (use_node_name_as_default) {
302     name_ = ndef.name();
303   } else {
304     resource_is_private_to_kernel_ = true;
305     static std::atomic<int64> counter(0);
306     name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
307   }
308   return Status::OK();
309 }
310 
DebugString() const311 string ContainerInfo::DebugString() const {
312   return strings::StrCat("[", container(), ",", name(), ",",
313                          resource_is_private_to_kernel() ? "private" : "public",
314                          "]");
315 }
316 
HandleFromInput(OpKernelContext * ctx,int input)317 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
318   return ctx->input(input).flat<ResourceHandle>()(0);
319 }
320 
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)321 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
322                        ResourceHandle* handle) {
323   const Tensor* tensor;
324   TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
325   *handle = tensor->flat<ResourceHandle>()(0);
326   return Status::OK();
327 }
328 
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)329 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
330   TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
331   return ctx->resource_manager()->Delete(p);
332 }
333 
334 }  //  end namespace tensorflow
335