• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/framework/resource_mgr.h"
17 
18 #include <atomic>
19 
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29 
30 namespace tensorflow {
31 
32 // Used to generate unique names for anonymous variables
33 static std::atomic<int64> current_id_;
34 
MakeResourceHandle(const string & container,const string & name,const DeviceBase & device,const TypeIndex & type_index,const std::vector<DtypeAndPartialTensorShape> & dtypes_and_shapes)35 ResourceHandle MakeResourceHandle(
36     const string& container, const string& name, const DeviceBase& device,
37     const TypeIndex& type_index,
38     const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes) {
39   ResourceHandle result;
40   result.set_device(device.name());
41   result.set_container(container);
42   if (name == ResourceHandle::ANONYMOUS_NAME) {
43     result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
44   } else {
45     result.set_name(name);
46   }
47   result.set_hash_code(type_index.hash_code());
48   result.set_maybe_type_name(type_index.name());
49   result.set_dtypes_and_shapes(dtypes_and_shapes);
50   return result;
51 }
52 
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)53 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
54                                   const string& container, const string& name,
55                                   const TypeIndex& type_index) {
56   Tensor* handle;
57   TF_RETURN_IF_ERROR(
58       context->allocate_output(output_index, TensorShape({}), &handle));
59   handle->scalar<ResourceHandle>()() =
60       MakeResourceHandle(container, name, *context->device(), type_index);
61   return Status::OK();
62 }
63 
64 namespace internal {
65 
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)66 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
67   if (ctx->device()->attributes().name() != p.device()) {
68     return errors::InvalidArgument(
69         "Trying to access resource ", p.name(), " located in device ",
70         p.device(), " from device ", ctx->device()->attributes().name());
71   }
72   return Status::OK();
73 }
74 
75 }  // end namespace internal
76 
InsertDebugTypeName(uint64 hash_code,const string & type_name)77 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
78                                         const string& type_name) {
79   auto iter = debug_type_names_.emplace(hash_code, type_name);
80   if (iter.first->second != type_name) {
81     return errors::AlreadyExists("Duplicate hash code found for type ",
82                                  type_name);
83   }
84   return Status::OK();
85 }
86 
DebugTypeName(uint64 hash_code) const87 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
88   auto type_name_iter = debug_type_names_.find(hash_code);
89   if (type_name_iter == debug_type_names_.end()) {
90     return "<unknown>";
91   } else {
92     return type_name_iter->second.c_str();
93   }
94 }
95 
ResourceAndName()96 ResourceMgr::ResourceAndName::ResourceAndName()
97     : resource(nullptr), name(nullptr) {}
98 
ResourceAndName(ResourceBase * resource,string name)99 ResourceMgr::ResourceAndName::ResourceAndName(ResourceBase* resource,
100                                               string name)
101     : resource(resource), name(absl::make_unique<string>(std::move(name))) {}
102 
ResourceAndName(ResourceAndName && other)103 ResourceMgr::ResourceAndName::ResourceAndName(
104     ResourceAndName&& other) noexcept {
105   resource = std::move(other.resource);
106   name = std::move(other.name);
107 }
108 
~ResourceAndName()109 ResourceMgr::ResourceAndName::~ResourceAndName() {}
110 
operator =(ResourceAndName && other)111 ResourceMgr::ResourceAndName& ResourceMgr::ResourceAndName::operator=(
112     ResourceAndName&& other) noexcept {
113   resource = std::move(other.resource);
114   name = std::move(other.name);
115   return *this;
116 }
117 
ResourceMgr()118 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
119 
ResourceMgr(const string & default_container)120 ResourceMgr::ResourceMgr(const string& default_container)
121     : default_container_(default_container) {}
122 
~ResourceMgr()123 ResourceMgr::~ResourceMgr() { Clear(); }
124 
Clear()125 void ResourceMgr::Clear() {
126   // We do the deallocation outside of the lock to avoid a potential deadlock
127   // in case any of the destructors access the resource manager.
128   std::unordered_map<string, Container*> tmp_containers;
129   {
130     mutex_lock l(mu_);
131     tmp_containers = std::move(containers_);
132   }
133   for (const auto& p : tmp_containers) {
134     delete p.second;
135   }
136   tmp_containers.clear();
137 }
138 
DebugString() const139 string ResourceMgr::DebugString() const {
140   mutex_lock l(mu_);
141   struct Line {
142     const string* container;
143     const string type;
144     const string* resource;
145     const string detail;
146   };
147   std::vector<Line> lines;
148   for (const auto& p : containers_) {
149     const string& container = p.first;
150     for (const auto& q : *p.second) {
151       const Key& key = q.first;
152       const char* type = DebugTypeName(key.first);
153       Line l{&container, port::Demangle(type), q.second.name.get(),
154              q.second.resource->DebugString()};
155       lines.push_back(l);
156     }
157   }
158   std::vector<string> text;
159   text.reserve(lines.size());
160   for (const Line& line : lines) {
161     text.push_back(strings::Printf(
162         "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
163         line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
164   }
165   std::sort(text.begin(), text.end());
166   return absl::StrJoin(text, "\n");
167 }
168 
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)169 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
170                              const string& name, ResourceBase* resource) {
171   Container** b = &containers_[container];
172   if (*b == nullptr) {
173     *b = new Container;
174   }
175 
176   // NOTE: Separating out the construction of the map key and value so that the
177   // key can contain a StringPiece that borrows from the string in the value.
178   ResourceAndName resource_and_name(resource, name);
179   StringPiece borrowed_name(*resource_and_name.name);
180   Container::value_type key_and_value(Key(type.hash_code(), borrowed_name),
181                                       std::move(resource_and_name));
182 
183   if ((*b)->insert(std::move(key_and_value)).second) {
184     TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
185     return Status::OK();
186   }
187   return errors::AlreadyExists("Resource ", container, "/", name, "/",
188                                type.name());
189 }
190 
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const191 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
192                              const string& name,
193                              ResourceBase** resource) const {
194   const Container* b = gtl::FindPtrOrNull(containers_, container);
195   if (b == nullptr) {
196     return errors::NotFound("Container ", container,
197                             " does not exist. (Could not find resource: ",
198                             container, "/", name, ")");
199   }
200   auto iter = b->find({type.hash_code(), name});
201   if (iter == b->end()) {
202     return errors::NotFound("Resource ", container, "/", name, "/", type.name(),
203                             " does not exist.");
204   }
205   *resource = const_cast<ResourceBase*>(iter->second.resource.get());
206   (*resource)->Ref();
207   return Status::OK();
208 }
209 
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)210 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
211                              const string& resource_name,
212                              const string& type_name) {
213   ResourceAndName resource_and_name;
214   {
215     mutex_lock l(mu_);
216     Container* b = gtl::FindPtrOrNull(containers_, container);
217     if (b == nullptr) {
218       return errors::NotFound("Container ", container, " does not exist.");
219     }
220     auto iter = b->find({type_hash_code, resource_name});
221     if (iter == b->end()) {
222       return errors::NotFound("Resource ", container, "/", resource_name, "/",
223                               type_name, " does not exist.");
224     }
225     std::swap(resource_and_name, iter->second);
226     b->erase(iter);
227   }
228   DCHECK(resource_and_name.resource != nullptr);
229   return Status::OK();
230 }
231 
DoDelete(const string & container,TypeIndex type,const string & resource_name)232 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
233                              const string& resource_name) {
234   return DoDelete(container, type.hash_code(), resource_name, type.name());
235 }
236 
Delete(const ResourceHandle & handle)237 Status ResourceMgr::Delete(const ResourceHandle& handle) {
238   return DoDelete(handle.container(), handle.hash_code(), handle.name(),
239                   "<unknown>");
240 }
241 
Cleanup(const string & container)242 Status ResourceMgr::Cleanup(const string& container) {
243   {
244     tf_shared_lock l(mu_);
245     if (!gtl::FindOrNull(containers_, container)) {
246       // Nothing to cleanup.
247       return Status::OK();
248     }
249   }
250   Container* b = nullptr;
251   {
252     mutex_lock l(mu_);
253     auto iter = containers_.find(container);
254     if (iter == containers_.end()) {
255       // Nothing to cleanup, it's OK (concurrent cleanup).
256       return Status::OK();
257     }
258     b = iter->second;
259     containers_.erase(iter);
260   }
261   CHECK(b != nullptr);
262   delete b;
263   return Status::OK();
264 }
265 
IsValidContainerName(StringPiece s)266 static bool IsValidContainerName(StringPiece s) {
267   using ::tensorflow::strings::Scanner;
268   return Scanner(s)
269       .One(Scanner::LETTER_DIGIT_DOT)
270       .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
271       .Eos()
272       .GetResult();
273 }
274 
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)275 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
276                            bool use_node_name_as_default) {
277   CHECK(rmgr);
278   rmgr_ = rmgr;
279   string attr_container;
280   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
281   if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
282     return errors::InvalidArgument("container contains invalid characters: ",
283                                    attr_container);
284   }
285   string attr_shared_name;
286   TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
287   if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
288     return errors::InvalidArgument("shared_name cannot start with '_':",
289                                    attr_shared_name);
290   }
291   if (!attr_container.empty()) {
292     container_ = attr_container;
293   } else {
294     container_ = rmgr_->default_container();
295   }
296   if (!attr_shared_name.empty()) {
297     name_ = attr_shared_name;
298   } else if (use_node_name_as_default) {
299     name_ = ndef.name();
300   } else {
301     resource_is_private_to_kernel_ = true;
302     static std::atomic<int64> counter(0);
303     name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
304   }
305   return Status::OK();
306 }
307 
DebugString() const308 string ContainerInfo::DebugString() const {
309   return strings::StrCat("[", container(), ",", name(), ",",
310                          resource_is_private_to_kernel() ? "private" : "public",
311                          "]");
312 }
313 
HandleFromInput(OpKernelContext * ctx,int input)314 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
315   return ctx->input(input).flat<ResourceHandle>()(0);
316 }
317 
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)318 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
319                        ResourceHandle* handle) {
320   const Tensor* tensor;
321   TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
322   *handle = tensor->flat<ResourceHandle>()(0);
323   return Status::OK();
324 }
325 
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)326 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
327   TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
328   return ctx->resource_manager()->Delete(p);
329 }
330 
ResourceHandlesShape(shape_inference::InferenceContext * c)331 Status ResourceHandlesShape(shape_inference::InferenceContext* c) {
332   int n;
333   TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
334   for (int i = 0; i < n; ++i) {
335     c->set_output(i, c->Scalar());
336   }
337   return Status::OK();
338 }
339 
340 }  //  end namespace tensorflow
341