1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/framework/resource_mgr.h"
17
18 #include <atomic>
19
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29 #include "tensorflow/core/platform/stacktrace.h"
30
31 namespace tensorflow {
32
33 // Used to generate unique names for anonymous variables
34 static std::atomic<int64> current_id_;
35
MakeResourceHandle(const string & container,const string & name,const DeviceBase & device,const TypeIndex & type_index,const std::vector<DtypeAndPartialTensorShape> & dtypes_and_shapes,const absl::optional<ManagedStackTrace> & definition_stack_trace)36 ResourceHandle MakeResourceHandle(
37 const string& container, const string& name, const DeviceBase& device,
38 const TypeIndex& type_index,
39 const std::vector<DtypeAndPartialTensorShape>& dtypes_and_shapes,
40 const absl::optional<ManagedStackTrace>& definition_stack_trace) {
41 ResourceHandle result;
42 result.set_device(device.name());
43 result.set_container(container);
44 result.set_definition_stack_trace(definition_stack_trace);
45 if (name == ResourceHandle::ANONYMOUS_NAME) {
46 result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
47 } else {
48 result.set_name(name);
49 }
50 result.set_hash_code(type_index.hash_code());
51 result.set_maybe_type_name(type_index.name());
52 result.set_dtypes_and_shapes(dtypes_and_shapes);
53 return result;
54 }
55
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)56 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
57 const string& container, const string& name,
58 const TypeIndex& type_index) {
59 Tensor* handle;
60 TF_RETURN_IF_ERROR(
61 context->allocate_output(output_index, TensorShape({}), &handle));
62 handle->scalar<ResourceHandle>()() =
63 MakeResourceHandle(container, name, *context->device(), type_index);
64 return Status::OK();
65 }
66
67 namespace internal {
68
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)69 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
70 if (ctx->device()->attributes().name() != p.device()) {
71 return errors::InvalidArgument(
72 "Trying to access resource ", p.name(), " located in device ",
73 p.device(), " from device ", ctx->device()->attributes().name());
74 }
75 return Status::OK();
76 }
77
78 } // end namespace internal
79
InsertDebugTypeName(uint64 hash_code,const string & type_name)80 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
81 const string& type_name) {
82 auto iter = debug_type_names_.emplace(hash_code, type_name);
83 if (iter.first->second != type_name) {
84 return errors::AlreadyExists("Duplicate hash code found for type ",
85 type_name);
86 }
87 return Status::OK();
88 }
89
DebugTypeName(uint64 hash_code) const90 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
91 auto type_name_iter = debug_type_names_.find(hash_code);
92 if (type_name_iter == debug_type_names_.end()) {
93 return "<unknown>";
94 } else {
95 return type_name_iter->second.c_str();
96 }
97 }
98
ResourceAndName()99 ResourceMgr::ResourceAndName::ResourceAndName()
100 : resource(nullptr), name(nullptr) {}
101
ResourceAndName(ResourceBase * resource,string name)102 ResourceMgr::ResourceAndName::ResourceAndName(ResourceBase* resource,
103 string name)
104 : resource(resource), name(absl::make_unique<string>(std::move(name))) {}
105
ResourceAndName(ResourceAndName && other)106 ResourceMgr::ResourceAndName::ResourceAndName(
107 ResourceAndName&& other) noexcept {
108 resource = std::move(other.resource);
109 name = std::move(other.name);
110 }
111
~ResourceAndName()112 ResourceMgr::ResourceAndName::~ResourceAndName() {}
113
operator =(ResourceAndName && other)114 ResourceMgr::ResourceAndName& ResourceMgr::ResourceAndName::operator=(
115 ResourceAndName&& other) noexcept {
116 resource = std::move(other.resource);
117 name = std::move(other.name);
118 return *this;
119 }
120
ResourceMgr()121 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
122
ResourceMgr(const string & default_container)123 ResourceMgr::ResourceMgr(const string& default_container)
124 : default_container_(default_container) {}
125
~ResourceMgr()126 ResourceMgr::~ResourceMgr() { Clear(); }
127
Clear()128 void ResourceMgr::Clear() {
129 // We do the deallocation outside of the lock to avoid a potential deadlock
130 // in case any of the destructors access the resource manager.
131 std::unordered_map<string, Container*> tmp_containers;
132 {
133 mutex_lock l(mu_);
134 tmp_containers = std::move(containers_);
135 }
136 for (const auto& p : tmp_containers) {
137 delete p.second;
138 }
139 tmp_containers.clear();
140 }
141
DebugString() const142 string ResourceMgr::DebugString() const {
143 mutex_lock l(mu_);
144 struct Line {
145 const string* container;
146 const string type;
147 const string* resource;
148 const string detail;
149 };
150 std::vector<Line> lines;
151 for (const auto& p : containers_) {
152 const string& container = p.first;
153 for (const auto& q : *p.second) {
154 const Key& key = q.first;
155 const char* type = DebugTypeName(key.first);
156 Line l{&container, port::Demangle(type), q.second.name.get(),
157 q.second.resource->DebugString()};
158 lines.push_back(l);
159 }
160 }
161 std::vector<string> text;
162 text.reserve(lines.size());
163 for (const Line& line : lines) {
164 text.push_back(strings::Printf(
165 "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
166 line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
167 }
168 std::sort(text.begin(), text.end());
169 return absl::StrJoin(text, "\n");
170 }
171
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)172 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
173 const string& name, ResourceBase* resource) {
174 Container** b = &containers_[container];
175 if (*b == nullptr) {
176 *b = new Container;
177 }
178
179 // NOTE: Separating out the construction of the map key and value so that the
180 // key can contain a StringPiece that borrows from the string in the value.
181 ResourceAndName resource_and_name(resource, name);
182 StringPiece borrowed_name(*resource_and_name.name);
183 Container::value_type key_and_value(Key(type.hash_code(), borrowed_name),
184 std::move(resource_and_name));
185
186 if ((*b)->insert(std::move(key_and_value)).second) {
187 TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
188 return Status::OK();
189 }
190 return errors::AlreadyExists("Resource ", container, "/", name, "/",
191 type.name());
192 }
193
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const194 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
195 const string& name,
196 ResourceBase** resource) const {
197 const Container* b = gtl::FindPtrOrNull(containers_, container);
198 if (b == nullptr) {
199 return errors::NotFound("Container ", container,
200 " does not exist. (Could not find resource: ",
201 container, "/", name, ")");
202 }
203 auto iter = b->find({type.hash_code(), name});
204 if (iter == b->end()) {
205 return errors::NotFound("Resource ", container, "/", name, "/", type.name(),
206 " does not exist.");
207 }
208 *resource = const_cast<ResourceBase*>(iter->second.resource.get());
209 (*resource)->Ref();
210 return Status::OK();
211 }
212
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)213 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
214 const string& resource_name,
215 const string& type_name) {
216 ResourceAndName resource_and_name;
217 {
218 mutex_lock l(mu_);
219 Container* b = gtl::FindPtrOrNull(containers_, container);
220 if (b == nullptr) {
221 return errors::NotFound("Container ", container, " does not exist.");
222 }
223 auto iter = b->find({type_hash_code, resource_name});
224 if (iter == b->end()) {
225 return errors::NotFound("Resource ", container, "/", resource_name, "/",
226 type_name, " does not exist.");
227 }
228 std::swap(resource_and_name, iter->second);
229 b->erase(iter);
230 }
231 DCHECK(resource_and_name.resource != nullptr);
232 return Status::OK();
233 }
234
DoDelete(const string & container,TypeIndex type,const string & resource_name)235 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
236 const string& resource_name) {
237 return DoDelete(container, type.hash_code(), resource_name, type.name());
238 }
239
Delete(const ResourceHandle & handle)240 Status ResourceMgr::Delete(const ResourceHandle& handle) {
241 return DoDelete(handle.container(), handle.hash_code(), handle.name(),
242 "<unknown>");
243 }
244
Cleanup(const string & container)245 Status ResourceMgr::Cleanup(const string& container) {
246 {
247 tf_shared_lock l(mu_);
248 if (!gtl::FindOrNull(containers_, container)) {
249 // Nothing to cleanup.
250 return Status::OK();
251 }
252 }
253 Container* b = nullptr;
254 {
255 mutex_lock l(mu_);
256 auto iter = containers_.find(container);
257 if (iter == containers_.end()) {
258 // Nothing to cleanup, it's OK (concurrent cleanup).
259 return Status::OK();
260 }
261 b = iter->second;
262 containers_.erase(iter);
263 }
264 CHECK(b != nullptr);
265 delete b;
266 return Status::OK();
267 }
268
IsValidContainerName(StringPiece s)269 static bool IsValidContainerName(StringPiece s) {
270 using ::tensorflow::strings::Scanner;
271 return Scanner(s)
272 .One(Scanner::LETTER_DIGIT_DOT)
273 .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
274 .Eos()
275 .GetResult();
276 }
277
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)278 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
279 bool use_node_name_as_default) {
280 CHECK(rmgr);
281 rmgr_ = rmgr;
282 string attr_container;
283 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
284 if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
285 return errors::InvalidArgument("container contains invalid characters: ",
286 attr_container);
287 }
288 string attr_shared_name;
289 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
290 if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
291 return errors::InvalidArgument("shared_name cannot start with '_':",
292 attr_shared_name);
293 }
294 if (!attr_container.empty()) {
295 container_ = attr_container;
296 } else {
297 container_ = rmgr_->default_container();
298 }
299 if (!attr_shared_name.empty()) {
300 name_ = attr_shared_name;
301 } else if (use_node_name_as_default) {
302 name_ = ndef.name();
303 } else {
304 resource_is_private_to_kernel_ = true;
305 static std::atomic<int64> counter(0);
306 name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
307 }
308 return Status::OK();
309 }
310
DebugString() const311 string ContainerInfo::DebugString() const {
312 return strings::StrCat("[", container(), ",", name(), ",",
313 resource_is_private_to_kernel() ? "private" : "public",
314 "]");
315 }
316
HandleFromInput(OpKernelContext * ctx,int input)317 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
318 return ctx->input(input).flat<ResourceHandle>()(0);
319 }
320
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)321 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
322 ResourceHandle* handle) {
323 const Tensor* tensor;
324 TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
325 *handle = tensor->flat<ResourceHandle>()(0);
326 return Status::OK();
327 }
328
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)329 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
330 TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
331 return ctx->resource_manager()->Delete(p);
332 }
333
334 } // end namespace tensorflow
335