1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <atomic>
17
18 #include "tensorflow/core/framework/resource_mgr.h"
19
20 #include "tensorflow/core/framework/device_attributes.pb.h"
21 #include "tensorflow/core/framework/node_def.pb.h"
22 #include "tensorflow/core/framework/node_def_util.h"
23 #include "tensorflow/core/lib/core/errors.h"
24 #include "tensorflow/core/lib/gtl/map_util.h"
25 #include "tensorflow/core/lib/strings/scanner.h"
26 #include "tensorflow/core/lib/strings/str_util.h"
27 #include "tensorflow/core/lib/strings/stringprintf.h"
28 #include "tensorflow/core/platform/demangle.h"
29
30 namespace tensorflow {
31
32 // Used to generate unique names for anonymous variables
33 static std::atomic<int64> current_id_;
34
MakeResourceHandle(OpKernelContext * ctx,const string & container,const string & name,const TypeIndex & type_index)35 ResourceHandle MakeResourceHandle(OpKernelContext* ctx, const string& container,
36 const string& name,
37 const TypeIndex& type_index) {
38 ResourceHandle result;
39 result.set_device(ctx->device()->attributes().name());
40 string actual_container;
41 if (!container.empty()) {
42 actual_container = container;
43 } else {
44 actual_container = ctx->resource_manager()->default_container();
45 }
46 result.set_container(actual_container);
47 if (name == ResourceHandle::ANONYMOUS_NAME) {
48 result.set_name(strings::StrCat("_AnonymousVar", current_id_.fetch_add(1)));
49 } else {
50 result.set_name(name);
51 }
52 result.set_hash_code(type_index.hash_code());
53 result.set_maybe_type_name(type_index.name());
54 return result;
55 }
56
MakeResourceHandleToOutput(OpKernelContext * context,int output_index,const string & container,const string & name,const TypeIndex & type_index)57 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
58 const string& container, const string& name,
59 const TypeIndex& type_index) {
60 Tensor* handle;
61 TF_RETURN_IF_ERROR(
62 context->allocate_output(output_index, TensorShape({}), &handle));
63 handle->scalar<ResourceHandle>()() =
64 MakeResourceHandle(context, container, name, type_index);
65 return Status::OK();
66 }
67
68 namespace internal {
69
ValidateDevice(OpKernelContext * ctx,const ResourceHandle & p)70 Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p) {
71 if (ctx->device()->attributes().name() != p.device()) {
72 return errors::InvalidArgument(
73 "Trying to access resource ", p.name(), " located in device ",
74 p.device(), " from device ", ctx->device()->attributes().name());
75 }
76 return Status::OK();
77 }
78
79 } // end namespace internal
80
InsertDebugTypeName(uint64 hash_code,const string & type_name)81 Status ResourceMgr::InsertDebugTypeName(uint64 hash_code,
82 const string& type_name) {
83 auto iter = debug_type_names_.emplace(hash_code, type_name);
84 if (iter.first->second != type_name) {
85 return errors::AlreadyExists("Duplicate hash code found for type ",
86 type_name);
87 }
88 return Status::OK();
89 }
90
DebugTypeName(uint64 hash_code) const91 const char* ResourceMgr::DebugTypeName(uint64 hash_code) const {
92 auto type_name_iter = debug_type_names_.find(hash_code);
93 if (type_name_iter == debug_type_names_.end()) {
94 return "<unknown>";
95 } else {
96 return type_name_iter->second.c_str();
97 }
98 }
99
ResourceMgr()100 ResourceMgr::ResourceMgr() : default_container_("localhost") {}
101
ResourceMgr(const string & default_container)102 ResourceMgr::ResourceMgr(const string& default_container)
103 : default_container_(default_container) {}
104
~ResourceMgr()105 ResourceMgr::~ResourceMgr() { Clear(); }
106
Clear()107 void ResourceMgr::Clear() {
108 mutex_lock l(mu_);
109 for (const auto& p : containers_) {
110 for (const auto& q : *p.second) {
111 q.second->Unref();
112 }
113 delete p.second;
114 }
115 containers_.clear();
116 }
117
DebugString() const118 string ResourceMgr::DebugString() const {
119 mutex_lock l(mu_);
120 struct Line {
121 const string* container;
122 const string type;
123 const string* resource;
124 const string detail;
125 };
126 std::vector<Line> lines;
127 for (const auto& p : containers_) {
128 const string& container = p.first;
129 for (const auto& q : *p.second) {
130 const Key& key = q.first;
131 const char* type = DebugTypeName(key.first);
132 const string& resource = key.second;
133 Line l{&container, port::Demangle(type), &resource,
134 q.second->DebugString()};
135 lines.push_back(l);
136 }
137 }
138 std::vector<string> text;
139 text.reserve(lines.size());
140 for (const Line& line : lines) {
141 text.push_back(strings::Printf(
142 "%-20s | %-40s | %-40s | %-s", line.container->c_str(),
143 line.type.c_str(), line.resource->c_str(), line.detail.c_str()));
144 }
145 std::sort(text.begin(), text.end());
146 return str_util::Join(text, "\n");
147 }
148
DoCreate(const string & container,TypeIndex type,const string & name,ResourceBase * resource)149 Status ResourceMgr::DoCreate(const string& container, TypeIndex type,
150 const string& name, ResourceBase* resource) {
151 Container** b = &containers_[container];
152 if (*b == nullptr) {
153 *b = new Container;
154 }
155 if ((*b)->insert({{type.hash_code(), name}, resource}).second) {
156 TF_RETURN_IF_ERROR(InsertDebugTypeName(type.hash_code(), type.name()));
157 return Status::OK();
158 }
159 resource->Unref();
160 return errors::AlreadyExists("Resource ", container, "/", name, "/",
161 type.name());
162 }
163
DoLookup(const string & container,TypeIndex type,const string & name,ResourceBase ** resource) const164 Status ResourceMgr::DoLookup(const string& container, TypeIndex type,
165 const string& name,
166 ResourceBase** resource) const {
167 const Container* b = gtl::FindPtrOrNull(containers_, container);
168 if (b == nullptr) {
169 return errors::NotFound("Container ", container,
170 " does not exist. (Could not find resource: ",
171 container, "/", name, ")");
172 }
173 auto r = gtl::FindPtrOrNull(*b, {type.hash_code(), name});
174 if (r == nullptr) {
175 return errors::NotFound("Resource ", container, "/", name, "/", type.name(),
176 " does not exist.");
177 }
178 *resource = const_cast<ResourceBase*>(r);
179 (*resource)->Ref();
180 return Status::OK();
181 }
182
DoDelete(const string & container,uint64 type_hash_code,const string & resource_name,const string & type_name)183 Status ResourceMgr::DoDelete(const string& container, uint64 type_hash_code,
184 const string& resource_name,
185 const string& type_name) {
186 ResourceBase* base = nullptr;
187 {
188 mutex_lock l(mu_);
189 Container* b = gtl::FindPtrOrNull(containers_, container);
190 if (b == nullptr) {
191 return errors::NotFound("Container ", container, " does not exist.");
192 }
193 auto iter = b->find({type_hash_code, resource_name});
194 if (iter == b->end()) {
195 return errors::NotFound("Resource ", container, "/", resource_name, "/",
196 type_name, " does not exist.");
197 }
198 base = iter->second;
199 b->erase(iter);
200 }
201 CHECK(base != nullptr);
202 base->Unref();
203 return Status::OK();
204 }
205
DoDelete(const string & container,TypeIndex type,const string & resource_name)206 Status ResourceMgr::DoDelete(const string& container, TypeIndex type,
207 const string& resource_name) {
208 return DoDelete(container, type.hash_code(), resource_name, type.name());
209 }
210
Delete(const ResourceHandle & handle)211 Status ResourceMgr::Delete(const ResourceHandle& handle) {
212 return DoDelete(handle.container(), handle.hash_code(), handle.name(),
213 "<unknown>");
214 }
215
Cleanup(const string & container)216 Status ResourceMgr::Cleanup(const string& container) {
217 {
218 tf_shared_lock l(mu_);
219 if (!gtl::FindOrNull(containers_, container)) {
220 // Nothing to cleanup.
221 return Status::OK();
222 }
223 }
224 Container* b = nullptr;
225 {
226 mutex_lock l(mu_);
227 auto iter = containers_.find(container);
228 if (iter == containers_.end()) {
229 // Nothing to cleanup, it's OK (concurrent cleanup).
230 return Status::OK();
231 }
232 b = iter->second;
233 containers_.erase(iter);
234 }
235 CHECK(b != nullptr);
236 for (const auto& p : *b) {
237 p.second->Unref();
238 }
239 delete b;
240 return Status::OK();
241 }
242
IsValidContainerName(StringPiece s)243 static bool IsValidContainerName(StringPiece s) {
244 using ::tensorflow::strings::Scanner;
245 return Scanner(s)
246 .One(Scanner::LETTER_DIGIT_DOT)
247 .Any(Scanner::LETTER_DIGIT_DASH_DOT_SLASH)
248 .Eos()
249 .GetResult();
250 }
251
Init(ResourceMgr * rmgr,const NodeDef & ndef,bool use_node_name_as_default)252 Status ContainerInfo::Init(ResourceMgr* rmgr, const NodeDef& ndef,
253 bool use_node_name_as_default) {
254 CHECK(rmgr);
255 rmgr_ = rmgr;
256 string attr_container;
257 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "container", &attr_container));
258 if (!attr_container.empty() && !IsValidContainerName(attr_container)) {
259 return errors::InvalidArgument("container contains invalid characters: ",
260 attr_container);
261 }
262 string attr_shared_name;
263 TF_RETURN_IF_ERROR(GetNodeAttr(ndef, "shared_name", &attr_shared_name));
264 if (!attr_shared_name.empty() && (attr_shared_name[0] == '_')) {
265 return errors::InvalidArgument("shared_name cannot start with '_':",
266 attr_shared_name);
267 }
268 if (!attr_container.empty()) {
269 container_ = attr_container;
270 } else {
271 container_ = rmgr_->default_container();
272 }
273 if (!attr_shared_name.empty()) {
274 name_ = attr_shared_name;
275 } else if (use_node_name_as_default) {
276 name_ = ndef.name();
277 } else {
278 resource_is_private_to_kernel_ = true;
279 static std::atomic<int64> counter(0);
280 name_ = strings::StrCat("_", counter.fetch_add(1), "_", ndef.name());
281 }
282 return Status::OK();
283 }
284
DebugString() const285 string ContainerInfo::DebugString() const {
286 return strings::StrCat("[", container(), ",", name(), ",",
287 resource_is_private_to_kernel() ? "private" : "public",
288 "]");
289 }
290
HandleFromInput(OpKernelContext * ctx,int input)291 const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
292 return ctx->input(input).flat<ResourceHandle>()(0);
293 }
294
HandleFromInput(OpKernelContext * ctx,StringPiece input,ResourceHandle * handle)295 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
296 ResourceHandle* handle) {
297 const Tensor* tensor;
298 TF_RETURN_IF_ERROR(ctx->input(input, &tensor));
299 *handle = tensor->flat<ResourceHandle>()(0);
300 return Status::OK();
301 }
302
DeleteResource(OpKernelContext * ctx,const ResourceHandle & p)303 Status DeleteResource(OpKernelContext* ctx, const ResourceHandle& p) {
304 TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
305 return ctx->resource_manager()->Delete(p);
306 }
307
ResourceHandlesShape(shape_inference::InferenceContext * c)308 Status ResourceHandlesShape(shape_inference::InferenceContext* c) {
309 int n;
310 TF_RETURN_IF_ERROR(c->GetAttr("N", &n));
311 for (int i = 0; i < n; ++i) {
312 c->set_output(i, c->Scalar());
313 }
314 return Status::OK();
315 }
316
317 } // end namespace tensorflow
318