1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_SESSION_H_ 17 #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_SESSION_H_ 18 19 #include <string> 20 21 #include "tensorflow/core/common_runtime/device_mgr.h" 22 #include "tensorflow/core/distributed_runtime/cluster_function_library_runtime.h" 23 #include "tensorflow/core/distributed_runtime/graph_mgr.h" 24 #include "tensorflow/core/distributed_runtime/worker_cache.h" 25 26 namespace tensorflow { 27 28 class ClusterFunctionLibraryRuntime; 29 class GraphMgr; 30 class WorkerCacheInterface; 31 32 // WorkerSession encapsulates all of the state relating to a given session. 33 class WorkerSession { 34 public: 35 // Collection of local devices. These devices are typically 36 // RenamedDevices in all except the SessionMgr.legacy_session_ and 37 // sessions created with `isolate_session_state == false`. In the 38 // those cases, this method returns a pointer to a borrowed 39 // DeviceMgr (typically the `worker_env.device_mgr`). device_mgr()40 DeviceMgr* device_mgr() { 41 return device_mgr_ ? device_mgr_.get() : borrowed_device_mgr_; 42 } 43 remote_device_mgr()44 DynamicDeviceMgr* remote_device_mgr() { return remote_device_mgr_.get(); } 45 session_name()46 const string& session_name() const { return session_name_; } worker_name()47 const string& worker_name() const { return worker_name_; } 48 worker_cache()49 WorkerCacheInterface* worker_cache() const { return worker_cache_.get(); } graph_mgr()50 GraphMgr* graph_mgr() const { return graph_mgr_.get(); } 51 cluster_flr()52 ClusterFunctionLibraryRuntime* cluster_flr() const { 53 return cluster_flr_.get(); 54 } 55 56 WorkerSession(const string& session_name, const string& worker_name, 57 std::unique_ptr<WorkerCacheInterface> worker_cache, 58 std::unique_ptr<DeviceMgr> device_mgr, 59 std::unique_ptr<GraphMgr> graph_mgr, 60 std::unique_ptr<DynamicDeviceMgr> remote_device_mgr); 61 62 static std::shared_ptr<WorkerSession> CreateWithBorrowedDeviceMgr( 63 const string& session_name, const string& worker_name, 64 std::unique_ptr<WorkerCacheInterface> worker_cache, 65 DeviceMgr* borrowed_device_mgr, std::unique_ptr<GraphMgr> graph_mgr, 66 std::unique_ptr<DynamicDeviceMgr> remote_device_mgr); 67 68 // Update an existing worker session with new set of remote workers and 69 // devices. Added devices will be owned by the worker session, and removed 70 // devices will be freed by their names. 71 Status UpdateWorkerCacheAndDevices( 72 std::unique_ptr<WorkerCacheInterface> new_worker_cache, 73 std::vector<std::unique_ptr<Device>> added_remote_devices, 74 const std::vector<Device*>& removed_remote_devices); 75 76 ~WorkerSession(); 77 78 private: 79 WorkerSession(const string& session_name, const string& worker_name, 80 std::unique_ptr<WorkerCacheInterface> worker_cache, 81 DeviceMgr* borrowed_device_mgr, 82 std::unique_ptr<GraphMgr> graph_mgr, 83 std::unique_ptr<DynamicDeviceMgr> remote_device_mgr); 84 85 // The name of the session. 86 const string session_name_; 87 88 // The name of the worker. E.g., /job:mnist/replica:0/task:1. 89 const string worker_name_; 90 91 // Object from which WorkerInterface instances can be obtained. 92 std::unique_ptr<WorkerCacheInterface> worker_cache_; 93 94 // graph_mgr keeps track of the registered graphs of this session. 95 // 96 // Note: graph_mgr must be deleted before rendezvous_mgr! 97 // Note: graph_mgr must be deleted before device_mgr! 98 const std::unique_ptr<GraphMgr> graph_mgr_; 99 100 std::unique_ptr<ClusterFunctionLibraryRuntime> cluster_flr_; 101 102 const std::unique_ptr<DeviceMgr> device_mgr_; 103 DeviceMgr* const borrowed_device_mgr_; // Not owned. 104 std::unique_ptr<DynamicDeviceMgr> remote_device_mgr_; 105 }; 106 107 } // namespace tensorflow 108 109 #endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_SESSION_H_ 110