1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_H_ 17 #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "tensorflow/core/distributed_runtime/worker_interface.h" 23 #include "tensorflow/core/framework/device_attributes.pb.h" // for DeviceLocality 24 #include "tensorflow/core/lib/core/status.h" 25 26 namespace tensorflow { 27 typedef std::function<void(const Status&)> StatusCallback; 28 29 class ChannelCache; 30 class StepStats; 31 32 class WorkerCacheInterface { 33 public: ~WorkerCacheInterface()34 virtual ~WorkerCacheInterface() {} 35 36 // Updates *workers with strings naming the remote worker tasks to 37 // which open channels have been established. 38 virtual void ListWorkers(std::vector<string>* workers) const = 0; 39 virtual void ListWorkersInJob(const string& job_name, 40 std::vector<string>* workers) const = 0; 41 42 // If "target" names a remote task for which an RPC channel exists 43 // or can be constructed, returns a pointer to a WorkerInterface object 44 // wrapping that channel. The returned value must be destroyed by 45 // calling `this->ReleaseWorker(target, ret)` 46 // TODO(mrry): rename this to GetOrCreateWorker() or something that 47 // makes it more obvious that this method returns a potentially 48 // shared object. 49 virtual WorkerInterface* CreateWorker(const string& target) = 0; 50 51 // Release a worker previously returned by this->CreateWorker(target). 52 // 53 // TODO(jeff,sanjay): Consider moving target into WorkerInterface. 54 // TODO(jeff,sanjay): Unify all worker-cache impls and factor out a 55 // per-rpc-subsystem WorkerInterface creator. ReleaseWorker(const string & target,WorkerInterface * worker)56 virtual void ReleaseWorker(const string& target, WorkerInterface* worker) { 57 // Subclasses may override to reuse worker objects. 58 delete worker; 59 } 60 61 // Set *locality with the DeviceLocality of the specified remote device 62 // within its local environment. Returns true if *locality 63 // was set, using only locally cached data. Returns false 64 // if status data for that device was not available. Never blocks. 65 virtual bool GetDeviceLocalityNonBlocking(const string& device, 66 DeviceLocality* locality) = 0; 67 68 // Set *locality with the DeviceLocality of the specified remote device 69 // within its local environment. Callback gets Status::OK if *locality 70 // was set. 71 virtual void GetDeviceLocalityAsync(const string& device, 72 DeviceLocality* locality, 73 StatusCallback done) = 0; 74 75 // Start/stop logging activity. SetLogging(bool active)76 virtual void SetLogging(bool active) {} 77 78 // Discard any saved log data. ClearLogs()79 virtual void ClearLogs() {} 80 81 // Return logs for the identified step in *ss. Any returned data will no 82 // longer be stored. RetrieveLogs(int64 step_id,StepStats * ss)83 virtual bool RetrieveLogs(int64 step_id, StepStats* ss) { return false; } 84 }; 85 } // namespace tensorflow 86 #endif // TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_WORKER_CACHE_H_ 87