• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_CORE_TPU_TPU_GLOBAL_INIT_H_
16 #define TENSORFLOW_CORE_TPU_TPU_GLOBAL_INIT_H_
17 
18 #include "absl/strings/string_view.h"
19 #include "tensorflow/core/common_runtime/device_set.h"
20 #include "tensorflow/core/lib/core/status.h"
21 #include "tensorflow/core/platform/env.h"
22 #include "tensorflow/core/protobuf/tpu/topology.pb.h"
23 #include "tensorflow/core/public/session.h"
24 
25 namespace tensorflow {
26 
27 // Initializes the TPU system globally. The state of initialization can then be
28 // shared by different sessions running on these TPUs, on the same process. This
29 // API is provided for multi-tenant usecases where multiple sessions in a
30 // process are using the same set of TPUs.
31 //
32 // Returns status errors if initialization is unsuccessful and returns the TPU
33 // TopologyProto as an output parameter.
34 //
35 // REQUIRES:
36 // * Call this API before any sessions using TPUs are run.
37 // * If you are using this API for initialization, please don't use the TPU
38 // configuration ops within your graph. This will cause errors to be returned
39 // from the API which is called second.
40 //
41 // DISTRIBUTED SETUP:
42 // To properly initialize a TPU topology that is beyond donut level, caller is
43 // required to provide correct following arguments:
44 //
45 // 1. job_name
46 // The name of the job under distributed settings. For example, if the job is
47 // '/job:tpu_worker/replica:0/task:0/...', the "tpu_worker" is the desired
48 // job_name here.
49 //
50 // 2. session_target
51 // The target string that will be used to create a Session and run the
52 // distributed TPU initialization graph. Generally this would be the master
53 // session from the cluster.
54 //
55 // 3.device_set
56 // The GLOBAL set of devices in the distributed setting, including proper
57 // "TPU_SYSTEM" devices across all tasks.
58 // For example, device_set should contain two "TPU_SYSTEM" devices on 2 tasks
59 // for a 4x2 (2 TPU workers) setup, and other non "TPU_SYSTEM" devices.
60 Status InitializeTPUSystemGlobally(absl::string_view job_name,
61                                    absl::string_view session_target,
62                                    const DeviceSet& device_set, Env* env,
63                                    tpu::TopologyProto* tpu_topology);
64 
65 Status InitializeTPUSystemGlobally(Env* env, tpu::TopologyProto* tpu_topology);
66 
67 Status InitializeTPUSystemGlobally();
68 
69 }  // namespace tensorflow
70 
71 #endif  // TENSORFLOW_CORE_TPU_TPU_GLOBAL_INIT_H_
72