1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_C_EAGER_PARALLEL_DEVICE_PARALLEL_DEVICE_H_ 17 #define TENSORFLOW_C_EAGER_PARALLEL_DEVICE_PARALLEL_DEVICE_H_ 18 19 #include "tensorflow/c/c_api.h" 20 #include "tensorflow/c/eager/c_api.h" 21 #include "tensorflow/c/eager/c_api_experimental.h" 22 23 namespace tensorflow { 24 namespace parallel_device { 25 26 // Allocate a parallel device named `device_name` which forwards operations to 27 // `underlying_devices`, maintaining "parallel tensors" with components placed 28 // on each underlying device. 29 // 30 // For example if `device_name` is 31 // "/job:localhost/replica:0/task:0/device:CUSTOM:0" 32 // and `underlying_devices` is 33 // {"/job:localhost/replica:0/task:0/device:GPU:0", 34 // "/job:localhost/replica:0/task:0/device:GPU:1"} 35 // Then executing an operation on CUSTOM:0 will execute it on GPU:0 and GPU:1. 36 // 37 // Implicit copies onto `device_name` are allowed, replicating the value once 38 // per device in `underlying_devices`. Implicit copies off of the device throw 39 // an error. 40 // 41 // All component tensors must have the same dtype. Currently they must also have 42 // the same shape, although this requirement may be relaxed in the future. 43 // 44 // `device_name` must not name an existing physical or custom device (see 45 // the documentation for TFE_RegisterCustomDevice for more information). 46 // 47 // Tensors may be copied on or off the device explicitly using 48 // TPUReplicatedInput and TPUReplicatedOutput respectively. For example, with 49 // two component devices, running `x = TPUReplicatedInput(inputs=[a, b])` on the 50 // parallel device creates a parallel tensor `x` with `a` on the first of 51 // `underlying_devices` and `b` on the second. Running `a_unpacked, b_unpacked = 52 // TPUReplicatedOutput(input=x, num_replicas=2)` un-packs the parallel tensor 53 // into its components. 54 // 55 // The filled `device` struct and the allocated `device_info` struct may be 56 // passed to TFE_RegisterCustomDevice. The `device_name` arguments must match. 57 void AllocateParallelDevice(const char* device_name, 58 const char* const* underlying_devices, 59 int num_underlying_devices, 60 TFE_CustomDevice* device, void** device_info); 61 62 } // namespace parallel_device 63 } // namespace tensorflow 64 65 #endif // TENSORFLOW_C_EAGER_PARALLEL_DEVICE_PARALLEL_DEVICE_H_ 66