• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #ifndef TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_
16 #define TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_
17 
18 #include "tensorflow/c/c_api.h"
19 #include "tensorflow/c/eager/c_api.h"
20 
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 // Resets `op_to_reset` with `op_or_function_name` and `raw_device_name`. This
26 // is for performance optimization by reusing an exiting unused op rather than
27 // creating a new op every time. If `raw_device_name` is `NULL` or empty, it
28 // does not set the device name. If it's not `NULL`, then it attempts to parse
29 // and set the device name. It's effectively `TFE_OpSetDevice`, but it is faster
30 // than separately calling it because if the existing op has the same
31 // `raw_device_name`, it skips parsing and just leave as it is.
32 TF_CAPI_EXPORT extern void TFE_OpReset(TFE_Op* op_to_reset,
33                                        const char* op_or_function_name,
34                                        const char* raw_device_name,
35                                        TF_Status* status);
36 
37 // Enables only graph collection in RunMetadata on the functions executed from
38 // this context.
39 TF_CAPI_EXPORT extern void TFE_ContextEnableGraphCollection(TFE_Context* ctx);
40 
41 // Disables only graph collection in RunMetadata on the functions executed from
42 // this context.
43 TF_CAPI_EXPORT extern void TFE_ContextDisableGraphCollection(TFE_Context* ctx);
44 
45 // TODO(fishx): Move these monitoring APIs into a separate file.
46 // -----------------------------------------------------------------------------
47 // Monitoring Counter APIs.
48 // These APIs de-templated monitoring Counter for swig.
49 
50 typedef struct TFE_MonitoringCounterCell TFE_MonitoringCounterCell;
51 
52 // Atomically increments the value of the cell. The value must be non-negative.
53 TF_CAPI_EXPORT extern void TFE_MonitoringCounterCellIncrementBy(
54     TFE_MonitoringCounterCell* cell, int64_t value);
55 
56 // Retrieves the current value of the cell.
57 TF_CAPI_EXPORT extern int64_t TFE_MonitoringCounterCellValue(
58     TFE_MonitoringCounterCell* cell);
59 
60 // APIs for Counter without label.
61 typedef struct TFE_MonitoringCounter0 TFE_MonitoringCounter0;
62 // Returns a new Counter metric object. The caller should manage lifetime of
63 // the object. Using duplicate metric name will crash the program with fatal
64 // error.
65 TF_CAPI_EXPORT extern TFE_MonitoringCounter0* TFE_MonitoringNewCounter0(
66     const char* name, TF_Status* status, const char* description);
67 // Deletes the Counter object.
68 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter0(
69     TFE_MonitoringCounter0* counter);
70 // Retrieves the cell from the Counter object. The Counter object will manage
71 // lifetime of the cell.
72 TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter0(
73     TFE_MonitoringCounter0* counter);
74 
75 // APIs for Counter with 1 label.
76 typedef struct TFE_MonitoringCounter1 TFE_MonitoringCounter1;
77 TF_CAPI_EXPORT extern TFE_MonitoringCounter1* TFE_MonitoringNewCounter1(
78     const char* name, TF_Status* status, const char* description,
79     const char* label1);
80 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter1(
81     TFE_MonitoringCounter1* counter);
82 TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter1(
83     TFE_MonitoringCounter1* counter, const char* label1);
84 
85 // APIs for Counter with 2 labels.
86 typedef struct TFE_MonitoringCounter2 TFE_MonitoringCounter2;
87 TF_CAPI_EXPORT extern TFE_MonitoringCounter2* TFE_MonitoringNewCounter2(
88     const char* name, TF_Status* status, const char* description,
89     const char* label1, const char* label2);
90 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteCounter2(
91     TFE_MonitoringCounter2* counter);
92 TF_CAPI_EXPORT extern TFE_MonitoringCounterCell* TFE_MonitoringGetCellCounter2(
93     TFE_MonitoringCounter2* counter, const char* label1, const char* label2);
94 
95 // -----------------------------------------------------------------------------
96 // Monitoring Gauge APIs.
97 // These APIs de-templated monitoring Gauge for swig.
98 
99 typedef struct TFE_MonitoringIntGaugeCell TFE_MonitoringIntGaugeCell;
100 
101 // Atomically set the value of the cell.
102 TF_CAPI_EXPORT extern void TFE_MonitoringIntGaugeCellSet(
103     TFE_MonitoringIntGaugeCell* cell, int64_t value);
104 
105 // Retrieves the current value of the cell.
106 TF_CAPI_EXPORT extern int64_t TFE_MonitoringIntGaugeCellValue(
107     TFE_MonitoringIntGaugeCell* cell);
108 
109 // APIs for Int Gauge without label.
110 typedef struct TFE_MonitoringIntGauge0 TFE_MonitoringIntGauge0;
111 TF_CAPI_EXPORT extern TFE_MonitoringIntGauge0* TFE_MonitoringNewIntGauge0(
112     const char* name, TF_Status* out_status, const char* description);
113 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge0(
114     TFE_MonitoringIntGauge0* gauge);
115 TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell*
116 TFE_MonitoringGetCellIntGauge0(TFE_MonitoringIntGauge0* gauge);
117 
118 // APIs for Int Gauge with 1 label.
119 typedef struct TFE_MonitoringIntGauge1 TFE_MonitoringIntGauge1;
120 TF_CAPI_EXPORT extern TFE_MonitoringIntGauge1* TFE_MonitoringNewIntGauge1(
121     const char* name, TF_Status* out_status, const char* description,
122     const char* label1);
123 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge1(
124     TFE_MonitoringIntGauge1* gauge);
125 TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell*
126 TFE_MonitoringGetCellIntGauge1(TFE_MonitoringIntGauge1* gauge,
127                                const char* label1);
128 
129 // APIs for Int Gauge with 2 label.
130 typedef struct TFE_MonitoringIntGauge2 TFE_MonitoringIntGauge2;
131 TF_CAPI_EXPORT extern TFE_MonitoringIntGauge2* TFE_MonitoringNewIntGauge2(
132     const char* name, TF_Status* out_status, const char* description,
133     const char* label1, const char* label2);
134 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteIntGauge2(
135     TFE_MonitoringIntGauge2* gauge);
136 TF_CAPI_EXPORT extern TFE_MonitoringIntGaugeCell*
137 TFE_MonitoringGetCellIntGauge2(TFE_MonitoringIntGauge2* gauge,
138                                const char* label1, const char* label2);
139 
140 typedef struct TFE_MonitoringStringGaugeCell TFE_MonitoringStringGaugeCell;
141 TF_CAPI_EXPORT extern void TFE_MonitoringStringGaugeCellSet(
142     TFE_MonitoringStringGaugeCell* cell, const char* value);
143 // Retrieves the string value and saves it in buffer.
144 TF_CAPI_EXPORT extern const void TFE_MonitoringStringGaugeCellValue(
145     TFE_MonitoringStringGaugeCell* cell, TF_Buffer* buf);
146 
147 // APIs for String Gauge without label.
148 typedef struct TFE_MonitoringStringGauge0 TFE_MonitoringStringGauge0;
149 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge0* TFE_MonitoringNewStringGauge0(
150     const char* name, TF_Status* out_status, const char* description);
151 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge0(
152     TFE_MonitoringStringGauge0* gauge);
153 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
154 TFE_MonitoringGetCellStringGauge0(TFE_MonitoringStringGauge0* gauge);
155 
156 // APIs for String Gauge with 1 label.
157 typedef struct TFE_MonitoringStringGauge1 TFE_MonitoringStringGauge1;
158 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge1* TFE_MonitoringNewStringGauge1(
159     const char* name, TF_Status* out_status, const char* description,
160     const char* label1);
161 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge1(
162     TFE_MonitoringStringGauge1* gauge);
163 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
164 TFE_MonitoringGetCellStringGauge1(TFE_MonitoringStringGauge1* gauge,
165                                   const char* label1);
166 
167 // APIs for String Gauge with 2 label.
168 typedef struct TFE_MonitoringStringGauge2 TFE_MonitoringStringGauge2;
169 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge2* TFE_MonitoringNewStringGauge2(
170     const char* name, TF_Status* out_status, const char* description,
171     const char* label1, const char* label2);
172 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge2(
173     TFE_MonitoringStringGauge2* gauge);
174 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
175 TFE_MonitoringGetCellStringGauge2(TFE_MonitoringStringGauge2* gauge,
176                                   const char* label1, const char* label2);
177 
178 // APIs for String Gauge with 3 labels.
179 typedef struct TFE_MonitoringStringGauge3 TFE_MonitoringStringGauge3;
180 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge3* TFE_MonitoringNewStringGauge3(
181     const char* name, TF_Status* out_status, const char* description,
182     const char* label1, const char* label2, const char* label3);
183 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge3(
184     TFE_MonitoringStringGauge3* gauge);
185 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
186 TFE_MonitoringGetCellStringGauge3(TFE_MonitoringStringGauge3* gauge,
187                                   const char* label1, const char* label2,
188                                   const char* label3);
189 
190 // APIs for String Gauge with 4 labels.
191 typedef struct TFE_MonitoringStringGauge4 TFE_MonitoringStringGauge4;
192 TF_CAPI_EXPORT extern TFE_MonitoringStringGauge4* TFE_MonitoringNewStringGauge4(
193     const char* name, TF_Status* out_status, const char* description,
194     const char* label1, const char* label2, const char* label3,
195     const char* label4);
196 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteStringGauge4(
197     TFE_MonitoringStringGauge4* gauge);
198 TF_CAPI_EXPORT extern TFE_MonitoringStringGaugeCell*
199 TFE_MonitoringGetCellStringGauge4(TFE_MonitoringStringGauge4* gauge,
200                                   const char* label1, const char* label2,
201                                   const char* label3, const char* label4);
202 
203 typedef struct TFE_MonitoringBoolGaugeCell TFE_MonitoringBoolGaugeCell;
204 TF_CAPI_EXPORT extern void TFE_MonitoringBoolGaugeCellSet(
205     TFE_MonitoringBoolGaugeCell* cell, bool value);
206 TF_CAPI_EXPORT extern bool TFE_MonitoringBoolGaugeCellValue(
207     TFE_MonitoringBoolGaugeCell* cell);
208 
209 // APIs for Bool Gauge without label.
210 typedef struct TFE_MonitoringBoolGauge0 TFE_MonitoringBoolGauge0;
211 TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge0* TFE_MonitoringNewBoolGauge0(
212     const char* name, TF_Status* out_status, const char* description);
213 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge0(
214     TFE_MonitoringBoolGauge0* gauge);
215 TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell*
216 TFE_MonitoringGetCellBoolGauge0(TFE_MonitoringBoolGauge0* gauge);
217 
218 // APIs for Bool Gauge with 1 label.
219 typedef struct TFE_MonitoringBoolGauge1 TFE_MonitoringBoolGauge1;
220 TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge1* TFE_MonitoringNewBoolGauge1(
221     const char* name, TF_Status* out_status, const char* description,
222     const char* label1);
223 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge1(
224     TFE_MonitoringBoolGauge1* gauge);
225 TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell*
226 TFE_MonitoringGetCellBoolGauge1(TFE_MonitoringBoolGauge1* gauge,
227                                 const char* label1);
228 
229 // APIs for Bool Gauge with 2 label.
230 typedef struct TFE_MonitoringBoolGauge2 TFE_MonitoringBoolGauge2;
231 TF_CAPI_EXPORT extern TFE_MonitoringBoolGauge2* TFE_MonitoringNewBoolGauge2(
232     const char* name, TF_Status* out_status, const char* description,
233     const char* label1, const char* label2);
234 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBoolGauge2(
235     TFE_MonitoringBoolGauge2* gauge);
236 TF_CAPI_EXPORT extern TFE_MonitoringBoolGaugeCell*
237 TFE_MonitoringGetCellBoolGauge2(TFE_MonitoringBoolGauge2* gauge,
238                                 const char* label1, const char* label2);
239 
240 // -----------------------------------------------------------------------------
241 // Monitoring Sampler APIs.
242 // These APIs de-templated monitoring Sampler for swig.
243 
244 typedef struct TFE_MonitoringSamplerCell TFE_MonitoringSamplerCell;
245 
246 // Atomically add the value of the cell.
247 TF_CAPI_EXPORT extern void TFE_MonitoringSamplerCellAdd(
248     TFE_MonitoringSamplerCell* cell, double value);
249 
250 // Retrieves the current value of the cell. The return value is a HistogramProto
251 // saved in buffer.
252 TF_CAPI_EXPORT extern void TFE_MonitoringSamplerCellValue(
253     TFE_MonitoringSamplerCell* cell, TF_Buffer* buf);
254 
255 // APIs for sampler buckets
256 typedef struct TFE_MonitoringBuckets TFE_MonitoringBuckets;
257 TF_CAPI_EXPORT extern TFE_MonitoringBuckets*
258 TFE_MonitoringNewExponentialBuckets(double scale, double growth_factor,
259                                     int bucket_count);
260 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteBuckets(
261     TFE_MonitoringBuckets* buckets);
262 
263 // APIs for Sampler without label.
264 typedef struct TFE_MonitoringSampler0 TFE_MonitoringSampler0;
265 TF_CAPI_EXPORT extern TFE_MonitoringSampler0* TFE_MonitoringNewSampler0(
266     const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status,
267     const char* description);
268 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler0(
269     TFE_MonitoringSampler0* sampler);
270 TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler0(
271     TFE_MonitoringSampler0* sampler);
272 
273 // APIs for Sampler with 1 label.
274 typedef struct TFE_MonitoringSampler1 TFE_MonitoringSampler1;
275 TF_CAPI_EXPORT extern TFE_MonitoringSampler1* TFE_MonitoringNewSampler1(
276     const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status,
277     const char* description, const char* label1);
278 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler1(
279     TFE_MonitoringSampler1* sampler);
280 TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler1(
281     TFE_MonitoringSampler1* sampler, const char* label1);
282 
283 // APIs for Sampler with 2 label.
284 typedef struct TFE_MonitoringSampler2 TFE_MonitoringSampler2;
285 TF_CAPI_EXPORT extern TFE_MonitoringSampler2* TFE_MonitoringNewSampler2(
286     const char* name, TFE_MonitoringBuckets* buckets, TF_Status* out_status,
287     const char* description, const char* label1, const char* label2);
288 TF_CAPI_EXPORT extern void TFE_MonitoringDeleteSampler2(
289     TFE_MonitoringSampler2* sampler);
290 TF_CAPI_EXPORT extern TFE_MonitoringSamplerCell* TFE_MonitoringGetCellSampler2(
291     TFE_MonitoringSampler2* sampler, const char* label1, const char* label2);
292 
293 // Sets whether to use TFRT
294 TF_CAPI_EXPORT extern void TFE_ContextOptionsSetTfrt(TFE_ContextOptions*,
295                                                      bool use_tfrt);
296 
297 // Sets whether to use TFRT distributed runtime
298 TF_CAPI_EXPORT extern void TFE_ContextOptionsSetTfrtDistributedRuntime(
299     TFE_ContextOptions* options, bool use_tfrt_distributed_runtime);
300 
301 // Returns the context_id from the EagerContext which is used by the
302 // EagerService to maintain consistency between client and worker. The
303 // context_id is initialized with a dummy value and is later set when the worker
304 // is initialized (either locally or remotely). The context_id can change during
305 // the process lifetime although this should cause the worker to be
306 // reinitialized (e.g. cleared caches) as well.
307 TF_CAPI_EXPORT extern uint64_t TFE_GetContextId(TFE_Context* ctx);
308 
309 // -----------------------------------------------------------------------------
310 // Cancellation APIs.
311 
312 typedef struct TFE_CancellationManager TFE_CancellationManager;
313 TF_CAPI_EXPORT extern TFE_CancellationManager* TFE_NewCancellationManager();
314 TF_CAPI_EXPORT extern bool TFE_CancellationManagerIsCancelled(
315     TFE_CancellationManager*);
316 TF_CAPI_EXPORT extern void TFE_CancellationManagerStartCancel(
317     TFE_CancellationManager*);
318 TF_CAPI_EXPORT extern void TFE_DeleteCancellationManager(
319     TFE_CancellationManager*);
320 
321 // Associates the given `cancellation_manager` with `op`, so that invoking
322 // `TFE_CancellationManagerStartCancel(cancellation_manager)` will cancel the
323 // execution of `op`.
324 typedef struct TFE_CancellationManager TFE_CancellationManager;
325 TF_CAPI_EXPORT extern void TFE_OpSetCancellationManager(
326     TFE_Op* op, TFE_CancellationManager* cancellation_manager,
327     TF_Status* status);
328 
329 // -----------------------------------------------------------------------------
330 // Eager Executor APIs.
331 typedef struct TFE_Executor TFE_Executor;
332 
333 // Creates a new eager Executor. Nodes in one executor are guaranteed to be
334 // executed in sequence. Assigning nodes to different executors allows executing
335 // nodes in parallel.
336 TF_CAPI_EXPORT extern TFE_Executor* TFE_NewExecutor(bool is_async);
337 
338 // Deletes the eager Executor without waiting for enqueued nodes. Please call
339 // TFE_ExecutorWaitForAllPendingNodes before calling this API if you want to
340 // make sure all nodes are finished.
341 TF_CAPI_EXPORT extern void TFE_DeleteExecutor(TFE_Executor*);
342 
343 // Returns true if the executor is in async mode.
344 TF_CAPI_EXPORT extern bool TFE_ExecutorIsAsync(TFE_Executor*);
345 
346 // Causes the calling thread to block till all ops dispatched in this executor
347 // have been executed. Note that "execution" here refers to kernel execution /
348 // scheduling of copies, etc. Similar to sync execution, it doesn't guarantee
349 // that lower level device queues (like GPU streams) have been flushed.
350 //
351 // This call may not block for execution of ops enqueued concurrently with this
352 // call.
353 TF_CAPI_EXPORT extern void TFE_ExecutorWaitForAllPendingNodes(
354     TFE_Executor*, TF_Status* status);
355 
356 // When an error happens, any pending operations are discarded and newly issued
357 // ops return an error. This call clears the error state and re-enables
358 // execution of newly issued ops.
359 //
360 // Note that outputs of discarded ops remain in a corrupt state and should not
361 // be used for future calls.
362 // TODO(agarwal): mark the affected handles and raise errors if they are used.
363 TF_CAPI_EXPORT extern void TFE_ExecutorClearError(TFE_Executor*);
364 
365 // Sets a custom Executor for current thread. All nodes created by this thread
366 // will be added to this Executor. It will override current executor.
367 TF_CAPI_EXPORT extern void TFE_ContextSetExecutorForThread(TFE_Context*,
368                                                            TFE_Executor*);
369 
370 // Returns the Executor for current thread.
371 TF_CAPI_EXPORT extern TFE_Executor* TFE_ContextGetExecutorForThread(
372     TFE_Context*);
373 
374 // -----------------------------------------------------------------------------
375 // Dynamic cluster API.
376 
377 // Update an existing context with a new set of servers defined in a ServerDef
378 // proto. Servers can be added to and removed from the list of remote workers
379 // in the context. New set of servers identified by the ServerDef must be up
380 // when the context is updated.
381 //
382 // This API is for experimental usage and may be subject to change.
383 TF_CAPI_EXPORT extern void TFE_ContextUpdateServerDef(TFE_Context* ctx,
384                                                       int keep_alive_secs,
385                                                       const void* proto,
386                                                       size_t proto_len,
387                                                       TF_Status* status);
388 
389 // Checks whether a remote worker is alive or not. This will return true even if
390 // the context doesn't exist on the remote worker.
391 TF_CAPI_EXPORT extern bool TFE_ContextCheckAlive(TFE_Context* ctx,
392                                                  const char* worker_name,
393                                                  TF_Status* status);
394 
395 // Sync pending nodes in local executors (including the context default executor
396 // and thread executors) and streaming requests to remote executors, and get the
397 // combined status.
398 TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context* ctx,
399                                                 TF_Status* status);
400 
401 // This function will block till the operation that produces `h` has
402 // completed. This is only valid on local TFE_TensorHandles. The pointer
403 // returned will be on the device in which the TFE_TensorHandle resides (so e.g.
404 // for a GPU tensor this will return a pointer to GPU memory). The pointer is
405 // only guaranteed to be valid until TFE_DeleteTensorHandle is called on this
406 // TensorHandle. Only supports POD data types.
407 TF_CAPI_EXPORT extern void* TFE_TensorHandleDevicePointer(TFE_TensorHandle*,
408                                                           TF_Status*);
409 
410 // This function will block till the operation that produces `h` has
411 // completed. This is only valid on local TFE_TensorHandles. Returns the size in
412 // bytes of the memory pointed to by the device pointer returned above.
413 TF_CAPI_EXPORT extern size_t TFE_TensorHandleDeviceMemorySize(TFE_TensorHandle*,
414                                                               TF_Status*);
415 
416 // Creates a new TensorHandle from memory residing in the physical device
417 // device_name. Takes ownership of the memory, and will call deleter to release
418 // it after TF no longer needs it or in case of error.
419 //
420 // Custom devices must use TFE_NewCustomDeviceTensorHandle instead.
421 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandleFromDeviceMemory(
422     TFE_Context* ctx, const char* device_name, TF_DataType, const int64_t* dims,
423     int num_dims, void* data, size_t len,
424     void (*deallocator)(void* data, size_t len, void* arg),
425     void* deallocator_arg, TF_Status* status);
426 
427 // Retrieves the address space (i.e. job, replia, task) of the local host and
428 // saves it in the buffer.
429 TF_CAPI_EXPORT extern void TFE_HostAddressSpace(TFE_Context* ctx,
430                                                 TF_Buffer* buf);
431 
432 // APIs for generically dealing with op attributes (e.g. when forwarding them
433 // through custom device implementations).
434 //
435 // TODO(allenl): Currently these are black boxes, but we should have some way to
436 // inspect values. This would let people e.g. copy over most attributes and then
437 // modify some based on their values.
438 
439 // A reference to an op's name -> attribute mapping
440 typedef struct TFE_OpAttrs TFE_OpAttrs;
441 
442 // Fetch a reference to `op`'s attributes. The returned reference is only valid
443 // while `op` is alive.
444 TF_CAPI_EXPORT extern const TFE_OpAttrs* TFE_OpGetAttrs(const TFE_Op* op);
445 // Add attributes in `attrs` to `op`.
446 //
447 // Does not overwrite or update existing attributes, but adds new ones.
448 TF_CAPI_EXPORT extern void TFE_OpAddAttrs(TFE_Op* op, const TFE_OpAttrs* attrs);
449 
450 // Serialize `attrs` as a tensorflow::NameAttrList protocol buffer (into `buf`),
451 // containing the op name and a map of its attributes.
452 TF_CAPI_EXPORT extern void TFE_OpAttrsSerialize(const TFE_OpAttrs* attrs,
453                                                 TF_Buffer* buf,
454                                                 TF_Status* status);
455 
456 // Set an op's attribute from a serialized AttrValue protocol buffer.
457 //
458 // Analogous to TF_SetAttrValueProto for building graph operations.
459 TF_CAPI_EXPORT extern void TFE_OpSetAttrValueProto(const TFE_Op* op,
460                                                    const char* attr_name,
461                                                    const void* proto,
462                                                    size_t proto_len,
463                                                    TF_Status* status);
464 
465 // TODO(b/166642410): It would be nice, for custom devices and for other users,
466 // to have a non-string representation of devices (TF_Device) extracted from
467 // tensors/ops/etc. and usable in APIs like OpSetDevice/ResetOp/etc.
468 
469 #define TFE_CUSTOM_DEVICE_VERSION 4
470 
471 // Struct to be filled in. Functions are required except where indicated.
472 typedef struct TFE_CustomDevice {
473   int version = TFE_CUSTOM_DEVICE_VERSION;
474   // Method to copy a tensor to the custom device.
475   TFE_TensorHandle* (*copy_tensor_to_device)(TFE_Context* context,
476                                              TFE_TensorHandle* tensor,
477                                              TF_Status* status,
478                                              void* device_info);
479 
480   // Method to copy a tensor from the custom device to a target device.
481   TFE_TensorHandle* (*copy_tensor_from_device)(TFE_Context* context,
482                                                TFE_TensorHandle* tensor,
483                                                const char* target_device_name,
484                                                TF_Status* status,
485                                                void* device_info);
486 
487   // Method to execute an operation.
488   //
489   // Arguments provide enough information to reconstruct the original `TFE_Op`,
490   // or construct a transformed version, by inspecting the passed `op`.
491   //
492   // TFE_OpGetDevice(op) records the original placement of the operation. It may
493   // be an empty string if no device was explicitly requested, but will
494   // otherwise be the name of this custom device. Ops are placed onto a custom
495   // device if any of their inputs are on that custom device, but custom devices
496   // are free to set a bad status in order to require explicit placement.
497   void (*execute)(const TFE_Op* op, int* num_outputs,
498                   TFE_TensorHandle** outputs, TF_Status* s, void* device_info);
499 
500   // Method to delete a device.
501   void (*delete_device)(void* device_info);
502 
503   // Implements TFE_CreatePackedTensorHandle when one of `handles` is on this
504   // custom device.
505   //
506   // Many devices will want to simply return an "unimplemented" status
507   // here. This is the default behavior if `pack` is null when passed to
508   // TFE_RegisterCustomDevice.
509   TFE_TensorHandle* (*pack)(TFE_Context* context, TFE_TensorHandle** handles,
510                             int num_handles, TF_Status* s,
511                             void* device_info) = nullptr;
512 } TFE_CustomDevice;
513 
514 // Registers a custom device for use with eager execution.
515 //
516 // Eager operations may be placed on this device, e.g.  `with
517 // tf.device("CUSTOM"):` from Python if `device_name` for this call is
518 // "/job:localhost/replica:0/task:0/device:CUSTOM:0".
519 //
520 // The custom device defines copy operations for moving TensorHandles on and
521 // off, and an execution operation for named operations. Often execution will
522 // simply wrap op execution on one or more physical devices.
523 //
524 // device_info is an opaque caller-defined type stored with the custom device
525 // which is passed to the functions referenced in the TFE_CustomDevice struct
526 // `device` (execute, delete_device, etc.). It can for example contain the
527 // names of wrapped devices.
528 //
529 // There are currently no graph semantics implemented for registered custom
530 // devices, so executing tf.functions which contain operations placed on custom
531 // devices will fail.
532 //
533 // `device_name` must not name an existing physical or custom device. It must
534 // follow the format:
535 //
536 //    /job:<name>/replica:<replica>/task:<task>/device:<type>:<device_num>
537 //
538 // If the device is successfully registered, `status` is set to TF_OK. Otherwise
539 // the device is not usable. In case of a bad status, `device.delete_device` is
540 // still called on `device_info` (i.e. the caller does not retain ownership).
541 //
542 // This API is highly experimental, and in particular is expected to change when
543 // it starts supporting operations with attributes and when tf.function support
544 // is added.
545 TF_CAPI_EXPORT extern void TFE_RegisterCustomDevice(TFE_Context* ctx,
546                                                     TFE_CustomDevice device,
547                                                     const char* device_name,
548                                                     void* device_info,
549                                                     TF_Status* status);
550 
551 // Struct to be filled in to define a custom device tensor handle. Fields are
552 // required except where indicated.
553 typedef struct TFE_CustomDeviceTensorHandleMethods {
554   int version = TFE_CUSTOM_DEVICE_VERSION;
555 
556   // Computes the rank of the tensor handle.
557   //
558   // Shapes are specified via callbacks because retrieving the shape of a tensor
559   // is a blocking operation for async eager; custom devices should avoid
560   // retrieving shapes of tensors they wrap until the custom device tensor's
561   // shape is explicitly requested where possible.
562   int (*num_dims)(void* data, TF_Status* status);
563 
564   // Computes the axis length at `dim_index`.
565   int64_t (*dim)(void* data, int dim_index, TF_Status* status);
566 
567   void (*deallocator)(void* data);
568 
569   // Summarizes the value of this tensor. The caller takes ownership of the
570   // returned buffer. If `status` is not TF_OK, instead returns a null pointer.
571   //
572   // Does not include the shape and dtype of the tensor (which is generally
573   // appended later), but should include any information specific to this custom
574   // device which would be useful for debugging.
575   //
576   // Optional. If null, defaults to resolving the TFE_TensorHandle into a
577   // TF_Tensor and summarizing that.
578   TF_Buffer* (*summarize)(void* data, TF_Status* status) = nullptr;
579 } TFE_CustomDeviceTensorHandle;
580 
581 // Creates a new TensorHandle from memory residing in a custom device. Takes
582 // ownership of the memory pointed to by `tensor_handle_data`, and calls
583 // `methods.deallocator` to release it after TF no longer needs it or in case of
584 // an error.
585 //
586 // This call is similar to `TFE_NewTensorHandleFromDeviceMemory`, but supports
587 // custom devices instead of physical devices and does not require blocking
588 // waiting for exact shapes.
589 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewCustomDeviceTensorHandle(
590     TFE_Context*, const char* device_name, TF_DataType, void* data,
591     TFE_CustomDeviceTensorHandle methods, TF_Status* status);
592 
593 TF_CAPI_EXPORT extern void TFE_ContextGetFunctionDef(TFE_Context* ctx,
594                                                      const char* function_name,
595                                                      TF_Buffer* buf,
596                                                      TF_Status* status);
597 
598 // Allocate and return a new Tensor on the host.
599 //
600 // The caller must set the Tensor values by writing them to the pointer returned
601 // by TF_TensorData with length TF_TensorByteSize.
602 TF_CAPI_EXPORT extern TF_Tensor* TFE_AllocateHostTensor(TFE_Context* ctx,
603                                                         TF_DataType dtype,
604                                                         const int64_t* dims,
605                                                         int num_dims,
606                                                         TF_Status* status);
607 
608 // Given a Tensor, wrap it with a TensorHandle
609 //
610 // Similar to TFE_NewTensorHandle, but includes a pointer to the TFE_Context.
611 // The context should be identical to that of the Tensor.
612 TF_CAPI_EXPORT TFE_TensorHandle* TFE_NewTensorHandleFromTensor(
613     TFE_Context* ctx, TF_Tensor* t, TF_Status* status);
614 
615 // Create a packed TensorHandle with the given list of TensorHandles.
616 // If `handles` are on the same device, assign the same device to the packed
617 // handle; if `handles` are on different deivces, assign a CompositeDevice to
618 // it.
619 TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_CreatePackedTensorHandle(
620     TFE_Context* ctx, TFE_TensorHandle** handles, int* num_handles,
621     TF_Status* status);
622 
623 // Configure soft device placement policy for the eager executor. Note this
624 // policy is applied to any subsequent op executions.
625 TF_CAPI_EXPORT void TFE_ContextSetSoftDevicePlacement(TFE_Context* ctx,
626                                                       unsigned char enable,
627                                                       TF_Status* status);
628 
629 // Configure device placement policy logging for the eager executor. Note this
630 // policy is applied to any subsequent op executions.
631 TF_CAPI_EXPORT void TFE_ContextSetLogDevicePlacement(TFE_Context* ctx,
632                                                      unsigned char enable,
633                                                      TF_Status* status);
634 
635 // Returns the device type of the operation that produced `h`.
636 TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceType(
637     TFE_TensorHandle* h, TF_Status* status);
638 
639 // Returns the device ID of the operation that produced `h`.
640 TF_CAPI_EXPORT extern int TFE_TensorHandleDeviceID(TFE_TensorHandle* h,
641                                                    TF_Status* status);
642 
643 // Get a comma-separated list of op names executed in graph functions dispatched
644 // to `ctx`. This feature is currently only enabled for TFRT debug builds, for
645 // performance and simplicity reasons.
646 TF_CAPI_EXPORT extern void TFE_GetExecutedOpNames(TFE_Context* ctx,
647                                                   TF_Buffer* buf,
648                                                   TF_Status* status);
649 
650 // Set logical devices to the context's device manager.
651 // If logical devices are already configured at context initialization
652 // through TFE_ContextOptions, this method should not be called.
653 TF_CAPI_EXPORT extern void TFE_SetLogicalCpuDevices(TFE_Context* ctx,
654                                                     int num_cpus,
655                                                     const char* prefix,
656                                                     TF_Status* status);
657 
658 // Set configuration key and value using coordination service.
659 // If coordination service is enabled, the key-value will be stored on the
660 // leader and become accessible to all workers in the cluster.
661 // Currently, a config key can only be set with one value, and subsequently
662 // setting the same key will lead to errors.
663 //
664 // Note that the key-values are only expected to be used for cluster
665 // configuration data, and should not be used for storing large amount of data
666 // or being accessed very frequently.
667 TF_CAPI_EXPORT extern void TFE_InsertConfigKeyValue(TFE_Context* ctx,
668                                                     const char* key,
669                                                     const char* value,
670                                                     TF_Status* status);
671 
672 // Get configuration key and value using coordination service.
673 // The config key must be set before getting its value. Getting value of
674 // non-existing config keys will result in errors.
675 TF_CAPI_EXPORT extern void TFE_GetConfigKeyValue(TFE_Context* ctx,
676                                                  const char* key,
677                                                  TF_Buffer* value_buf,
678                                                  TF_Status* status);
679 
680 // Delete configuration key-value. If `key` is a directory, recursively clean up
681 // all key-values under the path specified by `key`.
682 TF_CAPI_EXPORT extern void TFE_DeleteConfigKeyValue(TFE_Context* ctx,
683                                                     const char* key,
684                                                     TF_Status* status);
685 
686 // Report error (specified by error_code and error_message) to other tasks in
687 // the cluster.
688 TF_CAPI_EXPORT extern void TFE_ReportErrorToCluster(TFE_Context* ctx,
689                                                     int error_code,
690                                                     const char* error_message,
691                                                     TF_Status* status);
692 
693 #ifdef __cplusplus
694 } /* end extern "C" */
695 #endif
696 
697 #endif  // TENSORFLOW_C_EAGER_C_API_EXPERIMENTAL_H_
698