• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Functions for configuring TensorFlow execution."""
16
17from typing import Union
18
19from tensorflow.python.eager import context
20from tensorflow.python.framework import errors
21from tensorflow.python.util import _pywrap_determinism
22from tensorflow.python.util import _pywrap_tensor_float_32_execution
23from tensorflow.python.util import deprecation
24from tensorflow.python.util.tf_export import tf_export
25
26
27@tf_export('config.experimental.tensor_float_32_execution_enabled')
28def tensor_float_32_execution_enabled():
29  """Returns whether TensorFloat-32 is enabled.
30
31  By default, TensorFloat-32 is enabled, but this can be changed with
32  `tf.config.experimental.enable_tensor_float_32_execution`.
33
34  Returns:
35    True if TensorFloat-32 is enabled (the default) and False otherwise
36  """
37  return _pywrap_tensor_float_32_execution.is_enabled()
38
39
40@tf_export('config.experimental.enable_tensor_float_32_execution')
41def enable_tensor_float_32_execution(enabled):
42  """Enable or disable the use of TensorFloat-32 on supported hardware.
43
44  [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format),
45  or TF32 for short, is a math mode for NVIDIA Ampere GPUs. TensorFloat-32
46  execution causes certain float32 ops, such as matrix multiplications and
47  convolutions, to run much faster on Ampere GPUs but with reduced precision.
48  This reduced precision should not impact convergence of deep learning models
49  in practice.
50
51  TensorFloat-32 is enabled by default. TensorFloat-32 is only supported on
52  Ampere GPUs, so all other hardware will use the full float32 precision
53  regardless of whether TensorFloat-32 is enabled or not. If you want to use the
54  full float32 precision on Ampere, you can disable TensorFloat-32 execution
55  with this function. For example:
56
57  ```python
58  x = tf.fill((2, 2), 1.0001)
59  y = tf.fill((2, 2), 1.)
60  # TensorFloat-32 is enabled, so matmul is run with reduced precision
61  print(tf.linalg.matmul(x, y))  # [[2., 2.], [2., 2.]]
62  tf.config.experimental.enable_tensor_float_32_execution(False)
63  # Matmul is run with full precision
64  print(tf.linalg.matmul(x, y))  # [[2.0002, 2.0002], [2.0002, 2.0002]]
65  ```
66
67  To check whether TensorFloat-32 execution is currently enabled, use
68  `tf.config.experimental.tensor_float_32_execution_enabled`.
69
70  If TensorFloat-32 is enabled, float32 inputs of supported ops, such as
71  `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of
72  precision in most cases. This allows the ops to execute much faster by
73  utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range as
74  float32, meaning it is no more likely to underflow or overflow than float32.
75  Ops still use float32 accumulation when TensorFloat-32 is enabled. Enabling or
76  disabling TensorFloat-32 only affects Ampere GPUs and subsequent GPUs that
77  support TensorFloat-32.
78
79  Note TensorFloat-32 is not always used in supported ops, as only inputs of
80  certain shapes are supported. Support for more input shapes and more ops may
81  be added in the future. As a result, precision of float32 ops may decrease in
82  minor versions of TensorFlow.
83
84  TensorFloat-32 is also used for some complex64 ops. Currently, TensorFloat-32
85  is used in fewer cases for complex64 as it is for float32.
86
87  Args:
88    enabled: Bool indicating whether to enable TensorFloat-32 execution.
89  """
90  _pywrap_tensor_float_32_execution.enable(enabled)
91
92
93@tf_export('config.threading.get_intra_op_parallelism_threads')
94def get_intra_op_parallelism_threads():
95  """Get number of threads used within an individual op for parallelism.
96
97  Certain operations like matrix multiplication and reductions can utilize
98  parallel threads for speed ups. A value of 0 means the system picks an
99  appropriate number.
100
101  Returns:
102    Number of parallel threads
103  """
104  return context.context().intra_op_parallelism_threads
105
106
107@tf_export('config.threading.set_intra_op_parallelism_threads')
108def set_intra_op_parallelism_threads(num_threads):
109  """Set number of threads used within an individual op for parallelism.
110
111  Certain operations like matrix multiplication and reductions can utilize
112  parallel threads for speed ups. A value of 0 means the system picks an
113  appropriate number.
114
115  Args:
116    num_threads: Number of parallel threads
117  """
118  context.context().intra_op_parallelism_threads = num_threads
119
120
121@tf_export('config.threading.get_inter_op_parallelism_threads')
122def get_inter_op_parallelism_threads():
123  """Get number of threads used for parallelism between independent operations.
124
125  Determines the number of threads used by independent non-blocking operations.
126  0 means the system picks an appropriate number.
127
128  Returns:
129    Number of parallel threads
130  """
131  return context.context().inter_op_parallelism_threads
132
133
134@tf_export('config.threading.set_inter_op_parallelism_threads')
135def set_inter_op_parallelism_threads(num_threads):
136  """Set number of threads used for parallelism between independent operations.
137
138  Determines the number of threads used by independent non-blocking operations.
139  0 means the system picks an appropriate number.
140
141  Args:
142    num_threads: Number of parallel threads
143  """
144  context.context().inter_op_parallelism_threads = num_threads
145
146
147@tf_export('config.optimizer.get_jit')
148def get_optimizer_jit() -> str:
149  """Returns JIT compilation configuration for code inside `tf.function`.
150
151  Possible return values:
152     -`"autoclustering"` if
153     [autoclustering](https://www.tensorflow.org/xla#auto-clustering) is enabled
154     - `""` when no default compilation is applied.
155  """
156  if context.context().optimizer_jit:
157    return 'autoclustering'
158  return ''
159
160
161@tf_export('config.optimizer.set_jit')
162@deprecation.deprecated_arg_values(
163    None,
164    '`True` setting is deprecated, use `autoclustering` instead.',
165    warn_once=True,
166    jit_config=True)
167def set_optimizer_jit(enabled: Union[bool, str]):
168  """Configure JIT compilation.
169
170  Note: compilation is only applied to code that is compiled into a
171  graph (in TF2 that's only a code inside `tf.function`).
172
173  Args:
174    enabled: JIT compilation configuration.
175    Possible values:
176     - `"autoclustering"` (`True` is a deprecated alias): perform
177     [autoclustering](https://www.tensorflow.org/xla#auto-clustering)
178       (automatically identify and compile clusters of nodes) on all graphs
179       using
180     [XLA](https://www.tensorflow.org/xla).
181     - `False`: do not automatically compile any graphs.
182  """
183  autoclustering_enabled = enabled in (True, 'autoclustering')
184  context.context().optimizer_jit = autoclustering_enabled
185
186
187@tf_export('config.optimizer.get_experimental_options')
188def get_optimizer_experimental_options():
189  """Get experimental optimizer options.
190
191  Refer to tf.config.optimizer.set_experimental_options for a list of current
192  options.
193
194  Note that optimizations are only applied in graph mode, (within tf.function).
195  In addition, as these are experimental options, the list is subject to change.
196
197  Returns:
198    Dictionary of configured experimental optimizer options
199  """
200  return context.context().get_optimizer_experimental_options()
201
202
203@tf_export('config.optimizer.set_experimental_options')
204def set_optimizer_experimental_options(options):
205  """Set experimental optimizer options.
206
207  Note that optimizations are only applied in graph mode, (within tf.function).
208  In addition, as these are experimental options, the list is subject to change.
209
210  Args:
211    options: Dictionary of experimental optimizer options to configure.
212      Valid keys:
213      - layout_optimizer: Optimize tensor layouts e.g. This will try to use NCHW
214        layout on GPU which is faster.
215      - constant_folding: Fold constants Statically infer the value of tensors
216        when possible, and materialize the result using constants.
217      - shape_optimization: Simplify computations made on shapes.
218      - remapping: Remap subgraphs onto more efficient implementations.
219      - arithmetic_optimization: Simplify arithmetic ops with common
220        sub-expression elimination and arithmetic simplification.
221      - dependency_optimization: Control dependency optimizations. Remove
222        redundant control dependencies, which may enable other optimization.
223        This optimizer is also essential for pruning Identity and NoOp nodes.
224      - loop_optimization: Loop optimizations.
225      - function_optimization: Function optimizations and inlining.
226      - debug_stripper: Strips debug-related nodes from the graph.
227      - disable_model_pruning: Disable removal of unnecessary ops from the graph
228      - scoped_allocator_optimization: Try to allocate some independent Op
229        outputs contiguously in order to merge or eliminate downstream Ops.
230      - pin_to_host_optimization: Force small ops onto the CPU.
231      - implementation_selector: Enable the swap of kernel implementations based
232        on the device placement.
233      - auto_mixed_precision: Change certain float32 ops to float16 on Volta
234        GPUs and above. Without the use of loss scaling, this can cause
235        numerical underflow (see
236        `keras.mixed_precision.experimental.LossScaleOptimizer`).
237      - disable_meta_optimizer: Disable the entire meta optimizer.
238      - min_graph_nodes: The minimum number of nodes in a graph to optimizer.
239        For smaller graphs, optimization is skipped.
240  """
241  context.context().set_optimizer_experimental_options(options)
242
243
244@tf_export('config.get_soft_device_placement')
245def get_soft_device_placement():
246  """Return status of soft device placement flag.
247
248  If enabled, an op will be placed on CPU if any of the following are true
249    1. there's no GPU implementation for the OP
250    2. no GPU devices are known or registered
251    3. need to co-locate with reftype input(s) which are from CPU
252
253  If disabled, the placement is strict and CPU fallback is not allowed.
254  An error is raised when an Op cannot be placed onto its intended device.
255
256  Returns:
257   A boolean indicating if soft placement is enabled.
258  """
259  return context.context().soft_device_placement
260
261
262@tf_export('config.set_soft_device_placement')
263def set_soft_device_placement(enabled):
264  """Enable or disable soft device placement.
265
266  If enabled, an op will be placed on CPU if any of the following are true
267    1. there's no GPU implementation for the OP
268    2. no GPU devices are known or registered
269    3. need to co-locate with reftype input(s) which are from CPU
270
271  Note: by default soft device placement is enabled when running in eager mode
272  (for convenience) and disabled in graph mode (for performance).
273
274  Args:
275    enabled: A boolean indicating whether to enable soft placement.
276  """
277  context.context().soft_device_placement = enabled
278
279
280@tf_export('config.experimental.get_device_policy')
281def get_device_policy():
282  """Gets the current device policy.
283
284  The device policy controls how operations requiring inputs on a specific
285  device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1).
286
287  This function only gets the device policy for the current thread. Any
288  subsequently started thread will again use the default policy.
289
290  Returns:
291    Current thread device policy
292  """
293  device_policy = context.context().device_policy
294  if device_policy == context.DEVICE_PLACEMENT_SILENT:
295    return 'silent'
296  elif device_policy == context.DEVICE_PLACEMENT_SILENT_FOR_INT32:
297    return 'silent_for_int32'
298  elif device_policy == context.DEVICE_PLACEMENT_WARN:
299    return 'warn'
300  elif device_policy == context.DEVICE_PLACEMENT_EXPLICIT:
301    return 'explicit'
302  else:
303    # pylint: disable-next=no-value-for-parameter
304    raise errors.InternalError(
305        f'Got an invalid device policy: {device_policy!r}.')
306
307
308@tf_export('config.experimental.set_device_policy')
309def set_device_policy(device_policy):
310  """Sets the current thread device policy.
311
312  The device policy controls how operations requiring inputs on a specific
313  device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1).
314
315  When using the default, an appropriate policy will be picked automatically.
316  The default policy may change over time.
317
318  This function only sets the device policy for the current thread. Any
319  subsequently started thread will again use the default policy.
320
321  Args:
322    device_policy: A device policy.
323      Valid values:
324      - None: Switch to a system default.
325      - 'warn': Copies the tensors which are not on the right device and logs a
326        warning.
327      - 'explicit': Raises an error if the placement is not as required.
328      - 'silent': Silently copies the tensors. Note that this may hide
329        performance problems as there is no notification provided when
330        operations are blocked on the tensor being copied between devices.
331      - 'silent_for_int32': silently copies `int32` tensors, raising errors on
332        the other ones.
333
334  Raises:
335      ValueError: If an invalid `device_policy` is passed.
336  """
337  if device_policy == 'silent':
338    context.context().device_policy = context.DEVICE_PLACEMENT_SILENT
339  elif device_policy == 'silent_for_int32':
340    context.context().device_policy = context.DEVICE_PLACEMENT_SILENT_FOR_INT32
341  elif device_policy == 'warn':
342    context.context().device_policy = context.DEVICE_PLACEMENT_WARN
343  elif device_policy == 'explicit':
344    context.context().device_policy = context.DEVICE_PLACEMENT_EXPLICIT
345  elif device_policy is None:
346    context.context().device_policy = None
347  else:
348    raise ValueError(
349        f'Invalid argument `device_policy`: {device_policy!r}. Please refer to '
350        'https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_device_policy '
351        'for valid `device_policy` arguments.')
352
353
354@tf_export('config.experimental.get_synchronous_execution')
355def get_synchronous_execution():
356  """Gets whether operations are executed synchronously or asynchronously.
357
358  TensorFlow can execute operations synchronously or asynchronously. If
359  asynchronous execution is enabled, operations may return "non-ready" handles.
360
361  Returns:
362    Current thread execution mode
363  """
364  return context.context().execution_mode == context.SYNC
365
366
367@tf_export('config.experimental.set_synchronous_execution')
368def set_synchronous_execution(enable):
369  """Specifies whether operations are executed synchronously or asynchronously.
370
371  TensorFlow can execute operations synchronously or asynchronously. If
372  asynchronous execution is enabled, operations may return "non-ready" handles.
373
374  When `enable` is set to None, an appropriate value will be picked
375  automatically. The value picked may change between TensorFlow releases.
376
377  Args:
378    enable: Whether operations should be dispatched synchronously.
379      Valid values:
380      - None: sets the system default.
381      - True: executes each operation synchronously.
382      - False: executes each operation asynchronously.
383  """
384  if enable is None:
385    context.context().execution_mode = None
386  elif enable:
387    context.context().execution_mode = context.SYNC
388  else:
389    context.context().execution_mode = context.ASYNC
390
391
392@tf_export('config.list_physical_devices',
393           'config.experimental.list_physical_devices')
394@deprecation.deprecated_endpoints('config.experimental.list_physical_devices')
395def list_physical_devices(device_type=None):
396  """Return a list of physical devices visible to the host runtime.
397
398  Physical devices are hardware devices present on the host machine. By default
399  all discovered CPU and GPU devices are considered visible.
400
401  This API allows querying the physical hardware resources prior to runtime
402  initialization. Thus, giving an opportunity to call any additional
403  configuration APIs. This is in contrast to `tf.config.list_logical_devices`,
404  which triggers runtime initialization in order to list the configured devices.
405
406  The following example lists the number of visible GPUs on the host.
407
408  >>> physical_devices = tf.config.list_physical_devices('GPU')
409  >>> print("Num GPUs:", len(physical_devices))
410  Num GPUs: ...
411
412  However, the number of GPUs available to the runtime may change during runtime
413  initialization due to marking certain devices as not visible or configuring
414  multiple logical devices.
415
416  Args:
417    device_type: (optional string) Only include devices matching this device
418      type. For example "CPU" or "GPU".
419
420  Returns:
421    List of discovered `tf.config.PhysicalDevice` objects
422  """
423  return context.context().list_physical_devices(device_type)
424
425
426@tf_export('config.list_logical_devices',
427           'config.experimental.list_logical_devices')
428@deprecation.deprecated_endpoints('config.experimental.list_logical_devices')
429def list_logical_devices(device_type=None):
430  """Return a list of logical devices created by runtime.
431
432  Logical devices may correspond to physical devices or remote devices in the
433  cluster. Operations and tensors may be placed on these devices by using the
434  `name` of the `tf.config.LogicalDevice`.
435
436  Calling `tf.config.list_logical_devices` triggers the runtime to configure any
437  `tf.config.PhysicalDevice` visible to the runtime, thereby preventing
438  further configuration. To avoid runtime initialization, call
439  `tf.config.list_physical_devices` instead.
440
441  For example:
442
443  >>> logical_devices = tf.config.list_logical_devices('GPU')
444  >>> if len(logical_devices) > 0:
445  ...   # Allocate on GPU:0
446  ...   with tf.device(logical_devices[0].name):
447  ...     one = tf.constant(1)
448  ...   # Allocate on GPU:1
449  ...   with tf.device(logical_devices[1].name):
450  ...     two = tf.constant(2)
451
452  Args:
453    device_type: (optional string) Only include devices matching this device
454      type. For example "CPU" or "GPU".
455
456  Returns:
457    List of initialized `LogicalDevice`s
458  """
459  return context.context().list_logical_devices(device_type=device_type)
460
461
462@tf_export('config.get_visible_devices',
463           'config.experimental.get_visible_devices')
464@deprecation.deprecated_endpoints('config.experimental.get_visible_devices')
465def get_visible_devices(device_type=None):
466  """Get the list of visible physical devices.
467
468  Returns the list of `PhysicalDevice`s currently marked as visible to the
469  runtime. A visible device will have at least one `LogicalDevice` associated
470  with it once the runtime is initialized.
471
472  The following example verifies all visible GPUs have been disabled:
473
474  >>> physical_devices = tf.config.list_physical_devices('GPU')
475  >>> try:
476  ...   # Disable all GPUS
477  ...   tf.config.set_visible_devices([], 'GPU')
478  ...   visible_devices = tf.config.get_visible_devices()
479  ...   for device in visible_devices:
480  ...     assert device.device_type != 'GPU'
481  ... except:
482  ...   # Invalid device or cannot modify virtual devices once initialized.
483  ...   pass
484
485  Args:
486    device_type: (optional string) Only include devices matching this device
487      type. For example "CPU" or "GPU".
488
489  Returns:
490    List of visible `PhysicalDevice`s
491  """
492  return context.context().get_visible_devices(device_type)
493
494
495@tf_export('config.set_visible_devices',
496           'config.experimental.set_visible_devices')
497@deprecation.deprecated_endpoints('config.experimental.set_visible_devices')
498def set_visible_devices(devices, device_type=None):
499  """Set the list of visible devices.
500
501  Specifies which `PhysicalDevice` objects are visible to the runtime.
502  TensorFlow will only allocate memory and place operations on visible
503  physical devices, as otherwise no `LogicalDevice` will be created on them.
504  By default all discovered devices are marked as visible.
505
506  The following example demonstrates disabling the first GPU on the machine.
507
508  >>> physical_devices = tf.config.list_physical_devices('GPU')
509  >>> try:
510  ...   # Disable first GPU
511  ...   tf.config.set_visible_devices(physical_devices[1:], 'GPU')
512  ...   logical_devices = tf.config.list_logical_devices('GPU')
513  ...   # Logical device was not created for first GPU
514  ...   assert len(logical_devices) == len(physical_devices) - 1
515  ... except:
516  ...   # Invalid device or cannot modify virtual devices once initialized.
517  ...   pass
518
519  Args:
520    devices: List of `PhysicalDevice`s to make visible
521    device_type: (optional) Only configure devices matching this device type.
522      For example "CPU" or "GPU". Other devices will be left unaltered.
523
524  Raises:
525    ValueError: If argument validation fails.
526    RuntimeError: Runtime is already initialized.
527  """
528  context.context().set_visible_devices(devices, device_type)
529
530
531# TODO(b/188089869): Redesign memory stats related APIs before move them out of
532# experimental.
533@tf_export('config.experimental.get_memory_info')
534def get_memory_info(device):
535  """Get memory info for the chosen device, as a dict.
536
537  This function returns a dict containing information about the device's memory
538  usage. For example:
539
540  >>> if tf.config.list_physical_devices('GPU'):
541  ...   # Returns a dict in the form {'current': <current mem usage>,
542  ...   #                             'peak': <peak mem usage>}
543  ...   tf.config.experimental.get_memory_info('GPU:0')
544
545  Currently returns the following keys:
546    - `'current'`: The current memory used by the device, in bytes.
547    - `'peak'`: The peak memory used by the device across the run of the
548        program, in bytes. Can be reset with
549        `tf.config.experimental.reset_memory_stats`.
550
551  More keys may be added in the future, including device-specific keys.
552
553  Currently only supports GPU and TPU. If called on a CPU device, an exception
554  will be raised.
555
556  For GPUs, TensorFlow will allocate all the memory by default, unless changed
557  with `tf.config.experimental.set_memory_growth`. The dict specifies only the
558  current and peak memory that TensorFlow is actually using, not the memory that
559  TensorFlow has allocated on the GPU.
560
561  Args:
562    device: Device string to get the memory information for, e.g. `"GPU:0"`,
563    `"TPU:0"`. See https://www.tensorflow.org/api_docs/python/tf/device for
564      specifying device strings.
565
566  Returns:
567    A dict with keys `'current'` and `'peak'`, specifying the current and peak
568    memory usage respectively.
569
570  Raises:
571    ValueError: No device found with the device name, like '"nonexistent"'.
572    ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'.
573    ValueError: Multiple devices matched with the device name.
574    ValueError: Memory statistics not tracked, like '"CPU:0"'.
575  """
576  return context.context().get_memory_info(device)
577
578
579# TODO(b/188089869): Redesign memory stats related APIs before move them out of
580# experimental.
581# TODO(b/189498350): Unify the behavior on CPU, GPU and TPU.
582@tf_export('config.experimental.reset_memory_stats')
583def reset_memory_stats(device):
584  """Resets the tracked memory stats for the chosen device.
585
586  This function sets the tracked peak memory for a device to the device's
587  current memory usage. This allows you to measure the peak memory usage for a
588  specific part of your program. For example:
589
590  >>> if tf.config.list_physical_devices('GPU'):
591  ...   # Sets the peak memory to the current memory.
592  ...   tf.config.experimental.reset_memory_stats('GPU:0')
593  ...   # Creates the first peak memory usage.
594  ...   x1 = tf.ones(1000 * 1000, dtype=tf.float64)
595  ...   del x1 # Frees the memory referenced by `x1`.
596  ...   peak1 = tf.config.experimental.get_memory_info('GPU:0')['peak']
597  ...   # Sets the peak memory to the current memory again.
598  ...   tf.config.experimental.reset_memory_stats('GPU:0')
599  ...   # Creates the second peak memory usage.
600  ...   x2 = tf.ones(1000 * 1000, dtype=tf.float32)
601  ...   del x2
602  ...   peak2 = tf.config.experimental.get_memory_info('GPU:0')['peak']
603  ...   assert peak2 < peak1  # tf.float32 consumes less memory than tf.float64.
604
605  Currently only supports GPU and TPU. If called on a CPU device, an exception
606  will be raised.
607
608  Args:
609    device: Device string to reset the memory stats, e.g. `"GPU:0"`, `"TPU:0"`.
610      See https://www.tensorflow.org/api_docs/python/tf/device for specifying
611      device strings.
612
613  Raises:
614    ValueError: No device found with the device name, like '"nonexistent"'.
615    ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'.
616    ValueError: Multiple devices matched with the device name.
617    ValueError: Memory statistics not tracked or clearing memory statistics not
618      supported, like '"CPU:0"'.
619  """
620  context.context().reset_memory_stats(device)
621
622
623@deprecation.deprecated(
624    None,
625    "Use tf.config.experimental.get_memory_info(device)['current'] instead.")
626@tf_export('config.experimental.get_memory_usage')
627def get_memory_usage(device):
628  """Get the current memory usage, in bytes, for the chosen device.
629
630  This function is deprecated in favor of
631  `tf.config.experimental.get_memory_info`. Calling this function is equivalent
632  to calling `tf.config.experimental.get_memory_info()['current']`.
633
634  See https://www.tensorflow.org/api_docs/python/tf/device for specifying device
635  strings.
636
637  For example:
638
639  >>> gpu_devices = tf.config.list_physical_devices('GPU')
640  >>> if gpu_devices:
641  ...   tf.config.experimental.get_memory_usage('GPU:0')
642
643  Does not work for CPU.
644
645  For GPUs, TensorFlow will allocate all the memory by default, unless changed
646  with `tf.config.experimental.set_memory_growth`. This function only returns
647  the memory that TensorFlow is actually using, not the memory that TensorFlow
648  has allocated on the GPU.
649
650  Args:
651    device: Device string to get the bytes in use for, e.g. `"GPU:0"`
652
653  Returns:
654    Total memory usage in bytes.
655
656  Raises:
657    ValueError: Non-existent or CPU device specified.
658  """
659  return get_memory_info(device)['current']
660
661
662@tf_export('config.experimental.get_memory_growth')
663def get_memory_growth(device):
664  """Get if memory growth is enabled for a `PhysicalDevice`.
665
666  If memory growth is enabled for a `PhysicalDevice`, the runtime initialization
667  will not allocate all memory on the device.
668
669  For example:
670
671  >>> physical_devices = tf.config.list_physical_devices('GPU')
672  >>> try:
673  ...   tf.config.experimental.set_memory_growth(physical_devices[0], True)
674  ...   assert tf.config.experimental.get_memory_growth(physical_devices[0])
675  ... except:
676  ...   # Invalid device or cannot modify virtual devices once initialized.
677  ...   pass
678
679  Args:
680    device: `PhysicalDevice` to query
681
682  Returns:
683    A boolean indicating the memory growth setting for the `PhysicalDevice`.
684
685  Raises:
686    ValueError: Invalid `PhysicalDevice` specified.
687  """
688  return context.context().get_memory_growth(device)
689
690
691@tf_export('config.experimental.set_memory_growth')
692def set_memory_growth(device, enable):
693  """Set if memory growth should be enabled for a `PhysicalDevice`.
694
695  If memory growth is enabled for a `PhysicalDevice`, the runtime initialization
696  will not allocate all memory on the device. Memory growth cannot be configured
697  on a `PhysicalDevice` with virtual devices configured.
698
699  For example:
700
701  >>> physical_devices = tf.config.list_physical_devices('GPU')
702  >>> try:
703  ...   tf.config.experimental.set_memory_growth(physical_devices[0], True)
704  ... except:
705  ...   # Invalid device or cannot modify virtual devices once initialized.
706  ...   pass
707
708  Args:
709    device: `PhysicalDevice` to configure
710    enable: (Boolean) Whether to enable or disable memory growth
711
712  Raises:
713    ValueError: Invalid `PhysicalDevice` specified.
714    RuntimeError: Runtime is already initialized.
715  """
716  context.context().set_memory_growth(device, enable)
717
718
719@tf_export('config.experimental.get_device_details')
720def get_device_details(device):
721  """Returns details about a physical devices.
722
723  This API takes in a `tf.config.PhysicalDevice` returned by
724  `tf.config.list_physical_devices`. It returns a dict with string keys
725  containing various details about the device. Each key is only supported by a
726  subset of devices, so you should not assume the returned dict will have any
727  particular key.
728
729  >>> gpu_devices = tf.config.list_physical_devices('GPU')
730  >>> if gpu_devices:
731  ...   details = tf.config.experimental.get_device_details(gpu_devices[0])
732  ...   details.get('device_name', 'Unknown GPU')
733
734  Currently, details are only returned for GPUs. This function returns an
735  empty dict if passed a non-GPU device.
736
737  The returned dict may have the following keys:
738  * `'device_name'`: A human-readable name of the device as a string, e.g.
739    "Titan V". Unlike `tf.config.PhysicalDevice.name`, this will be the same for
740    multiple devices if each device is the same model. Currently only available
741    for GPUs.
742  * `'compute_capability'`: The
743    [compute capability](https://developer.nvidia.com/cuda-gpus) of the device
744    as a tuple of two ints, in the form `(major_version, minor_version)`. Only
745    available for NVIDIA GPUs
746
747  Note: This is similar to `tf.sysconfig.get_build_info` in that both functions
748  can return information relating to GPUs. However, this function returns
749  run-time information about a specific device (such as a GPU's compute
750  capability), while `tf.sysconfig.get_build_info` returns compile-time
751  information about how TensorFlow was built (such as what version of CUDA
752  TensorFlow was built for).
753
754  Args:
755    device: A `tf.config.PhysicalDevice` returned by
756      `tf.config.list_physical_devices` or `tf.config.get_visible_devices`.
757
758  Returns:
759    A dict with string keys.
760  """
761  return context.context().get_device_details(device)
762
763
764@tf_export('config.get_logical_device_configuration',
765           'config.experimental.get_virtual_device_configuration')
766@deprecation.deprecated_endpoints(
767    'config.experimental.get_virtual_device_configuration')
768def get_logical_device_configuration(device):
769  """Get the virtual device configuration for a `tf.config.PhysicalDevice`.
770
771  Returns the list of `tf.config.LogicalDeviceConfiguration`
772  objects previously configured by a call to
773  `tf.config.set_logical_device_configuration`.
774
775  For example:
776
777  >>> physical_devices = tf.config.list_physical_devices('CPU')
778  >>> assert len(physical_devices) == 1, "No CPUs found"
779  >>> configs = tf.config.get_logical_device_configuration(
780  ...   physical_devices[0])
781  >>> try:
782  ...   assert configs is None
783  ...   tf.config.set_logical_device_configuration(
784  ...     physical_devices[0],
785  ...     [tf.config.LogicalDeviceConfiguration(),
786  ...      tf.config.LogicalDeviceConfiguration()])
787  ...   configs = tf.config.get_logical_device_configuration(
788  ...     physical_devices[0])
789  ...   assert len(configs) == 2
790  ... except:
791  ...   # Cannot modify virtual devices once initialized.
792  ...   pass
793
794  Args:
795    device: `PhysicalDevice` to query
796
797  Returns:
798    List of `tf.config.LogicalDeviceConfiguration` objects or
799    `None` if no virtual device configuration has been set for this physical
800    device.
801  """
802  return context.context().get_logical_device_configuration(device)
803
804
805@tf_export('config.set_logical_device_configuration',
806           'config.experimental.set_virtual_device_configuration')
807@deprecation.deprecated_endpoints(
808    'config.experimental.set_virtual_device_configuration')
809def set_logical_device_configuration(device, logical_devices):
810  """Set the logical device configuration for a `tf.config.PhysicalDevice`.
811
812  A visible `tf.config.PhysicalDevice` will by default have a single
813  `tf.config.LogicalDevice` associated with it once the runtime is initialized.
814  Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows
815  multiple devices to be created on the same `tf.config.PhysicalDevice`.
816
817  Logical device configurations can be modified by calling this function as
818  long as the runtime is uninitialized. After the runtime is initialized
819  calling this function raises a RuntimeError.
820
821  The following example splits the CPU into 2 logical devices:
822
823  >>> physical_devices = tf.config.list_physical_devices('CPU')
824  >>> assert len(physical_devices) == 1, "No CPUs found"
825  >>> # Specify 2 virtual CPUs. Note currently memory limit is not supported.
826  >>> try:
827  ...   tf.config.set_logical_device_configuration(
828  ...     physical_devices[0],
829  ...     [tf.config.LogicalDeviceConfiguration(),
830  ...      tf.config.LogicalDeviceConfiguration()])
831  ...   logical_devices = tf.config.list_logical_devices('CPU')
832  ...   assert len(logical_devices) == 2
833  ...
834  ...   tf.config.set_logical_device_configuration(
835  ...     physical_devices[0],
836  ...     [tf.config.LogicalDeviceConfiguration(),
837  ...      tf.config.LogicalDeviceConfiguration(),
838  ...      tf.config.LogicalDeviceConfiguration(),
839  ...      tf.config.LogicalDeviceConfiguration()])
840  ... except:
841  ...   # Cannot modify logical devices once initialized.
842  ...   pass
843
844  The following example splits the GPU into 2 logical devices with 100 MB each:
845
846  >>> physical_devices = tf.config.list_physical_devices('GPU')
847  >>> try:
848  ...   tf.config.set_logical_device_configuration(
849  ...     physical_devices[0],
850  ...     [tf.config.LogicalDeviceConfiguration(memory_limit=100),
851  ...      tf.config.LogicalDeviceConfiguration(memory_limit=100)])
852  ...
853  ...   logical_devices = tf.config.list_logical_devices('GPU')
854  ...   assert len(logical_devices) == len(physical_devices) + 1
855  ...
856  ...   tf.config.set_logical_device_configuration(
857  ...     physical_devices[0],
858  ...     [tf.config.LogicalDeviceConfiguration(memory_limit=10),
859  ...      tf.config.LogicalDeviceConfiguration(memory_limit=10)])
860  ... except:
861  ...   # Invalid device or cannot modify logical devices once initialized.
862  ...   pass
863
864  Args:
865    device: The `PhysicalDevice` to configure.
866    logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration`
867      objects to allocate for the specified `PhysicalDevice`. If None, the
868      default configuration will be used.
869
870  Raises:
871    ValueError: If argument validation fails.
872    RuntimeError: Runtime is already initialized.
873  """
874  context.context().set_logical_device_configuration(device, logical_devices)
875
876
877@tf_export('config.experimental.enable_mlir_bridge')
878def enable_mlir_bridge():
879  """Enables experimental MLIR-Based TensorFlow Compiler Bridge.
880
881  DO NOT USE, DEV AND TESTING ONLY AT THE MOMENT.
882
883  NOTE: MLIR-Based TensorFlow Compiler is under active development and has
884  missing features, please refrain from using. This API exists for development
885  and testing only.
886
887  TensorFlow Compiler Bridge (TF Bridge) is responsible for translating parts
888  of TensorFlow graph into a form that can be accepted as an input by a backend
889  compiler such as XLA.
890  """
891  context.context().enable_mlir_bridge = True
892
893
894@tf_export('config.experimental.enable_mlir_graph_optimization')
895def enable_mlir_graph_optimization():
896  """Enables experimental MLIR-Based TensorFlow Compiler Optimizations.
897
898  DO NOT USE, DEV AND TESTING ONLY AT THE MOMENT.
899
900  NOTE: MLIR-Based TensorFlow Compiler is under active development and has
901  missing features, please refrain from using. This API exists for development
902  and testing only.
903
904  TensorFlow Compiler Optimizations are responsible general graph level
905  optimizations that in the current stack mostly done by Grappler graph
906  optimizers.
907  """
908  context.context().enable_mlir_graph_optimization = True
909
910
911@tf_export('config.experimental.disable_mlir_bridge')
912def disable_mlir_bridge():
913  """Disables experimental MLIR-Based TensorFlow Compiler Bridge."""
914  context.context().enable_mlir_bridge = False
915
916
917@tf_export('config.experimental.disable_mlir_graph_optimization')
918def disable_mlir_graph_optimization():
919  """Disables experimental MLIR-Based TensorFlow Compiler Optimizations."""
920  context.context().enable_mlir_graph_optimization = False
921
922
923@tf_export('config.experimental.enable_op_determinism', v1=[])
924def enable_op_determinism():
925  """Configures TensorFlow ops to run deterministically.
926
927  When op determinism is enabled, TensorFlow ops will be deterministic. This
928  means that if an op is run multiple times with the same inputs on the same
929  hardware, it will have the exact same outputs each time. This is useful for
930  debugging models. Note that determinism in general comes at the expense of
931  lower performance and so your model may run slower when op determinism is
932  enabled.
933
934  If you want your TensorFlow program to run deterministically, put the
935  following code near the start of your program.
936
937  ```python
938  tf.keras.utils.set_random_seed(1)
939  tf.config.experimental.enable_op_determinism()
940  ```
941
942  Calling `tf.keras.utils.set_random_seed` sets the Python seed, the NumPy seed,
943  and the TensorFlow seed. Setting these seeds is necessary to ensure any random
944  numbers your program generates are also deterministic.
945
946  By default, op determinism is not enabled, so ops might return different
947  results when run with the same inputs. These differences are often caused by
948  the use of asynchronous threads within the op nondeterministically changing
949  the order in which floating-point numbers are added. Most of these cases of
950  nondeterminism occur on GPUs, which have thousands of hardware threads that
951  are used to run ops. Enabling determinism directs such ops to use a different
952  algorithm, one that does not use threads in a nondeterministic way.
953
954  Another potential source of nondeterminism is `tf.data` based data processing.
955  Typically, this can introduce nondeterminsm due to the use of parallelism in
956  methods such as `Dataset.map` producing inputs or running stateful ops in a
957  nondeterministic order. Enabling determinism will remove such sources of
958  nondeterminism.
959
960  Enabling determinism will likely make your model or your `tf.data` data
961  processing slower. For example, `Dataset.map` can become several orders of
962  magnitude slower when the map function has random ops or other stateful ops.
963  See the “Determinism and tf.data” section below for more details. In future
964  TensorFlow releases, we plan on improving the performance of determinism,
965  especially for common scenarios such as `Dataset.map`.
966
967  Certain ops will raise an `UnimplementedError` because they do not yet have a
968  deterministic implementation. Additionally, due to bugs, some ops might be
969  nondeterministic and not raise an `UnimplementedError`. If you encounter such
970  ops, please [file an issue](https://github.com/tensorflow/tensorflow/issues).
971
972  An example of enabling determinism follows. The
973  `tf.nn.softmax_cross_entropy_with_logits` op is run multiple times and the
974  output is shown to be the same each time. This example would likely fail when
975  run on a GPU if determinism were not enabled, because
976  `tf.nn.softmax_cross_entropy_with_logits` uses a nondeterministic algorithm on
977  GPUs by default.
978
979  ```python
980  labels = tf.random.normal((1, 10000))
981  logits = tf.random.normal((1, 10000))
982  output = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
983                                                   logits=logits)
984  for _ in range(5):
985    output2 = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
986                                                      logits=logits)
987    tf.debugging.assert_equal(output, output2)
988  ```
989
990  ## Writing deterministic models
991
992  You can make your models deterministic by enabling op determinism. This
993  means that you can train a model and finish each run with exactly the same
994  trainable variables. This also means that the inferences of your
995  previously-trained model will be exactly the same on each run. Typically,
996  models can be made deterministic by simply setting the seeds and enabling
997  op determinism, as in the example above. However, to guarantee that your
998  model operates deterministically, you must meet all the following
999  requirements:
1000
1001  * Call `tf.config.experimental.enable_op_determinism()`, as mentioned above.
1002  * Reproducibly reset any pseudorandom number generators (PRNGs) you’re using,
1003    such as by setting the seeds for the default PRNGs in TensorFlow, Python,
1004    and NumPy, as mentioned above. Note that certain newer NumPy classes like
1005   ` numpy.random.default_rng` ignore the global NumPy seed, so a seed must be
1006    explicitly passed to such classes, if used.
1007  * Use the same hardware configuration in every run.
1008  * Use the same software environment in every run (OS, checkpoints, version of
1009    CUDA and TensorFlow, environmental variables, etc). Note that determinism is
1010    not guaranteed across different versions of TensorFlow.
1011  * Do not use constructs outside TensorFlow that are nondeterministic, such as
1012    reading from `/dev/random` or using multiple threads/processes in ways that
1013    influence TensorFlow’s behavior.
1014  * Ensure your input pipeline is deterministic. If you use `tf.data`, this is
1015    done automatically (at the expense of performance). See "Determinism and
1016    tf.data" below for more information.
1017  * Do not use `tf.compat.v1.Session` and
1018    `tf.distribute.experimental.ParameterServerStrategy`, which can introduce
1019    nondeterminism. Besides ops (including `tf.data` ops), these are the only
1020    known potential sources of nondeterminism within TensorFlow, (if you
1021    find more, please file an issue). Note that `tf.compat.v1.Session` is
1022    required to use the TF1 API, so determinism cannot be guaranteed when using
1023    the TF1 API.
1024  * Do not use nondeterministic custom ops.
1025
1026  ## Additional details on determinism
1027
1028  For stateful ops to be deterministic, the state of the system must be the same
1029  every time the op is run. For example the output of `tf.Variable.sparse_read`
1030  (obviously) depends on both the variable value and the `indices` function
1031  parameter.  When determinism is enabled, the side effects of stateful ops are
1032  deterministic.
1033
1034  TensorFlow’s random ops, such as `tf.random.normal`, will raise a
1035  `RuntimeError` if determinism is enabled and a seed has not been set. However,
1036  attempting to generate nondeterministic random numbers using Python or NumPy
1037  will not raise such errors. Make sure you remember to set the Python and NumPy
1038  seeds. Calling `tf.keras.utils.set_random_seed` is an easy way to set all
1039  three seeds.
1040
1041  Note that latency, memory consumption, throughput, and other performance
1042  characteristics are *not* made deterministic by enabling op determinism.
1043  Only op outputs and side effects are made deterministic. Additionally, a model
1044  may nondeterministically raise a `tf.errors.ResourceExhaustedError` from a
1045  lack of memory due to the fact that memory consumption is nondeterministic.
1046
1047  ## Determinism and tf.data
1048
1049  Enabling deterministic ops makes `tf.data` deterministic in several ways:
1050
1051  1. For dataset methods with a `deterministic` argument, such as `Dataset.map`
1052     and `Dataset.batch`, the `deterministic` argument is overridden to be
1053     `True` irrespective of its setting.
1054  2. The `tf.data.Option.experimental_deterministic` option is overridden to be
1055     `True` irrespective of its setting..
1056  3. In `Dataset.map` and `Dataset.interleave`, if the map or interleave
1057     function has stateful random ops or other stateful ops, the function will
1058     run serially instead of in parallel. This means the `num_parallel_calls`
1059     argument to `map` and `interleave` is effectively ignored.
1060  4. Prefetching with `Dataset.prefetch` will be disabled if any function run
1061     as part of the input pipeline has certain stateful ops. Similarly, any
1062     dataset method with a `num_parallel_calls` argument will be made to run
1063     serially if any function in the input pipeline has such stateful ops.
1064     Legacy random ops such as `tf.random.normal` will *not* cause such datasets
1065     to be changed, but most other stateful ops will.
1066
1067  Unfortunately, due to (3), performance can be greatly reduced when stateful
1068  ops are used in `Dataset.map` due to no longer running the map function in
1069  parallel. A common example of stateful ops used in `Dataset.map` are random
1070  ops, such as `tf.random.normal`, which are typically used for distortions. One
1071  way to work around this is to use stateless random ops instead. Alternatively
1072  you can hoist all random ops into its own separate `Dataset.map` call, making
1073  the original `Dataset.map` call stateless and thus avoid the need to serialize
1074  its execution.
1075
1076  (4) can also cause performance to be reduced, but occurs less frequently than
1077  (3) because legacy random ops do not cause (4) to take effect. However, unlike
1078  (3), when there are non-random stateful ops in a user-defined function, every
1079  `map` and `interleave` dataset is affected, instead of just the `map` or
1080  `interleave` dataset with the function that has stateful ops. Additionally,
1081  `prefetch` datasets and any dataset with the `num_parallel_calls` argument are
1082  also affected.
1083  """
1084  _pywrap_determinism.enable(True)
1085
1086
1087def disable_op_determinism():
1088  """Disables op determinism."""
1089  _pywrap_determinism.enable(False)
1090
1091
1092def is_op_determinism_enabled():
1093  """Returns True if op determinism is enabled."""
1094  return _pywrap_determinism.is_enabled()
1095