1# Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Functions for configuring TensorFlow execution.""" 16 17from typing import Union 18 19from tensorflow.python.eager import context 20from tensorflow.python.framework import errors 21from tensorflow.python.util import _pywrap_determinism 22from tensorflow.python.util import _pywrap_tensor_float_32_execution 23from tensorflow.python.util import deprecation 24from tensorflow.python.util.tf_export import tf_export 25 26 27@tf_export('config.experimental.tensor_float_32_execution_enabled') 28def tensor_float_32_execution_enabled(): 29 """Returns whether TensorFloat-32 is enabled. 30 31 By default, TensorFloat-32 is enabled, but this can be changed with 32 `tf.config.experimental.enable_tensor_float_32_execution`. 33 34 Returns: 35 True if TensorFloat-32 is enabled (the default) and False otherwise 36 """ 37 return _pywrap_tensor_float_32_execution.is_enabled() 38 39 40@tf_export('config.experimental.enable_tensor_float_32_execution') 41def enable_tensor_float_32_execution(enabled): 42 """Enable or disable the use of TensorFloat-32 on supported hardware. 43 44 [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format), 45 or TF32 for short, is a math mode for NVIDIA Ampere GPUs. TensorFloat-32 46 execution causes certain float32 ops, such as matrix multiplications and 47 convolutions, to run much faster on Ampere GPUs but with reduced precision. 48 This reduced precision should not impact convergence of deep learning models 49 in practice. 50 51 TensorFloat-32 is enabled by default. TensorFloat-32 is only supported on 52 Ampere GPUs, so all other hardware will use the full float32 precision 53 regardless of whether TensorFloat-32 is enabled or not. If you want to use the 54 full float32 precision on Ampere, you can disable TensorFloat-32 execution 55 with this function. For example: 56 57 ```python 58 x = tf.fill((2, 2), 1.0001) 59 y = tf.fill((2, 2), 1.) 60 # TensorFloat-32 is enabled, so matmul is run with reduced precision 61 print(tf.linalg.matmul(x, y)) # [[2., 2.], [2., 2.]] 62 tf.config.experimental.enable_tensor_float_32_execution(False) 63 # Matmul is run with full precision 64 print(tf.linalg.matmul(x, y)) # [[2.0002, 2.0002], [2.0002, 2.0002]] 65 ``` 66 67 To check whether TensorFloat-32 execution is currently enabled, use 68 `tf.config.experimental.tensor_float_32_execution_enabled`. 69 70 If TensorFloat-32 is enabled, float32 inputs of supported ops, such as 71 `tf.linalg.matmul`, will be rounded from 23 bits of precision to 10 bits of 72 precision in most cases. This allows the ops to execute much faster by 73 utilizing the GPU's tensor cores. TensorFloat-32 has the same dynamic range as 74 float32, meaning it is no more likely to underflow or overflow than float32. 75 Ops still use float32 accumulation when TensorFloat-32 is enabled. Enabling or 76 disabling TensorFloat-32 only affects Ampere GPUs and subsequent GPUs that 77 support TensorFloat-32. 78 79 Note TensorFloat-32 is not always used in supported ops, as only inputs of 80 certain shapes are supported. Support for more input shapes and more ops may 81 be added in the future. As a result, precision of float32 ops may decrease in 82 minor versions of TensorFlow. 83 84 TensorFloat-32 is also used for some complex64 ops. Currently, TensorFloat-32 85 is used in fewer cases for complex64 as it is for float32. 86 87 Args: 88 enabled: Bool indicating whether to enable TensorFloat-32 execution. 89 """ 90 _pywrap_tensor_float_32_execution.enable(enabled) 91 92 93@tf_export('config.threading.get_intra_op_parallelism_threads') 94def get_intra_op_parallelism_threads(): 95 """Get number of threads used within an individual op for parallelism. 96 97 Certain operations like matrix multiplication and reductions can utilize 98 parallel threads for speed ups. A value of 0 means the system picks an 99 appropriate number. 100 101 Returns: 102 Number of parallel threads 103 """ 104 return context.context().intra_op_parallelism_threads 105 106 107@tf_export('config.threading.set_intra_op_parallelism_threads') 108def set_intra_op_parallelism_threads(num_threads): 109 """Set number of threads used within an individual op for parallelism. 110 111 Certain operations like matrix multiplication and reductions can utilize 112 parallel threads for speed ups. A value of 0 means the system picks an 113 appropriate number. 114 115 Args: 116 num_threads: Number of parallel threads 117 """ 118 context.context().intra_op_parallelism_threads = num_threads 119 120 121@tf_export('config.threading.get_inter_op_parallelism_threads') 122def get_inter_op_parallelism_threads(): 123 """Get number of threads used for parallelism between independent operations. 124 125 Determines the number of threads used by independent non-blocking operations. 126 0 means the system picks an appropriate number. 127 128 Returns: 129 Number of parallel threads 130 """ 131 return context.context().inter_op_parallelism_threads 132 133 134@tf_export('config.threading.set_inter_op_parallelism_threads') 135def set_inter_op_parallelism_threads(num_threads): 136 """Set number of threads used for parallelism between independent operations. 137 138 Determines the number of threads used by independent non-blocking operations. 139 0 means the system picks an appropriate number. 140 141 Args: 142 num_threads: Number of parallel threads 143 """ 144 context.context().inter_op_parallelism_threads = num_threads 145 146 147@tf_export('config.optimizer.get_jit') 148def get_optimizer_jit() -> str: 149 """Returns JIT compilation configuration for code inside `tf.function`. 150 151 Possible return values: 152 -`"autoclustering"` if 153 [autoclustering](https://www.tensorflow.org/xla#auto-clustering) is enabled 154 - `""` when no default compilation is applied. 155 """ 156 if context.context().optimizer_jit: 157 return 'autoclustering' 158 return '' 159 160 161@tf_export('config.optimizer.set_jit') 162@deprecation.deprecated_arg_values( 163 None, 164 '`True` setting is deprecated, use `autoclustering` instead.', 165 warn_once=True, 166 jit_config=True) 167def set_optimizer_jit(enabled: Union[bool, str]): 168 """Configure JIT compilation. 169 170 Note: compilation is only applied to code that is compiled into a 171 graph (in TF2 that's only a code inside `tf.function`). 172 173 Args: 174 enabled: JIT compilation configuration. 175 Possible values: 176 - `"autoclustering"` (`True` is a deprecated alias): perform 177 [autoclustering](https://www.tensorflow.org/xla#auto-clustering) 178 (automatically identify and compile clusters of nodes) on all graphs 179 using 180 [XLA](https://www.tensorflow.org/xla). 181 - `False`: do not automatically compile any graphs. 182 """ 183 autoclustering_enabled = enabled in (True, 'autoclustering') 184 context.context().optimizer_jit = autoclustering_enabled 185 186 187@tf_export('config.optimizer.get_experimental_options') 188def get_optimizer_experimental_options(): 189 """Get experimental optimizer options. 190 191 Refer to tf.config.optimizer.set_experimental_options for a list of current 192 options. 193 194 Note that optimizations are only applied in graph mode, (within tf.function). 195 In addition, as these are experimental options, the list is subject to change. 196 197 Returns: 198 Dictionary of configured experimental optimizer options 199 """ 200 return context.context().get_optimizer_experimental_options() 201 202 203@tf_export('config.optimizer.set_experimental_options') 204def set_optimizer_experimental_options(options): 205 """Set experimental optimizer options. 206 207 Note that optimizations are only applied in graph mode, (within tf.function). 208 In addition, as these are experimental options, the list is subject to change. 209 210 Args: 211 options: Dictionary of experimental optimizer options to configure. 212 Valid keys: 213 - layout_optimizer: Optimize tensor layouts e.g. This will try to use NCHW 214 layout on GPU which is faster. 215 - constant_folding: Fold constants Statically infer the value of tensors 216 when possible, and materialize the result using constants. 217 - shape_optimization: Simplify computations made on shapes. 218 - remapping: Remap subgraphs onto more efficient implementations. 219 - arithmetic_optimization: Simplify arithmetic ops with common 220 sub-expression elimination and arithmetic simplification. 221 - dependency_optimization: Control dependency optimizations. Remove 222 redundant control dependencies, which may enable other optimization. 223 This optimizer is also essential for pruning Identity and NoOp nodes. 224 - loop_optimization: Loop optimizations. 225 - function_optimization: Function optimizations and inlining. 226 - debug_stripper: Strips debug-related nodes from the graph. 227 - disable_model_pruning: Disable removal of unnecessary ops from the graph 228 - scoped_allocator_optimization: Try to allocate some independent Op 229 outputs contiguously in order to merge or eliminate downstream Ops. 230 - pin_to_host_optimization: Force small ops onto the CPU. 231 - implementation_selector: Enable the swap of kernel implementations based 232 on the device placement. 233 - auto_mixed_precision: Change certain float32 ops to float16 on Volta 234 GPUs and above. Without the use of loss scaling, this can cause 235 numerical underflow (see 236 `keras.mixed_precision.experimental.LossScaleOptimizer`). 237 - disable_meta_optimizer: Disable the entire meta optimizer. 238 - min_graph_nodes: The minimum number of nodes in a graph to optimizer. 239 For smaller graphs, optimization is skipped. 240 """ 241 context.context().set_optimizer_experimental_options(options) 242 243 244@tf_export('config.get_soft_device_placement') 245def get_soft_device_placement(): 246 """Return status of soft device placement flag. 247 248 If enabled, an op will be placed on CPU if any of the following are true 249 1. there's no GPU implementation for the OP 250 2. no GPU devices are known or registered 251 3. need to co-locate with reftype input(s) which are from CPU 252 253 If disabled, the placement is strict and CPU fallback is not allowed. 254 An error is raised when an Op cannot be placed onto its intended device. 255 256 Returns: 257 A boolean indicating if soft placement is enabled. 258 """ 259 return context.context().soft_device_placement 260 261 262@tf_export('config.set_soft_device_placement') 263def set_soft_device_placement(enabled): 264 """Enable or disable soft device placement. 265 266 If enabled, an op will be placed on CPU if any of the following are true 267 1. there's no GPU implementation for the OP 268 2. no GPU devices are known or registered 269 3. need to co-locate with reftype input(s) which are from CPU 270 271 Note: by default soft device placement is enabled when running in eager mode 272 (for convenience) and disabled in graph mode (for performance). 273 274 Args: 275 enabled: A boolean indicating whether to enable soft placement. 276 """ 277 context.context().soft_device_placement = enabled 278 279 280@tf_export('config.experimental.get_device_policy') 281def get_device_policy(): 282 """Gets the current device policy. 283 284 The device policy controls how operations requiring inputs on a specific 285 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1). 286 287 This function only gets the device policy for the current thread. Any 288 subsequently started thread will again use the default policy. 289 290 Returns: 291 Current thread device policy 292 """ 293 device_policy = context.context().device_policy 294 if device_policy == context.DEVICE_PLACEMENT_SILENT: 295 return 'silent' 296 elif device_policy == context.DEVICE_PLACEMENT_SILENT_FOR_INT32: 297 return 'silent_for_int32' 298 elif device_policy == context.DEVICE_PLACEMENT_WARN: 299 return 'warn' 300 elif device_policy == context.DEVICE_PLACEMENT_EXPLICIT: 301 return 'explicit' 302 else: 303 # pylint: disable-next=no-value-for-parameter 304 raise errors.InternalError( 305 f'Got an invalid device policy: {device_policy!r}.') 306 307 308@tf_export('config.experimental.set_device_policy') 309def set_device_policy(device_policy): 310 """Sets the current thread device policy. 311 312 The device policy controls how operations requiring inputs on a specific 313 device (e.g., on GPU:0) handle inputs on a different device (e.g. GPU:1). 314 315 When using the default, an appropriate policy will be picked automatically. 316 The default policy may change over time. 317 318 This function only sets the device policy for the current thread. Any 319 subsequently started thread will again use the default policy. 320 321 Args: 322 device_policy: A device policy. 323 Valid values: 324 - None: Switch to a system default. 325 - 'warn': Copies the tensors which are not on the right device and logs a 326 warning. 327 - 'explicit': Raises an error if the placement is not as required. 328 - 'silent': Silently copies the tensors. Note that this may hide 329 performance problems as there is no notification provided when 330 operations are blocked on the tensor being copied between devices. 331 - 'silent_for_int32': silently copies `int32` tensors, raising errors on 332 the other ones. 333 334 Raises: 335 ValueError: If an invalid `device_policy` is passed. 336 """ 337 if device_policy == 'silent': 338 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT 339 elif device_policy == 'silent_for_int32': 340 context.context().device_policy = context.DEVICE_PLACEMENT_SILENT_FOR_INT32 341 elif device_policy == 'warn': 342 context.context().device_policy = context.DEVICE_PLACEMENT_WARN 343 elif device_policy == 'explicit': 344 context.context().device_policy = context.DEVICE_PLACEMENT_EXPLICIT 345 elif device_policy is None: 346 context.context().device_policy = None 347 else: 348 raise ValueError( 349 f'Invalid argument `device_policy`: {device_policy!r}. Please refer to ' 350 'https://www.tensorflow.org/api_docs/python/tf/config/experimental/set_device_policy ' 351 'for valid `device_policy` arguments.') 352 353 354@tf_export('config.experimental.get_synchronous_execution') 355def get_synchronous_execution(): 356 """Gets whether operations are executed synchronously or asynchronously. 357 358 TensorFlow can execute operations synchronously or asynchronously. If 359 asynchronous execution is enabled, operations may return "non-ready" handles. 360 361 Returns: 362 Current thread execution mode 363 """ 364 return context.context().execution_mode == context.SYNC 365 366 367@tf_export('config.experimental.set_synchronous_execution') 368def set_synchronous_execution(enable): 369 """Specifies whether operations are executed synchronously or asynchronously. 370 371 TensorFlow can execute operations synchronously or asynchronously. If 372 asynchronous execution is enabled, operations may return "non-ready" handles. 373 374 When `enable` is set to None, an appropriate value will be picked 375 automatically. The value picked may change between TensorFlow releases. 376 377 Args: 378 enable: Whether operations should be dispatched synchronously. 379 Valid values: 380 - None: sets the system default. 381 - True: executes each operation synchronously. 382 - False: executes each operation asynchronously. 383 """ 384 if enable is None: 385 context.context().execution_mode = None 386 elif enable: 387 context.context().execution_mode = context.SYNC 388 else: 389 context.context().execution_mode = context.ASYNC 390 391 392@tf_export('config.list_physical_devices', 393 'config.experimental.list_physical_devices') 394@deprecation.deprecated_endpoints('config.experimental.list_physical_devices') 395def list_physical_devices(device_type=None): 396 """Return a list of physical devices visible to the host runtime. 397 398 Physical devices are hardware devices present on the host machine. By default 399 all discovered CPU and GPU devices are considered visible. 400 401 This API allows querying the physical hardware resources prior to runtime 402 initialization. Thus, giving an opportunity to call any additional 403 configuration APIs. This is in contrast to `tf.config.list_logical_devices`, 404 which triggers runtime initialization in order to list the configured devices. 405 406 The following example lists the number of visible GPUs on the host. 407 408 >>> physical_devices = tf.config.list_physical_devices('GPU') 409 >>> print("Num GPUs:", len(physical_devices)) 410 Num GPUs: ... 411 412 However, the number of GPUs available to the runtime may change during runtime 413 initialization due to marking certain devices as not visible or configuring 414 multiple logical devices. 415 416 Args: 417 device_type: (optional string) Only include devices matching this device 418 type. For example "CPU" or "GPU". 419 420 Returns: 421 List of discovered `tf.config.PhysicalDevice` objects 422 """ 423 return context.context().list_physical_devices(device_type) 424 425 426@tf_export('config.list_logical_devices', 427 'config.experimental.list_logical_devices') 428@deprecation.deprecated_endpoints('config.experimental.list_logical_devices') 429def list_logical_devices(device_type=None): 430 """Return a list of logical devices created by runtime. 431 432 Logical devices may correspond to physical devices or remote devices in the 433 cluster. Operations and tensors may be placed on these devices by using the 434 `name` of the `tf.config.LogicalDevice`. 435 436 Calling `tf.config.list_logical_devices` triggers the runtime to configure any 437 `tf.config.PhysicalDevice` visible to the runtime, thereby preventing 438 further configuration. To avoid runtime initialization, call 439 `tf.config.list_physical_devices` instead. 440 441 For example: 442 443 >>> logical_devices = tf.config.list_logical_devices('GPU') 444 >>> if len(logical_devices) > 0: 445 ... # Allocate on GPU:0 446 ... with tf.device(logical_devices[0].name): 447 ... one = tf.constant(1) 448 ... # Allocate on GPU:1 449 ... with tf.device(logical_devices[1].name): 450 ... two = tf.constant(2) 451 452 Args: 453 device_type: (optional string) Only include devices matching this device 454 type. For example "CPU" or "GPU". 455 456 Returns: 457 List of initialized `LogicalDevice`s 458 """ 459 return context.context().list_logical_devices(device_type=device_type) 460 461 462@tf_export('config.get_visible_devices', 463 'config.experimental.get_visible_devices') 464@deprecation.deprecated_endpoints('config.experimental.get_visible_devices') 465def get_visible_devices(device_type=None): 466 """Get the list of visible physical devices. 467 468 Returns the list of `PhysicalDevice`s currently marked as visible to the 469 runtime. A visible device will have at least one `LogicalDevice` associated 470 with it once the runtime is initialized. 471 472 The following example verifies all visible GPUs have been disabled: 473 474 >>> physical_devices = tf.config.list_physical_devices('GPU') 475 >>> try: 476 ... # Disable all GPUS 477 ... tf.config.set_visible_devices([], 'GPU') 478 ... visible_devices = tf.config.get_visible_devices() 479 ... for device in visible_devices: 480 ... assert device.device_type != 'GPU' 481 ... except: 482 ... # Invalid device or cannot modify virtual devices once initialized. 483 ... pass 484 485 Args: 486 device_type: (optional string) Only include devices matching this device 487 type. For example "CPU" or "GPU". 488 489 Returns: 490 List of visible `PhysicalDevice`s 491 """ 492 return context.context().get_visible_devices(device_type) 493 494 495@tf_export('config.set_visible_devices', 496 'config.experimental.set_visible_devices') 497@deprecation.deprecated_endpoints('config.experimental.set_visible_devices') 498def set_visible_devices(devices, device_type=None): 499 """Set the list of visible devices. 500 501 Specifies which `PhysicalDevice` objects are visible to the runtime. 502 TensorFlow will only allocate memory and place operations on visible 503 physical devices, as otherwise no `LogicalDevice` will be created on them. 504 By default all discovered devices are marked as visible. 505 506 The following example demonstrates disabling the first GPU on the machine. 507 508 >>> physical_devices = tf.config.list_physical_devices('GPU') 509 >>> try: 510 ... # Disable first GPU 511 ... tf.config.set_visible_devices(physical_devices[1:], 'GPU') 512 ... logical_devices = tf.config.list_logical_devices('GPU') 513 ... # Logical device was not created for first GPU 514 ... assert len(logical_devices) == len(physical_devices) - 1 515 ... except: 516 ... # Invalid device or cannot modify virtual devices once initialized. 517 ... pass 518 519 Args: 520 devices: List of `PhysicalDevice`s to make visible 521 device_type: (optional) Only configure devices matching this device type. 522 For example "CPU" or "GPU". Other devices will be left unaltered. 523 524 Raises: 525 ValueError: If argument validation fails. 526 RuntimeError: Runtime is already initialized. 527 """ 528 context.context().set_visible_devices(devices, device_type) 529 530 531# TODO(b/188089869): Redesign memory stats related APIs before move them out of 532# experimental. 533@tf_export('config.experimental.get_memory_info') 534def get_memory_info(device): 535 """Get memory info for the chosen device, as a dict. 536 537 This function returns a dict containing information about the device's memory 538 usage. For example: 539 540 >>> if tf.config.list_physical_devices('GPU'): 541 ... # Returns a dict in the form {'current': <current mem usage>, 542 ... # 'peak': <peak mem usage>} 543 ... tf.config.experimental.get_memory_info('GPU:0') 544 545 Currently returns the following keys: 546 - `'current'`: The current memory used by the device, in bytes. 547 - `'peak'`: The peak memory used by the device across the run of the 548 program, in bytes. Can be reset with 549 `tf.config.experimental.reset_memory_stats`. 550 551 More keys may be added in the future, including device-specific keys. 552 553 Currently only supports GPU and TPU. If called on a CPU device, an exception 554 will be raised. 555 556 For GPUs, TensorFlow will allocate all the memory by default, unless changed 557 with `tf.config.experimental.set_memory_growth`. The dict specifies only the 558 current and peak memory that TensorFlow is actually using, not the memory that 559 TensorFlow has allocated on the GPU. 560 561 Args: 562 device: Device string to get the memory information for, e.g. `"GPU:0"`, 563 `"TPU:0"`. See https://www.tensorflow.org/api_docs/python/tf/device for 564 specifying device strings. 565 566 Returns: 567 A dict with keys `'current'` and `'peak'`, specifying the current and peak 568 memory usage respectively. 569 570 Raises: 571 ValueError: No device found with the device name, like '"nonexistent"'. 572 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'. 573 ValueError: Multiple devices matched with the device name. 574 ValueError: Memory statistics not tracked, like '"CPU:0"'. 575 """ 576 return context.context().get_memory_info(device) 577 578 579# TODO(b/188089869): Redesign memory stats related APIs before move them out of 580# experimental. 581# TODO(b/189498350): Unify the behavior on CPU, GPU and TPU. 582@tf_export('config.experimental.reset_memory_stats') 583def reset_memory_stats(device): 584 """Resets the tracked memory stats for the chosen device. 585 586 This function sets the tracked peak memory for a device to the device's 587 current memory usage. This allows you to measure the peak memory usage for a 588 specific part of your program. For example: 589 590 >>> if tf.config.list_physical_devices('GPU'): 591 ... # Sets the peak memory to the current memory. 592 ... tf.config.experimental.reset_memory_stats('GPU:0') 593 ... # Creates the first peak memory usage. 594 ... x1 = tf.ones(1000 * 1000, dtype=tf.float64) 595 ... del x1 # Frees the memory referenced by `x1`. 596 ... peak1 = tf.config.experimental.get_memory_info('GPU:0')['peak'] 597 ... # Sets the peak memory to the current memory again. 598 ... tf.config.experimental.reset_memory_stats('GPU:0') 599 ... # Creates the second peak memory usage. 600 ... x2 = tf.ones(1000 * 1000, dtype=tf.float32) 601 ... del x2 602 ... peak2 = tf.config.experimental.get_memory_info('GPU:0')['peak'] 603 ... assert peak2 < peak1 # tf.float32 consumes less memory than tf.float64. 604 605 Currently only supports GPU and TPU. If called on a CPU device, an exception 606 will be raised. 607 608 Args: 609 device: Device string to reset the memory stats, e.g. `"GPU:0"`, `"TPU:0"`. 610 See https://www.tensorflow.org/api_docs/python/tf/device for specifying 611 device strings. 612 613 Raises: 614 ValueError: No device found with the device name, like '"nonexistent"'. 615 ValueError: Invalid device name, like '"GPU"', '"CPU:GPU"', '"CPU:"'. 616 ValueError: Multiple devices matched with the device name. 617 ValueError: Memory statistics not tracked or clearing memory statistics not 618 supported, like '"CPU:0"'. 619 """ 620 context.context().reset_memory_stats(device) 621 622 623@deprecation.deprecated( 624 None, 625 "Use tf.config.experimental.get_memory_info(device)['current'] instead.") 626@tf_export('config.experimental.get_memory_usage') 627def get_memory_usage(device): 628 """Get the current memory usage, in bytes, for the chosen device. 629 630 This function is deprecated in favor of 631 `tf.config.experimental.get_memory_info`. Calling this function is equivalent 632 to calling `tf.config.experimental.get_memory_info()['current']`. 633 634 See https://www.tensorflow.org/api_docs/python/tf/device for specifying device 635 strings. 636 637 For example: 638 639 >>> gpu_devices = tf.config.list_physical_devices('GPU') 640 >>> if gpu_devices: 641 ... tf.config.experimental.get_memory_usage('GPU:0') 642 643 Does not work for CPU. 644 645 For GPUs, TensorFlow will allocate all the memory by default, unless changed 646 with `tf.config.experimental.set_memory_growth`. This function only returns 647 the memory that TensorFlow is actually using, not the memory that TensorFlow 648 has allocated on the GPU. 649 650 Args: 651 device: Device string to get the bytes in use for, e.g. `"GPU:0"` 652 653 Returns: 654 Total memory usage in bytes. 655 656 Raises: 657 ValueError: Non-existent or CPU device specified. 658 """ 659 return get_memory_info(device)['current'] 660 661 662@tf_export('config.experimental.get_memory_growth') 663def get_memory_growth(device): 664 """Get if memory growth is enabled for a `PhysicalDevice`. 665 666 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization 667 will not allocate all memory on the device. 668 669 For example: 670 671 >>> physical_devices = tf.config.list_physical_devices('GPU') 672 >>> try: 673 ... tf.config.experimental.set_memory_growth(physical_devices[0], True) 674 ... assert tf.config.experimental.get_memory_growth(physical_devices[0]) 675 ... except: 676 ... # Invalid device or cannot modify virtual devices once initialized. 677 ... pass 678 679 Args: 680 device: `PhysicalDevice` to query 681 682 Returns: 683 A boolean indicating the memory growth setting for the `PhysicalDevice`. 684 685 Raises: 686 ValueError: Invalid `PhysicalDevice` specified. 687 """ 688 return context.context().get_memory_growth(device) 689 690 691@tf_export('config.experimental.set_memory_growth') 692def set_memory_growth(device, enable): 693 """Set if memory growth should be enabled for a `PhysicalDevice`. 694 695 If memory growth is enabled for a `PhysicalDevice`, the runtime initialization 696 will not allocate all memory on the device. Memory growth cannot be configured 697 on a `PhysicalDevice` with virtual devices configured. 698 699 For example: 700 701 >>> physical_devices = tf.config.list_physical_devices('GPU') 702 >>> try: 703 ... tf.config.experimental.set_memory_growth(physical_devices[0], True) 704 ... except: 705 ... # Invalid device or cannot modify virtual devices once initialized. 706 ... pass 707 708 Args: 709 device: `PhysicalDevice` to configure 710 enable: (Boolean) Whether to enable or disable memory growth 711 712 Raises: 713 ValueError: Invalid `PhysicalDevice` specified. 714 RuntimeError: Runtime is already initialized. 715 """ 716 context.context().set_memory_growth(device, enable) 717 718 719@tf_export('config.experimental.get_device_details') 720def get_device_details(device): 721 """Returns details about a physical devices. 722 723 This API takes in a `tf.config.PhysicalDevice` returned by 724 `tf.config.list_physical_devices`. It returns a dict with string keys 725 containing various details about the device. Each key is only supported by a 726 subset of devices, so you should not assume the returned dict will have any 727 particular key. 728 729 >>> gpu_devices = tf.config.list_physical_devices('GPU') 730 >>> if gpu_devices: 731 ... details = tf.config.experimental.get_device_details(gpu_devices[0]) 732 ... details.get('device_name', 'Unknown GPU') 733 734 Currently, details are only returned for GPUs. This function returns an 735 empty dict if passed a non-GPU device. 736 737 The returned dict may have the following keys: 738 * `'device_name'`: A human-readable name of the device as a string, e.g. 739 "Titan V". Unlike `tf.config.PhysicalDevice.name`, this will be the same for 740 multiple devices if each device is the same model. Currently only available 741 for GPUs. 742 * `'compute_capability'`: The 743 [compute capability](https://developer.nvidia.com/cuda-gpus) of the device 744 as a tuple of two ints, in the form `(major_version, minor_version)`. Only 745 available for NVIDIA GPUs 746 747 Note: This is similar to `tf.sysconfig.get_build_info` in that both functions 748 can return information relating to GPUs. However, this function returns 749 run-time information about a specific device (such as a GPU's compute 750 capability), while `tf.sysconfig.get_build_info` returns compile-time 751 information about how TensorFlow was built (such as what version of CUDA 752 TensorFlow was built for). 753 754 Args: 755 device: A `tf.config.PhysicalDevice` returned by 756 `tf.config.list_physical_devices` or `tf.config.get_visible_devices`. 757 758 Returns: 759 A dict with string keys. 760 """ 761 return context.context().get_device_details(device) 762 763 764@tf_export('config.get_logical_device_configuration', 765 'config.experimental.get_virtual_device_configuration') 766@deprecation.deprecated_endpoints( 767 'config.experimental.get_virtual_device_configuration') 768def get_logical_device_configuration(device): 769 """Get the virtual device configuration for a `tf.config.PhysicalDevice`. 770 771 Returns the list of `tf.config.LogicalDeviceConfiguration` 772 objects previously configured by a call to 773 `tf.config.set_logical_device_configuration`. 774 775 For example: 776 777 >>> physical_devices = tf.config.list_physical_devices('CPU') 778 >>> assert len(physical_devices) == 1, "No CPUs found" 779 >>> configs = tf.config.get_logical_device_configuration( 780 ... physical_devices[0]) 781 >>> try: 782 ... assert configs is None 783 ... tf.config.set_logical_device_configuration( 784 ... physical_devices[0], 785 ... [tf.config.LogicalDeviceConfiguration(), 786 ... tf.config.LogicalDeviceConfiguration()]) 787 ... configs = tf.config.get_logical_device_configuration( 788 ... physical_devices[0]) 789 ... assert len(configs) == 2 790 ... except: 791 ... # Cannot modify virtual devices once initialized. 792 ... pass 793 794 Args: 795 device: `PhysicalDevice` to query 796 797 Returns: 798 List of `tf.config.LogicalDeviceConfiguration` objects or 799 `None` if no virtual device configuration has been set for this physical 800 device. 801 """ 802 return context.context().get_logical_device_configuration(device) 803 804 805@tf_export('config.set_logical_device_configuration', 806 'config.experimental.set_virtual_device_configuration') 807@deprecation.deprecated_endpoints( 808 'config.experimental.set_virtual_device_configuration') 809def set_logical_device_configuration(device, logical_devices): 810 """Set the logical device configuration for a `tf.config.PhysicalDevice`. 811 812 A visible `tf.config.PhysicalDevice` will by default have a single 813 `tf.config.LogicalDevice` associated with it once the runtime is initialized. 814 Specifying a list of `tf.config.LogicalDeviceConfiguration` objects allows 815 multiple devices to be created on the same `tf.config.PhysicalDevice`. 816 817 Logical device configurations can be modified by calling this function as 818 long as the runtime is uninitialized. After the runtime is initialized 819 calling this function raises a RuntimeError. 820 821 The following example splits the CPU into 2 logical devices: 822 823 >>> physical_devices = tf.config.list_physical_devices('CPU') 824 >>> assert len(physical_devices) == 1, "No CPUs found" 825 >>> # Specify 2 virtual CPUs. Note currently memory limit is not supported. 826 >>> try: 827 ... tf.config.set_logical_device_configuration( 828 ... physical_devices[0], 829 ... [tf.config.LogicalDeviceConfiguration(), 830 ... tf.config.LogicalDeviceConfiguration()]) 831 ... logical_devices = tf.config.list_logical_devices('CPU') 832 ... assert len(logical_devices) == 2 833 ... 834 ... tf.config.set_logical_device_configuration( 835 ... physical_devices[0], 836 ... [tf.config.LogicalDeviceConfiguration(), 837 ... tf.config.LogicalDeviceConfiguration(), 838 ... tf.config.LogicalDeviceConfiguration(), 839 ... tf.config.LogicalDeviceConfiguration()]) 840 ... except: 841 ... # Cannot modify logical devices once initialized. 842 ... pass 843 844 The following example splits the GPU into 2 logical devices with 100 MB each: 845 846 >>> physical_devices = tf.config.list_physical_devices('GPU') 847 >>> try: 848 ... tf.config.set_logical_device_configuration( 849 ... physical_devices[0], 850 ... [tf.config.LogicalDeviceConfiguration(memory_limit=100), 851 ... tf.config.LogicalDeviceConfiguration(memory_limit=100)]) 852 ... 853 ... logical_devices = tf.config.list_logical_devices('GPU') 854 ... assert len(logical_devices) == len(physical_devices) + 1 855 ... 856 ... tf.config.set_logical_device_configuration( 857 ... physical_devices[0], 858 ... [tf.config.LogicalDeviceConfiguration(memory_limit=10), 859 ... tf.config.LogicalDeviceConfiguration(memory_limit=10)]) 860 ... except: 861 ... # Invalid device or cannot modify logical devices once initialized. 862 ... pass 863 864 Args: 865 device: The `PhysicalDevice` to configure. 866 logical_devices: (optional) List of `tf.config.LogicalDeviceConfiguration` 867 objects to allocate for the specified `PhysicalDevice`. If None, the 868 default configuration will be used. 869 870 Raises: 871 ValueError: If argument validation fails. 872 RuntimeError: Runtime is already initialized. 873 """ 874 context.context().set_logical_device_configuration(device, logical_devices) 875 876 877@tf_export('config.experimental.enable_mlir_bridge') 878def enable_mlir_bridge(): 879 """Enables experimental MLIR-Based TensorFlow Compiler Bridge. 880 881 DO NOT USE, DEV AND TESTING ONLY AT THE MOMENT. 882 883 NOTE: MLIR-Based TensorFlow Compiler is under active development and has 884 missing features, please refrain from using. This API exists for development 885 and testing only. 886 887 TensorFlow Compiler Bridge (TF Bridge) is responsible for translating parts 888 of TensorFlow graph into a form that can be accepted as an input by a backend 889 compiler such as XLA. 890 """ 891 context.context().enable_mlir_bridge = True 892 893 894@tf_export('config.experimental.enable_mlir_graph_optimization') 895def enable_mlir_graph_optimization(): 896 """Enables experimental MLIR-Based TensorFlow Compiler Optimizations. 897 898 DO NOT USE, DEV AND TESTING ONLY AT THE MOMENT. 899 900 NOTE: MLIR-Based TensorFlow Compiler is under active development and has 901 missing features, please refrain from using. This API exists for development 902 and testing only. 903 904 TensorFlow Compiler Optimizations are responsible general graph level 905 optimizations that in the current stack mostly done by Grappler graph 906 optimizers. 907 """ 908 context.context().enable_mlir_graph_optimization = True 909 910 911@tf_export('config.experimental.disable_mlir_bridge') 912def disable_mlir_bridge(): 913 """Disables experimental MLIR-Based TensorFlow Compiler Bridge.""" 914 context.context().enable_mlir_bridge = False 915 916 917@tf_export('config.experimental.disable_mlir_graph_optimization') 918def disable_mlir_graph_optimization(): 919 """Disables experimental MLIR-Based TensorFlow Compiler Optimizations.""" 920 context.context().enable_mlir_graph_optimization = False 921 922 923@tf_export('config.experimental.enable_op_determinism', v1=[]) 924def enable_op_determinism(): 925 """Configures TensorFlow ops to run deterministically. 926 927 When op determinism is enabled, TensorFlow ops will be deterministic. This 928 means that if an op is run multiple times with the same inputs on the same 929 hardware, it will have the exact same outputs each time. This is useful for 930 debugging models. Note that determinism in general comes at the expense of 931 lower performance and so your model may run slower when op determinism is 932 enabled. 933 934 If you want your TensorFlow program to run deterministically, put the 935 following code near the start of your program. 936 937 ```python 938 tf.keras.utils.set_random_seed(1) 939 tf.config.experimental.enable_op_determinism() 940 ``` 941 942 Calling `tf.keras.utils.set_random_seed` sets the Python seed, the NumPy seed, 943 and the TensorFlow seed. Setting these seeds is necessary to ensure any random 944 numbers your program generates are also deterministic. 945 946 By default, op determinism is not enabled, so ops might return different 947 results when run with the same inputs. These differences are often caused by 948 the use of asynchronous threads within the op nondeterministically changing 949 the order in which floating-point numbers are added. Most of these cases of 950 nondeterminism occur on GPUs, which have thousands of hardware threads that 951 are used to run ops. Enabling determinism directs such ops to use a different 952 algorithm, one that does not use threads in a nondeterministic way. 953 954 Another potential source of nondeterminism is `tf.data` based data processing. 955 Typically, this can introduce nondeterminsm due to the use of parallelism in 956 methods such as `Dataset.map` producing inputs or running stateful ops in a 957 nondeterministic order. Enabling determinism will remove such sources of 958 nondeterminism. 959 960 Enabling determinism will likely make your model or your `tf.data` data 961 processing slower. For example, `Dataset.map` can become several orders of 962 magnitude slower when the map function has random ops or other stateful ops. 963 See the “Determinism and tf.data” section below for more details. In future 964 TensorFlow releases, we plan on improving the performance of determinism, 965 especially for common scenarios such as `Dataset.map`. 966 967 Certain ops will raise an `UnimplementedError` because they do not yet have a 968 deterministic implementation. Additionally, due to bugs, some ops might be 969 nondeterministic and not raise an `UnimplementedError`. If you encounter such 970 ops, please [file an issue](https://github.com/tensorflow/tensorflow/issues). 971 972 An example of enabling determinism follows. The 973 `tf.nn.softmax_cross_entropy_with_logits` op is run multiple times and the 974 output is shown to be the same each time. This example would likely fail when 975 run on a GPU if determinism were not enabled, because 976 `tf.nn.softmax_cross_entropy_with_logits` uses a nondeterministic algorithm on 977 GPUs by default. 978 979 ```python 980 labels = tf.random.normal((1, 10000)) 981 logits = tf.random.normal((1, 10000)) 982 output = tf.nn.softmax_cross_entropy_with_logits(labels=labels, 983 logits=logits) 984 for _ in range(5): 985 output2 = tf.nn.softmax_cross_entropy_with_logits(labels=labels, 986 logits=logits) 987 tf.debugging.assert_equal(output, output2) 988 ``` 989 990 ## Writing deterministic models 991 992 You can make your models deterministic by enabling op determinism. This 993 means that you can train a model and finish each run with exactly the same 994 trainable variables. This also means that the inferences of your 995 previously-trained model will be exactly the same on each run. Typically, 996 models can be made deterministic by simply setting the seeds and enabling 997 op determinism, as in the example above. However, to guarantee that your 998 model operates deterministically, you must meet all the following 999 requirements: 1000 1001 * Call `tf.config.experimental.enable_op_determinism()`, as mentioned above. 1002 * Reproducibly reset any pseudorandom number generators (PRNGs) you’re using, 1003 such as by setting the seeds for the default PRNGs in TensorFlow, Python, 1004 and NumPy, as mentioned above. Note that certain newer NumPy classes like 1005 ` numpy.random.default_rng` ignore the global NumPy seed, so a seed must be 1006 explicitly passed to such classes, if used. 1007 * Use the same hardware configuration in every run. 1008 * Use the same software environment in every run (OS, checkpoints, version of 1009 CUDA and TensorFlow, environmental variables, etc). Note that determinism is 1010 not guaranteed across different versions of TensorFlow. 1011 * Do not use constructs outside TensorFlow that are nondeterministic, such as 1012 reading from `/dev/random` or using multiple threads/processes in ways that 1013 influence TensorFlow’s behavior. 1014 * Ensure your input pipeline is deterministic. If you use `tf.data`, this is 1015 done automatically (at the expense of performance). See "Determinism and 1016 tf.data" below for more information. 1017 * Do not use `tf.compat.v1.Session` and 1018 `tf.distribute.experimental.ParameterServerStrategy`, which can introduce 1019 nondeterminism. Besides ops (including `tf.data` ops), these are the only 1020 known potential sources of nondeterminism within TensorFlow, (if you 1021 find more, please file an issue). Note that `tf.compat.v1.Session` is 1022 required to use the TF1 API, so determinism cannot be guaranteed when using 1023 the TF1 API. 1024 * Do not use nondeterministic custom ops. 1025 1026 ## Additional details on determinism 1027 1028 For stateful ops to be deterministic, the state of the system must be the same 1029 every time the op is run. For example the output of `tf.Variable.sparse_read` 1030 (obviously) depends on both the variable value and the `indices` function 1031 parameter. When determinism is enabled, the side effects of stateful ops are 1032 deterministic. 1033 1034 TensorFlow’s random ops, such as `tf.random.normal`, will raise a 1035 `RuntimeError` if determinism is enabled and a seed has not been set. However, 1036 attempting to generate nondeterministic random numbers using Python or NumPy 1037 will not raise such errors. Make sure you remember to set the Python and NumPy 1038 seeds. Calling `tf.keras.utils.set_random_seed` is an easy way to set all 1039 three seeds. 1040 1041 Note that latency, memory consumption, throughput, and other performance 1042 characteristics are *not* made deterministic by enabling op determinism. 1043 Only op outputs and side effects are made deterministic. Additionally, a model 1044 may nondeterministically raise a `tf.errors.ResourceExhaustedError` from a 1045 lack of memory due to the fact that memory consumption is nondeterministic. 1046 1047 ## Determinism and tf.data 1048 1049 Enabling deterministic ops makes `tf.data` deterministic in several ways: 1050 1051 1. For dataset methods with a `deterministic` argument, such as `Dataset.map` 1052 and `Dataset.batch`, the `deterministic` argument is overridden to be 1053 `True` irrespective of its setting. 1054 2. The `tf.data.Option.experimental_deterministic` option is overridden to be 1055 `True` irrespective of its setting.. 1056 3. In `Dataset.map` and `Dataset.interleave`, if the map or interleave 1057 function has stateful random ops or other stateful ops, the function will 1058 run serially instead of in parallel. This means the `num_parallel_calls` 1059 argument to `map` and `interleave` is effectively ignored. 1060 4. Prefetching with `Dataset.prefetch` will be disabled if any function run 1061 as part of the input pipeline has certain stateful ops. Similarly, any 1062 dataset method with a `num_parallel_calls` argument will be made to run 1063 serially if any function in the input pipeline has such stateful ops. 1064 Legacy random ops such as `tf.random.normal` will *not* cause such datasets 1065 to be changed, but most other stateful ops will. 1066 1067 Unfortunately, due to (3), performance can be greatly reduced when stateful 1068 ops are used in `Dataset.map` due to no longer running the map function in 1069 parallel. A common example of stateful ops used in `Dataset.map` are random 1070 ops, such as `tf.random.normal`, which are typically used for distortions. One 1071 way to work around this is to use stateless random ops instead. Alternatively 1072 you can hoist all random ops into its own separate `Dataset.map` call, making 1073 the original `Dataset.map` call stateless and thus avoid the need to serialize 1074 its execution. 1075 1076 (4) can also cause performance to be reduced, but occurs less frequently than 1077 (3) because legacy random ops do not cause (4) to take effect. However, unlike 1078 (3), when there are non-random stateful ops in a user-defined function, every 1079 `map` and `interleave` dataset is affected, instead of just the `map` or 1080 `interleave` dataset with the function that has stateful ops. Additionally, 1081 `prefetch` datasets and any dataset with the `num_parallel_calls` argument are 1082 also affected. 1083 """ 1084 _pywrap_determinism.enable(True) 1085 1086 1087def disable_op_determinism(): 1088 """Disables op determinism.""" 1089 _pywrap_determinism.enable(False) 1090 1091 1092def is_op_determinism_enabled(): 1093 """Returns True if op determinism is enabled.""" 1094 return _pywrap_determinism.is_enabled() 1095