• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019-2023 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""
16The configuration module provides various functions to set and get the supported
17configuration parameters, and read a configuration file.
18
19Common imported modules in corresponding API examples are as follows:
20
21.. code-block::
22
23    import mindspore.dataset as ds
24"""
25from __future__ import absolute_import
26from enum import IntEnum
27import os
28import platform
29import random
30import numpy
31import mindspore._c_dataengine as cde
32from mindspore import log as logger
33from mindspore.dataset.core.validator_helpers import replace_none, type_check
34from mindspore.dataset.debug import DebugHook, PrintMetaDataHook
35
36__all__ = ['set_sending_batches', 'load', '_init_device_info',
37           'set_seed', 'get_seed',
38           'set_prefetch_size', 'get_prefetch_size',
39           'set_num_parallel_workers', 'get_num_parallel_workers',
40           'set_numa_enable', 'get_numa_enable',
41           'set_monitor_sampling_interval', 'get_monitor_sampling_interval',
42           'set_callback_timeout', 'get_callback_timeout',
43           'set_auto_num_workers', 'get_auto_num_workers',
44           'set_enable_shared_mem', 'get_enable_shared_mem',
45           'set_enable_autotune', 'get_enable_autotune',
46           'set_autotune_interval', 'get_autotune_interval',
47           'set_auto_offload', 'get_auto_offload',
48           'set_enable_watchdog', 'get_enable_watchdog',
49           'set_fast_recovery', 'get_fast_recovery',
50           'set_debug_mode', 'get_debug_mode',
51           'set_error_samples_mode', 'get_error_samples_mode', 'ErrorSamplesMode',
52           'set_multiprocessing_timeout_interval', 'get_multiprocessing_timeout_interval']
53
54INT32_MAX = 2147483647
55UINT32_MAX = 4294967295
56
57_config = cde.GlobalContext.config_manager()
58_debug_context = {}
59
60
61def _init_device_info():
62    """
63    INTERNAL USE ONLY!
64    As rank_id need to pass into deep layer for numa and device_queue.
65    One process work with only one rank_id, In standalone scenario,
66    rank_id may come from env 'CUDA_VISIBLE_DEVICES', For distribute
67    scenario, rank_id come from _get_global_rank().
68    """
69    from mindspore import context
70    from mindspore.parallel._auto_parallel_context import auto_parallel_context
71    from mindspore.parallel._utils import _get_global_rank
72    numa_enable = False
73    numa_enable_env = os.getenv("DATASET_ENABLE_NUMA", None)
74    if numa_enable_env and numa_enable_env.strip() == 'True':
75        numa_enable = True
76    numa_enable_env = os.getenv("MS_ENABLE_NUMA", None)
77    if numa_enable_env and numa_enable_env.strip() == 'True':
78        numa_enable = True
79    if context.get_context("device_target") == "GPU":
80        rank_id = _get_global_rank()
81        parallel_mode = auto_parallel_context().get_parallel_mode()
82        if parallel_mode == "stand_alone":
83            rank_id = context.get_context("device_id")
84        if numa_enable:
85            _config.set_numa_enable(True)
86        _config.set_rank_id(rank_id)
87    elif context.get_context("device_target") == "Ascend":
88        # Ascend is a special scenario, we'd better get rank info from env
89        env_rank_size = os.getenv("RANK_SIZE", None)
90        env_rank_id = os.getenv("RANK_ID", None)
91        rank_size = 0
92        rank_id = 0
93        if env_rank_size and env_rank_id:
94            try:
95                rank_size = int(env_rank_size.strip())
96                rank_id = int(env_rank_id.strip())
97            except ValueError:
98                raise ValueError("rank_size or rank_id is not int.")
99        if rank_size > 1:
100            if numa_enable:
101                _config.set_numa_enable(True)
102            _config.set_rank_id(rank_id)
103
104
105def set_seed(seed):
106    """
107    Set the seed for the random number generator in data pipeline.
108
109    The seed can be set to control the initial state of the random generator
110    for the purpose of fixing the result of random number generation.
111
112    Note:
113        This interface will set the random seed of the `random`, `numpy.random`
114        and `mindspore.dataset` modules to the specified value at the same time.
115
116    Args:
117        seed (int): The desired seed. Must be non-negative.
118
119    Raises:
120        TypeError: If `seed` is not of type int.
121        ValueError: If `seed` is a negative value.
122
123    Examples:
124        >>> # Set a new global configuration value for the seed value.
125        >>> # Operations with randomness will use the seed value to generate random values.
126        >>> import mindspore.dataset as ds
127        >>> ds.config.set_seed(1000)
128    """
129    if not isinstance(seed, int) or isinstance(seed, bool):
130        raise TypeError("seed isn't of type int.")
131    if seed < 0 or seed > UINT32_MAX:
132        raise ValueError(
133            "seed given is not within the required range [0, UINT32_MAX(4294967295)].")
134    _config.set_seed(seed)
135    random.seed(seed)
136    # numpy.random isn't thread safe
137    numpy.random.seed(seed)
138
139
140def get_seed():
141    """
142    Get random number seed. If the seed has been set, then will
143    return the set value, otherwise it will return the default seed value
144    which equals to `std::mt19937::default_seed <http://www.cplusplus.com/reference/random/mt19937/>`_ .
145
146    Returns:
147        int, random number seed.
148
149    Examples:
150        >>> # Get the global configuration of seed.
151        >>> # If set_seed() is never called before, the default value(std::mt19937::default_seed) will be returned.
152        >>> import mindspore.dataset as ds
153        >>> seed = ds.config.get_seed()
154    """
155    return _config.get_seed()
156
157
158def set_prefetch_size(size):
159    """
160    Set the buffer queue size between dataset operations in the pipeline.
161
162    The presence of a buffer queue allows the current operation to start
163    processing subsequent data before the next operation fetches it, so the
164    operations can execute asynchronously and concurrently.
165
166    A larger buffer queue size reduces the overall processing latency when
167    neighboring operations have unbalanced throughput rates, but also consumes
168    more system memory.
169
170    Args:
171        size (int): The size of the buffer queue, must be greater than 0.
172
173    Raises:
174        TypeError: If `size` is not of type int.
175        ValueError: If `size` is not a positive number.
176
177    Note:
178        The total memory consumed by the buffer queue is proportional to the number
179        of worker threads. To avoid overuse of memory, when the number of worker
180        threads is greater than 4, the actual buffer queue size used will be adjusted
181        to the greater of (`size` * 4 / number of worker threads) and 1.
182
183    Examples:
184        >>> # Set a new global configuration value for the prefetch size.
185        >>> import mindspore.dataset as ds
186        >>> ds.config.set_prefetch_size(1000)
187    """
188    if not isinstance(size, int) or isinstance(size, bool):
189        raise TypeError("size isn't of type int.")
190    if size <= 0 or size > INT32_MAX:
191        raise ValueError(
192            "size is not within the required range (0, INT32_MAX(2147483647)].")
193    _config.set_op_connector_size(size)
194
195
196def get_prefetch_size():
197    """
198    Get the prefetch size as for number of rows.
199    If `set_prefetch_size` is never called before, the default value 16 will be returned.
200
201    Returns:
202        int, total number of rows to be prefetched.
203
204    Examples:
205        >>> # Get the global configuration of prefetch size.
206        >>> # If set_prefetch_size() is never called before, the default value(16) will be returned.
207        >>> import mindspore.dataset as ds
208        >>> prefetch_size = ds.config.get_prefetch_size()
209    """
210    return _config.get_op_connector_size()
211
212
213def set_num_parallel_workers(num):
214    """
215    Set a new global configuration default value for the number of parallel workers.
216    This setting will affect the parallelism of all dataset operation.
217
218    Args:
219        num (int): Number of parallel workers to be used as a default for each operation.
220
221    Raises:
222        TypeError: If `num` is not of type int.
223        ValueError: If `num` <= 0 or `num` > INT32_MAX(2147483647).
224
225    Examples:
226        >>> # Set a new global configuration value for the number of parallel workers.
227        >>> # Now parallel dataset operations will run with 8 workers.
228        >>> import mindspore.dataset as ds
229        >>> ds.config.set_num_parallel_workers(8)
230    """
231    if not isinstance(num, int) or isinstance(num, bool):
232        raise TypeError("num isn't of type int.")
233    if num <= 0 or num > INT32_MAX:
234        raise ValueError("Number of parallel workers given is not within the required range"
235                         " (0, INT32_MAX(2147483647)].")
236    _config.set_num_parallel_workers(num)
237
238
239def get_num_parallel_workers():
240    """
241    Get the global configuration of number of parallel workers.
242    This is the DEFAULT num_parallel_workers value used for each operation.
243    If `set_num_parallel_workers` is never called before, the default value(8) will be returned.
244
245    Returns:
246        int, number of parallel workers to be used as a default for each operation.
247
248    Examples:
249        >>> # Get the global configuration of parallel workers.
250        >>> # If set_num_parallel_workers() is never called before, the default value(8) will be returned.
251        >>> import mindspore.dataset as ds
252        >>> num_parallel_workers = ds.config.get_num_parallel_workers()
253    """
254    return _config.get_num_parallel_workers()
255
256
257def set_numa_enable(numa_enable):
258    """
259    Set the default state of numa enabled. If `numa_enable` is ``True``, need to
260    ensure `numa library <http://rpmfind.net/linux/rpm2html/search.php?query=libnuma-devel>`_ is installed.
261
262    Args:
263        numa_enable (bool): Whether to use numa bind feature.
264
265    Raises:
266        TypeError: If `numa_enable` is not a boolean data type.
267
268    Examples:
269        >>> # Set a new global configuration value for the state of numa enabled.
270        >>> # Now parallel dataset operations will run with numa bind function
271        >>> import mindspore.dataset as ds
272        >>> ds.config.set_numa_enable(True)
273    """
274    if not isinstance(numa_enable, bool):
275        raise TypeError("numa_enable must be a boolean dtype.")
276    _config.set_numa_enable(numa_enable)
277
278
279def get_numa_enable():
280    """
281    Get the state of numa to indicate enabled/disabled.
282    This is the numa config used for all processes, it is disabled by default.
283
284    Returns:
285        bool, the default state of numa enabled.
286
287    Examples:
288        >>> # Get the global configuration of numa.
289        >>> import mindspore.dataset as ds
290        >>> numa_state = ds.config.get_numa_enable()
291    """
292    return _config.get_numa_enable()
293
294
295def set_monitor_sampling_interval(interval):
296    """
297    Set the default interval (in milliseconds) for monitor sampling.
298
299    Args:
300        interval (int): Interval (in milliseconds) to be used for performance monitor sampling.
301
302    Raises:
303        TypeError: If `interval` is not type int.
304        ValueError: If `interval` <= 0 or `interval` > INT32_MAX(2147483647).
305
306    Examples:
307        >>> # Set a new global configuration value for the monitor sampling interval.
308        >>> import mindspore.dataset as ds
309        >>> ds.config.set_monitor_sampling_interval(100)
310    """
311    if not isinstance(interval, int) or isinstance(interval, bool):
312        raise TypeError("interval isn't of type int.")
313    if interval <= 0 or interval > INT32_MAX:
314        raise ValueError(
315            "Interval given is not within the required range (0, INT32_MAX(2147483647)].")
316    _config.set_monitor_sampling_interval(interval)
317
318
319def get_monitor_sampling_interval():
320    """
321    Get the global configuration of sampling interval of performance monitor.
322    If `set_monitor_sampling_interval` is never called before, the default value(1000) will be returned.
323
324    Returns:
325        int, interval (in milliseconds) for performance monitor sampling.
326
327    Examples:
328        >>> # Get the global configuration of monitor sampling interval.
329        >>> # If set_monitor_sampling_interval() is never called before, the default value(1000) will be returned.
330        >>> import mindspore.dataset as ds
331        >>> sampling_interval = ds.config.get_monitor_sampling_interval()
332    """
333    return _config.get_monitor_sampling_interval()
334
335
336def set_auto_num_workers(enable):
337    """
338    Set num_parallel_workers for each op automatically(This feature is turned off by default).
339
340    If turned on, the num_parallel_workers in each op will be adjusted automatically, possibly overwriting the
341    num_parallel_workers passed in by user or the default value (if user doesn't pass anything) set by
342    :func:`mindspore.dataset.config.set_num_parallel_workers`.
343
344    For now, this function is only optimized for YoloV3 dataset with per_batch_map (running map in batch).
345    This feature aims to provide a baseline for optimized num_workers assignment for each operation.
346    Operation whose num_parallel_workers is adjusted to a new value will be logged.
347
348    Args:
349        enable (bool): Whether to enable auto num_workers feature or not.
350
351    Raises:
352        TypeError: If `enable` is not of boolean type.
353
354    Examples:
355        >>> # Enable auto_num_worker feature, this might override the num_parallel_workers passed in by user
356        >>> import mindspore.dataset as ds
357        >>> ds.config.set_auto_num_workers(True)
358    """
359    if not isinstance(enable, bool):
360        raise TypeError("enable must be of type bool.")
361    _config.set_auto_num_workers(enable)
362
363
364def _set_auto_workers_config(option):
365    """
366    INTERNAL USE ONLY!
367    Select the weight profile of auto_num_workers. currently these 7 options are supported.
368    Option #0 leaf_num_workers:batch_num_workers:map_num_workers=1:1:1
369    Option #1 leaf_num_workers:batch_num_workers:map_num_workers=2:1:1
370    Option #2 leaf_num_workers:batch_num_workers:map_num_workers=1:2:1
371    Option #3 leaf_num_workers:batch_num_workers:map_num_workers=1:1:2
372    Option #4 leaf_num_workers:batch_num_workers:map_num_workers=2:2:1
373    Option #5 leaf_num_workers:batch_num_workers:map_num_workers=2:1:2
374    Option #6 leaf_num_workers:batch_num_workers:map_num_workers=1:2:2
375
376    Args:
377        option (int): The id of the profile to use.
378
379    Raises:
380        TypeError: If `option` is not of type int.
381        ValueError: If `option` is not within the range of [0, 6].
382    """
383    if not isinstance(option, int) or isinstance(option, bool):
384        raise TypeError("option isn't of type int.")
385    if option < 0 or option > 6:
386        raise ValueError("option isn't within the required range of [0, 6].")
387    _config.set_auto_worker_config(option)
388
389
390def get_auto_num_workers():
391    """
392    Get the setting (turned on or off) automatic number of workers, it is disabled by default.
393
394    Returns:
395        bool, whether auto number worker feature is turned on.
396
397    Examples:
398        >>> # Get the global configuration of auto number worker feature.
399        >>> import mindspore.dataset as ds
400        >>> flag = ds.config.get_auto_num_workers()
401    """
402    return _config.get_auto_num_workers()
403
404
405def set_callback_timeout(timeout):
406    """
407    Set the default timeout (in seconds) for :class:`mindspore.dataset.WaitedDSCallback` .
408
409    Args:
410        timeout (int): Timeout (in seconds) to be used to end the wait in :class:`mindspore.dataset.WaitedDSCallback`
411            in case of a deadlock. The `timeout` must be greater than 0.
412
413    Raises:
414        TypeError: If `timeout` is not type int.
415        ValueError: If `timeout` is not a positive number.
416
417    Examples:
418        >>> # Set a new global configuration value for the timeout value.
419        >>> import mindspore.dataset as ds
420        >>> ds.config.set_callback_timeout(100)
421    """
422    if not isinstance(timeout, int) or isinstance(timeout, bool):
423        raise TypeError("timeout isn't of type int.")
424    if timeout <= 0 or timeout > INT32_MAX:
425        raise ValueError("Timeout given is not within the required range.")
426    _config.set_callback_timeout(timeout)
427
428
429def get_callback_timeout():
430    """
431    Get the default timeout (in seconds) for :class:`mindspore.dataset.WaitedDSCallback` .
432    If `set_callback_timeout` is never called before, the default value(60) will be returned.
433
434    Returns:
435        int, Timeout (in seconds) to be used to end the wait in :class:`mindspore.dataset.WaitedDSCallback` in case of
436        a deadlock.
437
438    Examples:
439        >>> # Get the global configuration of callback timeout.
440        >>> # If set_callback_timeout() is never called before, the default value(60) will be returned.
441        >>> import mindspore.dataset as ds
442        >>> callback_timeout = ds.config.get_callback_timeout()
443    """
444    return _config.get_callback_timeout()
445
446
447def __str__():
448    """
449    String representation of the configurations.
450
451    Returns:
452        str, configurations.
453    """
454    return str(_config)
455
456
457def load(file):
458    """
459    Load the project configuration from the file.
460
461    Args:
462        file (str): Path of the configuration file to be loaded.
463
464    Raises:
465        RuntimeError: If `file` is invalid and parsing fails.
466
467    Examples:
468        >>> # Set new default configuration according to values in the configuration file.
469        >>> # example config file:
470        >>> # {
471        >>> #     "logFilePath": "/tmp",
472        >>> #     "numParallelWorkers": 4,
473        >>> #     "seed": 5489,
474        >>> #     "monitorSamplingInterval": 30
475        >>> # }
476        >>> import mindspore.dataset as ds
477        >>> config_file = "/path/to/config/file"
478        >>> ds.config.load(config_file)
479    """
480    _config.load(file)
481
482
483def set_enable_autotune(enable, filepath_prefix=None):
484    """
485    Set whether to enable AutoTune for data pipeline parameters.
486
487    It can be used to automatically adjust the parameter configurations
488    of each operation in the data processing pipeline, such as parallelism
489    and buffer queue size, according to the load of the environment resources
490    during training, so as to improve the overall processing speed.
491
492    AutoTune is not enabled by default.
493
494    Args:
495        enable (bool): Whether to enable AutoTune.
496        filepath_prefix (str, optional): The path where the optimized parameter configuration will be saved.
497            Effective only if `enable` is `True`. The parameter configuration file on each Device will be
498            saved separately, and the final file name will be `filepath_prefix + RANK_ID + ".json"`,
499            where RANK_ID is the Device ID corresponding to the file. Default: ``None`` , no configuration
500            file is saved.
501
502    Raises:
503        TypeError: If `enable` is not of type boolean.
504        TypeError: If `filepath_prefix` is not of type str.
505        RuntimeError: If `filepath_prefix` is an empty string.
506        RuntimeError: If `filepath_prefix` is a directory.
507        RuntimeError: If `filepath_prefix` does not exist.
508        RuntimeError: If `filepath_prefix` does not have write permission.
509
510    Note:
511        - Saved parameter profiles can be loaded via the `mindspore.dataset.deserialize` interface to
512          directly obtain a data processing pipeline object configured with optimal parameters.
513        - The parameter tuning process can be viewed by turning on INFO level logging.
514
515    An example of the generated configuration file is as follows, the "remark" field describes whether or not data
516    processing parameter tuning has been performed, the "summary" field briefly shows each operation in the data
517    processing pipeline and its corresponding optimal configuration, and the "tree" field provides complete
518    information about the structure of the data processing pipeline.
519
520    .. code-block::
521
522        {
523            "remark": "The following file has been auto-generated by the Dataset AutoTune.",
524            "summary": [
525                "CifarOp(ID:5)       (num_parallel_workers: 2, prefetch_size:64)",
526                "MapOp(ID:4)         (num_parallel_workers: 2, prefetch_size:64)",
527                "MapOp(ID:3)         (num_parallel_workers: 2, prefetch_size:64)",
528                "BatchOp(ID:2)       (num_parallel_workers: 8, prefetch_size:64)"
529            ],
530            "tree": {
531                ...
532            }
533        }
534
535    Examples:
536        >>> import mindspore.dataset as ds
537        >>>
538        >>> # enable AutoTune and save optimized data pipeline configuration
539        >>> ds.config.set_enable_autotune(True, "/path/to/autotune_out.json")
540        >>>
541        >>> # enable AutoTune
542        >>> ds.config.set_enable_autotune(True)
543    """
544    if not isinstance(enable, bool):
545        raise TypeError("enable must be of type bool.")
546
547    save_autoconfig = bool(enable and filepath_prefix is not None)
548
549    if filepath_prefix and not isinstance(filepath_prefix, str):
550        raise TypeError(
551            "json_filepath must be a str value but was: {}.".format(filepath_prefix))
552
553    if enable and filepath_prefix == "":
554        raise RuntimeError(
555            "The value of json_filepath cannot be the empty string.")
556
557    if not enable and filepath_prefix is not None:
558        logger.warning(
559            "The value of json_filepath is ignored when enable is False.")
560
561    if enable and filepath_prefix is None:
562        logger.warning(
563            "Dataset AutoTune is enabled but no json path is specified, check INFO log for tuned result.")
564
565    json_filepath = replace_none(filepath_prefix, "")
566    _config.set_enable_autotune(enable, save_autoconfig, json_filepath)
567
568
569def get_enable_autotune():
570    """
571    Get whether AutoTune is currently enabled, it is disabled by default.
572
573    Returns:
574        bool, whether AutoTune is currently enabled.
575
576    Examples:
577        >>> # get the state of AutoTune
578        >>> import mindspore.dataset as ds
579        >>> autotune_flag = ds.config.get_enable_autotune()
580    """
581    return _config.get_enable_autotune()
582
583
584def set_autotune_interval(interval):
585    """
586    Set the configuration adjustment interval (in steps) for AutoTune.
587
588    The default setting is ``0``, which will adjust the configuration after each epoch.
589    Otherwise, the configuration will be adjusted every `interval` steps.
590
591    Args:
592        interval (int): Interval (in steps) to adjust the configuration of the data pipeline.
593
594    Raises:
595        TypeError: If `interval` is not of type int.
596        ValueError: If `interval` is not non-negative.
597
598    Examples:
599        >>> # set a new interval for AutoTune
600        >>> import mindspore.dataset as ds
601        >>> ds.config.set_autotune_interval(30)
602    """
603    if not isinstance(interval, int) or isinstance(interval, bool):
604        raise TypeError("interval must be of type int.")
605    if interval < 0 or interval > INT32_MAX:
606        raise ValueError(
607            "Interval given is not within the required range [0, INT32_MAX(2147483647)].")
608    _config.set_autotune_interval(interval)
609
610
611def get_autotune_interval():
612    """
613    Get the current configuration adjustment interval (in steps) for AutoTune.
614    If `set_autotune_interval` is never called before, the default value(0) will be returned.
615
616    Returns:
617        int, the configuration adjustment interval (in steps) for AutoTune.
618
619    Examples:
620        >>> # get the global configuration of the autotuning interval
621        >>> import mindspore.dataset as ds
622        >>> autotune_interval = ds.config.get_autotune_interval()
623    """
624    return _config.get_autotune_interval()
625
626
627def get_enable_shared_mem():
628    """
629    Get the default state of shared mem enabled variable.
630
631    Note:
632        `get_enable_shared_mem` is not supported on Windows and MacOS platforms yet.
633
634    Returns:
635        bool, the state of shared mem enabled variable.
636
637    Examples:
638        >>> # Get the flag of shared memory feature.
639        >>> import mindspore.dataset as ds
640        >>> shared_mem_flag = ds.config.get_enable_shared_mem()
641    """
642    # For Windows and MacOS we forbid shared mem function temporarily
643    enable_shared_mem = _config.get_enable_shared_mem()
644    if enable_shared_mem and platform.system().lower() in {"windows", "darwin"}:
645        _config.set_enable_shared_mem(False)
646        return False
647    return enable_shared_mem
648
649
650def set_enable_shared_mem(enable):
651    """
652    Set whether to use shared memory for interprocess communication when data processing multiprocessing is turned on.
653
654    Using shared memory can speed up the efficiency of data transfer between processes.
655
656    Shared memory is used by default.
657
658    Note:
659        Windows and MacOS systems are not supported yet.
660
661    Args:
662        enable (bool): Whether to use shared memory for interprocess communication.
663
664    Raises:
665        TypeError: If `enable` is not of type bool.
666
667    Examples:
668        >>> # Enable shared memory feature to improve the performance of Python multiprocessing.
669        >>> import mindspore.dataset as ds
670        >>> ds.config.set_enable_shared_mem(True)
671    """
672    if not isinstance(enable, bool):
673        raise TypeError("enable must be of type bool.")
674    if enable:
675        # For Windows and MacOS we forbid shared mem function temporarily
676        if platform.system().lower() in {"windows", "darwin"}:
677            logger.warning("For Windows and MacOS we forbid shared mem function temporarily.")
678            return
679        logger.warning("The shared memory is on, multiprocessing performance will be improved. "
680                       "Note: the required shared memory can't exceeds 80% of the available shared memory.")
681    _config.set_enable_shared_mem(enable)
682
683
684def set_sending_batches(batch_num):
685    """
686    Set the upper limit on the number of batches of data that the Host can send to the Device.
687
688    Can be used to implement customized data sending control logic to solve the problem of
689    Device out of memory. In each epoch, when the actual number of batches sent to the Device
690    reaches this value, the Host will stop continuing to send until the user increases this
691    upper limit again through this interface.
692
693    Currently, it is only supported when training in sink mode with Ascend backend, which can
694    be enabled via the :class:`mindspore.train.Model.train` interface.
695
696    Args:
697        batch_num (int): The upper limit on the number of batches of data that the Host can
698            send to the Device. ``0`` indicates that there is no upper limit for sending.
699
700    Raises:
701        TypeError: If `batch_num` is not of type int.
702
703    Examples:
704        >>> # Set a new global configuration value for the sending batches
705        >>> import mindspore.dataset as ds
706        >>> ds.config.set_sending_batches(10)
707    """
708    if not isinstance(batch_num, int) or isinstance(batch_num, bool):
709        raise TypeError("batch_num must be an int dtype.")
710    _config.set_sending_batches(batch_num)
711
712
713def set_auto_offload(offload):
714    """
715    Set the automatic offload flag of the dataset. If set_auto_offload is True,
716    automatically offload as many dataset operations from the CPU to the Device (GPU or Ascend).
717
718    Args:
719        offload (bool): Whether to use the automatic offload feature.
720
721    Raises:
722        TypeError: If offload is not a boolean data type.
723
724    Examples:
725        >>> # Enable automatic offload feature
726        >>> import mindspore.dataset as ds
727        >>> ds.config.set_auto_offload(True)
728    """
729    if not isinstance(offload, bool):
730        raise TypeError("offload must be a bool dtype")
731    _config.set_auto_offload(offload)
732
733
734def get_auto_offload():
735    """
736    Get the state of the automatic offload flag (True or False), it is disabled by default.
737
738    Returns:
739        bool, Whether the automatic offload feature is enabled.
740
741    Examples:
742        >>> # Get the global configuration of the automatic offload feature.
743        >>> import mindspore.dataset as ds
744        >>> auto_offload = ds.config.get_auto_offload()
745    """
746    return _config.get_auto_offload()
747
748
749def set_enable_watchdog(enable):
750    """
751    Set the default state of watchdog Python thread as enabled, the default state of watchdog Python thread is enabled.
752    Watchdog is a thread which cleans up hanging subprocesses.
753
754    Args:
755        enable (bool): Whether to launch a watchdog Python thread.
756
757    Raises:
758        TypeError: If `enable` is not a boolean data type.
759
760    Examples:
761        >>> # Set a new global configuration value for the state of watchdog Python thread as enabled.
762        >>> import mindspore.dataset as ds
763        >>> ds.config.set_enable_watchdog(True)
764    """
765    if not isinstance(enable, bool):
766        raise TypeError("enable must be a boolean dtype.")
767    _config.set_enable_watchdog(enable)
768
769
770def get_enable_watchdog():
771    """
772    Get the state of watchdog Python thread to indicate enabled or disabled state.
773    This is a Python watchdog thread used for all processes, it is enabled by default.
774
775    Returns:
776        bool, the default state of watchdog Python thread enabled.
777
778    Examples:
779        >>> # Get the global configuration of watchdog Python thread.
780        >>> import mindspore.dataset as ds
781        >>> watchdog_state = ds.config.get_enable_watchdog()
782    """
783    return _config.get_enable_watchdog()
784
785
786def set_multiprocessing_timeout_interval(interval):
787    """
788    Set the default interval (in seconds) for multiprocessing/multithreading timeout when main process/thread gets
789    data from subprocesses/child threads.
790
791    Args:
792        interval (int): Interval (in seconds) to be used for multiprocessing/multithreading timeout when main
793            process/thread gets data from subprocess/child threads.
794
795    Raises:
796        TypeError: If `interval` is not of type int.
797        ValueError: If `interval` <= 0 or `interval` > INT32_MAX(2147483647).
798
799    Examples:
800        >>> # Set a new global configuration value for multiprocessing/multithreading timeout when getting data.
801        >>> import mindspore.dataset as ds
802        >>> ds.config.set_multiprocessing_timeout_interval(300)
803    """
804    if not isinstance(interval, int) or isinstance(interval, bool):
805        raise TypeError("interval isn't of type int.")
806    if interval <= 0 or interval > INT32_MAX:
807        raise ValueError(
808            "Interval given is not within the required range (0, INT32_MAX(2147483647)).")
809    _config.set_multiprocessing_timeout_interval(interval)
810
811
812def get_multiprocessing_timeout_interval():
813    """
814    Get the global configuration of multiprocessing/multithreading timeout when main process/thread gets data from
815    subprocesses/child threads.
816
817    Returns:
818        int, interval (in seconds) for multiprocessing/multithreading timeout when main process/thread gets data from
819        subprocesses/child threads. If `set_multiprocessing_timeout_interval` is never called before,
820        the default value(300) will be returned.
821
822    Examples:
823        >>> # Get the global configuration of multiprocessing/multithreading timeout when main process/thread gets data
824        >>> # from subprocesses/child threads. If set_multiprocessing_timeout_interval() is never called before, the
825        >>> # default value(300) will be returned.
826        >>> import mindspore.dataset as ds
827        >>> multiprocessing_timeout_interval = ds.config.get_multiprocessing_timeout_interval()
828    """
829    return _config.get_multiprocessing_timeout_interval()
830
831
832def set_dynamic_shape(is_dynamic):
833    """
834    Set the dynamic shape flag of the dataset.
835
836    Args:
837        is_dynamic (bool): Whether the dataset is dynamic shape. Default: False
838
839    Raises:
840        TypeError: If `is_dynamic` is not a boolean data type.
841
842    Examples:
843        >>> import mindspore.dataset as ds
844        >>> ds.config.set_dynamic_shape(True)
845    """
846    if not isinstance(is_dynamic, bool):
847        raise TypeError("is_dynamic must be a boolean dtype.")
848    _config.set_dynamic_shape(is_dynamic)
849
850
851def get_dynamic_shape():
852    """
853    Get the dynamic shape flag of the dataset, it is set to False by default.
854
855    Returns:
856        bool, whether the dataset is dynamic shape.
857
858    Examples:
859        >>> import mindspore.dataset as ds
860        >>> is_dynamic_shape = ds.config.get_dynamic_shape()
861    """
862    return _config.get_dynamic_shape()
863
864
865def set_fast_recovery(fast_recovery):
866    """
867    Set whether dataset pipeline should recover in fast mode during failover
868    (In fast mode, random augmentations may not get same results as before the failure occurred).
869
870    Args:
871        fast_recovery (bool): Whether the dataset pipeline recovers in fast mode.
872
873    Raises:
874        TypeError: If `fast_recovery` is not a boolean data type.
875
876    Examples:
877        >>> import mindspore.dataset as ds
878        >>> ds.config.set_fast_recovery(False)
879    """
880    if not isinstance(fast_recovery, bool):
881        raise TypeError("fast_recovery must be a boolean dtype.")
882    _config.set_fast_recovery(fast_recovery)
883
884
885def get_fast_recovery():
886    """
887    Get whether the fast recovery mode is enabled for the current dataset pipeline.
888    It is set to True by default.
889
890    Returns:
891        bool, whether the dataset recovers fast in failover reset.
892
893    Examples:
894        >>> import mindspore.dataset as ds
895        >>> is_fast_recovery = ds.config.get_fast_recovery()
896    """
897    return _config.get_fast_recovery()
898
899
900def set_debug_mode(debug_mode_flag: bool, debug_hook_list: list = None):
901    """
902    Set the debug_mode flag of the dataset pipeline. When enabled, the dataset pipeline is run synchronously and
903    sequentially with a single thread.
904
905    Note:
906        When debug_mode is enabled,
907
908        - If random seed has not been set, will internally set the seed to 1.
909          So that debug mode execution of the dataset pipeline can produce deterministic results.
910
911        - The following configuration settings are ignored:
912
913          - auto_offload (False is used.)
914          - enable_autotune (False is used.)
915          - error_samples_mode (ErrorSamplesMode.RETURN is used.)
916          - num_parallel_workers (Value 1 is used.)
917
918        - The `offload` parameter in `map` operation will be ignored.
919        - The `python_multiprocessing` parameter in `GeneratorDataset`, `map`/`batch` operation will be ignored.
920        - The `cache` parameter in Dataset loading API will be ignored.
921
922    Args:
923        debug_mode_flag (bool): Whether dataset pipeline debug mode is enabled, which forces the pipeline
924            to run synchronously and sequentially.
925        debug_hook_list (list[DebugHook]): a list of debug hook objects to be inserted before and after each
926            transform operation in map operation. Default: ``None``, which means to use basic print hook,
927            which prints shape/size/type of each input/output data of each transformation.
928
929    Raises:
930        TypeError: If `debug_mode_flag` is not a boolean data type.
931        TypeError: If `debug_hook_list` is not a list type.
932        TypeError: If any item in `debug_hook_list` is not DebugHook type.
933
934    Examples:
935        >>> import mindspore.dataset as ds
936        >>> import mindspore.dataset.vision as vision
937        >>> import mindspore.dataset.debug as debug
938        >>>
939        >>> # 1. Enable dataset pipeline debug mode and use default debug hook.
940        >>> # Print shape and type of input/output data of each transform op in map operator.
941        >>> ds.config.set_debug_mode(True)
942        >>>
943        >>> # 2. Enable dataset pipeline debug mode and use pre-defined debug hook provided by MindData.
944        >>> ds.config.set_debug_mode(True, debug_hook_list=[debug.PrintDataHook()])
945        >>>
946        >>> # 3. Enable dataset pipeline debug mode and use user-defined debug hook. It must define a
947        >>> # class inherited from DebugHook.
948        >>> class CustomizedHook(debug.DebugHook):
949        ...     def __init__(self):
950        ...         super().__init__()
951        ...
952        ...     def compute(self, *args):
953        ...         # Add your debugging code here.
954        ...         return args
955        >>>
956        >>> ds.config.set_debug_mode(True, debug_hook_list=[CustomizedHook()])
957        >>>
958        >>> # 4. Enable dataset pipeline debug mode and use user-defined debug hook and insert by users manually.
959        >>> ds.config.set_debug_mode(True)
960        >>> dataset = ds.ImageFolderDataset(dataset_dir="/path/to/image_folder_dataset_directory")
961        >>>
962        >>> # The debug hook is added after Decode operation.
963        >>> dataset = dataset.map([vision.Decode(), CustomizedHook(), vision.CenterCrop(100)])
964    """
965    if not isinstance(debug_mode_flag, bool):
966        raise TypeError("debug_mode_flag isn't of type boolean.")
967    if debug_hook_list is None:
968        debug_hook_list = [PrintMetaDataHook()]
969    if not isinstance(debug_hook_list, list):
970        raise TypeError("debug_hook_list is not a list.")
971    for debug_func in debug_hook_list:
972        if not isinstance(debug_func, DebugHook):
973            raise TypeError("All items in debug_hook_list must be of type DebugHook.")
974    if debug_mode_flag:
975        logger.warning("Dataset pipeline debug mode is enabled. Performance will be impacted because the pipeline"
976                       " will be running in a single thread.")
977    if debug_hook_list:
978        _debug_context["debug_hook_list"] = debug_hook_list
979
980    _config.set_debug_mode(debug_mode_flag)
981
982
983def get_debug_mode():
984    """
985    Get whether debug mode is currently enabled for the data pipeline.
986
987    Returns:
988        bool, whether data pipeline debug mode is enabled.
989
990    Examples:
991        >>> import mindspore.dataset as ds
992        >>> debug_mode = ds.config.get_debug_mode()
993    """
994    return _config.get_debug_mode()
995
996
997def _get_debug_hook_list():
998    """
999    INTERNAL USE ONLY!
1000    Get value of debug_hook_list.
1001
1002    Returns:
1003        list, the debug hook objects to be inserted in map operation to debug inputs/outputs of each transform.
1004    """
1005    return _debug_context.get("debug_hook_list")
1006
1007
1008class ErrorSamplesMode(IntEnum):
1009    """
1010    An enumeration for `error_samples_mode` .
1011
1012    Possible enumeration values are: ErrorSamplesMode.RETURN, ErrorSamplesMode.REPLACE, ErrorSamplesMode.SKIP.
1013
1014    - ErrorSamplesMode.RETURN: means erroneous sample results in error raised and returned.
1015    - ErrorSamplesMode.REPLACE: means erroneous sample is replaced with an internally determined sample.
1016    - ErrorSamplesMode.SKIP: means erroneous sample is skipped.
1017    """
1018
1019    RETURN = 0
1020    REPLACE = 1
1021    SKIP = 2
1022
1023
1024# Convert ErrorSamplesMode from Python enum format to CDE enum format
1025_PYTHON_TO_CDE_ERROR_SAMPLES_MODE = {
1026    ErrorSamplesMode.RETURN: cde.ErrorSamplesMode.DE_ERROR_SAMPLES_MODE_RETURN,
1027    ErrorSamplesMode.REPLACE: cde.ErrorSamplesMode.DE_ERROR_SAMPLES_MODE_REPLACE,
1028    ErrorSamplesMode.SKIP: cde.ErrorSamplesMode.DE_ERROR_SAMPLES_MODE_SKIP
1029}
1030
1031# Convert ErrorSamplesMode from CDE int format to Python enum format
1032_CDE_TO_PYTHON_ERROR_SAMPLES_MODE = {
1033    0: ErrorSamplesMode.RETURN,
1034    1: ErrorSamplesMode.REPLACE,
1035    2: ErrorSamplesMode.SKIP
1036}
1037
1038
1039def set_error_samples_mode(error_samples_mode):
1040    """
1041    Set the method in which erroneous samples should be processed in a dataset pipeline.
1042
1043    Note:
1044        - This error samples feature is only applicable to the Map operation in a dataset pipeline.
1045        - For ``ErrorSamplesMode.REPLACE`` mode, a cache of other samples will be used.
1046        - If ``ErrorSamplesMode.SKIP`` mode is used in a distributed setting, beware to manually ensure the
1047          number of valid samples are the same for each shard (otherwise one may encounter hangs).
1048          One technique is to manually concat a dataset of all valid samples plus a
1049          take operation for the number of skipped erroneous samples.
1050
1051    Args:
1052        error_samples_mode (ErrorSamplesMode): The method in which erroneous samples should be processed in a dataset
1053            pipeline. It can be any of [ErrorSamplesMode.RETURN, ErrorSamplesMode.REPLACE, ErrorSamplesMode.SKIP].
1054
1055            - ``ErrorSamplesMode.RETURN``: means erroneous sample results in error raised and returned.
1056
1057            - ``ErrorSamplesMode.REPLACE``: means erroneous sample is replaced with a correct sample.
1058
1059            - ``ErrorSamplesMode.SKIP``: means erroneous sample is skipped.
1060
1061    Raises:
1062        TypeError: If `error_samples_mode` is not of type ErrorSamplesMode.
1063
1064    Examples:
1065        >>> import mindspore.dataset as ds
1066        >>> ds.config.set_error_samples_mode(ds.config.ErrorSamplesMode.SKIP)
1067    """
1068    type_check(error_samples_mode, (ErrorSamplesMode,), "error_samples_mode")
1069    _config.set_error_samples_mode(_PYTHON_TO_CDE_ERROR_SAMPLES_MODE.get(error_samples_mode))
1070
1071
1072def get_error_samples_mode():
1073    """
1074    Get the current configuration for strategy for processing erroneous samples in a dataset pipeline.
1075    If `set_error_samples_mode` is never called before, the default setting is ErrorSamplesMode.RETURN.
1076
1077    Returns:
1078        ErrorSamplesMode, The method in which erroneous samples should be processed in a dataset pipeline.
1079
1080        - ErrorSamplesMode.RETURN: means erroneous sample results in error raised and returned.
1081        - ErrorSamplesMode.REPLACE: means erroneous sample is replaced with an internally determined sample.
1082        - ErrorSamplesMode.SKIP: means erroneous sample is skipped.
1083
1084    Examples:
1085        >>> import mindspore.dataset as ds
1086        >>> error_samples_mode = ds.config.get_error_samples_mode()
1087    """
1088    return _CDE_TO_PYTHON_ERROR_SAMPLES_MODE.get(_config.get_error_samples_mode())
1089