• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2020-2024 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15
16"""Parameter for cell."""
17from __future__ import absolute_import
18
19from copy import copy
20import time
21import os
22import sys
23import math
24import numbers
25import numpy as np
26from mindspore import log as logger
27from mindspore.log import _LogActionOnce
28from mindspore._c_expression import ParamInfo
29from mindspore.common import dtype as mstype
30from mindspore import context
31from mindspore.parallel._utils import _get_parallel_mode, _get_global_rank
32from mindspore.common._utils import get_slice_num, get_slice_shape
33from mindspore.common.initializer import initializer
34from mindspore.common.tensor import Tensor
35from mindspore import _checkparam as Validator
36from mindspore._check_jit_forbidden_api import jit_forbidden_register
37from mindspore._c_expression import Tensor as Tensor_
38from mindspore.parallel._tensor import _get_slice_index
39from mindspore.parallel._auto_parallel_context import auto_parallel_context
40from mindspore.parallel._ps_context import _is_role_worker, _is_role_pserver, _is_role_sched, _clone_hash_table, \
41                                           _is_ps_mode
42from mindspore.parallel._ps_context import _reinsert_hash_table_size, _insert_accumu_init_info, _cache_enable
43from mindspore.common._decorator import deprecated
44import mindspore.common._monad as monad
45
46__all__ = ['Parameter', 'ParameterTuple']
47
48PARAMETER_NAME_DEFAULT = "Parameter"
49PARAMETER_NAME_PREFIX_MAX_LEN = 1024
50
51# Global variable for parameter unique key.
52_GLOBAL_PARAMETER_KEY = -1
53
54
55def _is_in_parallel_mode():
56    """Get parallel mode."""
57    return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"]
58
59
60def init_to_value(init):
61    """
62    Get value of initializer.
63
64    Returns:
65        Value of the initializer.
66
67    Raises:
68        ValueError: The value of the argument 'init' is not correct.
69    """
70    if isinstance(init, str):
71        if init == 'zeros':
72            return 0.0
73        if init == 'ones':
74            return 1.0
75        raise ValueError("The argument 'init' should be one of values in ['zeros', 'ones'].")
76    if isinstance(init, numbers.Number):
77        return float(init)
78    raise ValueError("The argument 'init' should be number or string, but got {}.".format(type(init)))
79
80
81def _get_unique_parameter_key():
82    """
83    Get parameter unique key.
84    Used to identify the same Parameter for Worker and Server in the embedding cache scenario.
85
86    Returns:
87        Integer. The unique parameter key.
88    """
89    global _GLOBAL_PARAMETER_KEY
90    _GLOBAL_PARAMETER_KEY += 1
91    return _GLOBAL_PARAMETER_KEY
92
93
94def _offload_if_config(data):
95    """
96    Offload parameter(data size > 512) to file when enable memory offload and offload parameter to disk.
97    Args:
98        data: The parameter data to offload.
99    """
100    if not context.get_context("memory_offload") or data is None:
101        return
102
103    offload_context = context.get_offload_context()
104    if offload_context.get("offload_param", None) != "disk":
105        return
106
107    data_size_threshold = 512
108    if data.nbytes < data_size_threshold:
109        return
110
111    offload_file_path = data.offload_file_path()
112    if offload_file_path is None or offload_file_path == "":
113        offload_dir = offload_context.get("offload_path", "./offload")
114        offload_dir = os.path.relpath(offload_dir)
115        if not os.path.exists(offload_dir):
116            os.makedirs(offload_dir)
117        offload_file_path = offload_dir + "/" + str(_get_global_rank()) + "_" + str(
118            _get_unique_parameter_key()) + "_" + str(time.time()) + ".data"
119    data.offload(offload_file_path)
120
121
122class Parameter(Tensor_):
123    """
124    `Parameter` is a `Tensor` subclass, when they are assigned as Cell attributes they are automatically added to
125    the list of its parameters, and will appear, e.g. in `cell.get_parameters()` iterator.
126
127    Note:
128        - In auto_parallel mode of `SEMI_AUTO_PARALLEL` and `AUTO_PARALLEL`, if init `Parameter` by
129          a `Tensor`, the type of Parameter will be `Tensor`. `Tensor` will save the shape and type info of a tensor
130          with no memory usage.
131
132        - The shape can be changed while
133          compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data.
134
135        - If there is an operator in the network that requires part of the inputs to be Parameter,
136          then the Parameters as this part of the inputs are not allowed to be cast.
137
138        - Give each `Parameter` a unique name to facilitate subsequent operations and updates.
139          If there are two or more `Parameter` objects with the same name in a network,
140          will be prompted to set a unique name when defining.
141
142        - When directly printing a `Parameter`, you cannot view the actual values contained inside it.
143          You need to use the `Parameter.asnumpy()` method to access the actual values.
144
145    Args:
146        default_input (Union[Tensor, int, float, numpy.ndarray, list]): Parameter data,
147            to initialize the parameter data.
148        name (str): Name of the parameter. Default: ``None`` . If two or more `Parameter`
149            objects with the same name exist in a network,
150            you will be prompted to set a unique name when defining them.
151
152            1) If the parameter is not given a name, the default name is its variable name. For example, the name of
153            param_a below is name_a, and the name of param_b is the variable name param_b.
154
155            .. code-block::
156
157                self.param_a = Parameter(Tensor([1], ms.float32), name="name_a")
158                self.param_b = Parameter(Tensor([2], ms.float32))
159
160            2) If parameter in list or tuple is not given a name, will give it a unique name. For example, the names of
161            parameters below are **Parameter$1** and **Parameter$2**.
162
163            .. code-block::
164
165                self.param_list = [Parameter(Tensor([3], ms.float32)),
166                                   Parameter(Tensor([4], ms.float32))]
167
168            3) If the parameter is given a name, and the same name exists between different parameters, an exception
169            will be thrown. For example, "its name 'name_a' already exists." will be thrown.
170
171            .. code-block::
172
173                self.param_a = Parameter(Tensor([1], ms.float32), name="name_a")
174                self.param_tuple = (Parameter(Tensor([5], ms.float32), name="name_a"),
175                                    Parameter(Tensor([6], ms.float32)))
176
177            4) If a parameter appear multiple times in list or tuple, check the name of the object only once. For
178            example, the following example will not throw an exception.
179
180            .. code-block::
181
182                self.param_a = Parameter(Tensor([1], ms.float32), name="name_a")
183                self.param_tuple = (self.param_a, self.param_a)
184
185        requires_grad (bool): True if the parameter requires gradient. Default: ``True`` .
186        layerwise_parallel (bool): When `layerwise_parallel` is true in data/hybrid parallel mode,
187            broadcast and gradients communication would not be applied to the `Parameter`. Default: ``False`` .
188        parallel_optimizer (bool): It is used to filter the weight shard operation in `SEMI_AUTO_PARALLEL` or
189            `AUTO_PARALLEL` mode. It works only when enable parallel optimizer in
190            `mindspore.set_auto_parallel_context()`. Default: ``True`` .
191        storage_format (str): Only Ascend device target is supported. It is used to specify the format of the weight
192            loaded to the device. By default, the format is not changed. The optional values are ``"FRACTAL_NZ"`` ,
193            ``"NC1HWC0"`` , ``"FRACTAL_Z"`` , etc. Default: ``""`` .
194
195    Examples:
196        >>> import numpy as np
197        >>> import mindspore
198        >>> from mindspore import Parameter, Tensor, ops, nn
199        >>>
200        >>> class Net(nn.Cell):
201        ...     def __init__(self):
202        ...         super(Net, self).__init__()
203        ...         self.matmul = ops.MatMul()
204        ...         self.weight = Parameter(Tensor(np.ones((1, 2)), mindspore.float32), name="w", requires_grad=True)
205        ...
206        ...     def construct(self, x):
207        ...         out = self.matmul(self.weight, x)
208        ...         return out
209        >>> net = Net()
210        >>> x = Tensor(np.ones((2, 1)), mindspore.float32)
211        >>> print(net(x))
212        [[2.]]
213        >>> net.weight.set_data(Tensor(np.zeros((1, 2)), mindspore.float32))
214        >>> print(net(x))
215        [[0.]]
216    """
217    _base_type = {}
218
219    def __new__(cls, default_input, *args, **kwargs):
220        init_data_flag = bool(isinstance(default_input, Tensor) and default_input.has_init)
221        rc = sys.getrefcount(default_input)
222        input_class, *class_init_args = Parameter._get_parameter_new_args(default_input, rc)
223        new_type = Parameter._get_base_class(input_class)
224        obj = input_class.__new__(new_type)
225        input_class.__init__(obj, *class_init_args)
226        # it's better to make the Initializer a kind of tensor.
227        obj.init_mode = None
228        obj.is_default_input_init = init_data_flag
229        obj.from_ckpt = False
230        if obj.has_init:
231            obj.init_mode = default_input
232        else:
233            _offload_if_config(obj)
234        return obj
235
236    def __reduce_ex__(self, _):
237        data = self
238        if self.init_mode is not None:
239            data = self.init_mode
240        else:
241            # cast to break deep infinite loop while deepcopy
242            data = Tensor(self)
243        return (
244            Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel))
245
246    def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False, parallel_optimizer=True,
247                 storage_format=""):
248        self.param_info = ParamInfo()
249        self.init_in_server = False
250        self.name = name
251        self.requires_grad = requires_grad
252        self.layerwise_parallel = layerwise_parallel
253        self.parallel_optimizer = parallel_optimizer
254        # this flag for tensor copy data.
255        self.init_flag = False
256        # this flag is for ge variable copy data.
257        self.is_init = False
258        self._inited_param = None
259        self._sliced = False
260        self.is_param_ps = False
261        self.push_weight_to_server = False
262        self.pull_weight_from_server = False
263        self.requires_aggr = True
264        self._cast_type = None
265        self._unique = False
266        self.is_in_parallel = _is_in_parallel_mode()
267        self.is_in_shard = False
268        self._pipeline_stage_list = []
269        self.slice_num = 1
270        self.from_ckpt = False
271        if -1 in self.shape:
272            raise ValueError(f"All shape elements of the Parameter must be positive. But got None.")
273        if isinstance(default_input, (Tensor_, Tensor)):
274            # At embedding cache scenes, we need limit the size of memory for parameter.
275            # And save out range data to persistent storage to support TB-Level size parameter.
276            slice_num_of_persistent_data = get_slice_num(default_input.dtype, default_input.shape)
277            if slice_num_of_persistent_data > 1:
278                data_shape = list(default_input.shape)
279                slice_first_dim = math.ceil(data_shape[0] / slice_num_of_persistent_data)
280                data_shape[0] = slice_first_dim
281                self.param_info.use_persistent_storage = True
282                self.param_info.origin_shape = default_input.shape
283                self.slice_num = slice_num_of_persistent_data
284                Tensor_.__init__(self, default_input.dtype, tuple(data_shape))
285            else:
286                Tensor_.__init__(self, default_input.dtype, default_input.shape)
287
288        elif isinstance(default_input, int):
289            Tensor_.__init__(self, mstype.int64, ())
290        elif isinstance(default_input, float):
291            Tensor_.__init__(self, mstype.float32, ())
292        elif isinstance(default_input, (np.ndarray, list)):
293            Tensor_.__init__(self, default_input)
294        else:
295            raise TypeError(f"The type of the argument 'default_input' must be in ['Tensor', 'int', 'float',"
296                            f" 'numpy.ndarray', 'list']. But got type {type(default_input)}.")
297        self.param_info.parameter_shape = self.shape
298        self.param_info.storage_format = storage_format
299
300        import mindspore.ops.operations.other_ops as other_ops
301        self.load = other_ops.Load()
302
303    def __deepcopy__(self, memodict):
304        new_obj = Parameter(self)
305        new_obj.name = self.name
306        new_obj._inited_param = self._inited_param
307        return new_obj
308
309    def __str__(self):
310        return f'Parameter (name={self.name}, shape={self.shape}, dtype={self.dtype}, ' \
311               f'requires_grad={self.requires_grad})'
312
313    def __repr__(self):
314        return self.__str__()
315
316    def __parameter__(self):
317        """For parse check."""
318
319    @staticmethod
320    def _get_base_class(input_class):
321        input_class_name = Parameter.__name__
322        if input_class_name in Parameter._base_type:
323            new_type = Parameter._base_type.get(input_class_name)
324        else:
325            new_type = type(input_class_name, (Parameter, input_class), {})
326            Parameter._base_type[input_class_name] = new_type
327        return new_type
328
329    @staticmethod
330    def _get_parameter_new_args(data, rc):
331        """Set `set_data` of current `Parameter`."""
332        if isinstance(data, bool):
333            raise ValueError('Parameter data can not be `bool`')
334        if isinstance(data, Tensor):
335            if not data.has_init:
336                if rc == 4:
337                    # when ref count is 4, means the input data is not referenced
338                    # in other place, so we can make a Tensor without copy data.
339                    return (Tensor, data)
340                # make a copy of Tensor to init the parameter.
341                if data.dtype == mstype.qint4x2:
342                    return (Tensor, data.asnumpy(), mstype.qint4x2)
343                return (Tensor, data.asnumpy())
344
345            not_init_data = _is_role_sched() or (_is_role_pserver() and _cache_enable()) or _is_in_parallel_mode()
346            if not_init_data:
347                # do not init data while in auto parallel.
348                return (Tensor, None, data.dtype, get_slice_shape(data.dtype, data.shape), data.init)
349            return (Tensor, data.init_data())
350        if isinstance(data, int):
351            return (Tensor, data, mstype.int32)
352        if isinstance(data, float):
353            return (Tensor, data, mstype.float32)
354        return (Tensor, data)
355
356    def set_param_ps(self, init_in_server=False):
357        """
358        Set whether the trainable parameter is updated by parameter server and whether the
359        trainable parameter is initialized on server.
360
361        Note:
362            It only works when a running task is in the parameter server mode.
363            It is supported only in graph mode.
364
365        Args:
366            init_in_server (bool): Whether trainable parameter updated by parameter server is
367                initialized on server. Default: ``False``.
368
369        Tutorial Examples:
370            - `Parameter Server Mode
371              <https://www.mindspore.cn/tutorials/experts/en/master/parallel/parameter_server_training.html>`_
372        """
373        if not _is_ps_mode() or not (_is_role_worker() or _is_role_pserver() or _is_role_sched()):
374            raise RuntimeError("Must complete following two steps before calling set_param_ps: \n"
375                               "1. context.set_ps_context(enable_ps=True) \n"
376                               "2. export MS_ROLE environment variable \n"
377                               "Please refer to the official website for detailed usage.")
378
379        if context.get_context("mode") == context.PYNATIVE_MODE:
380            raise RuntimeError("Parameter server training is not supported in pynative mode currently."
381                               "Please switch to graph mode and retry.")
382        self.is_param_ps = True
383        self.init_in_server = init_in_server
384        self.param_info.init_in_server = init_in_server
385
386    def copy(self):
387        """
388        Copy the parameter.
389
390        Returns:
391            Parameter, a new parameter.
392
393        Examples:
394            >>> from mindspore import Tensor, Parameter
395            >>> import numpy as np
396            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
397            >>> y = x.copy()
398        """
399        return self.clone(init='same')
400
401    @deprecated("1.8", "set_param_fl")
402    def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True):
403        if push_to_server:
404            self.push_weight_to_server = True
405        if pull_from_server:
406            self.pull_weight_from_server = True
407        if not requires_aggr:
408            self.requires_aggr = False
409            self.param_info.requires_aggr = False
410
411    @property
412    def inited_param(self):
413        """
414        Get the new parameter after call the init_data.
415
416        Default is a None, If `self` is a Parameter without data, after call the
417        `init_data` the initialized Parameter with data will be recorded here.
418
419        Examples:
420            >>> from mindspore import Tensor, Parameter
421            >>> import numpy as np
422            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
423            >>> x.inited_param
424        """
425        return self._inited_param
426
427    @property
428    def param_info(self):
429        return self._param_info
430
431    @param_info.setter
432    def param_info(self, param_info_):
433        param_info_.obj = self
434        self._param_info = param_info_
435        Tensor_.param_info.fset(self, param_info_)
436
437    @property
438    def name(self):
439        """
440        Get the name of the parameter.
441
442        Examples:
443            >>> from mindspore import Tensor, Parameter
444            >>> import numpy as np
445            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
446            >>> x.name = "param1"
447            >>> x.name
448            'param1'
449        """
450        return self.param_info.name
451
452    @name.setter
453    def name(self, name_):
454        """
455        Define a name for the parameter.
456
457        Args:
458            name_ (`str` or `None`): The name of the parameter. When the parameter is None or an empty string,
459                the default value `PARAMETER_NAME_DEFAULT` is used.
460        """
461        if name_ is None:
462            name_ = PARAMETER_NAME_DEFAULT
463        elif isinstance(name_, str):
464            name_ = name_.strip()
465            if name_ == '':
466                name_ = PARAMETER_NAME_DEFAULT
467            if len(name_) > PARAMETER_NAME_PREFIX_MAX_LEN:
468                raise ValueError("The length of the '{}' name should be less than {}.".
469                                 format(name_, PARAMETER_NAME_PREFIX_MAX_LEN))
470        else:
471            raise ValueError("The type of the Parameter's name should be 'string' or 'None', "
472                             "but got {}.".format(type(name_)))
473
474        if _is_role_worker() and self.cache_enable:
475            _reinsert_hash_table_size(name_, self.param_info.name)
476        self.param_info.name = name_
477
478    @property
479    def sliced(self):
480        """
481        Get slice status of the parameter.
482
483        Examples:
484            >>> from mindspore import Tensor, Parameter
485            >>> import numpy as np
486            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
487            >>> x.sliced = True
488            >>> x.sliced
489            True
490        """
491        return self._sliced
492
493    @sliced.setter
494    def sliced(self, sliced_):
495        self._sliced = sliced_
496
497    @property
498    def comm_fusion(self):
499        """
500        Get the fusion type (int) for communication operators corresponding to this parameter.
501
502        In `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode, some communication operators used for parameters or
503        gradients aggregation are inserted automatically.
504        The value of `comm_fusion` must be greater than or equal to 0.
505        When the value of `comm_fusion` is ``0`` , operators will not be fused together.
506
507        Examples:
508            >>> from mindspore import Tensor, Parameter
509            >>> import numpy as np
510            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
511            >>> x.comm_fusion = 3
512            >>> x.comm_fusion
513            3
514        """
515        return self.param_info.comm_fusion
516
517    @comm_fusion.setter
518    def comm_fusion(self, comm_fusion_):
519        if context.get_context("mode") == context.PYNATIVE_MODE and "auto_parallel" in _get_parallel_mode():
520            raise RuntimeError(
521                "`comm_fusion` does not support PYNATIVE_MODE in AUTO_PARALLEL and SEMI_AUTO_PARALLEL mode.")
522        Validator.check_non_negative_int(comm_fusion_)
523        self.param_info.comm_fusion = comm_fusion_
524
525    @property
526    def parallel_optimizer_comm_recompute(self):
527        """
528        Get the communication recompute status(bool) of optimizer parallel for the parameter.
529
530        In `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode, when applying parallel optimizer,
531        some :class:`mindspore.ops.AllGather` operators
532        used for parameters gathering are inserted automatically. It is used to control the recompute attr for those
533        :class:`mindspore.ops.AllGather` operators.
534
535        Note:
536            - Only `Graph` mode is supported.
537            - It is recommended to use cell.recompute(parallel_optimizer_comm_recompute=True/False) to configure
538              the AllGather operators introducing by parallel optimizer rather than using this interface directly.
539
540        Examples:
541            >>> from mindspore import Tensor, Parameter
542            >>> import numpy as np
543            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
544            >>> x.parallel_optimizer_comm_recompute = True
545            >>> x.parallel_optimizer_comm_recompute
546            True
547        """
548        return self.param_info.parallel_optimizer_comm_recompute
549
550    @parallel_optimizer_comm_recompute.setter
551    def parallel_optimizer_comm_recompute(self, parallel_optimizer_comm_recompute_):
552        Validator.check_bool(parallel_optimizer_comm_recompute_)
553        self.param_info.parallel_optimizer_comm_recompute = parallel_optimizer_comm_recompute_
554
555    @property
556    def unique(self):
557        """
558        Whether the parameter is already unique or not.
559
560        Examples:
561            >>> from mindspore import Tensor, Parameter
562            >>> import numpy as np
563            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
564            >>> x.unique = True
565            >>> x.unique
566            True
567        """
568        return self._unique
569
570    @unique.setter
571    def unique(self, unique_):
572        self._unique = unique_
573
574    def clone(self, init='same'):
575        """
576        Clone the parameter.
577
578        Args:
579            init (Union[Tensor, str, numbers.Number]): Initialize the shape and dtype of the parameter.
580                If `init` is a `Tensor` or `numbers.Number`, clone a new parameter with the same shape
581                and dtype, and the data of the new parameter will be set according to `init`. If `init`
582                is a `str`, the `init` should be the alias of the class inheriting from `Initializer`.
583                For example, if `init` is ``'same'``, clone a new parameter with the same data, shape, and
584                dtype. Default: ``'same'``.
585
586        Returns:
587            Parameter, a new parameter.
588
589        Examples:
590            >>> from mindspore import Tensor, Parameter
591            >>> import numpy as np
592            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
593            >>> y = x.clone()
594        """
595        x = copy(self)
596        param_info_clone = self.param_info.clone()
597        info = self.param_info
598        if hasattr(info, "cloned_obj"):
599            info.cloned_obj.append(x)
600        else:
601            info.cloned_obj = [x]
602        self.param_info = info
603        param_info_clone.obj = x
604        x.param_info = param_info_clone
605        x.is_init = False
606        x.init = self.init
607        x.is_param_ps = self.is_param_ps
608        x.init_in_server = self.init_in_server
609        x.cache_enable = self.cache_enable
610        if x.cache_enable:
611            x.key = _get_unique_parameter_key()
612        x.requires_aggr = self.requires_aggr
613        if self.cache_shape:
614            x.cache_shape = self.cache_shape
615        if init != 'same':
616            shape = self.shape if self.slice_num == 1 else self.param_info.origin_shape
617            dtype = self.dtype
618            x.set_data(initializer(init, shape=shape, dtype=dtype))
619        return x
620
621    @property
622    def layerwise_parallel(self):
623        """
624        Get the layerwise parallel status(bool) of the parameter.
625
626        When `layerwise_parallel` is ``True`` in `DATA_PARALLEL` and `HYBRID_PARALLEL` parallel mode,
627        broadcast and gradients communication would not be applied to parameters.
628
629        Examples:
630            >>> from mindspore import Tensor, Parameter
631            >>> import numpy as np
632            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
633            >>> x.layerwise_parallel = True
634            >>> x.layerwise_parallel
635            True
636        """
637        return self.param_info.layerwise_parallel
638
639    @layerwise_parallel.setter
640    def layerwise_parallel(self, value=True):
641        if not isinstance(value, bool):
642            raise TypeError("The argument `layerwise_parallel` must be bool type.")
643        self.param_info.layerwise_parallel = value
644
645    @property
646    def parallel_optimizer(self):
647        """
648        Get the optimizer parallel status(bool) of the parameter.
649
650        It is used to filter the weight shard operation in `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode. It works only
651        when enable parallel optimizer in `mindspore.set_auto_parallel_context()`.
652
653        Examples:
654            >>> from mindspore import Tensor, Parameter
655            >>> import numpy as np
656            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
657            >>> x.parallel_optimizer = True
658            >>> x.parallel_optimizer
659            True
660        """
661        return self.param_info.parallel_optimizer
662
663    @parallel_optimizer.setter
664    def parallel_optimizer(self, value=True):
665        if not isinstance(value, bool):
666            raise TypeError("The argument `parallel_optimizer` must be bool type.")
667        self.param_info.parallel_optimizer = value
668
669    @property
670    def cache_enable(self):
671        """
672        Return whether the parameter is cache enable.
673
674        Examples:
675            >>> from mindspore import Tensor, Parameter
676            >>> import numpy as np
677            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
678            >>> x.cache_enable=True
679            >>> x.cache_enable
680            True
681        """
682        return self.param_info.cache_enable
683
684    @cache_enable.setter
685    def cache_enable(self, value=True):
686        if not isinstance(value, bool):
687            raise TypeError("The argument `cache_enable` must be bool type.")
688        self.param_info.cache_enable = value
689
690    @property
691    def cache_shape(self):
692        """
693        Return the cache shape corresponding to the parameter if use cache.
694
695        Examples:
696            >>> from mindspore import Tensor, Parameter
697            >>> import numpy as np
698            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
699            >>> x.cache_enable=True
700            >>> x.cache_shape=[1, 2]
701            >>> x.cache_shape
702            [1, 2]
703        """
704        return self.param_info.cache_shape
705
706    @cache_shape.setter
707    def cache_shape(self, value):
708        if not isinstance(value, (tuple, list)):
709            raise TypeError("The argument `cache_shape` must be tuple or list type.")
710        self.param_info.cache_shape = value
711
712    @property
713    def key(self):
714        """
715        Return the parameter unique key.
716
717        Examples:
718            >>> from mindspore import Tensor, Parameter
719            >>> import numpy as np
720            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
721            >>> x.key = 2
722            >>> x.key
723            2
724        """
725        return self.param_info.key
726
727    @key.setter
728    def key(self, value=-1):
729        """Set the parameter unique key."""
730        if not isinstance(value, int):
731            raise TypeError("The argument `key` must be int type.")
732        self.param_info.key = value
733
734    @property
735    def requires_grad(self):
736        """
737        Return whether the parameter requires gradient.
738
739        Examples:
740            >>> from mindspore import Tensor, Parameter
741            >>> import numpy as np
742            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
743            >>> x.requires_grad = True
744            >>> x.requires_grad
745            True
746        """
747        return self.param_info.requires_grad
748
749    @requires_grad.setter
750    def requires_grad(self, value=True):
751        if not isinstance(value, bool):
752            raise TypeError("The argument `requires_grad` must be bool type")
753        Tensor_.wait_pipeline(self)
754        self.param_info.requires_grad = value
755
756    @property
757    def data(self):
758        """
759        Return the parameter object.
760
761        Examples:
762            >>> from mindspore import Tensor, Parameter
763            >>> import numpy as np
764            >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param")
765            >>> x.data
766            Parameter (name=param, shape=(2, 2), dtype=Float32, requires_grad=True)
767        """
768        return self
769
770    def value(self):
771        """
772        Return the value of parameter object.
773
774        Examples:
775            >>> from mindspore import Tensor, Parameter
776            >>> import numpy as np
777            >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param")
778            >>> x_value = x.value()
779            >>> print(x_value)
780            [1.  2.]
781        """
782        return self.load(self, monad.U)
783
784    def _update_tensor_data(self, data):
785        """Update the parameter by a Tensor."""
786        if isinstance(self, Tensor):
787            self.init_flag = False
788            self.init = None
789            return self.assign_value(data)
790        new_param = Parameter(data, self.name, self.requires_grad)
791        new_param.param_info = self.param_info
792        return new_param
793
794    @_LogActionOnce(logger=logger, key='add_pipeline_stage')
795    def add_pipeline_stage(self, stage):
796        """
797        Add a pipeline stage to the parameter.
798
799        Args:
800            stage(int): The pipeline stage to be added.
801
802        Raise:
803            TypeError: If `stage` is not a positive number or not int type.
804        """
805        logger.warning(f"This interface may be deleted in the future.")
806        if not isinstance(stage, int) or stage < 0:
807            raise TypeError("`stage` must be a positive number of int type")
808        self._pipeline_stage_list.append(stage)
809
810    def _raise_type_error(self, incoming):
811        raise TypeError(f"Incoming Parameter dtype can not be converted to current dtype implicitly. "
812                        f"Current dtype is {self.dtype}, and incoming is {incoming}. "
813                        f"Use .set_dtype(xxx) to change the dtype.")
814
815    @staticmethod
816    def _set_data_check_input_valid(current_shape, data_shape, current_tensor_is_init, incoming_tensor_is_init,
817                                    from_ckpt, slice_shape=False, slice_num=1):
818        if not from_ckpt and incoming_tensor_is_init and not current_tensor_is_init:
819            raise TypeError("The original tensor data is initialized, but the argument 'data' is not initialized."
820                            "Please initialize 'data' before call this method.")
821        if tuple(current_shape) != tuple(data_shape):
822            # If Slice create Parameter shape can be change.
823            if not slice_shape and slice_num == 1:
824                raise ValueError(f"Can not change the shape of Parameter which has been initialized."
825                                 f" Current shape is {current_shape}, and incoming is {data_shape}.")
826
827    @staticmethod
828    def _from_tensor(tensor, *args, **kwargs):
829        """Create a `Parameter` that data is shared from a `Tensor`."""
830        if not isinstance(tensor, Tensor_):
831            raise TypeError(f"The type of input must be Tensor, but got {type(tensor)}.")
832        param = Tensor_.__new__(Parameter)
833        Tensor_.__init__(param, tensor)
834        param.init = None
835        param.init_mode = None
836        param.has_init = False
837        param.is_default_input_init = False
838        Parameter.__init__(param, tensor, *args, **kwargs)
839        return param
840
841    @jit_forbidden_register
842    def set_data(self, data, slice_shape=False):
843        """
844        Set Parameter's data.
845
846        Args:
847            data (Union[Tensor, int, float]): New data.
848            slice_shape (bool): If slice the parameter is set to ``True``, the shape consistency will not be checked.
849                                Default: ``False``. When `slice_shape` is ``True``, and the shapes are not consistent, a
850                                ValueError will be thrown.
851
852        Returns:
853            Parameter, the parameter after set data.
854
855        Examples:
856            >>> from mindspore import Tensor, Parameter
857            >>> import numpy as np
858            >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param")
859            >>> x.set_data(Tensor(np.array([[6, 6], [6, 6]], dtype=np.float32)))
860            Parameter (name=param, shape=(2, 2), dtype=Float32, requires_grad=True)
861        """
862        if not isinstance(data, (Tensor, int, float)):
863            raise TypeError(f"Parameter data must be [`Tensor`, `int`, `float`] or a kind of `Tensor` "
864                            f"(like `Tensor`). But with type {type(data)}.")
865        if isinstance(data, (int, float)):
866            if self.dtype in mstype.int_type and isinstance(data, float):
867                self._raise_type_error(mstype.float_)
868            data = Tensor(data, self.dtype)
869        # both not init.
870        incoming_tensor_is_init = isinstance(data, Tensor) and not data.has_init
871        current_tensor_is_init = isinstance(self, Tensor) and not self.has_init
872        Parameter._set_data_check_input_valid(self.shape, data.shape, current_tensor_is_init, incoming_tensor_is_init,
873                                              self.from_ckpt, slice_shape, self.slice_num)
874        if self.dtype != data.dtype:
875            if mstype.implicit_conversion_seq.get(self.dtype) < mstype.implicit_conversion_seq.get(data.dtype):
876                self._raise_type_error(data.dtype)
877            else:
878                from mindspore.ops import functional as F
879                if isinstance(data, Tensor) and data.init is not None:
880                    data.init_data()
881                data = F.cast(data, self.dtype)
882        if isinstance(data, Tensor) and data.has_init:
883            # The parameter has been initialized, directly update by the data
884            if current_tensor_is_init:
885                self._update_tensor_data(data.init_data())
886            else:
887                # also update the related inited parameter data
888                if self.inited_param is not None:
889                    self.inited_param.set_data(data)
890                self.init_mode = data
891        elif incoming_tensor_is_init or current_tensor_is_init:
892            self._update_tensor_data(data)
893        self.sliced = slice_shape
894        return self
895
896    @staticmethod
897    def _get_init_data_args(layout=None):
898        """Get the data layout args."""
899        init_data_args = ()
900        if layout:
901            if not isinstance(layout, tuple):
902                raise TypeError("The argument 'layout' should be tuple, but got {}.".format(type(layout)))
903            if len(layout) < 6:
904                raise ValueError("The length of 'layout' must be larger than 5, but got {}.".format(len(layout)))
905            slice_index = int(_get_slice_index(layout[0], layout[1], layout[5]))
906            init_data_args += (slice_index, layout[2], layout[5])
907        return init_data_args
908
909
910    def init_data(self, layout=None, set_sliced=False):
911        """
912        Initialize the parameter's data.
913
914        Args:
915            layout (Union[None, tuple]): The parameter's layout info.
916                layout [dev_mat, tensor_map, slice_shape, filed_size, uniform_split, opt_shard_group].
917                Default: ``None``.
918                It's not None only in 'SEMI_AUTO_PARALLEL' or 'AUTO_PARALLEL' mode.
919
920                - dev_mat (list(int)): The parameter's device matrix.
921                - tensor_map (list(int)): The parameter's tensor map.
922                - slice_shape (list(int)): The parameter's slice shape.
923                - filed_size (int): The parameter's filed size.
924                - uniform_split (bool): Whether the parameter is split evenly.
925                - opt_shard_group (str): The group of the parameter while running optimizer parallel.
926
927            set_sliced (bool): True if the parameter is set sliced after initializing the data.
928                Default: ``False``.
929
930        Returns:
931            Parameter, the `Parameter` after initializing data. If current `Parameter` was already initialized before,
932            returns the same initialized `Parameter`.
933
934        Raises:
935            RuntimeError: If it is from Initializer, and parallel mode has changed after the Initializer created.
936            ValueError: If the length of the layout is less than 6.
937            TypeError: If `layout` is not tuple.
938
939        Examples:
940            >>> from mindspore import Tensor, Parameter
941            >>> import numpy as np
942            >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param")
943            >>> x.init_data()
944        """
945        if self.is_default_input_init and self.is_in_parallel != _is_in_parallel_mode():
946            raise RuntimeError("Must set or change parallel mode before any initializer Tensor created.")
947        if self.init_mode is None:
948            return self
949        if self.inited_param is not None:
950            return self.inited_param
951
952        init_data_args = self._get_init_data_args(layout)
953
954        if _is_role_sched():
955            return self
956        if self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Tensor) and \
957                self.init_mode.init is not None and _is_role_worker():
958            if self.cache_enable:
959                data = self.init_mode.init_data(*init_data_args)
960            else:
961                data = self.init_mode.init_data(0, [1])
962        else:
963            data = self.init_mode.init_data(*init_data_args)
964
965        obj = self._update_tensor_data(data)
966        if id(obj) != id(self):
967            self._inited_param = obj
968        obj.init_mode = None
969        obj.sliced = set_sliced
970        _offload_if_config(obj)
971        return obj
972
973
974class ParameterTuple(tuple):
975    """
976    Inherited from tuple, ParameterTuple  is used to save multiple parameter.
977
978    Note:
979        It is used to store the parameters of the network into the parameter tuple collection.
980
981    Examples:
982        >>> from mindspore import Tensor, Parameter, ParameterTuple
983        >>> import numpy as np
984        >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param")
985        >>> y = Parameter(Tensor(np.array([[5, 6], [7, 8]], dtype=np.float32)), name="param1")
986        >>> pt = ParameterTuple([x, y])
987        >>> pt1 = pt.clone(prefix="new")
988    """
989
990    def __new__(cls, iterable):
991        """Create instance object of ParameterTuple."""
992        data = tuple(iterable)
993        ids = set()
994        names = set()
995        for x in data:
996            if not isinstance(x, Parameter):
997                raise TypeError(f"For ParameterTuple initialization, "
998                                f"ParameterTuple input should be 'Parameter' collection, "
999                                f"but got a {type(iterable)}. ")
1000            if id(x) not in ids:
1001                if x.name in names:
1002                    raise ValueError("The value {} , its name '{}' already exists. "
1003                                     "Please set a unique name for the parameter.".format(x, x.name))
1004                names.add(x.name)
1005                ids.add(id(x))
1006        return tuple.__new__(ParameterTuple, tuple(data))
1007
1008    def clone(self, prefix, init='same'):
1009        """
1010        Clone the parameters in ParameterTuple element-wisely to generate a new ParameterTuple.
1011
1012        Args:
1013            prefix (str): Namespace of parameter, the prefix string will be added to the names of parameters
1014                in parametertuple.
1015
1016            init (Union[Tensor, str, numbers.Number]): Clone the shape and dtype of Parameters in ParameterTuple and
1017                set  data according to `init`. Default: ``'same'``.
1018
1019                - If `init` is a `Tensor` , set the new Parameter data to the input Tensor.
1020                - If `init` is `numbers.Number` , set the new Parameter data to the input number.
1021                - If `init` is a `str`, data will be set according to the initialization method of the same name in
1022                  the `Initializer`. When it is ``'same'``, the new Parameter will have the same value
1023                  with the original Parameter.
1024
1025        Returns:
1026            Tuple, the new Parameter tuple.
1027
1028        Tutorial Examples:
1029            - `Cell and Parameter - Parameter Tuple
1030              <https://mindspore.cn/tutorials/en/master/advanced/modules/layer.html#parameter-tuple>`_
1031        """
1032        Validator.check_str_by_regular(prefix)
1033        new = []
1034        for x in self:
1035            x1 = x.clone(init)
1036            x1.name = prefix + "." + x1.name
1037            new.append(x1)
1038
1039            if not x1.cache_enable:
1040                continue
1041
1042            if _is_role_worker():
1043                _clone_hash_table(x.name, x.key, x1.name, x1.key)
1044                _insert_accumu_init_info(x1.name, init_to_value(init))
1045        return ParameterTuple(new)
1046
1047    def __parameter_tuple__(self):
1048        """For parse check."""
1049