1# Copyright 2020-2024 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15 16"""Parameter for cell.""" 17from __future__ import absolute_import 18 19from copy import copy 20import time 21import os 22import sys 23import math 24import numbers 25import numpy as np 26from mindspore import log as logger 27from mindspore.log import _LogActionOnce 28from mindspore._c_expression import ParamInfo 29from mindspore.common import dtype as mstype 30from mindspore import context 31from mindspore.parallel._utils import _get_parallel_mode, _get_global_rank 32from mindspore.common._utils import get_slice_num, get_slice_shape 33from mindspore.common.initializer import initializer 34from mindspore.common.tensor import Tensor 35from mindspore import _checkparam as Validator 36from mindspore._check_jit_forbidden_api import jit_forbidden_register 37from mindspore._c_expression import Tensor as Tensor_ 38from mindspore.parallel._tensor import _get_slice_index 39from mindspore.parallel._auto_parallel_context import auto_parallel_context 40from mindspore.parallel._ps_context import _is_role_worker, _is_role_pserver, _is_role_sched, _clone_hash_table, \ 41 _is_ps_mode 42from mindspore.parallel._ps_context import _reinsert_hash_table_size, _insert_accumu_init_info, _cache_enable 43from mindspore.common._decorator import deprecated 44import mindspore.common._monad as monad 45 46__all__ = ['Parameter', 'ParameterTuple'] 47 48PARAMETER_NAME_DEFAULT = "Parameter" 49PARAMETER_NAME_PREFIX_MAX_LEN = 1024 50 51# Global variable for parameter unique key. 52_GLOBAL_PARAMETER_KEY = -1 53 54 55def _is_in_parallel_mode(): 56 """Get parallel mode.""" 57 return auto_parallel_context().get_parallel_mode() in ["semi_auto_parallel", "auto_parallel"] 58 59 60def init_to_value(init): 61 """ 62 Get value of initializer. 63 64 Returns: 65 Value of the initializer. 66 67 Raises: 68 ValueError: The value of the argument 'init' is not correct. 69 """ 70 if isinstance(init, str): 71 if init == 'zeros': 72 return 0.0 73 if init == 'ones': 74 return 1.0 75 raise ValueError("The argument 'init' should be one of values in ['zeros', 'ones'].") 76 if isinstance(init, numbers.Number): 77 return float(init) 78 raise ValueError("The argument 'init' should be number or string, but got {}.".format(type(init))) 79 80 81def _get_unique_parameter_key(): 82 """ 83 Get parameter unique key. 84 Used to identify the same Parameter for Worker and Server in the embedding cache scenario. 85 86 Returns: 87 Integer. The unique parameter key. 88 """ 89 global _GLOBAL_PARAMETER_KEY 90 _GLOBAL_PARAMETER_KEY += 1 91 return _GLOBAL_PARAMETER_KEY 92 93 94def _offload_if_config(data): 95 """ 96 Offload parameter(data size > 512) to file when enable memory offload and offload parameter to disk. 97 Args: 98 data: The parameter data to offload. 99 """ 100 if not context.get_context("memory_offload") or data is None: 101 return 102 103 offload_context = context.get_offload_context() 104 if offload_context.get("offload_param", None) != "disk": 105 return 106 107 data_size_threshold = 512 108 if data.nbytes < data_size_threshold: 109 return 110 111 offload_file_path = data.offload_file_path() 112 if offload_file_path is None or offload_file_path == "": 113 offload_dir = offload_context.get("offload_path", "./offload") 114 offload_dir = os.path.relpath(offload_dir) 115 if not os.path.exists(offload_dir): 116 os.makedirs(offload_dir) 117 offload_file_path = offload_dir + "/" + str(_get_global_rank()) + "_" + str( 118 _get_unique_parameter_key()) + "_" + str(time.time()) + ".data" 119 data.offload(offload_file_path) 120 121 122class Parameter(Tensor_): 123 """ 124 `Parameter` is a `Tensor` subclass, when they are assigned as Cell attributes they are automatically added to 125 the list of its parameters, and will appear, e.g. in `cell.get_parameters()` iterator. 126 127 Note: 128 - In auto_parallel mode of `SEMI_AUTO_PARALLEL` and `AUTO_PARALLEL`, if init `Parameter` by 129 a `Tensor`, the type of Parameter will be `Tensor`. `Tensor` will save the shape and type info of a tensor 130 with no memory usage. 131 132 - The shape can be changed while 133 compiling for auto-parallel. Call `init_data` will return a Tensor Parameter with initialized data. 134 135 - If there is an operator in the network that requires part of the inputs to be Parameter, 136 then the Parameters as this part of the inputs are not allowed to be cast. 137 138 - Give each `Parameter` a unique name to facilitate subsequent operations and updates. 139 If there are two or more `Parameter` objects with the same name in a network, 140 will be prompted to set a unique name when defining. 141 142 - When directly printing a `Parameter`, you cannot view the actual values contained inside it. 143 You need to use the `Parameter.asnumpy()` method to access the actual values. 144 145 Args: 146 default_input (Union[Tensor, int, float, numpy.ndarray, list]): Parameter data, 147 to initialize the parameter data. 148 name (str): Name of the parameter. Default: ``None`` . If two or more `Parameter` 149 objects with the same name exist in a network, 150 you will be prompted to set a unique name when defining them. 151 152 1) If the parameter is not given a name, the default name is its variable name. For example, the name of 153 param_a below is name_a, and the name of param_b is the variable name param_b. 154 155 .. code-block:: 156 157 self.param_a = Parameter(Tensor([1], ms.float32), name="name_a") 158 self.param_b = Parameter(Tensor([2], ms.float32)) 159 160 2) If parameter in list or tuple is not given a name, will give it a unique name. For example, the names of 161 parameters below are **Parameter$1** and **Parameter$2**. 162 163 .. code-block:: 164 165 self.param_list = [Parameter(Tensor([3], ms.float32)), 166 Parameter(Tensor([4], ms.float32))] 167 168 3) If the parameter is given a name, and the same name exists between different parameters, an exception 169 will be thrown. For example, "its name 'name_a' already exists." will be thrown. 170 171 .. code-block:: 172 173 self.param_a = Parameter(Tensor([1], ms.float32), name="name_a") 174 self.param_tuple = (Parameter(Tensor([5], ms.float32), name="name_a"), 175 Parameter(Tensor([6], ms.float32))) 176 177 4) If a parameter appear multiple times in list or tuple, check the name of the object only once. For 178 example, the following example will not throw an exception. 179 180 .. code-block:: 181 182 self.param_a = Parameter(Tensor([1], ms.float32), name="name_a") 183 self.param_tuple = (self.param_a, self.param_a) 184 185 requires_grad (bool): True if the parameter requires gradient. Default: ``True`` . 186 layerwise_parallel (bool): When `layerwise_parallel` is true in data/hybrid parallel mode, 187 broadcast and gradients communication would not be applied to the `Parameter`. Default: ``False`` . 188 parallel_optimizer (bool): It is used to filter the weight shard operation in `SEMI_AUTO_PARALLEL` or 189 `AUTO_PARALLEL` mode. It works only when enable parallel optimizer in 190 `mindspore.set_auto_parallel_context()`. Default: ``True`` . 191 storage_format (str): Only Ascend device target is supported. It is used to specify the format of the weight 192 loaded to the device. By default, the format is not changed. The optional values are ``"FRACTAL_NZ"`` , 193 ``"NC1HWC0"`` , ``"FRACTAL_Z"`` , etc. Default: ``""`` . 194 195 Examples: 196 >>> import numpy as np 197 >>> import mindspore 198 >>> from mindspore import Parameter, Tensor, ops, nn 199 >>> 200 >>> class Net(nn.Cell): 201 ... def __init__(self): 202 ... super(Net, self).__init__() 203 ... self.matmul = ops.MatMul() 204 ... self.weight = Parameter(Tensor(np.ones((1, 2)), mindspore.float32), name="w", requires_grad=True) 205 ... 206 ... def construct(self, x): 207 ... out = self.matmul(self.weight, x) 208 ... return out 209 >>> net = Net() 210 >>> x = Tensor(np.ones((2, 1)), mindspore.float32) 211 >>> print(net(x)) 212 [[2.]] 213 >>> net.weight.set_data(Tensor(np.zeros((1, 2)), mindspore.float32)) 214 >>> print(net(x)) 215 [[0.]] 216 """ 217 _base_type = {} 218 219 def __new__(cls, default_input, *args, **kwargs): 220 init_data_flag = bool(isinstance(default_input, Tensor) and default_input.has_init) 221 rc = sys.getrefcount(default_input) 222 input_class, *class_init_args = Parameter._get_parameter_new_args(default_input, rc) 223 new_type = Parameter._get_base_class(input_class) 224 obj = input_class.__new__(new_type) 225 input_class.__init__(obj, *class_init_args) 226 # it's better to make the Initializer a kind of tensor. 227 obj.init_mode = None 228 obj.is_default_input_init = init_data_flag 229 obj.from_ckpt = False 230 if obj.has_init: 231 obj.init_mode = default_input 232 else: 233 _offload_if_config(obj) 234 return obj 235 236 def __reduce_ex__(self, _): 237 data = self 238 if self.init_mode is not None: 239 data = self.init_mode 240 else: 241 # cast to break deep infinite loop while deepcopy 242 data = Tensor(self) 243 return ( 244 Parameter, (data, self.name, self.requires_grad, self.layerwise_parallel)) 245 246 def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False, parallel_optimizer=True, 247 storage_format=""): 248 self.param_info = ParamInfo() 249 self.init_in_server = False 250 self.name = name 251 self.requires_grad = requires_grad 252 self.layerwise_parallel = layerwise_parallel 253 self.parallel_optimizer = parallel_optimizer 254 # this flag for tensor copy data. 255 self.init_flag = False 256 # this flag is for ge variable copy data. 257 self.is_init = False 258 self._inited_param = None 259 self._sliced = False 260 self.is_param_ps = False 261 self.push_weight_to_server = False 262 self.pull_weight_from_server = False 263 self.requires_aggr = True 264 self._cast_type = None 265 self._unique = False 266 self.is_in_parallel = _is_in_parallel_mode() 267 self.is_in_shard = False 268 self._pipeline_stage_list = [] 269 self.slice_num = 1 270 self.from_ckpt = False 271 if -1 in self.shape: 272 raise ValueError(f"All shape elements of the Parameter must be positive. But got None.") 273 if isinstance(default_input, (Tensor_, Tensor)): 274 # At embedding cache scenes, we need limit the size of memory for parameter. 275 # And save out range data to persistent storage to support TB-Level size parameter. 276 slice_num_of_persistent_data = get_slice_num(default_input.dtype, default_input.shape) 277 if slice_num_of_persistent_data > 1: 278 data_shape = list(default_input.shape) 279 slice_first_dim = math.ceil(data_shape[0] / slice_num_of_persistent_data) 280 data_shape[0] = slice_first_dim 281 self.param_info.use_persistent_storage = True 282 self.param_info.origin_shape = default_input.shape 283 self.slice_num = slice_num_of_persistent_data 284 Tensor_.__init__(self, default_input.dtype, tuple(data_shape)) 285 else: 286 Tensor_.__init__(self, default_input.dtype, default_input.shape) 287 288 elif isinstance(default_input, int): 289 Tensor_.__init__(self, mstype.int64, ()) 290 elif isinstance(default_input, float): 291 Tensor_.__init__(self, mstype.float32, ()) 292 elif isinstance(default_input, (np.ndarray, list)): 293 Tensor_.__init__(self, default_input) 294 else: 295 raise TypeError(f"The type of the argument 'default_input' must be in ['Tensor', 'int', 'float'," 296 f" 'numpy.ndarray', 'list']. But got type {type(default_input)}.") 297 self.param_info.parameter_shape = self.shape 298 self.param_info.storage_format = storage_format 299 300 import mindspore.ops.operations.other_ops as other_ops 301 self.load = other_ops.Load() 302 303 def __deepcopy__(self, memodict): 304 new_obj = Parameter(self) 305 new_obj.name = self.name 306 new_obj._inited_param = self._inited_param 307 return new_obj 308 309 def __str__(self): 310 return f'Parameter (name={self.name}, shape={self.shape}, dtype={self.dtype}, ' \ 311 f'requires_grad={self.requires_grad})' 312 313 def __repr__(self): 314 return self.__str__() 315 316 def __parameter__(self): 317 """For parse check.""" 318 319 @staticmethod 320 def _get_base_class(input_class): 321 input_class_name = Parameter.__name__ 322 if input_class_name in Parameter._base_type: 323 new_type = Parameter._base_type.get(input_class_name) 324 else: 325 new_type = type(input_class_name, (Parameter, input_class), {}) 326 Parameter._base_type[input_class_name] = new_type 327 return new_type 328 329 @staticmethod 330 def _get_parameter_new_args(data, rc): 331 """Set `set_data` of current `Parameter`.""" 332 if isinstance(data, bool): 333 raise ValueError('Parameter data can not be `bool`') 334 if isinstance(data, Tensor): 335 if not data.has_init: 336 if rc == 4: 337 # when ref count is 4, means the input data is not referenced 338 # in other place, so we can make a Tensor without copy data. 339 return (Tensor, data) 340 # make a copy of Tensor to init the parameter. 341 if data.dtype == mstype.qint4x2: 342 return (Tensor, data.asnumpy(), mstype.qint4x2) 343 return (Tensor, data.asnumpy()) 344 345 not_init_data = _is_role_sched() or (_is_role_pserver() and _cache_enable()) or _is_in_parallel_mode() 346 if not_init_data: 347 # do not init data while in auto parallel. 348 return (Tensor, None, data.dtype, get_slice_shape(data.dtype, data.shape), data.init) 349 return (Tensor, data.init_data()) 350 if isinstance(data, int): 351 return (Tensor, data, mstype.int32) 352 if isinstance(data, float): 353 return (Tensor, data, mstype.float32) 354 return (Tensor, data) 355 356 def set_param_ps(self, init_in_server=False): 357 """ 358 Set whether the trainable parameter is updated by parameter server and whether the 359 trainable parameter is initialized on server. 360 361 Note: 362 It only works when a running task is in the parameter server mode. 363 It is supported only in graph mode. 364 365 Args: 366 init_in_server (bool): Whether trainable parameter updated by parameter server is 367 initialized on server. Default: ``False``. 368 369 Tutorial Examples: 370 - `Parameter Server Mode 371 <https://www.mindspore.cn/tutorials/experts/en/master/parallel/parameter_server_training.html>`_ 372 """ 373 if not _is_ps_mode() or not (_is_role_worker() or _is_role_pserver() or _is_role_sched()): 374 raise RuntimeError("Must complete following two steps before calling set_param_ps: \n" 375 "1. context.set_ps_context(enable_ps=True) \n" 376 "2. export MS_ROLE environment variable \n" 377 "Please refer to the official website for detailed usage.") 378 379 if context.get_context("mode") == context.PYNATIVE_MODE: 380 raise RuntimeError("Parameter server training is not supported in pynative mode currently." 381 "Please switch to graph mode and retry.") 382 self.is_param_ps = True 383 self.init_in_server = init_in_server 384 self.param_info.init_in_server = init_in_server 385 386 def copy(self): 387 """ 388 Copy the parameter. 389 390 Returns: 391 Parameter, a new parameter. 392 393 Examples: 394 >>> from mindspore import Tensor, Parameter 395 >>> import numpy as np 396 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 397 >>> y = x.copy() 398 """ 399 return self.clone(init='same') 400 401 @deprecated("1.8", "set_param_fl") 402 def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True): 403 if push_to_server: 404 self.push_weight_to_server = True 405 if pull_from_server: 406 self.pull_weight_from_server = True 407 if not requires_aggr: 408 self.requires_aggr = False 409 self.param_info.requires_aggr = False 410 411 @property 412 def inited_param(self): 413 """ 414 Get the new parameter after call the init_data. 415 416 Default is a None, If `self` is a Parameter without data, after call the 417 `init_data` the initialized Parameter with data will be recorded here. 418 419 Examples: 420 >>> from mindspore import Tensor, Parameter 421 >>> import numpy as np 422 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 423 >>> x.inited_param 424 """ 425 return self._inited_param 426 427 @property 428 def param_info(self): 429 return self._param_info 430 431 @param_info.setter 432 def param_info(self, param_info_): 433 param_info_.obj = self 434 self._param_info = param_info_ 435 Tensor_.param_info.fset(self, param_info_) 436 437 @property 438 def name(self): 439 """ 440 Get the name of the parameter. 441 442 Examples: 443 >>> from mindspore import Tensor, Parameter 444 >>> import numpy as np 445 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 446 >>> x.name = "param1" 447 >>> x.name 448 'param1' 449 """ 450 return self.param_info.name 451 452 @name.setter 453 def name(self, name_): 454 """ 455 Define a name for the parameter. 456 457 Args: 458 name_ (`str` or `None`): The name of the parameter. When the parameter is None or an empty string, 459 the default value `PARAMETER_NAME_DEFAULT` is used. 460 """ 461 if name_ is None: 462 name_ = PARAMETER_NAME_DEFAULT 463 elif isinstance(name_, str): 464 name_ = name_.strip() 465 if name_ == '': 466 name_ = PARAMETER_NAME_DEFAULT 467 if len(name_) > PARAMETER_NAME_PREFIX_MAX_LEN: 468 raise ValueError("The length of the '{}' name should be less than {}.". 469 format(name_, PARAMETER_NAME_PREFIX_MAX_LEN)) 470 else: 471 raise ValueError("The type of the Parameter's name should be 'string' or 'None', " 472 "but got {}.".format(type(name_))) 473 474 if _is_role_worker() and self.cache_enable: 475 _reinsert_hash_table_size(name_, self.param_info.name) 476 self.param_info.name = name_ 477 478 @property 479 def sliced(self): 480 """ 481 Get slice status of the parameter. 482 483 Examples: 484 >>> from mindspore import Tensor, Parameter 485 >>> import numpy as np 486 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 487 >>> x.sliced = True 488 >>> x.sliced 489 True 490 """ 491 return self._sliced 492 493 @sliced.setter 494 def sliced(self, sliced_): 495 self._sliced = sliced_ 496 497 @property 498 def comm_fusion(self): 499 """ 500 Get the fusion type (int) for communication operators corresponding to this parameter. 501 502 In `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode, some communication operators used for parameters or 503 gradients aggregation are inserted automatically. 504 The value of `comm_fusion` must be greater than or equal to 0. 505 When the value of `comm_fusion` is ``0`` , operators will not be fused together. 506 507 Examples: 508 >>> from mindspore import Tensor, Parameter 509 >>> import numpy as np 510 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 511 >>> x.comm_fusion = 3 512 >>> x.comm_fusion 513 3 514 """ 515 return self.param_info.comm_fusion 516 517 @comm_fusion.setter 518 def comm_fusion(self, comm_fusion_): 519 if context.get_context("mode") == context.PYNATIVE_MODE and "auto_parallel" in _get_parallel_mode(): 520 raise RuntimeError( 521 "`comm_fusion` does not support PYNATIVE_MODE in AUTO_PARALLEL and SEMI_AUTO_PARALLEL mode.") 522 Validator.check_non_negative_int(comm_fusion_) 523 self.param_info.comm_fusion = comm_fusion_ 524 525 @property 526 def parallel_optimizer_comm_recompute(self): 527 """ 528 Get the communication recompute status(bool) of optimizer parallel for the parameter. 529 530 In `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode, when applying parallel optimizer, 531 some :class:`mindspore.ops.AllGather` operators 532 used for parameters gathering are inserted automatically. It is used to control the recompute attr for those 533 :class:`mindspore.ops.AllGather` operators. 534 535 Note: 536 - Only `Graph` mode is supported. 537 - It is recommended to use cell.recompute(parallel_optimizer_comm_recompute=True/False) to configure 538 the AllGather operators introducing by parallel optimizer rather than using this interface directly. 539 540 Examples: 541 >>> from mindspore import Tensor, Parameter 542 >>> import numpy as np 543 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 544 >>> x.parallel_optimizer_comm_recompute = True 545 >>> x.parallel_optimizer_comm_recompute 546 True 547 """ 548 return self.param_info.parallel_optimizer_comm_recompute 549 550 @parallel_optimizer_comm_recompute.setter 551 def parallel_optimizer_comm_recompute(self, parallel_optimizer_comm_recompute_): 552 Validator.check_bool(parallel_optimizer_comm_recompute_) 553 self.param_info.parallel_optimizer_comm_recompute = parallel_optimizer_comm_recompute_ 554 555 @property 556 def unique(self): 557 """ 558 Whether the parameter is already unique or not. 559 560 Examples: 561 >>> from mindspore import Tensor, Parameter 562 >>> import numpy as np 563 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 564 >>> x.unique = True 565 >>> x.unique 566 True 567 """ 568 return self._unique 569 570 @unique.setter 571 def unique(self, unique_): 572 self._unique = unique_ 573 574 def clone(self, init='same'): 575 """ 576 Clone the parameter. 577 578 Args: 579 init (Union[Tensor, str, numbers.Number]): Initialize the shape and dtype of the parameter. 580 If `init` is a `Tensor` or `numbers.Number`, clone a new parameter with the same shape 581 and dtype, and the data of the new parameter will be set according to `init`. If `init` 582 is a `str`, the `init` should be the alias of the class inheriting from `Initializer`. 583 For example, if `init` is ``'same'``, clone a new parameter with the same data, shape, and 584 dtype. Default: ``'same'``. 585 586 Returns: 587 Parameter, a new parameter. 588 589 Examples: 590 >>> from mindspore import Tensor, Parameter 591 >>> import numpy as np 592 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 593 >>> y = x.clone() 594 """ 595 x = copy(self) 596 param_info_clone = self.param_info.clone() 597 info = self.param_info 598 if hasattr(info, "cloned_obj"): 599 info.cloned_obj.append(x) 600 else: 601 info.cloned_obj = [x] 602 self.param_info = info 603 param_info_clone.obj = x 604 x.param_info = param_info_clone 605 x.is_init = False 606 x.init = self.init 607 x.is_param_ps = self.is_param_ps 608 x.init_in_server = self.init_in_server 609 x.cache_enable = self.cache_enable 610 if x.cache_enable: 611 x.key = _get_unique_parameter_key() 612 x.requires_aggr = self.requires_aggr 613 if self.cache_shape: 614 x.cache_shape = self.cache_shape 615 if init != 'same': 616 shape = self.shape if self.slice_num == 1 else self.param_info.origin_shape 617 dtype = self.dtype 618 x.set_data(initializer(init, shape=shape, dtype=dtype)) 619 return x 620 621 @property 622 def layerwise_parallel(self): 623 """ 624 Get the layerwise parallel status(bool) of the parameter. 625 626 When `layerwise_parallel` is ``True`` in `DATA_PARALLEL` and `HYBRID_PARALLEL` parallel mode, 627 broadcast and gradients communication would not be applied to parameters. 628 629 Examples: 630 >>> from mindspore import Tensor, Parameter 631 >>> import numpy as np 632 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 633 >>> x.layerwise_parallel = True 634 >>> x.layerwise_parallel 635 True 636 """ 637 return self.param_info.layerwise_parallel 638 639 @layerwise_parallel.setter 640 def layerwise_parallel(self, value=True): 641 if not isinstance(value, bool): 642 raise TypeError("The argument `layerwise_parallel` must be bool type.") 643 self.param_info.layerwise_parallel = value 644 645 @property 646 def parallel_optimizer(self): 647 """ 648 Get the optimizer parallel status(bool) of the parameter. 649 650 It is used to filter the weight shard operation in `AUTO_PARALLEL` and `SEMI_AUTO_PARALLEL` mode. It works only 651 when enable parallel optimizer in `mindspore.set_auto_parallel_context()`. 652 653 Examples: 654 >>> from mindspore import Tensor, Parameter 655 >>> import numpy as np 656 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 657 >>> x.parallel_optimizer = True 658 >>> x.parallel_optimizer 659 True 660 """ 661 return self.param_info.parallel_optimizer 662 663 @parallel_optimizer.setter 664 def parallel_optimizer(self, value=True): 665 if not isinstance(value, bool): 666 raise TypeError("The argument `parallel_optimizer` must be bool type.") 667 self.param_info.parallel_optimizer = value 668 669 @property 670 def cache_enable(self): 671 """ 672 Return whether the parameter is cache enable. 673 674 Examples: 675 >>> from mindspore import Tensor, Parameter 676 >>> import numpy as np 677 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 678 >>> x.cache_enable=True 679 >>> x.cache_enable 680 True 681 """ 682 return self.param_info.cache_enable 683 684 @cache_enable.setter 685 def cache_enable(self, value=True): 686 if not isinstance(value, bool): 687 raise TypeError("The argument `cache_enable` must be bool type.") 688 self.param_info.cache_enable = value 689 690 @property 691 def cache_shape(self): 692 """ 693 Return the cache shape corresponding to the parameter if use cache. 694 695 Examples: 696 >>> from mindspore import Tensor, Parameter 697 >>> import numpy as np 698 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 699 >>> x.cache_enable=True 700 >>> x.cache_shape=[1, 2] 701 >>> x.cache_shape 702 [1, 2] 703 """ 704 return self.param_info.cache_shape 705 706 @cache_shape.setter 707 def cache_shape(self, value): 708 if not isinstance(value, (tuple, list)): 709 raise TypeError("The argument `cache_shape` must be tuple or list type.") 710 self.param_info.cache_shape = value 711 712 @property 713 def key(self): 714 """ 715 Return the parameter unique key. 716 717 Examples: 718 >>> from mindspore import Tensor, Parameter 719 >>> import numpy as np 720 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 721 >>> x.key = 2 722 >>> x.key 723 2 724 """ 725 return self.param_info.key 726 727 @key.setter 728 def key(self, value=-1): 729 """Set the parameter unique key.""" 730 if not isinstance(value, int): 731 raise TypeError("The argument `key` must be int type.") 732 self.param_info.key = value 733 734 @property 735 def requires_grad(self): 736 """ 737 Return whether the parameter requires gradient. 738 739 Examples: 740 >>> from mindspore import Tensor, Parameter 741 >>> import numpy as np 742 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 743 >>> x.requires_grad = True 744 >>> x.requires_grad 745 True 746 """ 747 return self.param_info.requires_grad 748 749 @requires_grad.setter 750 def requires_grad(self, value=True): 751 if not isinstance(value, bool): 752 raise TypeError("The argument `requires_grad` must be bool type") 753 Tensor_.wait_pipeline(self) 754 self.param_info.requires_grad = value 755 756 @property 757 def data(self): 758 """ 759 Return the parameter object. 760 761 Examples: 762 >>> from mindspore import Tensor, Parameter 763 >>> import numpy as np 764 >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param") 765 >>> x.data 766 Parameter (name=param, shape=(2, 2), dtype=Float32, requires_grad=True) 767 """ 768 return self 769 770 def value(self): 771 """ 772 Return the value of parameter object. 773 774 Examples: 775 >>> from mindspore import Tensor, Parameter 776 >>> import numpy as np 777 >>> x = Parameter(Tensor(np.array([1, 2], dtype=np.float32)), name="param") 778 >>> x_value = x.value() 779 >>> print(x_value) 780 [1. 2.] 781 """ 782 return self.load(self, monad.U) 783 784 def _update_tensor_data(self, data): 785 """Update the parameter by a Tensor.""" 786 if isinstance(self, Tensor): 787 self.init_flag = False 788 self.init = None 789 return self.assign_value(data) 790 new_param = Parameter(data, self.name, self.requires_grad) 791 new_param.param_info = self.param_info 792 return new_param 793 794 @_LogActionOnce(logger=logger, key='add_pipeline_stage') 795 def add_pipeline_stage(self, stage): 796 """ 797 Add a pipeline stage to the parameter. 798 799 Args: 800 stage(int): The pipeline stage to be added. 801 802 Raise: 803 TypeError: If `stage` is not a positive number or not int type. 804 """ 805 logger.warning(f"This interface may be deleted in the future.") 806 if not isinstance(stage, int) or stage < 0: 807 raise TypeError("`stage` must be a positive number of int type") 808 self._pipeline_stage_list.append(stage) 809 810 def _raise_type_error(self, incoming): 811 raise TypeError(f"Incoming Parameter dtype can not be converted to current dtype implicitly. " 812 f"Current dtype is {self.dtype}, and incoming is {incoming}. " 813 f"Use .set_dtype(xxx) to change the dtype.") 814 815 @staticmethod 816 def _set_data_check_input_valid(current_shape, data_shape, current_tensor_is_init, incoming_tensor_is_init, 817 from_ckpt, slice_shape=False, slice_num=1): 818 if not from_ckpt and incoming_tensor_is_init and not current_tensor_is_init: 819 raise TypeError("The original tensor data is initialized, but the argument 'data' is not initialized." 820 "Please initialize 'data' before call this method.") 821 if tuple(current_shape) != tuple(data_shape): 822 # If Slice create Parameter shape can be change. 823 if not slice_shape and slice_num == 1: 824 raise ValueError(f"Can not change the shape of Parameter which has been initialized." 825 f" Current shape is {current_shape}, and incoming is {data_shape}.") 826 827 @staticmethod 828 def _from_tensor(tensor, *args, **kwargs): 829 """Create a `Parameter` that data is shared from a `Tensor`.""" 830 if not isinstance(tensor, Tensor_): 831 raise TypeError(f"The type of input must be Tensor, but got {type(tensor)}.") 832 param = Tensor_.__new__(Parameter) 833 Tensor_.__init__(param, tensor) 834 param.init = None 835 param.init_mode = None 836 param.has_init = False 837 param.is_default_input_init = False 838 Parameter.__init__(param, tensor, *args, **kwargs) 839 return param 840 841 @jit_forbidden_register 842 def set_data(self, data, slice_shape=False): 843 """ 844 Set Parameter's data. 845 846 Args: 847 data (Union[Tensor, int, float]): New data. 848 slice_shape (bool): If slice the parameter is set to ``True``, the shape consistency will not be checked. 849 Default: ``False``. When `slice_shape` is ``True``, and the shapes are not consistent, a 850 ValueError will be thrown. 851 852 Returns: 853 Parameter, the parameter after set data. 854 855 Examples: 856 >>> from mindspore import Tensor, Parameter 857 >>> import numpy as np 858 >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param") 859 >>> x.set_data(Tensor(np.array([[6, 6], [6, 6]], dtype=np.float32))) 860 Parameter (name=param, shape=(2, 2), dtype=Float32, requires_grad=True) 861 """ 862 if not isinstance(data, (Tensor, int, float)): 863 raise TypeError(f"Parameter data must be [`Tensor`, `int`, `float`] or a kind of `Tensor` " 864 f"(like `Tensor`). But with type {type(data)}.") 865 if isinstance(data, (int, float)): 866 if self.dtype in mstype.int_type and isinstance(data, float): 867 self._raise_type_error(mstype.float_) 868 data = Tensor(data, self.dtype) 869 # both not init. 870 incoming_tensor_is_init = isinstance(data, Tensor) and not data.has_init 871 current_tensor_is_init = isinstance(self, Tensor) and not self.has_init 872 Parameter._set_data_check_input_valid(self.shape, data.shape, current_tensor_is_init, incoming_tensor_is_init, 873 self.from_ckpt, slice_shape, self.slice_num) 874 if self.dtype != data.dtype: 875 if mstype.implicit_conversion_seq.get(self.dtype) < mstype.implicit_conversion_seq.get(data.dtype): 876 self._raise_type_error(data.dtype) 877 else: 878 from mindspore.ops import functional as F 879 if isinstance(data, Tensor) and data.init is not None: 880 data.init_data() 881 data = F.cast(data, self.dtype) 882 if isinstance(data, Tensor) and data.has_init: 883 # The parameter has been initialized, directly update by the data 884 if current_tensor_is_init: 885 self._update_tensor_data(data.init_data()) 886 else: 887 # also update the related inited parameter data 888 if self.inited_param is not None: 889 self.inited_param.set_data(data) 890 self.init_mode = data 891 elif incoming_tensor_is_init or current_tensor_is_init: 892 self._update_tensor_data(data) 893 self.sliced = slice_shape 894 return self 895 896 @staticmethod 897 def _get_init_data_args(layout=None): 898 """Get the data layout args.""" 899 init_data_args = () 900 if layout: 901 if not isinstance(layout, tuple): 902 raise TypeError("The argument 'layout' should be tuple, but got {}.".format(type(layout))) 903 if len(layout) < 6: 904 raise ValueError("The length of 'layout' must be larger than 5, but got {}.".format(len(layout))) 905 slice_index = int(_get_slice_index(layout[0], layout[1], layout[5])) 906 init_data_args += (slice_index, layout[2], layout[5]) 907 return init_data_args 908 909 910 def init_data(self, layout=None, set_sliced=False): 911 """ 912 Initialize the parameter's data. 913 914 Args: 915 layout (Union[None, tuple]): The parameter's layout info. 916 layout [dev_mat, tensor_map, slice_shape, filed_size, uniform_split, opt_shard_group]. 917 Default: ``None``. 918 It's not None only in 'SEMI_AUTO_PARALLEL' or 'AUTO_PARALLEL' mode. 919 920 - dev_mat (list(int)): The parameter's device matrix. 921 - tensor_map (list(int)): The parameter's tensor map. 922 - slice_shape (list(int)): The parameter's slice shape. 923 - filed_size (int): The parameter's filed size. 924 - uniform_split (bool): Whether the parameter is split evenly. 925 - opt_shard_group (str): The group of the parameter while running optimizer parallel. 926 927 set_sliced (bool): True if the parameter is set sliced after initializing the data. 928 Default: ``False``. 929 930 Returns: 931 Parameter, the `Parameter` after initializing data. If current `Parameter` was already initialized before, 932 returns the same initialized `Parameter`. 933 934 Raises: 935 RuntimeError: If it is from Initializer, and parallel mode has changed after the Initializer created. 936 ValueError: If the length of the layout is less than 6. 937 TypeError: If `layout` is not tuple. 938 939 Examples: 940 >>> from mindspore import Tensor, Parameter 941 >>> import numpy as np 942 >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param") 943 >>> x.init_data() 944 """ 945 if self.is_default_input_init and self.is_in_parallel != _is_in_parallel_mode(): 946 raise RuntimeError("Must set or change parallel mode before any initializer Tensor created.") 947 if self.init_mode is None: 948 return self 949 if self.inited_param is not None: 950 return self.inited_param 951 952 init_data_args = self._get_init_data_args(layout) 953 954 if _is_role_sched(): 955 return self 956 if self.init_in_server and self.is_param_ps and isinstance(self.init_mode, Tensor) and \ 957 self.init_mode.init is not None and _is_role_worker(): 958 if self.cache_enable: 959 data = self.init_mode.init_data(*init_data_args) 960 else: 961 data = self.init_mode.init_data(0, [1]) 962 else: 963 data = self.init_mode.init_data(*init_data_args) 964 965 obj = self._update_tensor_data(data) 966 if id(obj) != id(self): 967 self._inited_param = obj 968 obj.init_mode = None 969 obj.sliced = set_sliced 970 _offload_if_config(obj) 971 return obj 972 973 974class ParameterTuple(tuple): 975 """ 976 Inherited from tuple, ParameterTuple is used to save multiple parameter. 977 978 Note: 979 It is used to store the parameters of the network into the parameter tuple collection. 980 981 Examples: 982 >>> from mindspore import Tensor, Parameter, ParameterTuple 983 >>> import numpy as np 984 >>> x = Parameter(Tensor(np.array([[1, 2], [3, 4]], dtype=np.float32)), name="param") 985 >>> y = Parameter(Tensor(np.array([[5, 6], [7, 8]], dtype=np.float32)), name="param1") 986 >>> pt = ParameterTuple([x, y]) 987 >>> pt1 = pt.clone(prefix="new") 988 """ 989 990 def __new__(cls, iterable): 991 """Create instance object of ParameterTuple.""" 992 data = tuple(iterable) 993 ids = set() 994 names = set() 995 for x in data: 996 if not isinstance(x, Parameter): 997 raise TypeError(f"For ParameterTuple initialization, " 998 f"ParameterTuple input should be 'Parameter' collection, " 999 f"but got a {type(iterable)}. ") 1000 if id(x) not in ids: 1001 if x.name in names: 1002 raise ValueError("The value {} , its name '{}' already exists. " 1003 "Please set a unique name for the parameter.".format(x, x.name)) 1004 names.add(x.name) 1005 ids.add(id(x)) 1006 return tuple.__new__(ParameterTuple, tuple(data)) 1007 1008 def clone(self, prefix, init='same'): 1009 """ 1010 Clone the parameters in ParameterTuple element-wisely to generate a new ParameterTuple. 1011 1012 Args: 1013 prefix (str): Namespace of parameter, the prefix string will be added to the names of parameters 1014 in parametertuple. 1015 1016 init (Union[Tensor, str, numbers.Number]): Clone the shape and dtype of Parameters in ParameterTuple and 1017 set data according to `init`. Default: ``'same'``. 1018 1019 - If `init` is a `Tensor` , set the new Parameter data to the input Tensor. 1020 - If `init` is `numbers.Number` , set the new Parameter data to the input number. 1021 - If `init` is a `str`, data will be set according to the initialization method of the same name in 1022 the `Initializer`. When it is ``'same'``, the new Parameter will have the same value 1023 with the original Parameter. 1024 1025 Returns: 1026 Tuple, the new Parameter tuple. 1027 1028 Tutorial Examples: 1029 - `Cell and Parameter - Parameter Tuple 1030 <https://mindspore.cn/tutorials/en/master/advanced/modules/layer.html#parameter-tuple>`_ 1031 """ 1032 Validator.check_str_by_regular(prefix) 1033 new = [] 1034 for x in self: 1035 x1 = x.clone(init) 1036 x1.name = prefix + "." + x1.name 1037 new.append(x1) 1038 1039 if not x1.cache_enable: 1040 continue 1041 1042 if _is_role_worker(): 1043 _clone_hash_table(x.name, x.key, x1.name, x1.key) 1044 _insert_accumu_init_info(x1.name, init_to_value(init)) 1045 return ParameterTuple(new) 1046 1047 def __parameter_tuple__(self): 1048 """For parse check.""" 1049