• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DEVICE_ADDRESS_H_
18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DEVICE_ADDRESS_H_
19 
20 #include <string>
21 #include <vector>
22 #include <memory>
23 #include "include/backend/device_address.h"
24 #include "runtime/device/loadable_device_address.h"
25 #include "runtime/device/kernel_runtime.h"
26 #include "plugin/device/ascend/hal/device/ascend_memory_pool.h"
27 #include "plugin/device/ascend/hal/device/launch_transdata.h"
28 #include "ir/dtype.h"
29 #include "kernel/kernel.h"
30 #include "utils/shape_utils.h"
31 #include "acl/acl_rt.h"
32 
33 namespace mindspore {
34 #ifdef ENABLE_DEBUGGER
35 class Debugger;
36 #endif
37 namespace device {
38 class LaunchKernel;
39 namespace ascend {
40 class AscendDeviceAddress : public LoadableDeviceAddress {
41  public:
AscendDeviceAddress(const KernelTensorPtr & kernel_tensor)42   explicit AscendDeviceAddress(const KernelTensorPtr &kernel_tensor) : LoadableDeviceAddress(kernel_tensor) {
43     SetDevicePtrDeleter();
44   }
AscendDeviceAddress(void * ptr,size_t size)45   explicit AscendDeviceAddress(void *ptr, size_t size) : LoadableDeviceAddress(ptr, size) { SetDevicePtrDeleter(); }
AscendDeviceAddress(void * ptr,size_t size,const std::string & device_name,uint32_t device_id)46   explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &device_name, uint32_t device_id)
47       : LoadableDeviceAddress(ptr, size, device_name, device_id) {
48     SetDevicePtrDeleter();
49   }
AscendDeviceAddress(void * ptr,size_t size,const std::string & format,TypeId type_id,const std::string & device_name,uint32_t device_id)50   explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id,
51                                const std::string &device_name, uint32_t device_id)
52       : LoadableDeviceAddress(ptr, size, format, type_id, device_name, device_id) {
53     SetDevicePtrDeleter();
54   }
AscendDeviceAddress(void * ptr,size_t size,const ShapeVector & shape_vector,const Format & format,TypeId type_id,const std::string & device_name,uint32_t device_id,uint32_t stream_id)55   AscendDeviceAddress(void *ptr, size_t size, const ShapeVector &shape_vector, const Format &format, TypeId type_id,
56                       const std::string &device_name, uint32_t device_id, uint32_t stream_id)
57       : LoadableDeviceAddress(ptr, size, shape_vector, format, type_id, device_name, device_id, stream_id) {
58     SetDevicePtrDeleter();
59   }
AscendDeviceAddress(void * ptr,size_t size,const std::string & format,TypeId type_id,const KernelWithIndex & node_index,const std::string & device_name,uint32_t device_id)60   explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id,
61                                const KernelWithIndex &node_index, const std::string &device_name, uint32_t device_id)
62       : LoadableDeviceAddress(ptr, size, format, type_id, node_index, device_name, device_id) {
63     SetDevicePtrDeleter();
64   }
AscendDeviceAddress(void * ptr,size_t size,const std::string & format,TypeId type_id)65   explicit AscendDeviceAddress(void *ptr, size_t size, const std::string &format, TypeId type_id)
66       : LoadableDeviceAddress(ptr, size, format, type_id) {
67     SetDevicePtrDeleter();
68   }
69   ~AscendDeviceAddress() override;
70   void DeviceSynchronizerInit() override;
71   bool SyncDeviceToHost(size_t size, void *const host_ptr) const override;
72   bool SyncHostToDevice(size_t size, const void *host_ptr) const override;
73   bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override;
74   bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
75                         const std::string &format) const override;
76   bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const std::string &format,
77                         const tensor::TensorDataPtr &tensor_data) const override;
78 
79   bool AsyncHostToDevice(size_t size, const void *host_ptr) const override;
80 
81   bool AsyncDeviceToHost(size_t size, void *host_ptr) const override;
82 
83   bool AsyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
84                            const std::string &format) const override;
85   bool SyncDeviceToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *src_ptr,
86                           const std::string &format) const override;
87   bool AsyncHostToDevice(size_t size, TypeId /* type */, const void *host_ptr) const override;
88   bool SyncDeviceToDevice(const DeviceSync *src_device_addr) const override;
89   bool CopyDeviceToHost(void *dst, const void *src, const size_t &size) const override;
90   bool CopyHostToDevice(void *dst, const void *src, const size_t &size) const override;
91   void ClearDeviceMemory() override;
GetDeviceType()92   DeviceType GetDeviceType() const override { return DeviceType::kAscend; }
93 #ifndef ENABLE_SECURITY
94   bool DumpMemToFile(const std::string &filepath, const std::string &host_fmt, const ShapeVector &host_shape,
95                      TypeId host_type, bool trans_flag) const override;
96 #endif
97 #ifdef ENABLE_DEBUGGER
98   bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
99                      const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev,
100                      uint32_t root_graph_id, bool force_update, bool trans_flag, bool async_copy = true) const override;
101 #endif
102 
103   // Asynchronously copy host memory to device side.
104   bool AsyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
105                          size_t stream_id) const;
106 
107   // Asynchronously copy device memory to host side.
108   bool AsyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr, size_t stream_id) const;
109 
set_communication_ptr(uint8_t * communication_ptr)110   void set_communication_ptr(uint8_t *communication_ptr) override {
111     communication_ptr_ = communication_ptr;
112     // The communication_ptr_ should free to memory pool instead of GetDevicePtr(), so must update device pointer
113     // deleter.
114     SetDevicePtrDeleter();
115   }
116 
117  protected:
118   bool CopyDeviceToHost(void *dst, const void *src, size_t size, bool async, size_t stream_id) const override;
119   bool CopyHostToDevice(void *dst, const void *src, size_t size, bool async, size_t stream_id) const override;
120 
121   bool DeviceToFileDirectly(void *ptr, size_t size, const std::string &file_name, size_t stream_id) const override;
122 
123   bool FileToDeviceDirectly(void *ptr, size_t size, const std::string &file_name, size_t stream_id) const override;
124 
125   void DeviceToDevice(void *dst, void *src, size_t size, size_t stream_id) const;
126 
127  private:
128   bool SyncDeviceToHostAndConvertFormat(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const;
129   bool ConvertFormatAndSyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
130                                         const tensor::TensorDataPtr &tensor_data) const;
131   bool SyncDeviceToHostAndConvertFormatBasedOnTransData(const ShapeVector &host_shape, size_t size,
132                                                         mindspore::TypeId type, void *host_ptr) const;
133   bool SyncDeviceToDeviceWithDiffFormatType(const DeviceSync *src_device_addr) const;
134 
135   bool SyncHostToDeviceImpl(const ShapeVector &shape, size_t size, mindspore::TypeId type, const void *host_ptr,
136                             const std::string &format, const tensor::TensorDataPtr &tensor_data = nullptr) const;
137   void SyncStream() const;
138   bool SyncStream(size_t stream_id) const;
139   bool Float64ToFloatAndSyncHostToDevice(void *dst, size_t dst_size, const void *src, size_t src_size,
140                                          const tensor::TensorDataPtr &tensor_data) const;
141   bool SyncDeviceToHostAndFloatToFloat64(void *dst, size_t dst_size, const void *src, size_t src_size) const;
142   void SyncMemory(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind,
143                   const tensor::TensorDataPtr &tensor_data = nullptr) const;
144   void SyncHostMemoryToDeviceWithCopySrc(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind,
145                                          KernelRuntime *runtime_instance) const;
146   void SyncHostMemoryToDeviceForTensorFromNumpy(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind,
147                                                 KernelRuntime *runtime_instance) const;
148   void SyncHostMemoryToDeviceWithTensorData(void *dst, const void *src, uint64_t size, aclrtMemcpyKind kind,
149                                             const tensor::TensorDataPtr &tensor_data,
150                                             KernelRuntime *runtime_instance) const;
151   ShapeVector GetDeviceShape(ShapeVector *host_shape) const;
152   std::shared_ptr<LaunchTransData> CreateLaunchTransData(const ShapeVector &host_shape, const std::string &ori_format,
153                                                          const std::string &dst_format) const;
154   mutable std::shared_ptr<LaunchTransData> launch_transdata_{nullptr};
155   void BindDevice() const;
156   void CopyHostToDevice(const void *src, uint64_t size, const tensor::TensorDataPtr &tensor_data) const;
157   void CopyDeviceToHost(void *dst, uint64_t size) const;
158   bool CopyBetweenHostDevice(void *dst, const void *src, size_t size, bool async, size_t stream_id,
159                              bool host_to_device) const;
160   bool CopyBetweenFileDeviceDirectly(void *ptr, const std::string &file_name, size_t size, size_t stream_id,
161                                      bool file_to_device) const;
162 
163   // The 'const' for this class is irrational, but I abide by it
164   int64_t GetGroupsWithCache() const;
165 
166   // Set a device pointer destructor to kernel tensor, used to release resource reclaiming of the device pointer
167   // automatically when DeviceAddress destructed.
168   void SetDevicePtrDeleter();
169 
170   mutable int64_t groups_ = 1;
171 
172   // When the device address is used by communication node, create protect area [kMemAlignSize -- data -- kMemAlignSize]
173   // memory buffer, communication_ptr_(allocated from ascend memory pool) + kMemAlignSize = device pointer (could get by
174   // GetDevicePtr()), device pointer is to really used by communication node, and communication_ptr_ is used to free
175   // memory to Ascend memory pool.
176   uint8_t *communication_ptr_{nullptr};
177 };
178 using AscendDeviceAddressPtr = std::shared_ptr<AscendDeviceAddress>;
179 }  // namespace ascend
180 }  // namespace device
181 }  // namespace mindspore
182 #endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_ASCEND_DEVICE_ADDRESS_H_
183