1 /*===--------------------------------------------------------------------------
2 * ATMI (Asynchronous Task and Memory Interface)
3 *
4 * This file is distributed under the MIT License. See LICENSE.txt for details.
5 *===------------------------------------------------------------------------*/
6 #include "atmi_runtime.h"
7 #include "internal.h"
8 #include "rt.h"
9 #include <hsa.h>
10 #include <hsa_ext_amd.h>
11 #include <memory>
12
13 /*
14 * Initialize/Finalize
15 */
atmi_init()16 atmi_status_t atmi_init() { return core::Runtime::Initialize(); }
17
atmi_finalize()18 atmi_status_t atmi_finalize() { return core::Runtime::Finalize(); }
19
20 /*
21 * Machine Info
22 */
atmi_machine_get_info()23 atmi_machine_t *atmi_machine_get_info() {
24 return core::Runtime::GetMachineInfo();
25 }
26
27 /*
28 * Modules
29 */
atmi_module_register_from_memory_to_place(void * module_bytes,size_t module_size,atmi_place_t place,atmi_status_t (* on_deserialized_data)(void * data,size_t size,void * cb_state),void * cb_state)30 atmi_status_t atmi_module_register_from_memory_to_place(
31 void *module_bytes, size_t module_size, atmi_place_t place,
32 atmi_status_t (*on_deserialized_data)(void *data, size_t size,
33 void *cb_state),
34 void *cb_state) {
35 return core::Runtime::getInstance().RegisterModuleFromMemory(
36 module_bytes, module_size, place, on_deserialized_data, cb_state);
37 }
38
39 /*
40 * Data
41 */
42
invoke_hsa_copy(hsa_signal_t sig,void * dest,const void * src,size_t size,hsa_agent_t agent)43 static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
44 const void *src, size_t size,
45 hsa_agent_t agent) {
46 const hsa_signal_value_t init = 1;
47 const hsa_signal_value_t success = 0;
48 hsa_signal_store_screlease(sig, init);
49
50 hsa_status_t err =
51 hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig);
52 if (err != HSA_STATUS_SUCCESS) {
53 return err;
54 }
55
56 // async_copy reports success by decrementing and failure by setting to < 0
57 hsa_signal_value_t got = init;
58 while (got == init) {
59 got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init,
60 UINT64_MAX, ATMI_WAIT_STATE);
61 }
62
63 if (got != success) {
64 return HSA_STATUS_ERROR;
65 }
66
67 return err;
68 }
69
70 struct atmiFreePtrDeletor {
operator ()atmiFreePtrDeletor71 void operator()(void *p) {
72 atmi_free(p); // ignore failure to free
73 }
74 };
75
atmi_memcpy_h2d(hsa_signal_t signal,void * deviceDest,const void * hostSrc,size_t size,hsa_agent_t agent)76 atmi_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
77 const void *hostSrc, size_t size,
78 hsa_agent_t agent) {
79 hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size);
80
81 // hsa_memory_copy sometimes fails in situations where
82 // allocate + copy succeeds. Looks like it might be related to
83 // locking part of a read only segment. Fall back for now.
84 if (rc == HSA_STATUS_SUCCESS) {
85 return ATMI_STATUS_SUCCESS;
86 }
87
88 void *tempHostPtr;
89 atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
90 atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
91 if (ret != ATMI_STATUS_SUCCESS) {
92 DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
93 size);
94 return ret;
95 }
96 std::unique_ptr<void, atmiFreePtrDeletor> del(tempHostPtr);
97 memcpy(tempHostPtr, hostSrc, size);
98
99 if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) !=
100 HSA_STATUS_SUCCESS) {
101 return ATMI_STATUS_ERROR;
102 }
103 return ATMI_STATUS_SUCCESS;
104 }
105
atmi_memcpy_d2h(hsa_signal_t signal,void * dest,const void * deviceSrc,size_t size,hsa_agent_t agent)106 atmi_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
107 const void *deviceSrc, size_t size,
108 hsa_agent_t agent) {
109 hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size);
110
111 // hsa_memory_copy sometimes fails in situations where
112 // allocate + copy succeeds. Looks like it might be related to
113 // locking part of a read only segment. Fall back for now.
114 if (rc == HSA_STATUS_SUCCESS) {
115 return ATMI_STATUS_SUCCESS;
116 }
117
118 void *tempHostPtr;
119 atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0);
120 atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU);
121 if (ret != ATMI_STATUS_SUCCESS) {
122 DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
123 size);
124 return ret;
125 }
126 std::unique_ptr<void, atmiFreePtrDeletor> del(tempHostPtr);
127
128 if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) !=
129 HSA_STATUS_SUCCESS) {
130 return ATMI_STATUS_ERROR;
131 }
132
133 memcpy(dest, tempHostPtr, size);
134 return ATMI_STATUS_SUCCESS;
135 }
136
atmi_free(void * ptr)137 atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }
138
atmi_malloc(void ** ptr,size_t size,atmi_mem_place_t place)139 atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) {
140 return core::Runtime::Malloc(ptr, size, place);
141 }
142