1 #include <gtest/gtest.h>
2
3 #include <c10/util/irange.h>
4 #include <c10/xpu/XPUCachingAllocator.h>
5
has_xpu()6 bool has_xpu() {
7 return c10::xpu::device_count() > 0;
8 }
9
TEST(XPUCachingAllocatorTest,GetXPUAllocator)10 TEST(XPUCachingAllocatorTest, GetXPUAllocator) {
11 auto* allocator = c10::xpu::XPUCachingAllocator::get();
12
13 auto _500mb = 500 * 1024 * 1024;
14 auto buffer = allocator->allocate(_500mb);
15 EXPECT_TRUE(buffer.get());
16
17 auto* xpu_allocator = c10::GetAllocator(buffer.device().type());
18 EXPECT_EQ(allocator, xpu_allocator);
19 }
20
TEST(XPUCachingAllocatorTest,DeviceCachingAllocate)21 TEST(XPUCachingAllocatorTest, DeviceCachingAllocate) {
22 c10::xpu::XPUCachingAllocator::emptyCache();
23 auto* allocator = c10::xpu::XPUCachingAllocator::get();
24 // 500M memory is reserved, can be reused later.
25 {
26 auto _500mb = 500 * 1024 * 1024;
27 auto cache = allocator->allocate(_500mb);
28 }
29 auto _10mb = 10 * 1024 * 1024;
30 auto buffer = allocator->allocate(_10mb);
31 void* ptr0 = buffer.get();
32 // tmp is not allocated via device caching allocator.
33 void* tmp = sycl::aligned_alloc_device(
34 512, _10mb, c10::xpu::get_raw_device(0), c10::xpu::get_device_context());
35 void* ptr1 = c10::xpu::XPUCachingAllocator::raw_alloc(_10mb);
36 // We have reserved 500M memory that can be reused. When we allocate ptr0
37 // and ptr1 via device caching allocator, they should be on the same block.
38 // And ptr1 is the next block of ptr0, like [ptr0, ptr1]. This is because tmp
39 // pointer is not allocated via device caching allocator so that it can NOT
40 // reuse our reserved memory. So the offset between ptr0 and ptr1 should equal
41 // to ptr0's size (10M).
42 auto diff = static_cast<char*>(ptr1) - static_cast<char*>(ptr0);
43 EXPECT_EQ(diff, _10mb);
44 c10::xpu::XPUCachingAllocator::raw_delete(ptr1);
45 sycl::free(tmp, c10::xpu::get_device_context());
46 c10::xpu::XPUCachingAllocator::emptyCache();
47 }
48
TEST(XPUCachingAllocatorTest,AllocateMemory)49 TEST(XPUCachingAllocatorTest, AllocateMemory) {
50 c10::xpu::XPUCachingAllocator::emptyCache();
51 auto* allocator = c10::xpu::XPUCachingAllocator::get();
52 auto _10mb = 10 * 1024 * 1024;
53 auto buffer = allocator->allocate(_10mb);
54 auto* deviceData = static_cast<int*>(buffer.get());
55
56 constexpr int numel = 1024;
57 int hostData[numel];
58 for (const auto i : c10::irange(numel)) {
59 hostData[i] = i;
60 }
61
62 auto stream = c10::xpu::getStreamFromPool();
63 // H2D
64 stream.queue().memcpy(deviceData, hostData, sizeof(int) * numel);
65 c10::xpu::syncStreamsOnDevice();
66
67 for (const auto i : c10::irange(numel)) {
68 hostData[i] = 0;
69 }
70
71 // D2H
72 stream.queue().memcpy(hostData, deviceData, sizeof(int) * numel);
73 c10::xpu::syncStreamsOnDevice();
74
75 for (const auto i : c10::irange(numel)) {
76 EXPECT_EQ(hostData[i], i);
77 }
78 }
79
main(int argc,char * argv[])80 int main(int argc, char* argv[]) {
81 ::testing::InitGoogleTest(&argc, argv);
82 auto device = c10::xpu::device_count();
83 if (device <= 0) {
84 return 0;
85 }
86 c10::xpu::XPUCachingAllocator::init(device);
87 return RUN_ALL_TESTS();
88 }
89