• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <gtest/gtest.h>
2 
3 // Test IntegerDivider: this tests *all* 32-bit pairs (a, b) where a % b is 0 or
4 // (b-1), so it takes a few minutes to run.
5 
6 #include <assert.h>
7 #include <stdint.h>
8 #include <memory>
9 #include <vector>
10 
11 #include <ATen/cuda/CUDAContext.h>
12 #include <ATen/cuda/detail/IntegerDivider.cuh>
13 
14 using std::vector;
15 using at::cuda::detail::IntDivider;
16 using at::cuda::detail::DivMod;
17 
18 template<typename Value>
19 struct TestCase {
20   Value dividend;
21   int divisor_idx;
22   int steps;
23 
TestCaseTestCase24   TestCase(Value dividend, int divisor_idx, int steps)
25       : dividend(dividend), divisor_idx(divisor_idx), steps(steps) {}
26 };
27 
28 template <typename Value>
testIntDivider(const IntDivider<Value> * dividers,const TestCase<Value> * testCases,int numCases)29 __global__ void testIntDivider(
30     const IntDivider<Value>* dividers,
31     const TestCase<Value>* testCases,
32     int numCases) {
33   int index = blockIdx.x * blockDim.x + threadIdx.x;
34   int stride = blockDim.x * gridDim.x;
35   for (int i = index; i < numCases; i += stride) {
36     const TestCase<Value>& tc = testCases[i];
37     Value dividend = tc.dividend;
38     const IntDivider<Value>& divider = dividers[tc.divisor_idx];
39     Value divisor = divider.divisor;
40 
41     for (int j = 0; j < tc.steps; j++) {
42       if (sizeof(Value) == 4 && dividend > INT32_MAX)
43         return;
44 
45       DivMod<Value> qr = divider.divmod(dividend);
46       assert(qr.div == dividend / divisor && qr.mod == dividend % divisor);
47       dividend += divisor;
48     }
49   }
50 }
51 
52 enum {
53   // Number of test cases per each kernel invocation.
54   NUM_CASES = 1000000,
55 
56   // Maximum number of steps per each test case.
57   MAX_STEPS = 10000,
58 };
59 
60 // Test the magic division algorithm.
61 template<typename Value>
62 class IntDividerTester {
63  public:
IntDividerTester()64   IntDividerTester() {
65     cudaError_t err;
66 
67     err = cudaMalloc(&dividersBuf_, NUM_CASES * sizeof(IntDivider<Value>));
68     bool isEQ = err == cudaSuccess;
69     EXPECT_TRUE(isEQ);
70     err = cudaMalloc(&testCasesBuf_, NUM_CASES * sizeof(TestCase<Value>));
71     isEQ = err == cudaSuccess;
72     EXPECT_TRUE(isEQ);
73   }
74 
~IntDividerTester()75   ~IntDividerTester() {
76     cudaError_t err;
77 
78     err = cudaFree(dividersBuf_);
79     bool isEQ = err == cudaSuccess;
80     EXPECT_TRUE(isEQ);
81     err = cudaFree(testCasesBuf_);
82     isEQ = err == cudaSuccess;
83     EXPECT_TRUE(isEQ);
84   }
85 
addTestCase(Value dividend,Value divisor,int steps)86   void addTestCase(Value dividend, Value divisor, int steps) {
87     // Append a new IntDivider using 'divisor' if necessary.
88     if (dividers_.empty() || dividers_.back().divisor != divisor)
89       dividers_.emplace_back(divisor);
90 
91     // Append the test case.
92     testCases_.emplace_back(dividend, dividers_.size() - 1, steps);
93 
94     // Launch the test kernel if the buffer is full.
95     if (testCases_.size() == NUM_CASES)
96       flush();
97   }
98 
flush()99   void flush() {
100     cudaError_t err;
101     bool isTrue;
102     if (testCases_.empty())
103       return;
104 
105     ASSERT_FALSE(dividers_.empty());
106 
107     isTrue = dividers_.size() <= NUM_CASES;
108     ASSERT_TRUE(isTrue);
109     isTrue = testCases_.size() <= NUM_CASES;
110     ASSERT_TRUE(isTrue);
111     err = cudaMemcpy(
112         dividersBuf_,
113         dividers_.data(),
114         dividers_.size() * sizeof(IntDivider<Value>),
115         cudaMemcpyHostToDevice);
116     isTrue = err == cudaSuccess;
117     ASSERT_TRUE(isTrue);
118     err = cudaMemcpy(
119         testCasesBuf_,
120         testCases_.data(),
121         testCases_.size() * sizeof(TestCase<Value>),
122         cudaMemcpyHostToDevice);
123     isTrue = err == cudaSuccess;
124     ASSERT_TRUE(isTrue);
125 
126     int numCases = testCases_.size();
127     testIntDivider<Value><<<512, 512>>>(dividersBuf_, testCasesBuf_, numCases);
128     C10_CUDA_KERNEL_LAUNCH_CHECK();
129 
130     dividers_.clear();
131     testCases_.clear();
132   }
133 
134  private:
135   vector<IntDivider<Value>> dividers_;
136   vector<TestCase<Value>> testCases_;
137 
138   IntDivider<Value>* dividersBuf_;
139   TestCase<Value>* testCasesBuf_;
140 };
141 
testUint32Divider()142 static void testUint32Divider()
143 {
144   fprintf(stderr, "Testing 32-bit integer division ...");
145 
146   IntDividerTester<uint32_t> tester;
147 
148   for (uint64_t divisor = 1; divisor <= INT32_MAX; divisor++) {
149     if (divisor < 1000000 && divisor % 10000 == 0)
150       fprintf(stderr, ".");
151     if (divisor % 10000000 == 0)
152       fprintf(stderr, "-");
153 
154     // In order to save time, we only test when the remainder is zero or
155     // (divisor - 1).
156     uint64_t dividend = 0;
157     while (dividend <= INT32_MAX) {
158       uint64_t steps = (INT32_MAX - dividend) / divisor + 1;
159       if (steps > MAX_STEPS)
160         steps = MAX_STEPS;
161 
162       tester.addTestCase(dividend, divisor, steps);
163       tester.addTestCase(dividend + divisor - 1, divisor, steps);
164 
165       dividend += divisor * steps;
166     }
167 
168     // Check the boundary cases.
169     tester.addTestCase(1, divisor, 1);
170     tester.addTestCase(INT32_MAX, divisor, 1);
171   }
172 
173   tester.flush();
174 
175   fprintf(stderr, " Done!\n");
176 }
177 
178 // uint64_t divider uses plain division, so we just check a few random cases.
testUint64Divider()179 static void testUint64Divider()
180 {
181   IntDividerTester<uint64_t> tester;
182 
183   uint64_t dividend = 0x123456789ULL;
184   uint64_t divisor = 0x54321ULL;
185 
186   for (int i = 0; i < 1000; i++) {
187     if (divisor != 0) {
188       tester.addTestCase(dividend, divisor, 100);
189 
190       // Test small divisor.
191       tester.addTestCase(dividend, divisor % 65536, 100);
192 
193       // Create pseudorandom numbers.
194       dividend *= 0x100000001b3ULL;
195       dividend ^= 0x1234567890abcdefULL;
196       divisor *= 0x100000001b3ULL;
197       divisor ^= 0x1234567890abcdefULL;
198     }
199   }
200 
201   tester.flush();
202 }
203 
TEST(TestCUDAIntegerDivider,IntegerDivider)204 TEST(TestCUDAIntegerDivider, IntegerDivider) {
205   if (!at::cuda::is_available()) return;
206   testUint64Divider();
207   testUint32Divider();
208 
209   cudaError_t err = cudaDeviceSynchronize();
210   bool isTrue = err == cudaSuccess;
211   ASSERT_TRUE(isTrue);
212 }
213