• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <unistd.h>
16 #ifdef __APPLE__
17 #include <sys/time.h>
18 #endif
19 
20 #include <cstdint>
21 #include <cstdlib>
22 #include <ctime>
23 #include <iostream>
24 #include <map>
25 #include <vector>
26 
27 #include "../meta/legacy_multi_thread_gemm.h"
28 #include "../public/gemmlowp.h"
29 #include "test.h"
30 // lets include these so we make sure they always compile
31 #include "../meta/multi_thread_gemm.h"
32 #include "../meta/multi_thread_transform.h"
33 #include "../meta/legacy_multi_thread_common.h"
34 
35 #if defined(__arm__) && !defined(GEMMLOWP_NEON)
36 #warning "Building without NEON support on ARM, check your compiler setup!"
37 #endif
38 
time()39 double time() {
40 #ifdef __APPLE__
41   timeval t;
42   gettimeofday(&t, nullptr);
43   return t.tv_sec + 1e-6 * t.tv_usec;
44 #else
45   timespec t;
46   clock_gettime(CLOCK_REALTIME, &t);
47   return t.tv_sec + 1e-9 * t.tv_nsec;
48 #endif
49 }
50 
prepare_test_data(std::uint8_t * data,std::int32_t rows,std::int32_t cols,std::int32_t seed,std::int32_t seed_2)51 void prepare_test_data(std::uint8_t* data, std::int32_t rows, std::int32_t cols,
52                        std::int32_t seed, std::int32_t seed_2) {
53   std::int32_t value = seed;
54   for (int i = 0; i < rows; ++i) {
55     for (int j = 0; j < cols; ++j) {
56       data[i * cols + j] = static_cast<std::uint8_t>(value);
57       value = ((value * seed_2) + seed) % 256;
58     }
59   }
60 }
61 
check_result(std::uint8_t * left,std::uint8_t * right,std::uint8_t * result,std::int32_t rows,std::int32_t cols,std::int32_t depth,std::int32_t lhs_offset,std::int32_t rhs_offset,std::int32_t sum_offset,std::int32_t mul_offset,std::int32_t shift)62 void check_result(std::uint8_t* left, std::uint8_t* right, std::uint8_t* result,
63                   std::int32_t rows, std::int32_t cols, std::int32_t depth,
64                   std::int32_t lhs_offset, std::int32_t rhs_offset,
65                   std::int32_t sum_offset, std::int32_t mul_offset,
66                   std::int32_t shift) {
67   std::int32_t rounding = (1 << (shift - 1));
68   std::int32_t wrong = 0;
69   for (int i = 0; i < rows; ++i) {
70     for (int j = 0; j < cols; ++j) {
71       std::int32_t expected = 0;
72       for (int k = 0; k < depth; ++k) {
73         expected +=
74             (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) *
75             (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset);
76       }
77       expected += sum_offset;
78       expected *= mul_offset;
79       expected += rounding;
80       expected = (expected >> shift);
81       if (expected < 0) {
82         expected = 0;
83       } else if (expected > 255) {
84         expected = 255;
85       }
86       expected = static_cast<std::int32_t>(static_cast<std::uint8_t>(expected));
87       std::int32_t actual = static_cast<std::int32_t>(result[i * cols + j]);
88       if (actual != expected) {
89         std::cout << "(" << i << ", " << j << "): " << expected << "!="
90                   << actual << std::endl;
91         wrong++;
92       }
93     }
94   }
95   if (wrong > 0) {
96     std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : "
97               << wrong << "/" << (rows * cols) << std::endl
98               << std::flush;
99     std::exit(1);
100   } else {
101     std::cout << "." << std::flush;
102   }
103 }
104 
check_result_f(std::uint8_t * left,std::uint8_t * right,float * result,std::int32_t rows,std::int32_t cols,std::int32_t depth,std::int32_t lhs_offset,std::int32_t rhs_offset,float result_offset)105 void check_result_f(std::uint8_t* left, std::uint8_t* right, float* result,
106                     std::int32_t rows, std::int32_t cols, std::int32_t depth,
107                     std::int32_t lhs_offset, std::int32_t rhs_offset,
108                     float result_offset) {
109   std::int32_t wrong = 0;
110   for (int i = 0; i < rows; ++i) {
111     for (int j = 0; j < cols; ++j) {
112       std::int32_t expected = 0;
113       for (int k = 0; k < depth; ++k) {
114         expected +=
115             (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) *
116             (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset);
117       }
118       float expected_float = static_cast<float>(expected) * result_offset;
119       float actual_float = result[i * cols + j];
120       if (actual_float != expected_float) {
121         std::cout << "(" << i << ", " << j << "): " << expected_float << "!="
122                   << actual_float << std::endl;
123         wrong++;
124       }
125     }
126   }
127   if (wrong > 0) {
128     std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : "
129               << wrong << "/" << (rows * cols) << std::endl
130               << std::flush;
131     std::exit(1);
132   } else {
133     std::cout << "." << std::flush;
134   }
135 }
136 
137 
check_result_i32(std::uint8_t * left,std::uint8_t * right,std::int32_t * result,std::int32_t rows,std::int32_t cols,std::int32_t depth,std::int32_t lhs_offset,std::int32_t rhs_offset)138 void check_result_i32(std::uint8_t* left, std::uint8_t* right,
139                       std::int32_t* result, std::int32_t rows,
140                       std::int32_t cols, std::int32_t depth,
141                       std::int32_t lhs_offset, std::int32_t rhs_offset) {
142   std::int32_t wrong = 0;
143   for (int i = 0; i < rows; ++i) {
144     for (int j = 0; j < cols; ++j) {
145       std::int32_t expected = 0;
146       for (int k = 0; k < depth; ++k) {
147         expected +=
148             (static_cast<std::int32_t>(left[depth * i + k]) + lhs_offset) *
149             (static_cast<std::int32_t>(right[depth * j + k]) + rhs_offset);
150       }
151       std::int32_t actual = result[i * cols + j];
152       if (actual != expected) {
153         std::cout << "(" << i << ", " << j << "): " << expected << "!="
154                   << actual << std::endl;
155         wrong++;
156       }
157     }
158   }
159   if (wrong > 0) {
160     std::cout << "Wrong: " << rows << "x" << cols << "x" << depth << " : "
161               << wrong << "/" << (rows * cols) << std::endl
162               << std::flush;
163     std::exit(1);
164   } else {
165     std::cout << "." << std::flush;
166   }
167 }
168 
169 template <typename T>
clear(T * result,std::int32_t rows,std::int32_t cols)170 void clear(T* result, std::int32_t rows, std::int32_t cols) {
171   for (int i = 0; i < rows * cols; ++i) {
172     result[i] = static_cast<T>(0);
173   }
174 }
175 
test(std::uint8_t * scratch,std::uint8_t * lhs,std::uint8_t * rhs,std::int32_t m,std::int32_t n,std::int32_t k,std::uint8_t * result,gemmlowp::WorkersPool * pool,std::int32_t pool_size)176 void test(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs,
177           std::int32_t m, std::int32_t n, std::int32_t k, std::uint8_t* result,
178           gemmlowp::WorkersPool* pool, std::int32_t pool_size) {
179   prepare_test_data(lhs, m, k, 11, 13);
180   prepare_test_data(rhs, n, k, 177, 19);
181 
182   clear(result, m, n);
183   gemmlowp::meta::multi_thread_gemm_q8(pool, pool_size, scratch, lhs, rhs, m, n,
184                                        k, -127, -127, 127 * k, 1, 7, result);
185   check_result(lhs, rhs, result, m, n, k, -127, -127, 127 * k, 1, 7);
186 }
187 
test_f(std::uint8_t * scratch,std::uint8_t * lhs,std::uint8_t * rhs,std::int32_t m,std::int32_t n,std::int32_t k,float * result,gemmlowp::WorkersPool * pool,std::int32_t pool_size)188 void test_f(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs,
189             std::int32_t m, std::int32_t n, std::int32_t k, float* result,
190             gemmlowp::WorkersPool* pool, std::int32_t pool_size) {
191   prepare_test_data(lhs, m, k, 11, 13);
192   prepare_test_data(rhs, n, k, 177, 19);
193 
194   clear(result, m, n);
195   float scale = 1.0f / 1234567.8f;
196   gemmlowp::meta::multi_thread_gemm_f(pool, pool_size, scratch, lhs, rhs, m, n,
197                                       k, -127, -127, scale, result);
198   check_result_f(lhs, rhs, result, m, n, k, -127, -127, scale);
199 }
200 
test_i32(std::uint8_t * scratch,std::uint8_t * lhs,std::uint8_t * rhs,std::int32_t m,std::int32_t n,std::int32_t k,std::int32_t * result,gemmlowp::WorkersPool * pool,std::int32_t pool_size)201 void test_i32(std::uint8_t* scratch, std::uint8_t* lhs, std::uint8_t* rhs,
202               std::int32_t m, std::int32_t n, std::int32_t k,
203               std::int32_t* result, gemmlowp::WorkersPool* pool,
204               std::int32_t pool_size) {
205   prepare_test_data(lhs, m, k, 11, 13);
206   prepare_test_data(rhs, n, k, 177, 19);
207 
208   clear(result, m, n);
209   gemmlowp::meta::multi_thread_gemm_i32(pool, pool_size, scratch, lhs, rhs, m,
210                                         n, k, -127, -127, result);
211   check_result_i32(lhs, rhs, result, m, n, k, -127, -127);
212 }
213 
q_suite(int mi,int ni,int ki,int mx,int nx,int kx,int md,int nd,int kd,std::uint8_t * scratch,std::uint8_t * left,std::uint8_t * right,std::uint8_t * result,gemmlowp::WorkersPool * pool,int t)214 void q_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd,
215              int kd, std::uint8_t* scratch, std::uint8_t* left,
216              std::uint8_t* right, std::uint8_t* result,
217              gemmlowp::WorkersPool* pool, int t) {
218   for (int m = mi; m < mx; m += md) {
219     for (int n = ni; n < nx; n += nd) {
220       for (int k = ki; k < kx; k += kd) {
221         test(scratch, left, right, m, n, k, result, pool, t);
222       }
223     }
224   }
225   std::cout << std::endl;
226 }
227 
f_suite(int mi,int ni,int ki,int mx,int nx,int kx,int md,int nd,int kd,std::uint8_t * scratch,std::uint8_t * left,std::uint8_t * right,float * result,gemmlowp::WorkersPool * pool,int t)228 void f_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd,
229              int kd, std::uint8_t* scratch, std::uint8_t* left,
230              std::uint8_t* right, float* result, gemmlowp::WorkersPool* pool,
231              int t) {
232   for (int m = mi; m < mx; m += md) {
233     for (int n = ni; n < nx; n += nd) {
234       for (int k = ki; k < kx; k += kd) {
235         test_f(scratch, left, right, m, n, k, result, pool, t);
236       }
237     }
238   }
239   std::cout << std::endl;
240 }
241 
i32_suite(int mi,int ni,int ki,int mx,int nx,int kx,int md,int nd,int kd,std::uint8_t * scratch,std::uint8_t * left,std::uint8_t * right,std::int32_t * result,gemmlowp::WorkersPool * pool,int t)242 void i32_suite(int mi, int ni, int ki, int mx, int nx, int kx, int md, int nd,
243                int kd, std::uint8_t* scratch, std::uint8_t* left,
244                std::uint8_t* right, std::int32_t* result,
245                gemmlowp::WorkersPool* pool, int t) {
246   for (int m = mi; m < mx; m += md) {
247     for (int n = ni; n < nx; n += nd) {
248       for (int k = ki; k < kx; k += kd) {
249         test_i32(scratch, left, right, m, n, k, result, pool, t);
250       }
251     }
252   }
253   std::cout << std::endl;
254 }
255 
main(int argc,char * argv[])256 int main(int argc, char* argv[]) {
257   bool run_long_test = false;
258 
259   if (argc > 1 && strcmp(argv[1], "long")) {
260     run_long_test = true;
261   }
262 
263   const std::int32_t min_n = 1;
264   const std::int32_t min_m = 1;
265   const std::int32_t min_k = 8;
266 
267   const std::int32_t max_n = 1024;
268   const std::int32_t max_m = 1024;
269   const std::int32_t max_k = 2048;
270 
271   std::uint8_t* left = new std::uint8_t[max_m * max_k];
272   std::uint8_t* right = new std::uint8_t[max_n * max_k];
273   std::uint8_t* result = new std::uint8_t[max_m * max_n];
274   float* result_float = new float[max_m * max_n];
275   std::int32_t* result_i32 = new std::int32_t[max_m * max_n];
276   std::uint8_t* scratch = new std::uint8_t[1024 * 1024 * 64];
277 
278   gemmlowp::WorkersPool pool;
279 
280   int max_repetitions = run_long_test ? 10 : 1;
281 
282   for (int repetitions = 0; repetitions < max_repetitions; ++repetitions) {
283     int t = std::min(repetitions + 1, 4);
284     std::cout << "Threads: " << t << std::endl << std::flush;
285 
286     std::cout << "Quantized 8 bit." << std::endl << std::flush;
287 
288     std::cout << "Small." << std::endl << std::flush;
289     q_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result, &pool,
290             t);
291 
292     if (run_long_test) {
293       std::cout << "Big." << std::endl << std::flush;
294       q_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right,
295               result, &pool, t);
296     }
297 
298     std::cout << "Gemv." << std::endl << std::flush;
299     q_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right, result,
300             &pool, t);
301     q_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right, result,
302             &pool, t);
303 
304     std::cout << std::endl << "Floats." << std::endl << std::flush;
305 
306     std::cout << "Small." << std::endl << std::flush;
307     f_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result_float,
308             &pool, t);
309 
310     if (run_long_test) {
311       std::cout << "Big." << std::endl << std::flush;
312       f_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right,
313               result_float, &pool, t);
314     }
315 
316     std::cout << "Gemv." << std::endl << std::flush;
317     f_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right,
318             result_float, &pool, t);
319     f_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right,
320             result_float, &pool, t);
321 
322     std::cout << std::endl << "Int32." << std::endl << std::flush;
323 
324     std::cout << "Small." << std::endl << std::flush;
325     i32_suite(1, 1, 1, 16, 16, 32, 1, 1, 1, scratch, left, right, result_i32,
326               &pool, t);
327 
328     if (run_long_test) {
329       std::cout << "Big." << std::endl << std::flush;
330       i32_suite(1, 1, 1, 512, 512, 2048, 111, 111, 111, scratch, left, right,
331                 result_i32, &pool, t);
332     }
333 
334     std::cout << "Gemv." << std::endl << std::flush;
335     i32_suite(1, 1, 1, 2, 512, 2048, 1, 111, 111, scratch, left, right,
336               result_i32, &pool, t);
337     i32_suite(1, 1, 1, 512, 2, 2048, 111, 1, 111, scratch, left, right,
338               result_i32, &pool, t);
339 
340     std::cout << std::endl << std::flush;
341   }
342 
343   std::cout << "Done." << std::endl << std::flush;
344 }
345