• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __BANDWIDTH_H__
18 #define __BANDWIDTH_H__
19 
20 #include <stdlib.h>
21 #include <string.h>
22 
23 #include "utils/Compat.h"
24 #include "memtest.h"
25 
26 // Bandwidth Class definitions.
27 class BandwidthBenchmark {
28 public:
BandwidthBenchmark()29     BandwidthBenchmark()
30         : _size(0),
31           _num_warm_loops(DEFAULT_NUM_WARM_LOOPS),
32           _num_loops(DEFAULT_NUM_LOOPS) {}
~BandwidthBenchmark()33     virtual ~BandwidthBenchmark() {}
34 
run()35     bool run() {
36         if (_size == 0) {
37             return false;
38         }
39         if (!canRun()) {
40             return false;
41         }
42 
43         bench(_num_warm_loops);
44 
45         nsecs_t t = system_time();
46         bench(_num_loops);
47         t = system_time() - t;
48 
49         _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC);
50 
51         return true;
52     }
53 
canRun()54     bool canRun() { return !usesNeon() || isNeonSupported(); }
55 
56     virtual bool setSize(size_t size) = 0;
57 
58     virtual const char *getName() = 0;
59 
60     virtual bool verify() = 0;
61 
usesNeon()62     virtual bool usesNeon() { return false; }
63 
isNeonSupported()64     bool isNeonSupported() {
65 #if defined(__ARM_NEON__)
66         return true;
67 #else
68         return false;
69 #endif
70     }
71 
72     // Accessors/mutators.
mb_per_sec()73     double mb_per_sec() { return _mb_per_sec; }
num_warm_loops()74     size_t num_warm_loops() { return _num_warm_loops; }
num_loops()75     size_t num_loops() { return _num_loops; }
size()76     size_t size() { return _size; }
77 
set_num_warm_loops(size_t num_warm_loops)78     void set_num_warm_loops(size_t num_warm_loops) {
79         _num_warm_loops = num_warm_loops;
80     }
set_num_loops(size_t num_loops)81     void set_num_loops(size_t num_loops) { _num_loops = num_loops; }
82 
83     // Static constants
84     static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000;
85     static const unsigned int DEFAULT_NUM_LOOPS = 20000000;
86 
87 protected:
88     virtual void bench(size_t num_loops) = 0;
89 
90     double _mb_per_sec;
91     size_t _size;
92     size_t _num_warm_loops;
93     size_t _num_loops;
94 
95 private:
96     // Static constants
97     static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0;
98     static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0;
99 };
100 
101 class CopyBandwidthBenchmark : public BandwidthBenchmark {
102 public:
CopyBandwidthBenchmark()103     CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { }
104 
setSize(size_t size)105     bool setSize(size_t size) {
106         if (_src) {
107            free(_src);
108         }
109         if (_dst) {
110             free(_dst);
111         }
112 
113         if (size == 0) {
114             _size = DEFAULT_COPY_SIZE;
115         } else {
116             _size = size;
117         }
118 
119         _src = reinterpret_cast<char*>(memalign(64, _size));
120         if (!_src) {
121             perror("Failed to allocate memory for test.");
122             return false;
123         }
124         _dst = reinterpret_cast<char*>(memalign(64, _size));
125         if (!_dst) {
126             perror("Failed to allocate memory for test.");
127             return false;
128         }
129 
130         return true;
131     }
~CopyBandwidthBenchmark()132     virtual ~CopyBandwidthBenchmark() {
133         if (_src) {
134             free(_src);
135             _src = NULL;
136         }
137         if (_dst) {
138             free(_dst);
139             _dst = NULL;
140         }
141     }
142 
verify()143     bool verify() {
144         memset(_src, 0x23, _size);
145         memset(_dst, 0, _size);
146         bench(1);
147         if (memcmp(_src, _dst, _size) != 0) {
148             printf("Buffers failed to compare after one loop.\n");
149             return false;
150         }
151 
152         memset(_src, 0x23, _size);
153         memset(_dst, 0, _size);
154         _num_loops = 2;
155         bench(2);
156         if (memcmp(_src, _dst, _size) != 0) {
157             printf("Buffers failed to compare after two loops.\n");
158             return false;
159         }
160 
161         return true;
162     }
163 
164 protected:
165     char *_src;
166     char *_dst;
167 
168     static const unsigned int DEFAULT_COPY_SIZE = 8000;
169 };
170 
171 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark {
172 public:
CopyLdrdStrdBenchmark()173     CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { }
~CopyLdrdStrdBenchmark()174     virtual ~CopyLdrdStrdBenchmark() {}
175 
getName()176     const char *getName() { return "ldrd/strd"; }
177 
178 protected:
179     // Copy using ldrd/strd instructions.
bench(size_t num_loops)180     void bench(size_t num_loops) {
181         asm volatile(
182             "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
183 
184             "mov r0, %0\n"
185             "mov r1, %1\n"
186             "mov r2, %2\n"
187             "mov r3, %3\n"
188 
189             "0:\n"
190             "mov r4, r2, lsr #6\n"
191 
192             "1:\n"
193             "ldrd r6, r7, [r0]\n"
194             "strd r6, r7, [r1]\n"
195             "ldrd r6, r7, [r0, #8]\n"
196             "strd r6, r7, [r1, #8]\n"
197             "ldrd r6, r7, [r0, #16]\n"
198             "strd r6, r7, [r1, #16]\n"
199             "ldrd r6, r7, [r0, #24]\n"
200             "strd r6, r7, [r1, #24]\n"
201             "ldrd r6, r7, [r0, #32]\n"
202             "strd r6, r7, [r1, #32]\n"
203             "ldrd r6, r7, [r0, #40]\n"
204             "strd r6, r7, [r1, #40]\n"
205             "ldrd r6, r7, [r0, #48]\n"
206             "strd r6, r7, [r1, #48]\n"
207             "ldrd r6, r7, [r0, #56]\n"
208             "strd r6, r7, [r1, #56]\n"
209 
210             "add  r0, r0, #64\n"
211             "add  r1, r1, #64\n"
212             "subs r4, r4, #1\n"
213             "bgt 1b\n"
214 
215             "sub r0, r0, r2\n"
216             "sub r1, r1, r2\n"
217             "subs r3, r3, #1\n"
218             "bgt 0b\n"
219 
220             "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n"
221         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
222     }
223 };
224 
225 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark {
226 public:
CopyLdmiaStmiaBenchmark()227     CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { }
~CopyLdmiaStmiaBenchmark()228     virtual ~CopyLdmiaStmiaBenchmark() {}
229 
getName()230     const char *getName() { return "ldmia/stmia"; }
231 
232 protected:
233     // Copy using ldmia/stmia instructions.
bench(size_t num_loops)234     void bench(size_t num_loops) {
235         asm volatile(
236             "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
237 
238             "mov r0, %0\n"
239             "mov r1, %1\n"
240             "mov r2, %2\n"
241             "mov r3, %3\n"
242 
243             "0:\n"
244             "mov r4, r2, lsr #6\n"
245 
246             "1:\n"
247             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
248             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
249             "subs r4, r4, #1\n"
250             "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
251             "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n"
252             "bgt 1b\n"
253 
254             "sub r0, r0, r2\n"
255             "sub r1, r1, r2\n"
256             "subs r3, r3, #1\n"
257             "bgt 0b\n"
258 
259             "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n"
260         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
261     }
262 };
263 
264 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark {
265 public:
CopyVld1Vst1Benchmark()266     CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { }
~CopyVld1Vst1Benchmark()267     virtual ~CopyVld1Vst1Benchmark() {}
268 
getName()269     const char *getName() { return "vld1/vst1"; }
270 
usesNeon()271     bool usesNeon() { return true; }
272 
273 protected:
274     // Copy using vld1/vst1 instructions.
275 #if defined(__ARM_NEON__)
bench(size_t num_loops)276     void bench(size_t num_loops) {
277         asm volatile(
278             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
279 
280             "mov r0, %0\n"
281             "mov r1, %1\n"
282             "mov r2, %2\n"
283             "mov r3, %3\n"
284 
285             "0:\n"
286             "mov r4, r2, lsr #6\n"
287 
288             "1:\n"
289             "vld1.8 {d0-d3}, [r0]!\n"
290             "vld1.8 {d4-d7}, [r0]!\n"
291             "subs r4, r4, #1\n"
292             "vst1.8 {d0-d3}, [r1:128]!\n"
293             "vst1.8 {d4-d7}, [r1:128]!\n"
294             "bgt 1b\n"
295 
296             "sub r0, r0, r2\n"
297             "sub r1, r1, r2\n"
298             "subs r3, r3, #1\n"
299             "bgt 0b\n"
300 
301             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
302         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
303 #else
304     void bench(size_t) {
305 #endif
306     }
307 };
308 
309 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark {
310 public:
CopyVldrVstrBenchmark()311     CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { }
~CopyVldrVstrBenchmark()312     virtual ~CopyVldrVstrBenchmark() {}
313 
getName()314     const char *getName() { return "vldr/vstr"; }
315 
usesNeon()316     bool usesNeon() { return true; }
317 
318 protected:
319     // Copy using vldr/vstr instructions.
320 #if defined(__ARM_NEON__)
bench(size_t num_loops)321     void bench(size_t num_loops) {
322         asm volatile(
323             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
324 
325             "mov r0, %0\n"
326             "mov r1, %1\n"
327             "mov r2, %2\n"
328             "mov r3, %3\n"
329 
330             "0:\n"
331             "mov r4, r2, lsr #6\n"
332 
333             "1:\n"
334             "vldr d0, [r0, #0]\n"
335             "subs r4, r4, #1\n"
336             "vldr d1, [r0, #8]\n"
337             "vstr d0, [r1, #0]\n"
338             "vldr d0, [r0, #16]\n"
339             "vstr d1, [r1, #8]\n"
340             "vldr d1, [r0, #24]\n"
341             "vstr d0, [r1, #16]\n"
342             "vldr d0, [r0, #32]\n"
343             "vstr d1, [r1, #24]\n"
344             "vldr d1, [r0, #40]\n"
345             "vstr d0, [r1, #32]\n"
346             "vldr d0, [r0, #48]\n"
347             "vstr d1, [r1, #40]\n"
348             "vldr d1, [r0, #56]\n"
349             "vstr d0, [r1, #48]\n"
350             "add r0, r0, #64\n"
351             "vstr d1, [r1, #56]\n"
352             "add r1, r1, #64\n"
353             "bgt 1b\n"
354 
355             "sub r0, r0, r2\n"
356             "sub r1, r1, r2\n"
357             "subs r3, r3, #1\n"
358             "bgt 0b\n"
359 
360             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
361         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
362 #else
363     void bench(size_t) {
364 #endif
365     }
366 };
367 
368 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark {
369 public:
CopyVldmiaVstmiaBenchmark()370     CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { }
~CopyVldmiaVstmiaBenchmark()371     virtual ~CopyVldmiaVstmiaBenchmark() {}
372 
getName()373     const char *getName() { return "vldmia/vstmia"; }
374 
usesNeon()375     bool usesNeon() { return true; }
376 
377 protected:
378     // Copy using vldmia/vstmia instructions.
379 #if defined(__ARM_NEON__)
bench(size_t num_loops)380     void bench(size_t num_loops) {
381         asm volatile(
382             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
383 
384             "mov r0, %0\n"
385             "mov r1, %1\n"
386             "mov r2, %2\n"
387             "mov r3, %3\n"
388 
389             "0:\n"
390             "mov r4, r2, lsr #6\n"
391 
392             "1:\n"
393             "vldmia r0!, {d0-d7}\n"
394             "subs r4, r4, #1\n"
395             "vstmia r1!, {d0-d7}\n"
396             "bgt 1b\n"
397 
398             "sub r0, r0, r2\n"
399             "sub r1, r1, r2\n"
400             "subs r3, r3, #1\n"
401             "bgt 0b\n"
402 
403             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
404         :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3");
405 #else
406     void bench(size_t) {
407 #endif
408     }
409 };
410 
411 class MemcpyBenchmark : public CopyBandwidthBenchmark {
412 public:
MemcpyBenchmark()413     MemcpyBenchmark() : CopyBandwidthBenchmark() { }
~MemcpyBenchmark()414     virtual ~MemcpyBenchmark() {}
415 
getName()416     const char *getName() { return "memcpy"; }
417 
418 protected:
bench(size_t num_loops)419     void bench(size_t num_loops) {
420         for (size_t i = 0; i < num_loops; i++) {
421             memcpy(_dst, _src, _size);
422         }
423     }
424 };
425 
426 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark {
427 public:
SingleBufferBandwidthBenchmark()428     SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { }
~SingleBufferBandwidthBenchmark()429     virtual ~SingleBufferBandwidthBenchmark() {
430         if (_buffer) {
431             free(_buffer);
432             _buffer = NULL;
433         }
434     }
435 
setSize(size_t size)436     bool setSize(size_t size) {
437         if (_buffer) {
438             free(_buffer);
439             _buffer = NULL;
440         }
441 
442         if (size == 0) {
443             _size = DEFAULT_SINGLE_BUFFER_SIZE;
444         } else {
445             _size = size;
446         }
447 
448         _buffer = reinterpret_cast<char*>(memalign(64, _size));
449         if (!_buffer) {
450             perror("Failed to allocate memory for test.");
451             return false;
452         }
453         memset(_buffer, 0, _size);
454 
455         return true;
456     }
457 
verify()458     bool verify() { return true; }
459 
460 protected:
461     char *_buffer;
462 
463     static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000;
464 };
465 
466 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark {
467 public:
WriteBandwidthBenchmark()468     WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { }
~WriteBandwidthBenchmark()469     virtual ~WriteBandwidthBenchmark() { }
470 
verify()471     bool verify() {
472         memset(_buffer, 0, _size);
473         bench(1);
474         for (size_t i = 0; i < _size; i++) {
475             if (_buffer[i] != 1) {
476                 printf("Buffer failed to compare after one loop.\n");
477                 return false;
478             }
479         }
480 
481         memset(_buffer, 0, _size);
482         bench(2);
483         for (size_t i = 0; i < _size; i++) {
484             if (_buffer[i] != 2) {
485                 printf("Buffer failed to compare after two loops.\n");
486                 return false;
487             }
488         }
489 
490         return true;
491     }
492 };
493 
494 class WriteStrdBenchmark : public WriteBandwidthBenchmark {
495 public:
WriteStrdBenchmark()496     WriteStrdBenchmark() : WriteBandwidthBenchmark() { }
~WriteStrdBenchmark()497     virtual ~WriteStrdBenchmark() {}
498 
getName()499     const char *getName() { return "strd"; }
500 
501 protected:
502     // Write a given value using strd.
bench(size_t num_loops)503     void bench(size_t num_loops) {
504         asm volatile(
505             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
506 
507             "mov r0, %0\n"
508             "mov r1, %1\n"
509             "mov r2, %2\n"
510 
511             "mov r4, #0\n"
512             "mov r5, #0\n"
513 
514             "0:\n"
515             "mov r3, r1, lsr #5\n"
516 
517             "add r4, r4, #0x01010101\n"
518             "mov r5, r4\n"
519 
520             "1:\n"
521             "subs r3, r3, #1\n"
522             "strd r4, r5, [r0]\n"
523             "strd r4, r5, [r0, #8]\n"
524             "strd r4, r5, [r0, #16]\n"
525             "strd r4, r5, [r0, #24]\n"
526             "add  r0, r0, #32\n"
527             "bgt 1b\n"
528 
529             "sub r0, r0, r1\n"
530             "subs r2, r2, #1\n"
531             "bgt 0b\n"
532 
533             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
534           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
535     }
536 };
537 
538 class WriteStmiaBenchmark : public WriteBandwidthBenchmark {
539 public:
WriteStmiaBenchmark()540     WriteStmiaBenchmark() : WriteBandwidthBenchmark() { }
~WriteStmiaBenchmark()541     virtual ~WriteStmiaBenchmark() {}
542 
getName()543     const char *getName() { return "stmia"; }
544 
545 protected:
546       // Write a given value using stmia.
bench(size_t num_loops)547       void bench(size_t num_loops) {
548           asm volatile(
549               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
550 
551               "mov r0, %0\n"
552               "mov r1, %1\n"
553               "mov r2, %2\n"
554 
555               "mov r4, #0\n"
556 
557               "0:\n"
558               "mov r3, r1, lsr #5\n"
559 
560               "add r4, r4, #0x01010101\n"
561               "mov r5, r4\n"
562               "mov r6, r4\n"
563               "mov r7, r4\n"
564               "mov r8, r4\n"
565               "mov r9, r4\n"
566               "mov r10, r4\n"
567               "mov r11, r4\n"
568 
569               "1:\n"
570               "subs r3, r3, #1\n"
571               "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
572               "bgt 1b\n"
573 
574               "sub r0, r0, r1\n"
575               "subs r2, r2, #1\n"
576               "bgt 0b\n"
577 
578               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
579         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
580     }
581 };
582 
583 class WriteVst1Benchmark : public WriteBandwidthBenchmark {
584 public:
WriteVst1Benchmark()585     WriteVst1Benchmark() : WriteBandwidthBenchmark() { }
~WriteVst1Benchmark()586     virtual ~WriteVst1Benchmark() {}
587 
getName()588     const char *getName() { return "vst1"; }
589 
usesNeon()590     bool usesNeon() { return true; }
591 
592 protected:
593     // Write a given value using vst.
594 #if defined(__ARM_NEON__)
bench(size_t num_loops)595     void bench(size_t num_loops) {
596         asm volatile(
597             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
598 
599             "mov r0, %0\n"
600             "mov r1, %1\n"
601             "mov r2, %2\n"
602             "mov r4, #0\n"
603 
604             "0:\n"
605             "mov r3, r1, lsr #5\n"
606 
607             "add r4, r4, #1\n"
608             "vdup.8 d0, r4\n"
609             "vmov d1, d0\n"
610             "vmov d2, d0\n"
611             "vmov d3, d0\n"
612 
613             "1:\n"
614             "subs r3, r3, #1\n"
615             "vst1.8 {d0-d3}, [r0:128]!\n"
616             "bgt 1b\n"
617 
618             "sub r0, r0, r1\n"
619             "subs r2, r2, #1\n"
620             "bgt 0b\n"
621 
622             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
623         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
624 #else
625     void bench(size_t) {
626 #endif
627     }
628 };
629 
630 class WriteVstrBenchmark : public WriteBandwidthBenchmark {
631 public:
WriteVstrBenchmark()632     WriteVstrBenchmark() : WriteBandwidthBenchmark() { }
~WriteVstrBenchmark()633     virtual ~WriteVstrBenchmark() {}
634 
getName()635     const char *getName() { return "vstr"; }
636 
usesNeon()637     bool usesNeon() { return true; }
638 
639 protected:
640     // Write a given value using vst.
641 #if defined(__ARM_NEON__)
bench(size_t num_loops)642     void bench(size_t num_loops) {
643         asm volatile(
644             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
645 
646             "mov r0, %0\n"
647             "mov r1, %1\n"
648             "mov r2, %2\n"
649             "mov r4, #0\n"
650 
651             "0:\n"
652             "mov r3, r1, lsr #5\n"
653 
654             "add r4, r4, #1\n"
655             "vdup.8 d0, r4\n"
656             "vmov d1, d0\n"
657             "vmov d2, d0\n"
658             "vmov d3, d0\n"
659 
660             "1:\n"
661             "vstr d0, [r0, #0]\n"
662             "subs r3, r3, #1\n"
663             "vstr d1, [r0, #8]\n"
664             "vstr d0, [r0, #16]\n"
665             "vstr d1, [r0, #24]\n"
666             "add r0, r0, #32\n"
667             "bgt 1b\n"
668 
669             "sub r0, r0, r1\n"
670             "subs r2, r2, #1\n"
671             "bgt 0b\n"
672 
673             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
674         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
675 #else
676     void bench(size_t) {
677 #endif
678     }
679 };
680 
681 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark {
682 public:
WriteVstmiaBenchmark()683     WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { }
~WriteVstmiaBenchmark()684     virtual ~WriteVstmiaBenchmark() {}
685 
getName()686     const char *getName() { return "vstmia"; }
687 
usesNeon()688     bool usesNeon() { return true; }
689 
690 protected:
691     // Write a given value using vstmia.
692 #if defined(__ARM_NEON__)
bench(size_t num_loops)693     void bench(size_t num_loops) {
694         asm volatile(
695             "stmfd sp!, {r0,r1,r2,r3,r4}\n"
696 
697             "mov r0, %0\n"
698             "mov r1, %1\n"
699             "mov r2, %2\n"
700             "mov r4, #0\n"
701 
702             "0:\n"
703             "mov r3, r1, lsr #5\n"
704 
705             "add r4, r4, #1\n"
706             "vdup.8 d0, r4\n"
707             "vmov d1, d0\n"
708             "vmov d2, d0\n"
709             "vmov d3, d0\n"
710 
711             "1:\n"
712             "subs r3, r3, #1\n"
713             "vstmia r0!, {d0-d3}\n"
714             "bgt 1b\n"
715 
716             "sub r0, r0, r1\n"
717             "subs r2, r2, #1\n"
718             "bgt 0b\n"
719 
720             "ldmfd sp!, {r0,r1,r2,r3,r4}\n"
721         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
722 #else
723     void bench(size_t) {
724 #endif
725     }
726 };
727 
728 class MemsetBenchmark : public WriteBandwidthBenchmark {
729 public:
MemsetBenchmark()730     MemsetBenchmark() : WriteBandwidthBenchmark() { }
~MemsetBenchmark()731     virtual ~MemsetBenchmark() {}
732 
getName()733     const char *getName() { return "memset"; }
734 
735 protected:
bench(size_t num_loops)736     void bench(size_t num_loops) {
737         for (size_t i = 0; i < num_loops; i++) {
738             memset(_buffer, (i % 255) + 1, _size);
739         }
740     }
741 };
742 
743 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark {
744 public:
ReadLdrdBenchmark()745     ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadLdrdBenchmark()746     virtual ~ReadLdrdBenchmark() {}
747 
getName()748     const char *getName() { return "ldrd"; }
749 
750 protected:
751     // Write a given value using strd.
bench(size_t num_loops)752     void bench(size_t num_loops) {
753         asm volatile(
754             "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
755 
756             "mov r0, %0\n"
757             "mov r1, %1\n"
758             "mov r2, %2\n"
759 
760             "0:\n"
761             "mov r3, r1, lsr #5\n"
762 
763             "1:\n"
764             "subs r3, r3, #1\n"
765             "ldrd r4, r5, [r0]\n"
766             "ldrd r4, r5, [r0, #8]\n"
767             "ldrd r4, r5, [r0, #16]\n"
768             "ldrd r4, r5, [r0, #24]\n"
769             "add  r0, r0, #32\n"
770             "bgt 1b\n"
771 
772             "sub r0, r0, r1\n"
773             "subs r2, r2, #1\n"
774             "bgt 0b\n"
775 
776             "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n"
777           :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
778     }
779 };
780 
781 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark {
782 public:
ReadLdmiaBenchmark()783     ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadLdmiaBenchmark()784     virtual ~ReadLdmiaBenchmark() {}
785 
getName()786     const char *getName() { return "ldmia"; }
787 
788 protected:
789       // Write a given value using stmia.
bench(size_t num_loops)790       void bench(size_t num_loops) {
791           asm volatile(
792               "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
793 
794               "mov r0, %0\n"
795               "mov r1, %1\n"
796               "mov r2, %2\n"
797 
798               "0:\n"
799               "mov r3, r1, lsr #5\n"
800 
801               "1:\n"
802               "subs r3, r3, #1\n"
803               "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n"
804               "bgt 1b\n"
805 
806               "sub r0, r0, r1\n"
807               "subs r2, r2, #1\n"
808               "bgt 0b\n"
809 
810               "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n"
811         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
812     }
813 };
814 
815 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark {
816 public:
ReadVld1Benchmark()817     ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVld1Benchmark()818     virtual ~ReadVld1Benchmark() {}
819 
getName()820     const char *getName() { return "vld1"; }
821 
usesNeon()822     bool usesNeon() { return true; }
823 
824 protected:
825     // Write a given value using vst.
826 #if defined(__ARM_NEON__)
bench(size_t num_loops)827     void bench(size_t num_loops) {
828         asm volatile(
829             "stmfd sp!, {r0,r1,r2,r3}\n"
830 
831             "mov r0, %0\n"
832             "mov r1, %1\n"
833             "mov r2, %2\n"
834 
835             "0:\n"
836             "mov r3, r1, lsr #5\n"
837 
838             "1:\n"
839             "subs r3, r3, #1\n"
840             "vld1.8 {d0-d3}, [r0:128]!\n"
841             "bgt 1b\n"
842 
843             "sub r0, r0, r1\n"
844             "subs r2, r2, #1\n"
845             "bgt 0b\n"
846 
847             "ldmfd sp!, {r0,r1,r2,r3}\n"
848         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
849 #else
850     void bench(size_t) {
851 #endif
852     }
853 };
854 
855 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark {
856 public:
ReadVldrBenchmark()857     ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVldrBenchmark()858     virtual ~ReadVldrBenchmark() {}
859 
getName()860     const char *getName() { return "vldr"; }
861 
usesNeon()862     bool usesNeon() { return true; }
863 
864 protected:
865     // Write a given value using vst.
866 #if defined(__ARM_NEON__)
bench(size_t num_loops)867     void bench(size_t num_loops) {
868         asm volatile(
869             "stmfd sp!, {r0,r1,r2,r3}\n"
870 
871             "mov r0, %0\n"
872             "mov r1, %1\n"
873             "mov r2, %2\n"
874 
875             "0:\n"
876             "mov r3, r1, lsr #5\n"
877 
878             "1:\n"
879             "vldr d0, [r0, #0]\n"
880             "subs r3, r3, #1\n"
881             "vldr d1, [r0, #8]\n"
882             "vldr d0, [r0, #16]\n"
883             "vldr d1, [r0, #24]\n"
884             "add r0, r0, #32\n"
885             "bgt 1b\n"
886 
887             "sub r0, r0, r1\n"
888             "subs r2, r2, #1\n"
889             "bgt 0b\n"
890 
891             "ldmfd sp!, {r0,r1,r2,r3}\n"
892         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
893 #else
894     void bench(size_t) {
895 #endif
896     }
897 };
898 
899 
900 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark {
901 public:
ReadVldmiaBenchmark()902     ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { }
~ReadVldmiaBenchmark()903     virtual ~ReadVldmiaBenchmark() {}
904 
getName()905     const char *getName() { return "vldmia"; }
906 
usesNeon()907     bool usesNeon() { return true; }
908 
909 protected:
910     // Write a given value using vstmia.
911 #if defined(__ARM_NEON__)
bench(size_t num_loops)912     void bench(size_t num_loops) {
913         asm volatile(
914             "stmfd sp!, {r0,r1,r2,r3}\n"
915 
916             "mov r0, %0\n"
917             "mov r1, %1\n"
918             "mov r2, %2\n"
919 
920             "0:\n"
921             "mov r3, r1, lsr #5\n"
922 
923             "1:\n"
924             "subs r3, r3, #1\n"
925             "vldmia r0!, {d0-d3}\n"
926             "bgt 1b\n"
927 
928             "sub r0, r0, r1\n"
929             "subs r2, r2, #1\n"
930             "bgt 0b\n"
931 
932             "ldmfd sp!, {r0,r1,r2,r3}\n"
933         :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2");
934 #else
935     void bench(size_t) {
936 #endif
937     }
938 };
939 
940 #endif  // __BANDWIDTH_H__
941