1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef __BANDWIDTH_H__ 18 #define __BANDWIDTH_H__ 19 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "utils/Compat.h" 24 #include "memtest.h" 25 26 // Bandwidth Class definitions. 27 class BandwidthBenchmark { 28 public: BandwidthBenchmark()29 BandwidthBenchmark() 30 : _size(0), 31 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS), 32 _num_loops(DEFAULT_NUM_LOOPS) {} ~BandwidthBenchmark()33 virtual ~BandwidthBenchmark() {} 34 run()35 bool run() { 36 if (_size == 0) { 37 return false; 38 } 39 if (!canRun()) { 40 return false; 41 } 42 43 bench(_num_warm_loops); 44 45 nsecs_t t = system_time(); 46 bench(_num_loops); 47 t = system_time() - t; 48 49 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC); 50 51 return true; 52 } 53 canRun()54 bool canRun() { return !usesNeon() || isNeonSupported(); } 55 56 virtual bool setSize(size_t size) = 0; 57 58 virtual const char *getName() = 0; 59 60 virtual bool verify() = 0; 61 usesNeon()62 virtual bool usesNeon() { return false; } 63 isNeonSupported()64 bool isNeonSupported() { 65 #if defined(__ARM_NEON__) 66 return true; 67 #else 68 return false; 69 #endif 70 } 71 72 // Accessors/mutators. mb_per_sec()73 double mb_per_sec() { return _mb_per_sec; } num_warm_loops()74 size_t num_warm_loops() { return _num_warm_loops; } num_loops()75 size_t num_loops() { return _num_loops; } size()76 size_t size() { return _size; } 77 set_num_warm_loops(size_t num_warm_loops)78 void set_num_warm_loops(size_t num_warm_loops) { 79 _num_warm_loops = num_warm_loops; 80 } set_num_loops(size_t num_loops)81 void set_num_loops(size_t num_loops) { _num_loops = num_loops; } 82 83 // Static constants 84 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000; 85 static const unsigned int DEFAULT_NUM_LOOPS = 20000000; 86 87 protected: 88 virtual void bench(size_t num_loops) = 0; 89 90 double _mb_per_sec; 91 size_t _size; 92 size_t _num_warm_loops; 93 size_t _num_loops; 94 95 private: 96 // Static constants 97 static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0; 98 static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0; 99 }; 100 101 class CopyBandwidthBenchmark : public BandwidthBenchmark { 102 public: CopyBandwidthBenchmark()103 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { } 104 setSize(size_t size)105 bool setSize(size_t size) { 106 if (_src) { 107 free(_src); 108 } 109 if (_dst) { 110 free(_dst); 111 } 112 113 if (size == 0) { 114 _size = DEFAULT_COPY_SIZE; 115 } else { 116 _size = size; 117 } 118 119 _src = reinterpret_cast<char*>(memalign(64, _size)); 120 if (!_src) { 121 perror("Failed to allocate memory for test."); 122 return false; 123 } 124 _dst = reinterpret_cast<char*>(memalign(64, _size)); 125 if (!_dst) { 126 perror("Failed to allocate memory for test."); 127 return false; 128 } 129 130 return true; 131 } ~CopyBandwidthBenchmark()132 virtual ~CopyBandwidthBenchmark() { 133 if (_src) { 134 free(_src); 135 _src = NULL; 136 } 137 if (_dst) { 138 free(_dst); 139 _dst = NULL; 140 } 141 } 142 verify()143 bool verify() { 144 memset(_src, 0x23, _size); 145 memset(_dst, 0, _size); 146 bench(1); 147 if (memcmp(_src, _dst, _size) != 0) { 148 printf("Buffers failed to compare after one loop.\n"); 149 return false; 150 } 151 152 memset(_src, 0x23, _size); 153 memset(_dst, 0, _size); 154 _num_loops = 2; 155 bench(2); 156 if (memcmp(_src, _dst, _size) != 0) { 157 printf("Buffers failed to compare after two loops.\n"); 158 return false; 159 } 160 161 return true; 162 } 163 164 protected: 165 char *_src; 166 char *_dst; 167 168 static const unsigned int DEFAULT_COPY_SIZE = 8000; 169 }; 170 171 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark { 172 public: CopyLdrdStrdBenchmark()173 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { } ~CopyLdrdStrdBenchmark()174 virtual ~CopyLdrdStrdBenchmark() {} 175 getName()176 const char *getName() { return "ldrd/strd"; } 177 178 protected: 179 // Copy using ldrd/strd instructions. bench(size_t num_loops)180 void bench(size_t num_loops) { 181 asm volatile( 182 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" 183 184 "mov r0, %0\n" 185 "mov r1, %1\n" 186 "mov r2, %2\n" 187 "mov r3, %3\n" 188 189 "0:\n" 190 "mov r4, r2, lsr #6\n" 191 192 "1:\n" 193 "ldrd r6, r7, [r0]\n" 194 "strd r6, r7, [r1]\n" 195 "ldrd r6, r7, [r0, #8]\n" 196 "strd r6, r7, [r1, #8]\n" 197 "ldrd r6, r7, [r0, #16]\n" 198 "strd r6, r7, [r1, #16]\n" 199 "ldrd r6, r7, [r0, #24]\n" 200 "strd r6, r7, [r1, #24]\n" 201 "ldrd r6, r7, [r0, #32]\n" 202 "strd r6, r7, [r1, #32]\n" 203 "ldrd r6, r7, [r0, #40]\n" 204 "strd r6, r7, [r1, #40]\n" 205 "ldrd r6, r7, [r0, #48]\n" 206 "strd r6, r7, [r1, #48]\n" 207 "ldrd r6, r7, [r0, #56]\n" 208 "strd r6, r7, [r1, #56]\n" 209 210 "add r0, r0, #64\n" 211 "add r1, r1, #64\n" 212 "subs r4, r4, #1\n" 213 "bgt 1b\n" 214 215 "sub r0, r0, r2\n" 216 "sub r1, r1, r2\n" 217 "subs r3, r3, #1\n" 218 "bgt 0b\n" 219 220 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" 221 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 222 } 223 }; 224 225 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark { 226 public: CopyLdmiaStmiaBenchmark()227 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { } ~CopyLdmiaStmiaBenchmark()228 virtual ~CopyLdmiaStmiaBenchmark() {} 229 getName()230 const char *getName() { return "ldmia/stmia"; } 231 232 protected: 233 // Copy using ldmia/stmia instructions. bench(size_t num_loops)234 void bench(size_t num_loops) { 235 asm volatile( 236 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" 237 238 "mov r0, %0\n" 239 "mov r1, %1\n" 240 "mov r2, %2\n" 241 "mov r3, %3\n" 242 243 "0:\n" 244 "mov r4, r2, lsr #6\n" 245 246 "1:\n" 247 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 248 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 249 "subs r4, r4, #1\n" 250 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 251 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 252 "bgt 1b\n" 253 254 "sub r0, r0, r2\n" 255 "sub r1, r1, r2\n" 256 "subs r3, r3, #1\n" 257 "bgt 0b\n" 258 259 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" 260 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 261 } 262 }; 263 264 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark { 265 public: CopyVld1Vst1Benchmark()266 CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { } ~CopyVld1Vst1Benchmark()267 virtual ~CopyVld1Vst1Benchmark() {} 268 getName()269 const char *getName() { return "vld1/vst1"; } 270 usesNeon()271 bool usesNeon() { return true; } 272 273 protected: 274 // Copy using vld1/vst1 instructions. 275 #if defined(__ARM_NEON__) bench(size_t num_loops)276 void bench(size_t num_loops) { 277 asm volatile( 278 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 279 280 "mov r0, %0\n" 281 "mov r1, %1\n" 282 "mov r2, %2\n" 283 "mov r3, %3\n" 284 285 "0:\n" 286 "mov r4, r2, lsr #6\n" 287 288 "1:\n" 289 "vld1.8 {d0-d3}, [r0]!\n" 290 "vld1.8 {d4-d7}, [r0]!\n" 291 "subs r4, r4, #1\n" 292 "vst1.8 {d0-d3}, [r1:128]!\n" 293 "vst1.8 {d4-d7}, [r1:128]!\n" 294 "bgt 1b\n" 295 296 "sub r0, r0, r2\n" 297 "sub r1, r1, r2\n" 298 "subs r3, r3, #1\n" 299 "bgt 0b\n" 300 301 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 302 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 303 #else 304 void bench(size_t) { 305 #endif 306 } 307 }; 308 309 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark { 310 public: CopyVldrVstrBenchmark()311 CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { } ~CopyVldrVstrBenchmark()312 virtual ~CopyVldrVstrBenchmark() {} 313 getName()314 const char *getName() { return "vldr/vstr"; } 315 usesNeon()316 bool usesNeon() { return true; } 317 318 protected: 319 // Copy using vldr/vstr instructions. 320 #if defined(__ARM_NEON__) bench(size_t num_loops)321 void bench(size_t num_loops) { 322 asm volatile( 323 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 324 325 "mov r0, %0\n" 326 "mov r1, %1\n" 327 "mov r2, %2\n" 328 "mov r3, %3\n" 329 330 "0:\n" 331 "mov r4, r2, lsr #6\n" 332 333 "1:\n" 334 "vldr d0, [r0, #0]\n" 335 "subs r4, r4, #1\n" 336 "vldr d1, [r0, #8]\n" 337 "vstr d0, [r1, #0]\n" 338 "vldr d0, [r0, #16]\n" 339 "vstr d1, [r1, #8]\n" 340 "vldr d1, [r0, #24]\n" 341 "vstr d0, [r1, #16]\n" 342 "vldr d0, [r0, #32]\n" 343 "vstr d1, [r1, #24]\n" 344 "vldr d1, [r0, #40]\n" 345 "vstr d0, [r1, #32]\n" 346 "vldr d0, [r0, #48]\n" 347 "vstr d1, [r1, #40]\n" 348 "vldr d1, [r0, #56]\n" 349 "vstr d0, [r1, #48]\n" 350 "add r0, r0, #64\n" 351 "vstr d1, [r1, #56]\n" 352 "add r1, r1, #64\n" 353 "bgt 1b\n" 354 355 "sub r0, r0, r2\n" 356 "sub r1, r1, r2\n" 357 "subs r3, r3, #1\n" 358 "bgt 0b\n" 359 360 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 361 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 362 #else 363 void bench(size_t) { 364 #endif 365 } 366 }; 367 368 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark { 369 public: CopyVldmiaVstmiaBenchmark()370 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { } ~CopyVldmiaVstmiaBenchmark()371 virtual ~CopyVldmiaVstmiaBenchmark() {} 372 getName()373 const char *getName() { return "vldmia/vstmia"; } 374 usesNeon()375 bool usesNeon() { return true; } 376 377 protected: 378 // Copy using vldmia/vstmia instructions. 379 #if defined(__ARM_NEON__) bench(size_t num_loops)380 void bench(size_t num_loops) { 381 asm volatile( 382 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 383 384 "mov r0, %0\n" 385 "mov r1, %1\n" 386 "mov r2, %2\n" 387 "mov r3, %3\n" 388 389 "0:\n" 390 "mov r4, r2, lsr #6\n" 391 392 "1:\n" 393 "vldmia r0!, {d0-d7}\n" 394 "subs r4, r4, #1\n" 395 "vstmia r1!, {d0-d7}\n" 396 "bgt 1b\n" 397 398 "sub r0, r0, r2\n" 399 "sub r1, r1, r2\n" 400 "subs r3, r3, #1\n" 401 "bgt 0b\n" 402 403 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 404 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 405 #else 406 void bench(size_t) { 407 #endif 408 } 409 }; 410 411 class MemcpyBenchmark : public CopyBandwidthBenchmark { 412 public: MemcpyBenchmark()413 MemcpyBenchmark() : CopyBandwidthBenchmark() { } ~MemcpyBenchmark()414 virtual ~MemcpyBenchmark() {} 415 getName()416 const char *getName() { return "memcpy"; } 417 418 protected: bench(size_t num_loops)419 void bench(size_t num_loops) { 420 for (size_t i = 0; i < num_loops; i++) { 421 memcpy(_dst, _src, _size); 422 } 423 } 424 }; 425 426 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark { 427 public: SingleBufferBandwidthBenchmark()428 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { } ~SingleBufferBandwidthBenchmark()429 virtual ~SingleBufferBandwidthBenchmark() { 430 if (_buffer) { 431 free(_buffer); 432 _buffer = NULL; 433 } 434 } 435 setSize(size_t size)436 bool setSize(size_t size) { 437 if (_buffer) { 438 free(_buffer); 439 _buffer = NULL; 440 } 441 442 if (size == 0) { 443 _size = DEFAULT_SINGLE_BUFFER_SIZE; 444 } else { 445 _size = size; 446 } 447 448 _buffer = reinterpret_cast<char*>(memalign(64, _size)); 449 if (!_buffer) { 450 perror("Failed to allocate memory for test."); 451 return false; 452 } 453 memset(_buffer, 0, _size); 454 455 return true; 456 } 457 verify()458 bool verify() { return true; } 459 460 protected: 461 char *_buffer; 462 463 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000; 464 }; 465 466 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark { 467 public: WriteBandwidthBenchmark()468 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { } ~WriteBandwidthBenchmark()469 virtual ~WriteBandwidthBenchmark() { } 470 verify()471 bool verify() { 472 memset(_buffer, 0, _size); 473 bench(1); 474 for (size_t i = 0; i < _size; i++) { 475 if (_buffer[i] != 1) { 476 printf("Buffer failed to compare after one loop.\n"); 477 return false; 478 } 479 } 480 481 memset(_buffer, 0, _size); 482 bench(2); 483 for (size_t i = 0; i < _size; i++) { 484 if (_buffer[i] != 2) { 485 printf("Buffer failed to compare after two loops.\n"); 486 return false; 487 } 488 } 489 490 return true; 491 } 492 }; 493 494 class WriteStrdBenchmark : public WriteBandwidthBenchmark { 495 public: WriteStrdBenchmark()496 WriteStrdBenchmark() : WriteBandwidthBenchmark() { } ~WriteStrdBenchmark()497 virtual ~WriteStrdBenchmark() {} 498 getName()499 const char *getName() { return "strd"; } 500 501 protected: 502 // Write a given value using strd. bench(size_t num_loops)503 void bench(size_t num_loops) { 504 asm volatile( 505 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 506 507 "mov r0, %0\n" 508 "mov r1, %1\n" 509 "mov r2, %2\n" 510 511 "mov r4, #0\n" 512 "mov r5, #0\n" 513 514 "0:\n" 515 "mov r3, r1, lsr #5\n" 516 517 "add r4, r4, #0x01010101\n" 518 "mov r5, r4\n" 519 520 "1:\n" 521 "subs r3, r3, #1\n" 522 "strd r4, r5, [r0]\n" 523 "strd r4, r5, [r0, #8]\n" 524 "strd r4, r5, [r0, #16]\n" 525 "strd r4, r5, [r0, #24]\n" 526 "add r0, r0, #32\n" 527 "bgt 1b\n" 528 529 "sub r0, r0, r1\n" 530 "subs r2, r2, #1\n" 531 "bgt 0b\n" 532 533 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 534 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 535 } 536 }; 537 538 class WriteStmiaBenchmark : public WriteBandwidthBenchmark { 539 public: WriteStmiaBenchmark()540 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { } ~WriteStmiaBenchmark()541 virtual ~WriteStmiaBenchmark() {} 542 getName()543 const char *getName() { return "stmia"; } 544 545 protected: 546 // Write a given value using stmia. bench(size_t num_loops)547 void bench(size_t num_loops) { 548 asm volatile( 549 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 550 551 "mov r0, %0\n" 552 "mov r1, %1\n" 553 "mov r2, %2\n" 554 555 "mov r4, #0\n" 556 557 "0:\n" 558 "mov r3, r1, lsr #5\n" 559 560 "add r4, r4, #0x01010101\n" 561 "mov r5, r4\n" 562 "mov r6, r4\n" 563 "mov r7, r4\n" 564 "mov r8, r4\n" 565 "mov r9, r4\n" 566 "mov r10, r4\n" 567 "mov r11, r4\n" 568 569 "1:\n" 570 "subs r3, r3, #1\n" 571 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" 572 "bgt 1b\n" 573 574 "sub r0, r0, r1\n" 575 "subs r2, r2, #1\n" 576 "bgt 0b\n" 577 578 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 579 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 580 } 581 }; 582 583 class WriteVst1Benchmark : public WriteBandwidthBenchmark { 584 public: WriteVst1Benchmark()585 WriteVst1Benchmark() : WriteBandwidthBenchmark() { } ~WriteVst1Benchmark()586 virtual ~WriteVst1Benchmark() {} 587 getName()588 const char *getName() { return "vst1"; } 589 usesNeon()590 bool usesNeon() { return true; } 591 592 protected: 593 // Write a given value using vst. 594 #if defined(__ARM_NEON__) bench(size_t num_loops)595 void bench(size_t num_loops) { 596 asm volatile( 597 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 598 599 "mov r0, %0\n" 600 "mov r1, %1\n" 601 "mov r2, %2\n" 602 "mov r4, #0\n" 603 604 "0:\n" 605 "mov r3, r1, lsr #5\n" 606 607 "add r4, r4, #1\n" 608 "vdup.8 d0, r4\n" 609 "vmov d1, d0\n" 610 "vmov d2, d0\n" 611 "vmov d3, d0\n" 612 613 "1:\n" 614 "subs r3, r3, #1\n" 615 "vst1.8 {d0-d3}, [r0:128]!\n" 616 "bgt 1b\n" 617 618 "sub r0, r0, r1\n" 619 "subs r2, r2, #1\n" 620 "bgt 0b\n" 621 622 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 623 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 624 #else 625 void bench(size_t) { 626 #endif 627 } 628 }; 629 630 class WriteVstrBenchmark : public WriteBandwidthBenchmark { 631 public: WriteVstrBenchmark()632 WriteVstrBenchmark() : WriteBandwidthBenchmark() { } ~WriteVstrBenchmark()633 virtual ~WriteVstrBenchmark() {} 634 getName()635 const char *getName() { return "vstr"; } 636 usesNeon()637 bool usesNeon() { return true; } 638 639 protected: 640 // Write a given value using vst. 641 #if defined(__ARM_NEON__) bench(size_t num_loops)642 void bench(size_t num_loops) { 643 asm volatile( 644 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 645 646 "mov r0, %0\n" 647 "mov r1, %1\n" 648 "mov r2, %2\n" 649 "mov r4, #0\n" 650 651 "0:\n" 652 "mov r3, r1, lsr #5\n" 653 654 "add r4, r4, #1\n" 655 "vdup.8 d0, r4\n" 656 "vmov d1, d0\n" 657 "vmov d2, d0\n" 658 "vmov d3, d0\n" 659 660 "1:\n" 661 "vstr d0, [r0, #0]\n" 662 "subs r3, r3, #1\n" 663 "vstr d1, [r0, #8]\n" 664 "vstr d0, [r0, #16]\n" 665 "vstr d1, [r0, #24]\n" 666 "add r0, r0, #32\n" 667 "bgt 1b\n" 668 669 "sub r0, r0, r1\n" 670 "subs r2, r2, #1\n" 671 "bgt 0b\n" 672 673 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 674 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 675 #else 676 void bench(size_t) { 677 #endif 678 } 679 }; 680 681 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark { 682 public: WriteVstmiaBenchmark()683 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { } ~WriteVstmiaBenchmark()684 virtual ~WriteVstmiaBenchmark() {} 685 getName()686 const char *getName() { return "vstmia"; } 687 usesNeon()688 bool usesNeon() { return true; } 689 690 protected: 691 // Write a given value using vstmia. 692 #if defined(__ARM_NEON__) bench(size_t num_loops)693 void bench(size_t num_loops) { 694 asm volatile( 695 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 696 697 "mov r0, %0\n" 698 "mov r1, %1\n" 699 "mov r2, %2\n" 700 "mov r4, #0\n" 701 702 "0:\n" 703 "mov r3, r1, lsr #5\n" 704 705 "add r4, r4, #1\n" 706 "vdup.8 d0, r4\n" 707 "vmov d1, d0\n" 708 "vmov d2, d0\n" 709 "vmov d3, d0\n" 710 711 "1:\n" 712 "subs r3, r3, #1\n" 713 "vstmia r0!, {d0-d3}\n" 714 "bgt 1b\n" 715 716 "sub r0, r0, r1\n" 717 "subs r2, r2, #1\n" 718 "bgt 0b\n" 719 720 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 721 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 722 #else 723 void bench(size_t) { 724 #endif 725 } 726 }; 727 728 class MemsetBenchmark : public WriteBandwidthBenchmark { 729 public: MemsetBenchmark()730 MemsetBenchmark() : WriteBandwidthBenchmark() { } ~MemsetBenchmark()731 virtual ~MemsetBenchmark() {} 732 getName()733 const char *getName() { return "memset"; } 734 735 protected: bench(size_t num_loops)736 void bench(size_t num_loops) { 737 for (size_t i = 0; i < num_loops; i++) { 738 memset(_buffer, (i % 255) + 1, _size); 739 } 740 } 741 }; 742 743 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark { 744 public: ReadLdrdBenchmark()745 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadLdrdBenchmark()746 virtual ~ReadLdrdBenchmark() {} 747 getName()748 const char *getName() { return "ldrd"; } 749 750 protected: 751 // Write a given value using strd. bench(size_t num_loops)752 void bench(size_t num_loops) { 753 asm volatile( 754 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 755 756 "mov r0, %0\n" 757 "mov r1, %1\n" 758 "mov r2, %2\n" 759 760 "0:\n" 761 "mov r3, r1, lsr #5\n" 762 763 "1:\n" 764 "subs r3, r3, #1\n" 765 "ldrd r4, r5, [r0]\n" 766 "ldrd r4, r5, [r0, #8]\n" 767 "ldrd r4, r5, [r0, #16]\n" 768 "ldrd r4, r5, [r0, #24]\n" 769 "add r0, r0, #32\n" 770 "bgt 1b\n" 771 772 "sub r0, r0, r1\n" 773 "subs r2, r2, #1\n" 774 "bgt 0b\n" 775 776 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 777 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 778 } 779 }; 780 781 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark { 782 public: ReadLdmiaBenchmark()783 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadLdmiaBenchmark()784 virtual ~ReadLdmiaBenchmark() {} 785 getName()786 const char *getName() { return "ldmia"; } 787 788 protected: 789 // Write a given value using stmia. bench(size_t num_loops)790 void bench(size_t num_loops) { 791 asm volatile( 792 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 793 794 "mov r0, %0\n" 795 "mov r1, %1\n" 796 "mov r2, %2\n" 797 798 "0:\n" 799 "mov r3, r1, lsr #5\n" 800 801 "1:\n" 802 "subs r3, r3, #1\n" 803 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" 804 "bgt 1b\n" 805 806 "sub r0, r0, r1\n" 807 "subs r2, r2, #1\n" 808 "bgt 0b\n" 809 810 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 811 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 812 } 813 }; 814 815 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark { 816 public: ReadVld1Benchmark()817 ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { } ~ReadVld1Benchmark()818 virtual ~ReadVld1Benchmark() {} 819 getName()820 const char *getName() { return "vld1"; } 821 usesNeon()822 bool usesNeon() { return true; } 823 824 protected: 825 // Write a given value using vst. 826 #if defined(__ARM_NEON__) bench(size_t num_loops)827 void bench(size_t num_loops) { 828 asm volatile( 829 "stmfd sp!, {r0,r1,r2,r3}\n" 830 831 "mov r0, %0\n" 832 "mov r1, %1\n" 833 "mov r2, %2\n" 834 835 "0:\n" 836 "mov r3, r1, lsr #5\n" 837 838 "1:\n" 839 "subs r3, r3, #1\n" 840 "vld1.8 {d0-d3}, [r0:128]!\n" 841 "bgt 1b\n" 842 843 "sub r0, r0, r1\n" 844 "subs r2, r2, #1\n" 845 "bgt 0b\n" 846 847 "ldmfd sp!, {r0,r1,r2,r3}\n" 848 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 849 #else 850 void bench(size_t) { 851 #endif 852 } 853 }; 854 855 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark { 856 public: ReadVldrBenchmark()857 ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadVldrBenchmark()858 virtual ~ReadVldrBenchmark() {} 859 getName()860 const char *getName() { return "vldr"; } 861 usesNeon()862 bool usesNeon() { return true; } 863 864 protected: 865 // Write a given value using vst. 866 #if defined(__ARM_NEON__) bench(size_t num_loops)867 void bench(size_t num_loops) { 868 asm volatile( 869 "stmfd sp!, {r0,r1,r2,r3}\n" 870 871 "mov r0, %0\n" 872 "mov r1, %1\n" 873 "mov r2, %2\n" 874 875 "0:\n" 876 "mov r3, r1, lsr #5\n" 877 878 "1:\n" 879 "vldr d0, [r0, #0]\n" 880 "subs r3, r3, #1\n" 881 "vldr d1, [r0, #8]\n" 882 "vldr d0, [r0, #16]\n" 883 "vldr d1, [r0, #24]\n" 884 "add r0, r0, #32\n" 885 "bgt 1b\n" 886 887 "sub r0, r0, r1\n" 888 "subs r2, r2, #1\n" 889 "bgt 0b\n" 890 891 "ldmfd sp!, {r0,r1,r2,r3}\n" 892 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 893 #else 894 void bench(size_t) { 895 #endif 896 } 897 }; 898 899 900 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark { 901 public: ReadVldmiaBenchmark()902 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadVldmiaBenchmark()903 virtual ~ReadVldmiaBenchmark() {} 904 getName()905 const char *getName() { return "vldmia"; } 906 usesNeon()907 bool usesNeon() { return true; } 908 909 protected: 910 // Write a given value using vstmia. 911 #if defined(__ARM_NEON__) bench(size_t num_loops)912 void bench(size_t num_loops) { 913 asm volatile( 914 "stmfd sp!, {r0,r1,r2,r3}\n" 915 916 "mov r0, %0\n" 917 "mov r1, %1\n" 918 "mov r2, %2\n" 919 920 "0:\n" 921 "mov r3, r1, lsr #5\n" 922 923 "1:\n" 924 "subs r3, r3, #1\n" 925 "vldmia r0!, {d0-d3}\n" 926 "bgt 1b\n" 927 928 "sub r0, r0, r1\n" 929 "subs r2, r2, #1\n" 930 "bgt 0b\n" 931 932 "ldmfd sp!, {r0,r1,r2,r3}\n" 933 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 934 #else 935 void bench(size_t) { 936 #endif 937 } 938 }; 939 940 #endif // __BANDWIDTH_H__ 941