1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef __BANDWIDTH_H__ 18 #define __BANDWIDTH_H__ 19 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "utils/Compat.h" 24 #include "memtest.h" 25 26 // Bandwidth Class definitions. 27 class BandwidthBenchmark { 28 public: BandwidthBenchmark()29 BandwidthBenchmark() 30 : _size(0), 31 _num_warm_loops(DEFAULT_NUM_WARM_LOOPS), 32 _num_loops(DEFAULT_NUM_LOOPS) {} ~BandwidthBenchmark()33 virtual ~BandwidthBenchmark() {} 34 run()35 bool run() { 36 if (_size == 0) { 37 return false; 38 } 39 if (!canRun()) { 40 return false; 41 } 42 43 bench(_num_warm_loops); 44 45 nsecs_t t = system_time(); 46 bench(_num_loops); 47 t = system_time() - t; 48 49 _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC); 50 51 return true; 52 } 53 canRun()54 bool canRun() { return !usesNeon() || isNeonSupported(); } 55 56 virtual bool setSize(size_t size) = 0; 57 58 virtual const char *getName() = 0; 59 60 virtual bool verify() = 0; 61 usesNeon()62 virtual bool usesNeon() { return false; } 63 isNeonSupported()64 bool isNeonSupported() { 65 #if defined(__ARM_NEON__) 66 return true; 67 #else 68 return false; 69 #endif 70 } 71 72 // Accessors/mutators. mb_per_sec()73 double mb_per_sec() { return _mb_per_sec; } num_warm_loops()74 size_t num_warm_loops() { return _num_warm_loops; } num_loops()75 size_t num_loops() { return _num_loops; } size()76 size_t size() { return _size; } 77 set_num_warm_loops(size_t num_warm_loops)78 void set_num_warm_loops(size_t num_warm_loops) { 79 _num_warm_loops = num_warm_loops; 80 } set_num_loops(size_t num_loops)81 void set_num_loops(size_t num_loops) { _num_loops = num_loops; } 82 83 // Static constants 84 static const unsigned int DEFAULT_NUM_WARM_LOOPS = 1000000; 85 static const unsigned int DEFAULT_NUM_LOOPS = 20000000; 86 87 protected: 88 virtual void bench(size_t num_loops) = 0; 89 90 double _mb_per_sec; 91 size_t _size; 92 size_t _num_warm_loops; 93 size_t _num_loops; 94 95 private: 96 // Static constants 97 static const CONSTEXPR double _NUM_NS_PER_SEC = 1000000000.0; 98 static const CONSTEXPR double _BYTES_PER_MB = 1024.0* 1024.0; 99 }; 100 101 class CopyBandwidthBenchmark : public BandwidthBenchmark { 102 public: CopyBandwidthBenchmark()103 CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { } 104 setSize(size_t size)105 bool setSize(size_t size) { 106 if (_src) { 107 free(_src); 108 _src = NULL; 109 } 110 if (_dst) { 111 free(_dst); 112 _dst = NULL; 113 } 114 115 if (size == 0) { 116 _size = DEFAULT_COPY_SIZE; 117 } else { 118 _size = size; 119 } 120 121 _src = reinterpret_cast<char*>(memalign(64, _size)); 122 if (!_src) { 123 perror("Failed to allocate memory for test."); 124 return false; 125 } 126 _dst = reinterpret_cast<char*>(memalign(64, _size)); 127 if (!_dst) { 128 perror("Failed to allocate memory for test."); 129 return false; 130 } 131 132 return true; 133 } ~CopyBandwidthBenchmark()134 virtual ~CopyBandwidthBenchmark() { 135 if (_src) { 136 free(_src); 137 _src = NULL; 138 } 139 if (_dst) { 140 free(_dst); 141 _dst = NULL; 142 } 143 } 144 verify()145 bool verify() { 146 memset(_src, 0x23, _size); 147 memset(_dst, 0, _size); 148 bench(1); 149 if (memcmp(_src, _dst, _size) != 0) { 150 printf("Buffers failed to compare after one loop.\n"); 151 return false; 152 } 153 154 memset(_src, 0x23, _size); 155 memset(_dst, 0, _size); 156 _num_loops = 2; 157 bench(2); 158 if (memcmp(_src, _dst, _size) != 0) { 159 printf("Buffers failed to compare after two loops.\n"); 160 return false; 161 } 162 163 return true; 164 } 165 166 protected: 167 char *_src; 168 char *_dst; 169 170 static const unsigned int DEFAULT_COPY_SIZE = 8000; 171 }; 172 173 class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark { 174 public: CopyLdrdStrdBenchmark()175 CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { } ~CopyLdrdStrdBenchmark()176 virtual ~CopyLdrdStrdBenchmark() {} 177 getName()178 const char *getName() { return "ldrd/strd"; } 179 180 protected: 181 // Copy using ldrd/strd instructions. bench(size_t num_loops)182 void bench(size_t num_loops) { 183 asm volatile( 184 "stmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" 185 186 "mov r0, %0\n" 187 "mov r1, %1\n" 188 "mov r2, %2\n" 189 "mov r3, %3\n" 190 191 "0:\n" 192 "mov r4, r2, lsr #6\n" 193 194 "1:\n" 195 "ldrd r6, r7, [r0]\n" 196 "strd r6, r7, [r1]\n" 197 "ldrd r6, r7, [r0, #8]\n" 198 "strd r6, r7, [r1, #8]\n" 199 "ldrd r6, r7, [r0, #16]\n" 200 "strd r6, r7, [r1, #16]\n" 201 "ldrd r6, r7, [r0, #24]\n" 202 "strd r6, r7, [r1, #24]\n" 203 "ldrd r6, r7, [r0, #32]\n" 204 "strd r6, r7, [r1, #32]\n" 205 "ldrd r6, r7, [r0, #40]\n" 206 "strd r6, r7, [r1, #40]\n" 207 "ldrd r6, r7, [r0, #48]\n" 208 "strd r6, r7, [r1, #48]\n" 209 "ldrd r6, r7, [r0, #56]\n" 210 "strd r6, r7, [r1, #56]\n" 211 212 "add r0, r0, #64\n" 213 "add r1, r1, #64\n" 214 "subs r4, r4, #1\n" 215 "bgt 1b\n" 216 217 "sub r0, r0, r2\n" 218 "sub r1, r1, r2\n" 219 "subs r3, r3, #1\n" 220 "bgt 0b\n" 221 222 "ldmfd sp!, {r0,r1,r2,r3,r4,r6,r7}\n" 223 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 224 } 225 }; 226 227 class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark { 228 public: CopyLdmiaStmiaBenchmark()229 CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { } ~CopyLdmiaStmiaBenchmark()230 virtual ~CopyLdmiaStmiaBenchmark() {} 231 getName()232 const char *getName() { return "ldmia/stmia"; } 233 234 protected: 235 // Copy using ldmia/stmia instructions. bench(size_t num_loops)236 void bench(size_t num_loops) { 237 asm volatile( 238 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" 239 240 "mov r0, %0\n" 241 "mov r1, %1\n" 242 "mov r2, %2\n" 243 "mov r3, %3\n" 244 245 "0:\n" 246 "mov r4, r2, lsr #6\n" 247 248 "1:\n" 249 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 250 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 251 "subs r4, r4, #1\n" 252 "ldmia r0!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 253 "stmia r1!, {r5, r6, r7, r8, r9, r10, r11, r12}\n" 254 "bgt 1b\n" 255 256 "sub r0, r0, r2\n" 257 "sub r1, r1, r2\n" 258 "subs r3, r3, #1\n" 259 "bgt 0b\n" 260 261 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}\n" 262 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 263 } 264 }; 265 266 class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark { 267 public: CopyVld1Vst1Benchmark()268 CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { } ~CopyVld1Vst1Benchmark()269 virtual ~CopyVld1Vst1Benchmark() {} 270 getName()271 const char *getName() { return "vld1/vst1"; } 272 usesNeon()273 bool usesNeon() { return true; } 274 275 protected: 276 // Copy using vld1/vst1 instructions. 277 #if defined(__ARM_NEON__) bench(size_t num_loops)278 void bench(size_t num_loops) { 279 asm volatile( 280 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 281 282 "mov r0, %0\n" 283 "mov r1, %1\n" 284 "mov r2, %2\n" 285 "mov r3, %3\n" 286 287 "0:\n" 288 "mov r4, r2, lsr #6\n" 289 290 "1:\n" 291 "vld1.8 {d0-d3}, [r0]!\n" 292 "vld1.8 {d4-d7}, [r0]!\n" 293 "subs r4, r4, #1\n" 294 "vst1.8 {d0-d3}, [r1:128]!\n" 295 "vst1.8 {d4-d7}, [r1:128]!\n" 296 "bgt 1b\n" 297 298 "sub r0, r0, r2\n" 299 "sub r1, r1, r2\n" 300 "subs r3, r3, #1\n" 301 "bgt 0b\n" 302 303 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 304 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 305 #else 306 void bench(size_t) { 307 #endif 308 } 309 }; 310 311 class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark { 312 public: CopyVldrVstrBenchmark()313 CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { } ~CopyVldrVstrBenchmark()314 virtual ~CopyVldrVstrBenchmark() {} 315 getName()316 const char *getName() { return "vldr/vstr"; } 317 usesNeon()318 bool usesNeon() { return true; } 319 320 protected: 321 // Copy using vldr/vstr instructions. 322 #if defined(__ARM_NEON__) bench(size_t num_loops)323 void bench(size_t num_loops) { 324 asm volatile( 325 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 326 327 "mov r0, %0\n" 328 "mov r1, %1\n" 329 "mov r2, %2\n" 330 "mov r3, %3\n" 331 332 "0:\n" 333 "mov r4, r2, lsr #6\n" 334 335 "1:\n" 336 "vldr d0, [r0, #0]\n" 337 "subs r4, r4, #1\n" 338 "vldr d1, [r0, #8]\n" 339 "vstr d0, [r1, #0]\n" 340 "vldr d0, [r0, #16]\n" 341 "vstr d1, [r1, #8]\n" 342 "vldr d1, [r0, #24]\n" 343 "vstr d0, [r1, #16]\n" 344 "vldr d0, [r0, #32]\n" 345 "vstr d1, [r1, #24]\n" 346 "vldr d1, [r0, #40]\n" 347 "vstr d0, [r1, #32]\n" 348 "vldr d0, [r0, #48]\n" 349 "vstr d1, [r1, #40]\n" 350 "vldr d1, [r0, #56]\n" 351 "vstr d0, [r1, #48]\n" 352 "add r0, r0, #64\n" 353 "vstr d1, [r1, #56]\n" 354 "add r1, r1, #64\n" 355 "bgt 1b\n" 356 357 "sub r0, r0, r2\n" 358 "sub r1, r1, r2\n" 359 "subs r3, r3, #1\n" 360 "bgt 0b\n" 361 362 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 363 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 364 #else 365 void bench(size_t) { 366 #endif 367 } 368 }; 369 370 class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark { 371 public: CopyVldmiaVstmiaBenchmark()372 CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { } ~CopyVldmiaVstmiaBenchmark()373 virtual ~CopyVldmiaVstmiaBenchmark() {} 374 getName()375 const char *getName() { return "vldmia/vstmia"; } 376 usesNeon()377 bool usesNeon() { return true; } 378 379 protected: 380 // Copy using vldmia/vstmia instructions. 381 #if defined(__ARM_NEON__) bench(size_t num_loops)382 void bench(size_t num_loops) { 383 asm volatile( 384 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 385 386 "mov r0, %0\n" 387 "mov r1, %1\n" 388 "mov r2, %2\n" 389 "mov r3, %3\n" 390 391 "0:\n" 392 "mov r4, r2, lsr #6\n" 393 394 "1:\n" 395 "vldmia r0!, {d0-d7}\n" 396 "subs r4, r4, #1\n" 397 "vstmia r1!, {d0-d7}\n" 398 "bgt 1b\n" 399 400 "sub r0, r0, r2\n" 401 "sub r1, r1, r2\n" 402 "subs r3, r3, #1\n" 403 "bgt 0b\n" 404 405 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 406 :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); 407 #else 408 void bench(size_t) { 409 #endif 410 } 411 }; 412 413 class MemcpyBenchmark : public CopyBandwidthBenchmark { 414 public: MemcpyBenchmark()415 MemcpyBenchmark() : CopyBandwidthBenchmark() { } ~MemcpyBenchmark()416 virtual ~MemcpyBenchmark() {} 417 getName()418 const char *getName() { return "memcpy"; } 419 420 protected: bench(size_t num_loops)421 void bench(size_t num_loops) { 422 for (size_t i = 0; i < num_loops; i++) { 423 memcpy(_dst, _src, _size); 424 } 425 } 426 }; 427 428 class SingleBufferBandwidthBenchmark : public BandwidthBenchmark { 429 public: SingleBufferBandwidthBenchmark()430 SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { } ~SingleBufferBandwidthBenchmark()431 virtual ~SingleBufferBandwidthBenchmark() { 432 if (_buffer) { 433 free(_buffer); 434 _buffer = NULL; 435 } 436 } 437 setSize(size_t size)438 bool setSize(size_t size) { 439 if (_buffer) { 440 free(_buffer); 441 _buffer = NULL; 442 } 443 444 if (size == 0) { 445 _size = DEFAULT_SINGLE_BUFFER_SIZE; 446 } else { 447 _size = size; 448 } 449 450 _buffer = reinterpret_cast<char*>(memalign(64, _size)); 451 if (!_buffer) { 452 perror("Failed to allocate memory for test."); 453 return false; 454 } 455 memset(_buffer, 0, _size); 456 457 return true; 458 } 459 verify()460 bool verify() { return true; } 461 462 protected: 463 char *_buffer; 464 465 static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000; 466 }; 467 468 class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark { 469 public: WriteBandwidthBenchmark()470 WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { } ~WriteBandwidthBenchmark()471 virtual ~WriteBandwidthBenchmark() { } 472 verify()473 bool verify() { 474 memset(_buffer, 0, _size); 475 bench(1); 476 for (size_t i = 0; i < _size; i++) { 477 if (_buffer[i] != 1) { 478 printf("Buffer failed to compare after one loop.\n"); 479 return false; 480 } 481 } 482 483 memset(_buffer, 0, _size); 484 bench(2); 485 for (size_t i = 0; i < _size; i++) { 486 if (_buffer[i] != 2) { 487 printf("Buffer failed to compare after two loops.\n"); 488 return false; 489 } 490 } 491 492 return true; 493 } 494 }; 495 496 class WriteStrdBenchmark : public WriteBandwidthBenchmark { 497 public: WriteStrdBenchmark()498 WriteStrdBenchmark() : WriteBandwidthBenchmark() { } ~WriteStrdBenchmark()499 virtual ~WriteStrdBenchmark() {} 500 getName()501 const char *getName() { return "strd"; } 502 503 protected: 504 // Write a given value using strd. bench(size_t num_loops)505 void bench(size_t num_loops) { 506 asm volatile( 507 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 508 509 "mov r0, %0\n" 510 "mov r1, %1\n" 511 "mov r2, %2\n" 512 513 "mov r4, #0\n" 514 "mov r5, #0\n" 515 516 "0:\n" 517 "mov r3, r1, lsr #5\n" 518 519 "add r4, r4, #0x01010101\n" 520 "mov r5, r4\n" 521 522 "1:\n" 523 "subs r3, r3, #1\n" 524 "strd r4, r5, [r0]\n" 525 "strd r4, r5, [r0, #8]\n" 526 "strd r4, r5, [r0, #16]\n" 527 "strd r4, r5, [r0, #24]\n" 528 "add r0, r0, #32\n" 529 "bgt 1b\n" 530 531 "sub r0, r0, r1\n" 532 "subs r2, r2, #1\n" 533 "bgt 0b\n" 534 535 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 536 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 537 } 538 }; 539 540 class WriteStmiaBenchmark : public WriteBandwidthBenchmark { 541 public: WriteStmiaBenchmark()542 WriteStmiaBenchmark() : WriteBandwidthBenchmark() { } ~WriteStmiaBenchmark()543 virtual ~WriteStmiaBenchmark() {} 544 getName()545 const char *getName() { return "stmia"; } 546 547 protected: 548 // Write a given value using stmia. bench(size_t num_loops)549 void bench(size_t num_loops) { 550 asm volatile( 551 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 552 553 "mov r0, %0\n" 554 "mov r1, %1\n" 555 "mov r2, %2\n" 556 557 "mov r4, #0\n" 558 559 "0:\n" 560 "mov r3, r1, lsr #5\n" 561 562 "add r4, r4, #0x01010101\n" 563 "mov r5, r4\n" 564 "mov r6, r4\n" 565 "mov r7, r4\n" 566 "mov r8, r4\n" 567 "mov r9, r4\n" 568 "mov r10, r4\n" 569 "mov r11, r4\n" 570 571 "1:\n" 572 "subs r3, r3, #1\n" 573 "stmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" 574 "bgt 1b\n" 575 576 "sub r0, r0, r1\n" 577 "subs r2, r2, #1\n" 578 "bgt 0b\n" 579 580 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 581 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 582 } 583 }; 584 585 class WriteVst1Benchmark : public WriteBandwidthBenchmark { 586 public: WriteVst1Benchmark()587 WriteVst1Benchmark() : WriteBandwidthBenchmark() { } ~WriteVst1Benchmark()588 virtual ~WriteVst1Benchmark() {} 589 getName()590 const char *getName() { return "vst1"; } 591 usesNeon()592 bool usesNeon() { return true; } 593 594 protected: 595 // Write a given value using vst. 596 #if defined(__ARM_NEON__) bench(size_t num_loops)597 void bench(size_t num_loops) { 598 asm volatile( 599 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 600 601 "mov r0, %0\n" 602 "mov r1, %1\n" 603 "mov r2, %2\n" 604 "mov r4, #0\n" 605 606 "0:\n" 607 "mov r3, r1, lsr #5\n" 608 609 "add r4, r4, #1\n" 610 "vdup.8 d0, r4\n" 611 "vmov d1, d0\n" 612 "vmov d2, d0\n" 613 "vmov d3, d0\n" 614 615 "1:\n" 616 "subs r3, r3, #1\n" 617 "vst1.8 {d0-d3}, [r0:128]!\n" 618 "bgt 1b\n" 619 620 "sub r0, r0, r1\n" 621 "subs r2, r2, #1\n" 622 "bgt 0b\n" 623 624 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 625 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 626 #else 627 void bench(size_t) { 628 #endif 629 } 630 }; 631 632 class WriteVstrBenchmark : public WriteBandwidthBenchmark { 633 public: WriteVstrBenchmark()634 WriteVstrBenchmark() : WriteBandwidthBenchmark() { } ~WriteVstrBenchmark()635 virtual ~WriteVstrBenchmark() {} 636 getName()637 const char *getName() { return "vstr"; } 638 usesNeon()639 bool usesNeon() { return true; } 640 641 protected: 642 // Write a given value using vst. 643 #if defined(__ARM_NEON__) bench(size_t num_loops)644 void bench(size_t num_loops) { 645 asm volatile( 646 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 647 648 "mov r0, %0\n" 649 "mov r1, %1\n" 650 "mov r2, %2\n" 651 "mov r4, #0\n" 652 653 "0:\n" 654 "mov r3, r1, lsr #5\n" 655 656 "add r4, r4, #1\n" 657 "vdup.8 d0, r4\n" 658 "vmov d1, d0\n" 659 "vmov d2, d0\n" 660 "vmov d3, d0\n" 661 662 "1:\n" 663 "vstr d0, [r0, #0]\n" 664 "subs r3, r3, #1\n" 665 "vstr d1, [r0, #8]\n" 666 "vstr d0, [r0, #16]\n" 667 "vstr d1, [r0, #24]\n" 668 "add r0, r0, #32\n" 669 "bgt 1b\n" 670 671 "sub r0, r0, r1\n" 672 "subs r2, r2, #1\n" 673 "bgt 0b\n" 674 675 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 676 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 677 #else 678 void bench(size_t) { 679 #endif 680 } 681 }; 682 683 class WriteVstmiaBenchmark : public WriteBandwidthBenchmark { 684 public: WriteVstmiaBenchmark()685 WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { } ~WriteVstmiaBenchmark()686 virtual ~WriteVstmiaBenchmark() {} 687 getName()688 const char *getName() { return "vstmia"; } 689 usesNeon()690 bool usesNeon() { return true; } 691 692 protected: 693 // Write a given value using vstmia. 694 #if defined(__ARM_NEON__) bench(size_t num_loops)695 void bench(size_t num_loops) { 696 asm volatile( 697 "stmfd sp!, {r0,r1,r2,r3,r4}\n" 698 699 "mov r0, %0\n" 700 "mov r1, %1\n" 701 "mov r2, %2\n" 702 "mov r4, #0\n" 703 704 "0:\n" 705 "mov r3, r1, lsr #5\n" 706 707 "add r4, r4, #1\n" 708 "vdup.8 d0, r4\n" 709 "vmov d1, d0\n" 710 "vmov d2, d0\n" 711 "vmov d3, d0\n" 712 713 "1:\n" 714 "subs r3, r3, #1\n" 715 "vstmia r0!, {d0-d3}\n" 716 "bgt 1b\n" 717 718 "sub r0, r0, r1\n" 719 "subs r2, r2, #1\n" 720 "bgt 0b\n" 721 722 "ldmfd sp!, {r0,r1,r2,r3,r4}\n" 723 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 724 #else 725 void bench(size_t) { 726 #endif 727 } 728 }; 729 730 class MemsetBenchmark : public WriteBandwidthBenchmark { 731 public: MemsetBenchmark()732 MemsetBenchmark() : WriteBandwidthBenchmark() { } ~MemsetBenchmark()733 virtual ~MemsetBenchmark() {} 734 getName()735 const char *getName() { return "memset"; } 736 737 protected: bench(size_t num_loops)738 void bench(size_t num_loops) { 739 for (size_t i = 0; i < num_loops; i++) { 740 memset(_buffer, (i % 255) + 1, _size); 741 } 742 } 743 }; 744 745 class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark { 746 public: ReadLdrdBenchmark()747 ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadLdrdBenchmark()748 virtual ~ReadLdrdBenchmark() {} 749 getName()750 const char *getName() { return "ldrd"; } 751 752 protected: 753 // Write a given value using strd. bench(size_t num_loops)754 void bench(size_t num_loops) { 755 asm volatile( 756 "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 757 758 "mov r0, %0\n" 759 "mov r1, %1\n" 760 "mov r2, %2\n" 761 762 "0:\n" 763 "mov r3, r1, lsr #5\n" 764 765 "1:\n" 766 "subs r3, r3, #1\n" 767 "ldrd r4, r5, [r0]\n" 768 "ldrd r4, r5, [r0, #8]\n" 769 "ldrd r4, r5, [r0, #16]\n" 770 "ldrd r4, r5, [r0, #24]\n" 771 "add r0, r0, #32\n" 772 "bgt 1b\n" 773 774 "sub r0, r0, r1\n" 775 "subs r2, r2, #1\n" 776 "bgt 0b\n" 777 778 "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" 779 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 780 } 781 }; 782 783 class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark { 784 public: ReadLdmiaBenchmark()785 ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadLdmiaBenchmark()786 virtual ~ReadLdmiaBenchmark() {} 787 getName()788 const char *getName() { return "ldmia"; } 789 790 protected: 791 // Write a given value using stmia. bench(size_t num_loops)792 void bench(size_t num_loops) { 793 asm volatile( 794 "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 795 796 "mov r0, %0\n" 797 "mov r1, %1\n" 798 "mov r2, %2\n" 799 800 "0:\n" 801 "mov r3, r1, lsr #5\n" 802 803 "1:\n" 804 "subs r3, r3, #1\n" 805 "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" 806 "bgt 1b\n" 807 808 "sub r0, r0, r1\n" 809 "subs r2, r2, #1\n" 810 "bgt 0b\n" 811 812 "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" 813 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 814 } 815 }; 816 817 class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark { 818 public: ReadVld1Benchmark()819 ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { } ~ReadVld1Benchmark()820 virtual ~ReadVld1Benchmark() {} 821 getName()822 const char *getName() { return "vld1"; } 823 usesNeon()824 bool usesNeon() { return true; } 825 826 protected: 827 // Write a given value using vst. 828 #if defined(__ARM_NEON__) bench(size_t num_loops)829 void bench(size_t num_loops) { 830 asm volatile( 831 "stmfd sp!, {r0,r1,r2,r3}\n" 832 833 "mov r0, %0\n" 834 "mov r1, %1\n" 835 "mov r2, %2\n" 836 837 "0:\n" 838 "mov r3, r1, lsr #5\n" 839 840 "1:\n" 841 "subs r3, r3, #1\n" 842 "vld1.8 {d0-d3}, [r0:128]!\n" 843 "bgt 1b\n" 844 845 "sub r0, r0, r1\n" 846 "subs r2, r2, #1\n" 847 "bgt 0b\n" 848 849 "ldmfd sp!, {r0,r1,r2,r3}\n" 850 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 851 #else 852 void bench(size_t) { 853 #endif 854 } 855 }; 856 857 class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark { 858 public: ReadVldrBenchmark()859 ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadVldrBenchmark()860 virtual ~ReadVldrBenchmark() {} 861 getName()862 const char *getName() { return "vldr"; } 863 usesNeon()864 bool usesNeon() { return true; } 865 866 protected: 867 // Write a given value using vst. 868 #if defined(__ARM_NEON__) bench(size_t num_loops)869 void bench(size_t num_loops) { 870 asm volatile( 871 "stmfd sp!, {r0,r1,r2,r3}\n" 872 873 "mov r0, %0\n" 874 "mov r1, %1\n" 875 "mov r2, %2\n" 876 877 "0:\n" 878 "mov r3, r1, lsr #5\n" 879 880 "1:\n" 881 "vldr d0, [r0, #0]\n" 882 "subs r3, r3, #1\n" 883 "vldr d1, [r0, #8]\n" 884 "vldr d0, [r0, #16]\n" 885 "vldr d1, [r0, #24]\n" 886 "add r0, r0, #32\n" 887 "bgt 1b\n" 888 889 "sub r0, r0, r1\n" 890 "subs r2, r2, #1\n" 891 "bgt 0b\n" 892 893 "ldmfd sp!, {r0,r1,r2,r3}\n" 894 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 895 #else 896 void bench(size_t) { 897 #endif 898 } 899 }; 900 901 902 class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark { 903 public: ReadVldmiaBenchmark()904 ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { } ~ReadVldmiaBenchmark()905 virtual ~ReadVldmiaBenchmark() {} 906 getName()907 const char *getName() { return "vldmia"; } 908 usesNeon()909 bool usesNeon() { return true; } 910 911 protected: 912 // Write a given value using vstmia. 913 #if defined(__ARM_NEON__) bench(size_t num_loops)914 void bench(size_t num_loops) { 915 asm volatile( 916 "stmfd sp!, {r0,r1,r2,r3}\n" 917 918 "mov r0, %0\n" 919 "mov r1, %1\n" 920 "mov r2, %2\n" 921 922 "0:\n" 923 "mov r3, r1, lsr #5\n" 924 925 "1:\n" 926 "subs r3, r3, #1\n" 927 "vldmia r0!, {d0-d3}\n" 928 "bgt 1b\n" 929 930 "sub r0, r0, r1\n" 931 "subs r2, r2, #1\n" 932 "bgt 0b\n" 933 934 "ldmfd sp!, {r0,r1,r2,r3}\n" 935 :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); 936 #else 937 void bench(size_t) { 938 #endif 939 } 940 }; 941 942 #endif // __BANDWIDTH_H__ 943