1 2# Various microbenchmarks comparing unicode and byte string performance 3# Please keep this file both 2.x and 3.x compatible! 4 5import timeit 6import itertools 7import operator 8import re 9import sys 10import datetime 11import optparse 12 13VERSION = '2.0' 14 15def p(*args): 16 sys.stdout.write(' '.join(str(s) for s in args) + '\n') 17 18if sys.version_info >= (3,): 19 BYTES = bytes_from_str = lambda x: x.encode('ascii') 20 UNICODE = unicode_from_str = lambda x: x 21else: 22 BYTES = bytes_from_str = lambda x: x 23 UNICODE = unicode_from_str = lambda x: x.decode('ascii') 24 25class UnsupportedType(TypeError): 26 pass 27 28 29p('stringbench v%s' % VERSION) 30p(sys.version) 31p(datetime.datetime.now()) 32 33REPEAT = 1 34REPEAT = 3 35#REPEAT = 7 36 37if __name__ != "__main__": 38 raise SystemExit("Must run as main program") 39 40parser = optparse.OptionParser() 41parser.add_option("-R", "--skip-re", dest="skip_re", 42 action="store_true", 43 help="skip regular expression tests") 44parser.add_option("-8", "--8-bit", dest="bytes_only", 45 action="store_true", 46 help="only do 8-bit string benchmarks") 47parser.add_option("-u", "--unicode", dest="unicode_only", 48 action="store_true", 49 help="only do Unicode string benchmarks") 50 51 52_RANGE_1000 = list(range(1000)) 53_RANGE_100 = list(range(100)) 54_RANGE_10 = list(range(10)) 55 56dups = {} 57def bench(s, group, repeat_count): 58 def blah(f): 59 if f.__name__ in dups: 60 raise AssertionError("Multiple functions with same name: %r" % 61 (f.__name__,)) 62 dups[f.__name__] = 1 63 f.comment = s 64 f.is_bench = True 65 f.group = group 66 f.repeat_count = repeat_count 67 return f 68 return blah 69 70def uses_re(f): 71 f.uses_re = True 72 73####### 'in' comparisons 74 75@bench('"A" in "A"*1000', "early match, single character", 1000) 76def in_test_quick_match_single_character(STR): 77 s1 = STR("A" * 1000) 78 s2 = STR("A") 79 for x in _RANGE_1000: 80 s2 in s1 81 82@bench('"B" in "A"*1000', "no match, single character", 1000) 83def in_test_no_match_single_character(STR): 84 s1 = STR("A" * 1000) 85 s2 = STR("B") 86 for x in _RANGE_1000: 87 s2 in s1 88 89 90@bench('"AB" in "AB"*1000', "early match, two characters", 1000) 91def in_test_quick_match_two_characters(STR): 92 s1 = STR("AB" * 1000) 93 s2 = STR("AB") 94 for x in _RANGE_1000: 95 s2 in s1 96 97@bench('"BC" in "AB"*1000', "no match, two characters", 1000) 98def in_test_no_match_two_character(STR): 99 s1 = STR("AB" * 1000) 100 s2 = STR("BC") 101 for x in _RANGE_1000: 102 s2 in s1 103 104@bench('"BC" in ("AB"*300+"C")', "late match, two characters", 1000) 105def in_test_slow_match_two_characters(STR): 106 s1 = STR("AB" * 300+"C") 107 s2 = STR("BC") 108 for x in _RANGE_1000: 109 s2 in s1 110 111@bench('s="ABC"*33; (s+"E") in ((s+"D")*300+s+"E")', 112 "late match, 100 characters", 100) 113def in_test_slow_match_100_characters(STR): 114 m = STR("ABC"*33) 115 d = STR("D") 116 e = STR("E") 117 s1 = (m+d)*300 + m+e 118 s2 = m+e 119 for x in _RANGE_100: 120 s2 in s1 121 122# Try with regex 123@uses_re 124@bench('s="ABC"*33; re.compile(s+"D").search((s+"D")*300+s+"E")', 125 "late match, 100 characters", 100) 126def re_test_slow_match_100_characters(STR): 127 m = STR("ABC"*33) 128 d = STR("D") 129 e = STR("E") 130 s1 = (m+d)*300 + m+e 131 s2 = m+e 132 pat = re.compile(s2) 133 search = pat.search 134 for x in _RANGE_100: 135 search(s1) 136 137 138#### same tests as 'in' but use 'find' 139 140@bench('("A"*1000).find("A")', "early match, single character", 1000) 141def find_test_quick_match_single_character(STR): 142 s1 = STR("A" * 1000) 143 s2 = STR("A") 144 s1_find = s1.find 145 for x in _RANGE_1000: 146 s1_find(s2) 147 148@bench('("A"*1000).find("B")', "no match, single character", 1000) 149def find_test_no_match_single_character(STR): 150 s1 = STR("A" * 1000) 151 s2 = STR("B") 152 s1_find = s1.find 153 for x in _RANGE_1000: 154 s1_find(s2) 155 156 157@bench('("AB"*1000).find("AB")', "early match, two characters", 1000) 158def find_test_quick_match_two_characters(STR): 159 s1 = STR("AB" * 1000) 160 s2 = STR("AB") 161 s1_find = s1.find 162 for x in _RANGE_1000: 163 s1_find(s2) 164 165@bench('("AB"*1000).find("BC")', "no match, two characters", 1000) 166def find_test_no_match_two_character(STR): 167 s1 = STR("AB" * 1000) 168 s2 = STR("BC") 169 s1_find = s1.find 170 for x in _RANGE_1000: 171 s1_find(s2) 172 173@bench('("AB"*1000).find("CA")', "no match, two characters", 1000) 174def find_test_no_match_two_character_bis(STR): 175 s1 = STR("AB" * 1000) 176 s2 = STR("CA") 177 s1_find = s1.find 178 for x in _RANGE_1000: 179 s1_find(s2) 180 181@bench('("AB"*300+"C").find("BC")', "late match, two characters", 1000) 182def find_test_slow_match_two_characters(STR): 183 s1 = STR("AB" * 300+"C") 184 s2 = STR("BC") 185 s1_find = s1.find 186 for x in _RANGE_1000: 187 s1_find(s2) 188 189@bench('("AB"*300+"CA").find("CA")', "late match, two characters", 1000) 190def find_test_slow_match_two_characters_bis(STR): 191 s1 = STR("AB" * 300+"CA") 192 s2 = STR("CA") 193 s1_find = s1.find 194 for x in _RANGE_1000: 195 s1_find(s2) 196 197@bench('s="ABC"*33; ((s+"D")*500+s+"E").find(s+"E")', 198 "late match, 100 characters", 100) 199def find_test_slow_match_100_characters(STR): 200 m = STR("ABC"*33) 201 d = STR("D") 202 e = STR("E") 203 s1 = (m+d)*500 + m+e 204 s2 = m+e 205 s1_find = s1.find 206 for x in _RANGE_100: 207 s1_find(s2) 208 209@bench('s="ABC"*33; ((s+"D")*500+"E"+s).find("E"+s)', 210 "late match, 100 characters", 100) 211def find_test_slow_match_100_characters_bis(STR): 212 m = STR("ABC"*33) 213 d = STR("D") 214 e = STR("E") 215 s1 = (m+d)*500 + e+m 216 s2 = e+m 217 s1_find = s1.find 218 for x in _RANGE_100: 219 s1_find(s2) 220 221 222#### Same tests for 'rfind' 223 224@bench('("A"*1000).rfind("A")', "early match, single character", 1000) 225def rfind_test_quick_match_single_character(STR): 226 s1 = STR("A" * 1000) 227 s2 = STR("A") 228 s1_rfind = s1.rfind 229 for x in _RANGE_1000: 230 s1_rfind(s2) 231 232@bench('("A"*1000).rfind("B")', "no match, single character", 1000) 233def rfind_test_no_match_single_character(STR): 234 s1 = STR("A" * 1000) 235 s2 = STR("B") 236 s1_rfind = s1.rfind 237 for x in _RANGE_1000: 238 s1_rfind(s2) 239 240 241@bench('("AB"*1000).rfind("AB")', "early match, two characters", 1000) 242def rfind_test_quick_match_two_characters(STR): 243 s1 = STR("AB" * 1000) 244 s2 = STR("AB") 245 s1_rfind = s1.rfind 246 for x in _RANGE_1000: 247 s1_rfind(s2) 248 249@bench('("AB"*1000).rfind("BC")', "no match, two characters", 1000) 250def rfind_test_no_match_two_character(STR): 251 s1 = STR("AB" * 1000) 252 s2 = STR("BC") 253 s1_rfind = s1.rfind 254 for x in _RANGE_1000: 255 s1_rfind(s2) 256 257@bench('("AB"*1000).rfind("CA")', "no match, two characters", 1000) 258def rfind_test_no_match_two_character_bis(STR): 259 s1 = STR("AB" * 1000) 260 s2 = STR("CA") 261 s1_rfind = s1.rfind 262 for x in _RANGE_1000: 263 s1_rfind(s2) 264 265@bench('("C"+"AB"*300).rfind("CA")', "late match, two characters", 1000) 266def rfind_test_slow_match_two_characters(STR): 267 s1 = STR("C" + "AB" * 300) 268 s2 = STR("CA") 269 s1_rfind = s1.rfind 270 for x in _RANGE_1000: 271 s1_rfind(s2) 272 273@bench('("BC"+"AB"*300).rfind("BC")', "late match, two characters", 1000) 274def rfind_test_slow_match_two_characters_bis(STR): 275 s1 = STR("BC" + "AB" * 300) 276 s2 = STR("BC") 277 s1_rfind = s1.rfind 278 for x in _RANGE_1000: 279 s1_rfind(s2) 280 281@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rfind("E"+s)', 282 "late match, 100 characters", 100) 283def rfind_test_slow_match_100_characters(STR): 284 m = STR("ABC"*33) 285 d = STR("D") 286 e = STR("E") 287 s1 = e+m + (d+m)*500 288 s2 = e+m 289 s1_rfind = s1.rfind 290 for x in _RANGE_100: 291 s1_rfind(s2) 292 293@bench('s="ABC"*33; (s+"E"+("D"+s)*500).rfind(s+"E")', 294 "late match, 100 characters", 100) 295def rfind_test_slow_match_100_characters_bis(STR): 296 m = STR("ABC"*33) 297 d = STR("D") 298 e = STR("E") 299 s1 = m+e + (d+m)*500 300 s2 = m+e 301 s1_rfind = s1.rfind 302 for x in _RANGE_100: 303 s1_rfind(s2) 304 305 306#### Now with index. 307# Skip the ones which fail because that would include exception overhead. 308 309@bench('("A"*1000).index("A")', "early match, single character", 1000) 310def index_test_quick_match_single_character(STR): 311 s1 = STR("A" * 1000) 312 s2 = STR("A") 313 s1_index = s1.index 314 for x in _RANGE_1000: 315 s1_index(s2) 316 317@bench('("AB"*1000).index("AB")', "early match, two characters", 1000) 318def index_test_quick_match_two_characters(STR): 319 s1 = STR("AB" * 1000) 320 s2 = STR("AB") 321 s1_index = s1.index 322 for x in _RANGE_1000: 323 s1_index(s2) 324 325@bench('("AB"*300+"C").index("BC")', "late match, two characters", 1000) 326def index_test_slow_match_two_characters(STR): 327 s1 = STR("AB" * 300+"C") 328 s2 = STR("BC") 329 s1_index = s1.index 330 for x in _RANGE_1000: 331 s1_index(s2) 332 333@bench('s="ABC"*33; ((s+"D")*500+s+"E").index(s+"E")', 334 "late match, 100 characters", 100) 335def index_test_slow_match_100_characters(STR): 336 m = STR("ABC"*33) 337 d = STR("D") 338 e = STR("E") 339 s1 = (m+d)*500 + m+e 340 s2 = m+e 341 s1_index = s1.index 342 for x in _RANGE_100: 343 s1_index(s2) 344 345 346#### Same for rindex 347 348@bench('("A"*1000).rindex("A")', "early match, single character", 1000) 349def rindex_test_quick_match_single_character(STR): 350 s1 = STR("A" * 1000) 351 s2 = STR("A") 352 s1_rindex = s1.rindex 353 for x in _RANGE_1000: 354 s1_rindex(s2) 355 356@bench('("AB"*1000).rindex("AB")', "early match, two characters", 1000) 357def rindex_test_quick_match_two_characters(STR): 358 s1 = STR("AB" * 1000) 359 s2 = STR("AB") 360 s1_rindex = s1.rindex 361 for x in _RANGE_1000: 362 s1_rindex(s2) 363 364@bench('("C"+"AB"*300).rindex("CA")', "late match, two characters", 1000) 365def rindex_test_slow_match_two_characters(STR): 366 s1 = STR("C" + "AB" * 300) 367 s2 = STR("CA") 368 s1_rindex = s1.rindex 369 for x in _RANGE_1000: 370 s1_rindex(s2) 371 372@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rindex("E"+s)', 373 "late match, 100 characters", 100) 374def rindex_test_slow_match_100_characters(STR): 375 m = STR("ABC"*33) 376 d = STR("D") 377 e = STR("E") 378 s1 = e + m + (d+m)*500 379 s2 = e + m 380 s1_rindex = s1.rindex 381 for x in _RANGE_100: 382 s1_rindex(s2) 383 384 385#### Same for partition 386 387@bench('("A"*1000).partition("A")', "early match, single character", 1000) 388def partition_test_quick_match_single_character(STR): 389 s1 = STR("A" * 1000) 390 s2 = STR("A") 391 s1_partition = s1.partition 392 for x in _RANGE_1000: 393 s1_partition(s2) 394 395@bench('("A"*1000).partition("B")', "no match, single character", 1000) 396def partition_test_no_match_single_character(STR): 397 s1 = STR("A" * 1000) 398 s2 = STR("B") 399 s1_partition = s1.partition 400 for x in _RANGE_1000: 401 s1_partition(s2) 402 403 404@bench('("AB"*1000).partition("AB")', "early match, two characters", 1000) 405def partition_test_quick_match_two_characters(STR): 406 s1 = STR("AB" * 1000) 407 s2 = STR("AB") 408 s1_partition = s1.partition 409 for x in _RANGE_1000: 410 s1_partition(s2) 411 412@bench('("AB"*1000).partition("BC")', "no match, two characters", 1000) 413def partition_test_no_match_two_character(STR): 414 s1 = STR("AB" * 1000) 415 s2 = STR("BC") 416 s1_partition = s1.partition 417 for x in _RANGE_1000: 418 s1_partition(s2) 419 420@bench('("AB"*300+"C").partition("BC")', "late match, two characters", 1000) 421def partition_test_slow_match_two_characters(STR): 422 s1 = STR("AB" * 300+"C") 423 s2 = STR("BC") 424 s1_partition = s1.partition 425 for x in _RANGE_1000: 426 s1_partition(s2) 427 428@bench('s="ABC"*33; ((s+"D")*500+s+"E").partition(s+"E")', 429 "late match, 100 characters", 100) 430def partition_test_slow_match_100_characters(STR): 431 m = STR("ABC"*33) 432 d = STR("D") 433 e = STR("E") 434 s1 = (m+d)*500 + m+e 435 s2 = m+e 436 s1_partition = s1.partition 437 for x in _RANGE_100: 438 s1_partition(s2) 439 440 441#### Same for rpartition 442 443@bench('("A"*1000).rpartition("A")', "early match, single character", 1000) 444def rpartition_test_quick_match_single_character(STR): 445 s1 = STR("A" * 1000) 446 s2 = STR("A") 447 s1_rpartition = s1.rpartition 448 for x in _RANGE_1000: 449 s1_rpartition(s2) 450 451@bench('("A"*1000).rpartition("B")', "no match, single character", 1000) 452def rpartition_test_no_match_single_character(STR): 453 s1 = STR("A" * 1000) 454 s2 = STR("B") 455 s1_rpartition = s1.rpartition 456 for x in _RANGE_1000: 457 s1_rpartition(s2) 458 459 460@bench('("AB"*1000).rpartition("AB")', "early match, two characters", 1000) 461def rpartition_test_quick_match_two_characters(STR): 462 s1 = STR("AB" * 1000) 463 s2 = STR("AB") 464 s1_rpartition = s1.rpartition 465 for x in _RANGE_1000: 466 s1_rpartition(s2) 467 468@bench('("AB"*1000).rpartition("BC")', "no match, two characters", 1000) 469def rpartition_test_no_match_two_character(STR): 470 s1 = STR("AB" * 1000) 471 s2 = STR("BC") 472 s1_rpartition = s1.rpartition 473 for x in _RANGE_1000: 474 s1_rpartition(s2) 475 476@bench('("C"+"AB"*300).rpartition("CA")', "late match, two characters", 1000) 477def rpartition_test_slow_match_two_characters(STR): 478 s1 = STR("C" + "AB" * 300) 479 s2 = STR("CA") 480 s1_rpartition = s1.rpartition 481 for x in _RANGE_1000: 482 s1_rpartition(s2) 483 484@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rpartition("E"+s)', 485 "late match, 100 characters", 100) 486def rpartition_test_slow_match_100_characters(STR): 487 m = STR("ABC"*33) 488 d = STR("D") 489 e = STR("E") 490 s1 = e + m + (d+m)*500 491 s2 = e + m 492 s1_rpartition = s1.rpartition 493 for x in _RANGE_100: 494 s1_rpartition(s2) 495 496 497#### Same for split(s, 1) 498 499@bench('("A"*1000).split("A", 1)', "early match, single character", 1000) 500def split_test_quick_match_single_character(STR): 501 s1 = STR("A" * 1000) 502 s2 = STR("A") 503 s1_split = s1.split 504 for x in _RANGE_1000: 505 s1_split(s2, 1) 506 507@bench('("A"*1000).split("B", 1)', "no match, single character", 1000) 508def split_test_no_match_single_character(STR): 509 s1 = STR("A" * 1000) 510 s2 = STR("B") 511 s1_split = s1.split 512 for x in _RANGE_1000: 513 s1_split(s2, 1) 514 515 516@bench('("AB"*1000).split("AB", 1)', "early match, two characters", 1000) 517def split_test_quick_match_two_characters(STR): 518 s1 = STR("AB" * 1000) 519 s2 = STR("AB") 520 s1_split = s1.split 521 for x in _RANGE_1000: 522 s1_split(s2, 1) 523 524@bench('("AB"*1000).split("BC", 1)', "no match, two characters", 1000) 525def split_test_no_match_two_character(STR): 526 s1 = STR("AB" * 1000) 527 s2 = STR("BC") 528 s1_split = s1.split 529 for x in _RANGE_1000: 530 s1_split(s2, 1) 531 532@bench('("AB"*300+"C").split("BC", 1)', "late match, two characters", 1000) 533def split_test_slow_match_two_characters(STR): 534 s1 = STR("AB" * 300+"C") 535 s2 = STR("BC") 536 s1_split = s1.split 537 for x in _RANGE_1000: 538 s1_split(s2, 1) 539 540@bench('s="ABC"*33; ((s+"D")*500+s+"E").split(s+"E", 1)', 541 "late match, 100 characters", 100) 542def split_test_slow_match_100_characters(STR): 543 m = STR("ABC"*33) 544 d = STR("D") 545 e = STR("E") 546 s1 = (m+d)*500 + m+e 547 s2 = m+e 548 s1_split = s1.split 549 for x in _RANGE_100: 550 s1_split(s2, 1) 551 552 553#### Same for rsplit(s, 1) 554 555@bench('("A"*1000).rsplit("A", 1)', "early match, single character", 1000) 556def rsplit_test_quick_match_single_character(STR): 557 s1 = STR("A" * 1000) 558 s2 = STR("A") 559 s1_rsplit = s1.rsplit 560 for x in _RANGE_1000: 561 s1_rsplit(s2, 1) 562 563@bench('("A"*1000).rsplit("B", 1)', "no match, single character", 1000) 564def rsplit_test_no_match_single_character(STR): 565 s1 = STR("A" * 1000) 566 s2 = STR("B") 567 s1_rsplit = s1.rsplit 568 for x in _RANGE_1000: 569 s1_rsplit(s2, 1) 570 571 572@bench('("AB"*1000).rsplit("AB", 1)', "early match, two characters", 1000) 573def rsplit_test_quick_match_two_characters(STR): 574 s1 = STR("AB" * 1000) 575 s2 = STR("AB") 576 s1_rsplit = s1.rsplit 577 for x in _RANGE_1000: 578 s1_rsplit(s2, 1) 579 580@bench('("AB"*1000).rsplit("BC", 1)', "no match, two characters", 1000) 581def rsplit_test_no_match_two_character(STR): 582 s1 = STR("AB" * 1000) 583 s2 = STR("BC") 584 s1_rsplit = s1.rsplit 585 for x in _RANGE_1000: 586 s1_rsplit(s2, 1) 587 588@bench('("C"+"AB"*300).rsplit("CA", 1)', "late match, two characters", 1000) 589def rsplit_test_slow_match_two_characters(STR): 590 s1 = STR("C" + "AB" * 300) 591 s2 = STR("CA") 592 s1_rsplit = s1.rsplit 593 for x in _RANGE_1000: 594 s1_rsplit(s2, 1) 595 596@bench('s="ABC"*33; ("E"+s+("D"+s)*500).rsplit("E"+s, 1)', 597 "late match, 100 characters", 100) 598def rsplit_test_slow_match_100_characters(STR): 599 m = STR("ABC"*33) 600 d = STR("D") 601 e = STR("E") 602 s1 = e + m + (d+m)*500 603 s2 = e + m 604 s1_rsplit = s1.rsplit 605 for x in _RANGE_100: 606 s1_rsplit(s2, 1) 607 608 609#### Benchmark the operator-based methods 610 611@bench('"A"*10', "repeat 1 character 10 times", 1000) 612def repeat_single_10_times(STR): 613 s = STR("A") 614 for x in _RANGE_1000: 615 s * 10 616 617@bench('"A"*1000', "repeat 1 character 1000 times", 1000) 618def repeat_single_1000_times(STR): 619 s = STR("A") 620 for x in _RANGE_1000: 621 s * 1000 622 623@bench('"ABCDE"*10', "repeat 5 characters 10 times", 1000) 624def repeat_5_10_times(STR): 625 s = STR("ABCDE") 626 for x in _RANGE_1000: 627 s * 10 628 629@bench('"ABCDE"*1000', "repeat 5 characters 1000 times", 1000) 630def repeat_5_1000_times(STR): 631 s = STR("ABCDE") 632 for x in _RANGE_1000: 633 s * 1000 634 635# + for concat 636 637@bench('"Andrew"+"Dalke"', "concat two strings", 1000) 638def concat_two_strings(STR): 639 s1 = STR("Andrew") 640 s2 = STR("Dalke") 641 for x in _RANGE_1000: 642 s1+s2 643 644@bench('s1+s2+s3+s4+...+s20', "concat 20 strings of words length 4 to 15", 645 1000) 646def concat_many_strings(STR): 647 s1=STR('TIXSGYNREDCVBHJ') 648 s2=STR('PUMTLXBZVDO') 649 s3=STR('FVZNJ') 650 s4=STR('OGDXUW') 651 s5=STR('WEIMRNCOYVGHKB') 652 s6=STR('FCQTNMXPUZH') 653 s7=STR('TICZJYRLBNVUEAK') 654 s8=STR('REYB') 655 s9=STR('PWUOQ') 656 s10=STR('EQHCMKBS') 657 s11=STR('AEVDFOH') 658 s12=STR('IFHVD') 659 s13=STR('JGTCNLXWOHQ') 660 s14=STR('ITSKEPYLROZAWXF') 661 s15=STR('THEK') 662 s16=STR('GHPZFBUYCKMNJIT') 663 s17=STR('JMUZ') 664 s18=STR('WLZQMTB') 665 s19=STR('KPADCBW') 666 s20=STR('TNJHZQAGBU') 667 for x in _RANGE_1000: 668 (s1 + s2+ s3+ s4+ s5+ s6+ s7+ s8+ s9+s10+ 669 s11+s12+s13+s14+s15+s16+s17+s18+s19+s20) 670 671 672#### Benchmark join 673 674def get_bytes_yielding_seq(STR, arg): 675 if STR is BYTES and sys.version_info >= (3,): 676 raise UnsupportedType 677 return STR(arg) 678 679@bench('"A".join("")', 680 "join empty string, with 1 character sep", 100) 681def join_empty_single(STR): 682 sep = STR("A") 683 s2 = get_bytes_yielding_seq(STR, "") 684 sep_join = sep.join 685 for x in _RANGE_100: 686 sep_join(s2) 687 688@bench('"ABCDE".join("")', 689 "join empty string, with 5 character sep", 100) 690def join_empty_5(STR): 691 sep = STR("ABCDE") 692 s2 = get_bytes_yielding_seq(STR, "") 693 sep_join = sep.join 694 for x in _RANGE_100: 695 sep_join(s2) 696 697@bench('"A".join("ABC..Z")', 698 "join string with 26 characters, with 1 character sep", 1000) 699def join_alphabet_single(STR): 700 sep = STR("A") 701 s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ") 702 sep_join = sep.join 703 for x in _RANGE_1000: 704 sep_join(s2) 705 706@bench('"ABCDE".join("ABC..Z")', 707 "join string with 26 characters, with 5 character sep", 1000) 708def join_alphabet_5(STR): 709 sep = STR("ABCDE") 710 s2 = get_bytes_yielding_seq(STR, "ABCDEFGHIJKLMnOPQRSTUVWXYZ") 711 sep_join = sep.join 712 for x in _RANGE_1000: 713 sep_join(s2) 714 715@bench('"A".join(list("ABC..Z"))', 716 "join list of 26 characters, with 1 character sep", 1000) 717def join_alphabet_list_single(STR): 718 sep = STR("A") 719 s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"] 720 sep_join = sep.join 721 for x in _RANGE_1000: 722 sep_join(s2) 723 724@bench('"ABCDE".join(list("ABC..Z"))', 725 "join list of 26 characters, with 5 character sep", 1000) 726def join_alphabet_list_five(STR): 727 sep = STR("ABCDE") 728 s2 = [STR(x) for x in "ABCDEFGHIJKLMnOPQRSTUVWXYZ"] 729 sep_join = sep.join 730 for x in _RANGE_1000: 731 sep_join(s2) 732 733@bench('"A".join(["Bob"]*100))', 734 "join list of 100 words, with 1 character sep", 1000) 735def join_100_words_single(STR): 736 sep = STR("A") 737 s2 = [STR("Bob")]*100 738 sep_join = sep.join 739 for x in _RANGE_1000: 740 sep_join(s2) 741 742@bench('"ABCDE".join(["Bob"]*100))', 743 "join list of 100 words, with 5 character sep", 1000) 744def join_100_words_5(STR): 745 sep = STR("ABCDE") 746 s2 = [STR("Bob")]*100 747 sep_join = sep.join 748 for x in _RANGE_1000: 749 sep_join(s2) 750 751#### split tests 752 753@bench('("Here are some words. "*2).split()', "split whitespace (small)", 1000) 754def whitespace_split(STR): 755 s = STR("Here are some words. "*2) 756 s_split = s.split 757 for x in _RANGE_1000: 758 s_split() 759 760@bench('("Here are some words. "*2).rsplit()', "split whitespace (small)", 1000) 761def whitespace_rsplit(STR): 762 s = STR("Here are some words. "*2) 763 s_rsplit = s.rsplit 764 for x in _RANGE_1000: 765 s_rsplit() 766 767@bench('("Here are some words. "*2).split(None, 1)', 768 "split 1 whitespace", 1000) 769def whitespace_split_1(STR): 770 s = STR("Here are some words. "*2) 771 s_split = s.split 772 N = None 773 for x in _RANGE_1000: 774 s_split(N, 1) 775 776@bench('("Here are some words. "*2).rsplit(None, 1)', 777 "split 1 whitespace", 1000) 778def whitespace_rsplit_1(STR): 779 s = STR("Here are some words. "*2) 780 s_rsplit = s.rsplit 781 N = None 782 for x in _RANGE_1000: 783 s_rsplit(N, 1) 784 785@bench('("Here are some words. "*2).partition(" ")', 786 "split 1 whitespace", 1000) 787def whitespace_partition(STR): 788 sep = STR(" ") 789 s = STR("Here are some words. "*2) 790 s_partition = s.partition 791 for x in _RANGE_1000: 792 s_partition(sep) 793 794@bench('("Here are some words. "*2).rpartition(" ")', 795 "split 1 whitespace", 1000) 796def whitespace_rpartition(STR): 797 sep = STR(" ") 798 s = STR("Here are some words. "*2) 799 s_rpartition = s.rpartition 800 for x in _RANGE_1000: 801 s_rpartition(sep) 802 803human_text = """\ 804Python is a dynamic object-oriented programming language that can be 805used for many kinds of software development. It offers strong support 806for integration with other languages and tools, comes with extensive 807standard libraries, and can be learned in a few days. Many Python 808programmers report substantial productivity gains and feel the language 809encourages the development of higher quality, more maintainable code. 810 811Python runs on Windows, Linux/Unix, Mac OS X, Amiga, Palm 812Handhelds, and Nokia mobile phones. Python has also been ported to the 813Java and .NET virtual machines. 814 815Python is distributed under an OSI-approved open source license that 816makes it free to use, even for commercial products. 817"""*25 818human_text_bytes = bytes_from_str(human_text) 819human_text_unicode = unicode_from_str(human_text) 820def _get_human_text(STR): 821 if STR is UNICODE: 822 return human_text_unicode 823 if STR is BYTES: 824 return human_text_bytes 825 raise AssertionError 826 827@bench('human_text.split()', "split whitespace (huge)", 10) 828def whitespace_split_huge(STR): 829 s = _get_human_text(STR) 830 s_split = s.split 831 for x in _RANGE_10: 832 s_split() 833 834@bench('human_text.rsplit()', "split whitespace (huge)", 10) 835def whitespace_rsplit_huge(STR): 836 s = _get_human_text(STR) 837 s_rsplit = s.rsplit 838 for x in _RANGE_10: 839 s_rsplit() 840 841 842 843@bench('"this\\nis\\na\\ntest\\n".split("\\n")', "split newlines", 1000) 844def newlines_split(STR): 845 s = STR("this\nis\na\ntest\n") 846 s_split = s.split 847 nl = STR("\n") 848 for x in _RANGE_1000: 849 s_split(nl) 850 851 852@bench('"this\\nis\\na\\ntest\\n".rsplit("\\n")', "split newlines", 1000) 853def newlines_rsplit(STR): 854 s = STR("this\nis\na\ntest\n") 855 s_rsplit = s.rsplit 856 nl = STR("\n") 857 for x in _RANGE_1000: 858 s_rsplit(nl) 859 860@bench('"this\\nis\\na\\ntest\\n".splitlines()', "split newlines", 1000) 861def newlines_splitlines(STR): 862 s = STR("this\nis\na\ntest\n") 863 s_splitlines = s.splitlines 864 for x in _RANGE_1000: 865 s_splitlines() 866 867## split text with 2000 newlines 868 869def _make_2000_lines(): 870 import random 871 r = random.Random(100) 872 chars = list(map(chr, range(32, 128))) 873 i = 0 874 while i < len(chars): 875 chars[i] = " " 876 i += r.randrange(9) 877 s = "".join(chars) 878 s = s*4 879 words = [] 880 for i in range(2000): 881 start = r.randrange(96) 882 n = r.randint(5, 65) 883 words.append(s[start:start+n]) 884 return "\n".join(words)+"\n" 885 886_text_with_2000_lines = _make_2000_lines() 887_text_with_2000_lines_bytes = bytes_from_str(_text_with_2000_lines) 888_text_with_2000_lines_unicode = unicode_from_str(_text_with_2000_lines) 889def _get_2000_lines(STR): 890 if STR is UNICODE: 891 return _text_with_2000_lines_unicode 892 if STR is BYTES: 893 return _text_with_2000_lines_bytes 894 raise AssertionError 895 896 897@bench('"...text...".split("\\n")', "split 2000 newlines", 10) 898def newlines_split_2000(STR): 899 s = _get_2000_lines(STR) 900 s_split = s.split 901 nl = STR("\n") 902 for x in _RANGE_10: 903 s_split(nl) 904 905@bench('"...text...".rsplit("\\n")', "split 2000 newlines", 10) 906def newlines_rsplit_2000(STR): 907 s = _get_2000_lines(STR) 908 s_rsplit = s.rsplit 909 nl = STR("\n") 910 for x in _RANGE_10: 911 s_rsplit(nl) 912 913@bench('"...text...".splitlines()', "split 2000 newlines", 10) 914def newlines_splitlines_2000(STR): 915 s = _get_2000_lines(STR) 916 s_splitlines = s.splitlines 917 for x in _RANGE_10: 918 s_splitlines() 919 920 921## split text on "--" characters 922@bench( 923 '"this--is--a--test--of--the--emergency--broadcast--system".split("--")', 924 "split on multicharacter separator (small)", 1000) 925def split_multichar_sep_small(STR): 926 s = STR("this--is--a--test--of--the--emergency--broadcast--system") 927 s_split = s.split 928 pat = STR("--") 929 for x in _RANGE_1000: 930 s_split(pat) 931@bench( 932 '"this--is--a--test--of--the--emergency--broadcast--system".rsplit("--")', 933 "split on multicharacter separator (small)", 1000) 934def rsplit_multichar_sep_small(STR): 935 s = STR("this--is--a--test--of--the--emergency--broadcast--system") 936 s_rsplit = s.rsplit 937 pat = STR("--") 938 for x in _RANGE_1000: 939 s_rsplit(pat) 940 941## split dna text on "ACTAT" characters 942@bench('dna.split("ACTAT")', 943 "split on multicharacter separator (dna)", 10) 944def split_multichar_sep_dna(STR): 945 s = _get_dna(STR) 946 s_split = s.split 947 pat = STR("ACTAT") 948 for x in _RANGE_10: 949 s_split(pat) 950 951@bench('dna.rsplit("ACTAT")', 952 "split on multicharacter separator (dna)", 10) 953def rsplit_multichar_sep_dna(STR): 954 s = _get_dna(STR) 955 s_rsplit = s.rsplit 956 pat = STR("ACTAT") 957 for x in _RANGE_10: 958 s_rsplit(pat) 959 960 961 962## split with limits 963 964GFF3_example = "\t".join([ 965 "I", "Genomic_canonical", "region", "357208", "396183", ".", "+", ".", 966 "ID=Sequence:R119;note=Clone R119%3B Genbank AF063007;Name=R119"]) 967 968@bench('GFF3_example.split("\\t")', "tab split", 1000) 969def tab_split_no_limit(STR): 970 sep = STR("\t") 971 s = STR(GFF3_example) 972 s_split = s.split 973 for x in _RANGE_1000: 974 s_split(sep) 975 976@bench('GFF3_example.split("\\t", 8)', "tab split", 1000) 977def tab_split_limit(STR): 978 sep = STR("\t") 979 s = STR(GFF3_example) 980 s_split = s.split 981 for x in _RANGE_1000: 982 s_split(sep, 8) 983 984@bench('GFF3_example.rsplit("\\t")', "tab split", 1000) 985def tab_rsplit_no_limit(STR): 986 sep = STR("\t") 987 s = STR(GFF3_example) 988 s_rsplit = s.rsplit 989 for x in _RANGE_1000: 990 s_rsplit(sep) 991 992@bench('GFF3_example.rsplit("\\t", 8)', "tab split", 1000) 993def tab_rsplit_limit(STR): 994 sep = STR("\t") 995 s = STR(GFF3_example) 996 s_rsplit = s.rsplit 997 for x in _RANGE_1000: 998 s_rsplit(sep, 8) 999 1000#### Count characters 1001 1002@bench('...text.with.2000.newlines.count("\\n")', 1003 "count newlines", 10) 1004def count_newlines(STR): 1005 s = _get_2000_lines(STR) 1006 s_count = s.count 1007 nl = STR("\n") 1008 for x in _RANGE_10: 1009 s_count(nl) 1010 1011# Orchid sequences concatenated, from Biopython 1012_dna = """ 1013CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGGGTT 1014AATCTGGAGGATCTGTTTACTTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGAATTGCCATCG 1015AGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGCAGTTTTGCTCCAAGTCGTT 1016TGACACATAATTGGTGAAGGGGGTGGCATCCTTCCCTGACCCTCCCCCAACTATTTTTTTAACAACTCTC 1017AGCAACGGAGACTCAGTCTTCGGCAAATGCGATAAATGGTGTGAATTGCAGAATCCCGTGCACCATCGAG 1018TCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCATTGCGAGTCATAT 1019CTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCGGATGTGAGTTTGGCCCCTTGTTCTT 1020TGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAGGTGGACGAACTAT 1021GCTACAACAAAATTGTTGTGCAGAGGCCCCGGGTTGTCGTATTAGATGGGCCACCGTAATCTGAAGACCC 1022TTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGCGACCCCAGGTCAG 1023GTGAGCAACAGCTGTCGTAACAAGGTTTCCGTAGGGTGAACTGCGGAAGGATCATTGTTGAGATCACATA 1024ATAATTGATCGAGTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGAC 1025CTAGATTTGCCATCGAGCCTCCTTGGGAGCATCCTTGTTGGCGATATCTAAACCCTCAATTTTTCCCCCA 1026ATCAAATTACACAAAATTGGTGGAGGGGGTGGCATTCTTCCCTTACCCTCCCCCAAATATTTTTTTAACA 1027ACTCTCAGCAACGGATATCTCAGCTCTTGCATCGATGAAGAACCCACCGAAATGCGATAAATGGTGTGAA 1028TTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACG 1029CCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCAGCCGGTGCG 1030GATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGATGCATGGGCTTTTGATGGTCCTAA 1031ATACGGCAAGAGGTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATAAG 1032ATGGGCCACCGATATCTGAAGACCCTTTTGGACCCCATTGGAGCCCATCAACCCATGTCAGTTGATGGCC 1033ATTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGA 1034GTTAATCTGGAGGATCTGTTTACTTGGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCA 1035TCGAGCCTCCTTGGGAGCTTTCTTGTTGGCGATATCTAAACCCTTGCCCGGCAGAGTTTTGGGAATCCCG 1036TGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCTGCCTGGGCAT 1037TGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACACACCTGTTCAGCCGGTGCGGATGTGAGTTTG 1038GCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCTTTTGATGGTCCTAAATACGGCAAGAG 1039GTGGACGAACTATGCTACAACAAAATTGTTGTGCAAAGGCCCCGGGTTGTCGTATTAGATGGGCCACCAT 1040AATCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGGCCATTTGGTTGC 1041GACCCAGTCAGGTGAGGGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATCACATAATAATTGATCGAG 1042TTAATCTGGAGGATCTGTTTACTTTGGTCACCCATGGGCATTTGCTGTTGAAGTGACCTAGATTTGCCAT 1043CGAGCCTCCTTGGGAGCTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTTGGCGCCAAGTCA 1044TATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAACAACTC 1045TCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGAATTGC 1046AGAATCCCGTGAACCATCGAGTCTTTGGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCACGCCT 1047GCCTGGGCATTGGGAATCATATCTCTCCCCTAACGAGGCTATCCAAACATACTGTTCATCCGGTGCGGAT 1048GTGAGTTTGGCCCCTTGTTCTTTGGTACCGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTCAAAA 1049CGGCAAGAGGTGGACGAACTATGCCACAACAAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTAGATG 1050GGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATGACCA 1051TTTGTTGCGACCCCAGTCAGCTGAGCAACCCGCTGAGTGGAAGGTCATTGCCGATATCACATAATAATTG 1052ATCGAGTTAATCTGGAGGATCTGTTTACTTGGTCACCCATGAGCATTTGCTGTTGAAGTGACCTAGATTT 1053GCCATCGAGCCTCCTTGGGAGTTTTCTTGTTGGCGAGATCTAAACCCTTGCCCGGCGGAGTTGTGCGCCA 1054AGTCATATGACACATAATTGGTGAAGGGGGTGGCATCCTGCCCTGACCCTCCCCAAATTATTTTTTTAAC 1055AACTCTCAGCAACGGATATCTCGGCTCTTGCATCGATGAAGAACGCAGCGAAATGCGATAAATGGTGTGA 1056ATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAGGCCATCAGGCCAAGGGCAC 1057GCCTGCCTGGGCATTGCGAGTCATATCTCTCCCTTAACGAGGCTGTCCATACATACTGTTCATCCGGTGC 1058GGATGTGAGTTTGGCCCCTTGTTCTTTGGTACGGGGGGTCTAAGAGCTGCATGGGCATTTGATGGTCCTC 1059AAAACGGCAAGAGGTGGACGAACTATGCTACAACCAAATTGTTGTCCCAAGGCCCCGGGTTGTCGTATTA 1060GATGGGCCACCGTAACCTGAAGACCCTTTTGAACCCCATTGGAGGCCCATCAACCCATGATCAGTTGATG 1061ACCATGTGTTGCGACCCCAGTCAGCTGAGCAACGCGCTGAGCGTAACAAGGTTTCCGTAGGTGGACCTCC 1062GGGAGGATCATTGTTGAGATCACATAATAATTGATCGAGGTAATCTGGAGGATCTGCATATTTTGGTCAC 1063""" 1064_dna = "".join(_dna.splitlines()) 1065_dna = _dna * 25 1066_dna_bytes = bytes_from_str(_dna) 1067_dna_unicode = unicode_from_str(_dna) 1068 1069def _get_dna(STR): 1070 if STR is UNICODE: 1071 return _dna_unicode 1072 if STR is BYTES: 1073 return _dna_bytes 1074 raise AssertionError 1075 1076@bench('dna.count("AACT")', "count AACT substrings in DNA example", 10) 1077def count_aact(STR): 1078 seq = _get_dna(STR) 1079 seq_count = seq.count 1080 needle = STR("AACT") 1081 for x in _RANGE_10: 1082 seq_count(needle) 1083 1084##### startswith and endswith 1085 1086@bench('"Andrew".startswith("A")', 'startswith single character', 1000) 1087def startswith_single(STR): 1088 s1 = STR("Andrew") 1089 s2 = STR("A") 1090 s1_startswith = s1.startswith 1091 for x in _RANGE_1000: 1092 s1_startswith(s2) 1093 1094@bench('"Andrew".startswith("Andrew")', 'startswith multiple characters', 1095 1000) 1096def startswith_multiple(STR): 1097 s1 = STR("Andrew") 1098 s2 = STR("Andrew") 1099 s1_startswith = s1.startswith 1100 for x in _RANGE_1000: 1101 s1_startswith(s2) 1102 1103@bench('"Andrew".startswith("Anders")', 1104 'startswith multiple characters - not!', 1000) 1105def startswith_multiple_not(STR): 1106 s1 = STR("Andrew") 1107 s2 = STR("Anders") 1108 s1_startswith = s1.startswith 1109 for x in _RANGE_1000: 1110 s1_startswith(s2) 1111 1112 1113# endswith 1114 1115@bench('"Andrew".endswith("w")', 'endswith single character', 1000) 1116def endswith_single(STR): 1117 s1 = STR("Andrew") 1118 s2 = STR("w") 1119 s1_endswith = s1.endswith 1120 for x in _RANGE_1000: 1121 s1_endswith(s2) 1122 1123@bench('"Andrew".endswith("Andrew")', 'endswith multiple characters', 1000) 1124def endswith_multiple(STR): 1125 s1 = STR("Andrew") 1126 s2 = STR("Andrew") 1127 s1_endswith = s1.endswith 1128 for x in _RANGE_1000: 1129 s1_endswith(s2) 1130 1131@bench('"Andrew".endswith("Anders")', 1132 'endswith multiple characters - not!', 1000) 1133def endswith_multiple_not(STR): 1134 s1 = STR("Andrew") 1135 s2 = STR("Anders") 1136 s1_endswith = s1.endswith 1137 for x in _RANGE_1000: 1138 s1_endswith(s2) 1139 1140#### Strip 1141 1142@bench('"Hello!\\n".strip()', 'strip terminal newline', 1000) 1143def terminal_newline_strip_right(STR): 1144 s = STR("Hello!\n") 1145 s_strip = s.strip 1146 for x in _RANGE_1000: 1147 s_strip() 1148 1149@bench('"Hello!\\n".rstrip()', 'strip terminal newline', 1000) 1150def terminal_newline_rstrip(STR): 1151 s = STR("Hello!\n") 1152 s_rstrip = s.rstrip 1153 for x in _RANGE_1000: 1154 s_rstrip() 1155 1156@bench('"\\nHello!".strip()', 'strip terminal newline', 1000) 1157def terminal_newline_strip_left(STR): 1158 s = STR("\nHello!") 1159 s_strip = s.strip 1160 for x in _RANGE_1000: 1161 s_strip() 1162 1163@bench('"\\nHello!\\n".strip()', 'strip terminal newline', 1000) 1164def terminal_newline_strip_both(STR): 1165 s = STR("\nHello!\n") 1166 s_strip = s.strip 1167 for x in _RANGE_1000: 1168 s_strip() 1169 1170@bench('"\\nHello!".rstrip()', 'strip terminal newline', 1000) 1171def terminal_newline_lstrip(STR): 1172 s = STR("\nHello!") 1173 s_lstrip = s.lstrip 1174 for x in _RANGE_1000: 1175 s_lstrip() 1176 1177@bench('s="Hello!\\n"; s[:-1] if s[-1]=="\\n" else s', 1178 'strip terminal newline', 1000) 1179def terminal_newline_if_else(STR): 1180 s = STR("Hello!\n") 1181 NL = STR("\n") 1182 for x in _RANGE_1000: 1183 s[:-1] if (s[-1] == NL) else s 1184 1185 1186# Strip multiple spaces or tabs 1187 1188@bench('"Hello\\t \\t".strip()', 'strip terminal spaces and tabs', 1000) 1189def terminal_space_strip(STR): 1190 s = STR("Hello\t \t!") 1191 s_strip = s.strip 1192 for x in _RANGE_1000: 1193 s_strip() 1194 1195@bench('"Hello\\t \\t".rstrip()', 'strip terminal spaces and tabs', 1000) 1196def terminal_space_rstrip(STR): 1197 s = STR("Hello!\t \t") 1198 s_rstrip = s.rstrip 1199 for x in _RANGE_1000: 1200 s_rstrip() 1201 1202@bench('"\\t \\tHello".rstrip()', 'strip terminal spaces and tabs', 1000) 1203def terminal_space_lstrip(STR): 1204 s = STR("\t \tHello!") 1205 s_lstrip = s.lstrip 1206 for x in _RANGE_1000: 1207 s_lstrip() 1208 1209 1210#### replace 1211@bench('"This is a test".replace(" ", "\\t")', 'replace single character', 1212 1000) 1213def replace_single_character(STR): 1214 s = STR("This is a test!") 1215 from_str = STR(" ") 1216 to_str = STR("\t") 1217 s_replace = s.replace 1218 for x in _RANGE_1000: 1219 s_replace(from_str, to_str) 1220 1221@uses_re 1222@bench('re.sub(" ", "\\t", "This is a test"', 'replace single character', 1223 1000) 1224def replace_single_character_re(STR): 1225 s = STR("This is a test!") 1226 pat = re.compile(STR(" ")) 1227 to_str = STR("\t") 1228 pat_sub = pat.sub 1229 for x in _RANGE_1000: 1230 pat_sub(to_str, s) 1231 1232@bench('"...text.with.2000.lines...replace("\\n", " ")', 1233 'replace single character, big string', 10) 1234def replace_single_character_big(STR): 1235 s = _get_2000_lines(STR) 1236 from_str = STR("\n") 1237 to_str = STR(" ") 1238 s_replace = s.replace 1239 for x in _RANGE_10: 1240 s_replace(from_str, to_str) 1241 1242@uses_re 1243@bench('re.sub("\\n", " ", "...text.with.2000.lines...")', 1244 'replace single character, big string', 10) 1245def replace_single_character_big_re(STR): 1246 s = _get_2000_lines(STR) 1247 pat = re.compile(STR("\n")) 1248 to_str = STR(" ") 1249 pat_sub = pat.sub 1250 for x in _RANGE_10: 1251 pat_sub(to_str, s) 1252 1253 1254@bench('dna.replace("ATC", "ATT")', 1255 'replace multiple characters, dna', 10) 1256def replace_multiple_characters_dna(STR): 1257 seq = _get_dna(STR) 1258 from_str = STR("ATC") 1259 to_str = STR("ATT") 1260 seq_replace = seq.replace 1261 for x in _RANGE_10: 1262 seq_replace(from_str, to_str) 1263 1264# This increases the character count 1265@bench('"...text.with.2000.newlines...replace("\\n", "\\r\\n")', 1266 'replace and expand multiple characters, big string', 10) 1267def replace_multiple_character_big(STR): 1268 s = _get_2000_lines(STR) 1269 from_str = STR("\n") 1270 to_str = STR("\r\n") 1271 s_replace = s.replace 1272 for x in _RANGE_10: 1273 s_replace(from_str, to_str) 1274 1275 1276# This decreases the character count 1277@bench('"When shall we three meet again?".replace("ee", "")', 1278 'replace/remove multiple characters', 1000) 1279def replace_multiple_character_remove(STR): 1280 s = STR("When shall we three meet again?") 1281 from_str = STR("ee") 1282 to_str = STR("") 1283 s_replace = s.replace 1284 for x in _RANGE_1000: 1285 s_replace(from_str, to_str) 1286 1287 1288big_s = "A" + ("Z"*128*1024) 1289big_s_bytes = bytes_from_str(big_s) 1290big_s_unicode = unicode_from_str(big_s) 1291def _get_big_s(STR): 1292 if STR is UNICODE: return big_s_unicode 1293 if STR is BYTES: return big_s_bytes 1294 raise AssertionError 1295 1296# The older replace implementation counted all matches in 1297# the string even when it only needed to make one replacement. 1298@bench('("A" + ("Z"*128*1024)).replace("A", "BB", 1)', 1299 'quick replace single character match', 10) 1300def quick_replace_single_match(STR): 1301 s = _get_big_s(STR) 1302 from_str = STR("A") 1303 to_str = STR("BB") 1304 s_replace = s.replace 1305 for x in _RANGE_10: 1306 s_replace(from_str, to_str, 1) 1307 1308@bench('("A" + ("Z"*128*1024)).replace("AZZ", "BBZZ", 1)', 1309 'quick replace multiple character match', 10) 1310def quick_replace_multiple_match(STR): 1311 s = _get_big_s(STR) 1312 from_str = STR("AZZ") 1313 to_str = STR("BBZZ") 1314 s_replace = s.replace 1315 for x in _RANGE_10: 1316 s_replace(from_str, to_str, 1) 1317 1318 1319#### 1320 1321# CCP does a lot of this, for internationalisation of ingame messages. 1322_format = "The %(thing)s is %(place)s the %(location)s." 1323_format_dict = { "thing":"THING", "place":"PLACE", "location":"LOCATION", } 1324_format_bytes = bytes_from_str(_format) 1325_format_unicode = unicode_from_str(_format) 1326_format_dict_bytes = dict((bytes_from_str(k), bytes_from_str(v)) for (k,v) in _format_dict.items()) 1327_format_dict_unicode = dict((unicode_from_str(k), unicode_from_str(v)) for (k,v) in _format_dict.items()) 1328 1329def _get_format(STR): 1330 if STR is UNICODE: 1331 return _format_unicode 1332 if STR is BYTES: 1333 if sys.version_info >= (3,): 1334 raise UnsupportedType 1335 return _format_bytes 1336 raise AssertionError 1337 1338def _get_format_dict(STR): 1339 if STR is UNICODE: 1340 return _format_dict_unicode 1341 if STR is BYTES: 1342 if sys.version_info >= (3,): 1343 raise UnsupportedType 1344 return _format_dict_bytes 1345 raise AssertionError 1346 1347# Formatting. 1348@bench('"The %(k1)s is %(k2)s the %(k3)s."%{"k1":"x","k2":"y","k3":"z",}', 1349 'formatting a string type with a dict', 1000) 1350def format_with_dict(STR): 1351 s = _get_format(STR) 1352 d = _get_format_dict(STR) 1353 for x in _RANGE_1000: 1354 s % d 1355 1356 1357#### Upper- and lower- case conversion 1358 1359@bench('("Where in the world is Carmen San Deigo?"*10).lower()', 1360 "case conversion -- rare", 1000) 1361def lower_conversion_rare(STR): 1362 s = STR("Where in the world is Carmen San Deigo?"*10) 1363 s_lower = s.lower 1364 for x in _RANGE_1000: 1365 s_lower() 1366 1367@bench('("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10).lower()', 1368 "case conversion -- dense", 1000) 1369def lower_conversion_dense(STR): 1370 s = STR("WHERE IN THE WORLD IS CARMEN SAN DEIGO?"*10) 1371 s_lower = s.lower 1372 for x in _RANGE_1000: 1373 s_lower() 1374 1375 1376@bench('("wHERE IN THE WORLD IS cARMEN sAN dEIGO?"*10).upper()', 1377 "case conversion -- rare", 1000) 1378def upper_conversion_rare(STR): 1379 s = STR("Where in the world is Carmen San Deigo?"*10) 1380 s_upper = s.upper 1381 for x in _RANGE_1000: 1382 s_upper() 1383 1384@bench('("where in the world is carmen san deigo?"*10).upper()', 1385 "case conversion -- dense", 1000) 1386def upper_conversion_dense(STR): 1387 s = STR("where in the world is carmen san deigo?"*10) 1388 s_upper = s.upper 1389 for x in _RANGE_1000: 1390 s_upper() 1391 1392 1393# end of benchmarks 1394 1395################# 1396 1397class BenchTimer(timeit.Timer): 1398 def best(self, repeat=1): 1399 for i in range(1, 10): 1400 number = 10**i 1401 x = self.timeit(number) 1402 if x > 0.02: 1403 break 1404 times = [x] 1405 for i in range(1, repeat): 1406 times.append(self.timeit(number)) 1407 return min(times) / number 1408 1409def main(): 1410 (options, test_names) = parser.parse_args() 1411 if options.bytes_only and options.unicode_only: 1412 raise SystemExit("Only one of --8-bit and --unicode are allowed") 1413 1414 bench_functions = [] 1415 for (k,v) in globals().items(): 1416 if hasattr(v, "is_bench"): 1417 if test_names: 1418 for name in test_names: 1419 if name in v.group: 1420 break 1421 else: 1422 # Not selected, ignore 1423 continue 1424 if options.skip_re and hasattr(v, "uses_re"): 1425 continue 1426 1427 bench_functions.append( (v.group, k, v) ) 1428 bench_functions.sort() 1429 1430 p("bytes\tunicode") 1431 p("(in ms)\t(in ms)\t%\tcomment") 1432 1433 bytes_total = uni_total = 0.0 1434 1435 for title, group in itertools.groupby(bench_functions, 1436 operator.itemgetter(0)): 1437 # Flush buffer before each group 1438 sys.stdout.flush() 1439 p("="*10, title) 1440 for (_, k, v) in group: 1441 if hasattr(v, "is_bench"): 1442 bytes_time = 0.0 1443 bytes_time_s = " - " 1444 if not options.unicode_only: 1445 try: 1446 bytes_time = BenchTimer("__main__.%s(__main__.BYTES)" % (k,), 1447 "import __main__").best(REPEAT) 1448 bytes_time_s = "%.2f" % (1000 * bytes_time) 1449 bytes_total += bytes_time 1450 except UnsupportedType: 1451 bytes_time_s = "N/A" 1452 uni_time = 0.0 1453 uni_time_s = " - " 1454 if not options.bytes_only: 1455 try: 1456 uni_time = BenchTimer("__main__.%s(__main__.UNICODE)" % (k,), 1457 "import __main__").best(REPEAT) 1458 uni_time_s = "%.2f" % (1000 * uni_time) 1459 uni_total += uni_time 1460 except UnsupportedType: 1461 uni_time_s = "N/A" 1462 try: 1463 average = bytes_time/uni_time 1464 except (TypeError, ZeroDivisionError): 1465 average = 0.0 1466 p("%s\t%s\t%.1f\t%s (*%d)" % ( 1467 bytes_time_s, uni_time_s, 100.*average, 1468 v.comment, v.repeat_count)) 1469 1470 if bytes_total == uni_total == 0.0: 1471 p("That was zippy!") 1472 else: 1473 try: 1474 ratio = bytes_total/uni_total 1475 except ZeroDivisionError: 1476 ratio = 0.0 1477 p("%.2f\t%.2f\t%.1f\t%s" % ( 1478 1000*bytes_total, 1000*uni_total, 100.*ratio, 1479 "TOTAL")) 1480 1481if __name__ == "__main__": 1482 main() 1483