1from __future__ import print_function 2import sys 3import os 4import timeit 5import math 6import argparse 7import fnmatch 8import json 9 10parser = argparse.ArgumentParser(description="Python protobuf benchmark") 11parser.add_argument("data_files", metavar="dataFile", nargs="+", 12 help="testing data files.") 13parser.add_argument("--json", action="store_const", dest="json", 14 const="yes", default="no", 15 help="Whether to output json results") 16parser.add_argument("--behavior_prefix", dest="behavior_prefix", 17 help="The output json format's behavior's name's prefix", 18 default="") 19# BEGIN CPP GENERATED MESSAGE 20parser.add_argument("--cpp_generated", action="store_const", 21 dest="cpp_generated", const="yes", default="no", 22 help="Whether to link generated code library") 23# END CPP GENERATED MESSAGE 24args = parser.parse_args() 25# BEGIN CPP GENERATED MESSAGE 26# CPP generated code must be linked before importing the generated Python code 27# for the descriptor can be found in the pool 28if args.cpp_generated != "no": 29 sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" ) 30 import libbenchmark_messages 31 sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" ) 32# END CPP GENERATED MESSAGE 33 34 35import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2 36import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2 37import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2 38import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2 39import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2 40import benchmarks_pb2 as benchmarks_pb2 41 42 43def run_one_test(filename): 44 data = open(filename, "rb").read() 45 benchmark_dataset = benchmarks_pb2.BenchmarkDataset() 46 benchmark_dataset.ParseFromString(data) 47 total_bytes = 0 48 for payload in benchmark_dataset.payload: 49 total_bytes += len(payload) 50 benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload), 51 module="py_benchmark", 52 setup_method="init", 53 total_bytes=total_bytes) 54 result={} 55 result["filename"] = filename 56 result["message_name"] = benchmark_dataset.message_name 57 result["benchmarks"] = {} 58 benchmark_util.set_test_method("parse_from_benchmark") 59 result["benchmarks"][args.behavior_prefix + "_parse_from_benchmark"] = \ 60 benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) 61 benchmark_util.set_test_method("serialize_to_benchmark") 62 result["benchmarks"][args.behavior_prefix + "_serialize_to_benchmark"] = \ 63 benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) 64 return result 65 66 67def init(filename): 68 global benchmark_dataset, message_class, message_list, counter, total_bytes 69 message_list=[] 70 counter = 0 71 total_bytes = 0 72 data = open(filename, "rb").read() 73 benchmark_dataset = benchmarks_pb2.BenchmarkDataset() 74 benchmark_dataset.ParseFromString(data) 75 76 if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1": 77 message_class = benchmark_message1_proto3_pb2.GoogleMessage1 78 elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1": 79 message_class = benchmark_message1_proto2_pb2.GoogleMessage1 80 elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2": 81 message_class = benchmark_message2_pb2.GoogleMessage2 82 elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3": 83 message_class = benchmark_message3_pb2.GoogleMessage3 84 elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4": 85 message_class = benchmark_message4_pb2.GoogleMessage4 86 else: 87 raise IOError("Message %s not found!" % (benchmark_dataset.message_name)) 88 89 for one_payload in benchmark_dataset.payload: 90 temp = message_class() 91 temp.ParseFromString(one_payload) 92 message_list.append(temp) 93 total_bytes += len(one_payload) 94 95 96def parse_from_benchmark(): 97 global counter, message_class, benchmark_dataset 98 m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)]) 99 counter = counter + 1 100 101 102def serialize_to_benchmark(): 103 global counter, message_list, message_class 104 s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString() 105 counter = counter + 1 106 107 108class Benchmark: 109 def __init__(self, module=None, test_method=None, 110 setup_method=None, total_bytes=None, full_iteration = 1): 111 self.full_iteration = full_iteration 112 self.module = module 113 self.test_method = test_method 114 self.setup_method = setup_method 115 self.total_bytes = total_bytes 116 117 def set_test_method(self, test_method): 118 self.test_method = test_method 119 120 def full_setup_code(self, setup_method_args=''): 121 setup_code = "" 122 setup_code += "from %s import %s\n" % (self.module, self.test_method) 123 setup_code += "from %s import %s\n" % (self.module, self.setup_method) 124 setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args) 125 return setup_code 126 127 def dry_run(self, test_method_args='', setup_method_args=''): 128 return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), 129 setup=self.full_setup_code(setup_method_args), 130 number=self.full_iteration); 131 132 def run_benchmark(self, test_method_args='', setup_method_args=''): 133 reps = self.full_iteration; 134 t = self.dry_run(test_method_args, setup_method_args); 135 if t < 3 : 136 reps = int(math.ceil(3 / t)) * self.full_iteration 137 if reps != self.full_iteration: 138 t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), 139 setup=self.full_setup_code(setup_method_args), 140 number=reps); 141 return self.total_bytes * 1.0 / 2 ** 20 / (1.0 * t / reps * self.full_iteration) 142 143 144if __name__ == "__main__": 145 results = [] 146 for file in args.data_files: 147 results.append(run_one_test(file)) 148 149 if args.json != "no": 150 print(json.dumps(results)) 151 else: 152 for result in results: 153 print("Message %s of dataset file %s" % \ 154 (result["message_name"], result["filename"])) 155 print("Average throughput for parse_from_benchmark: %.2f MB/s" % \ 156 (result["benchmarks"][ \ 157 args.behavior_prefix + "_parse_from_benchmark"])) 158 print("Average throughput for serialize_to_benchmark: %.2f MB/s" % \ 159 (result["benchmarks"][ \ 160 args.behavior_prefix + "_serialize_to_benchmark"])) 161 print("") 162