1#!/usr/bin/env python 2# Copyright 2016 The Chromium Authors 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6''' 7This script "stitches" the NetLog files from a ".inprogress" directory to 8create a single NetLog file. 9''' 10 11import glob 12import os 13import re 14import sys 15 16 17USAGE ='''Usage: stitch_net_log_files.py <INPROGRESS_DIR> [<OUTPUT_PATH>] 18 19Will copy all the files in <INPROGRESS_DIR> and write the their content into a 20NetLog file at path <OUTPUT_PATH>. 21 22If <OUTPUT_PATH> is not specified, it should end with ".inprogress", and the 23completed NetLog file will be written to the location with ".inprogress" 24stripped. 25''' 26 27 28def get_event_file_sort_key(path): 29 '''Returns a tuple (modification timestamp, file number) for a path of the 30 form event_file_%d.json''' 31 32 m = re.match('^event_file_(\d+).json$', path) 33 file_index = int(m.group(1)) 34 return (os.path.getmtime(path), file_index) 35 36 37def get_ordered_event_files(): 38 '''Returns a list of file paths to event files. The order of the files is 39 from oldest to newest. If modification times are the same, files will be 40 ordered based on the numeral in their file name.''' 41 42 paths = glob.glob("event_file_*.json") 43 paths = sorted(paths, key=get_event_file_sort_key) 44 sys.stdout.write("Identified %d event files:\n %s\n" % 45 (len(paths), "\n ".join(paths))) 46 return paths 47 48 49def main(): 50 if len(sys.argv) != 2 and len(sys.argv) != 3: 51 sys.stderr.write(USAGE) 52 sys.exit(1) 53 54 inprogress_dir = sys.argv[1] 55 output_path = None 56 57 # Pick an output path based on command line arguments. 58 if len(sys.argv) == 3: 59 output_path = sys.argv[2] 60 elif len(sys.argv) == 2: 61 m = re.match("^(.*)\.inprogress/?$", inprogress_dir) 62 if not m: 63 sys.stdout.write("Must specify OUTPUT_PATH\n") 64 sys.exit(1) 65 output_path = m.group(1) 66 67 output_path = os.path.abspath(output_path) 68 69 sys.stdout.write("Reading data from: %s\n" % inprogress_dir) 70 sys.stdout.write("Writing log file to: %s\n" % output_path) 71 72 os.chdir(inprogress_dir) 73 74 with open(output_path, "w") as stitched_file: 75 try: 76 file = open("constants.json") 77 with file: 78 for line in file: 79 stitched_file.write(line) 80 except IOError: 81 sys.stderr.write("Failed reading \"constants.json\".\n") 82 sys.exit(1) 83 84 events_written = False; 85 for event_file_path in get_ordered_event_files(): 86 try: 87 file = open(event_file_path) 88 with file: 89 if not events_written: 90 line = file.readline(); 91 events_written = True 92 for next_line in file: 93 if next_line.strip() == "": 94 line += next_line 95 else: 96 stitched_file.write(line) 97 line = next_line 98 except IOError: 99 sys.stderr.write("Failed reading \"%s\"\n" % event_file_path) 100 sys.exit(1) 101 # Remove hanging comma from last event 102 # TODO(dconnol): Check if the last line is a valid JSON object. If not, 103 # do not write the line to file. This handles incomplete logs. 104 line = line.strip() 105 if line[-1:] == ",": 106 stitched_file.write(line[:-1]) 107 elif line: 108 raise ValueError('Last event is not properly formed') 109 110 if os.path.exists("end_netlog.json"): 111 try: 112 file = open("end_netlog.json") 113 with file: 114 for line in file: 115 stitched_file.write(line) 116 except IOError: 117 sys.stderr.write("Failed reading \"end_netlog.json\".\n") 118 sys.exit(1) 119 else: 120 # end_netlog.json won't exist when using this tool to stitch logging 121 # sessions that didn't shutdown gracefully. 122 # 123 # Close the events array and then the log (no polled_data). 124 stitched_file.write("]}\n") 125 126 127if __name__ == "__main__": 128 main() 129