1#!/usr/bin/python2 2"""Cleans output from other scripts to eliminate duplicates. 3 4When frequently sampling data, we see that records occasionally will contain 5the same timestamp (due to perf recording twice in the same second). 6 7This removes all of the duplicate timestamps for every record. Order with 8respect to timestamps is not preserved. Also, the assumption is that the log 9file is a csv with the first value in each row being the time in seconds from a 10standard time. 11 12""" 13 14import argparse 15 16parser = argparse.ArgumentParser() 17parser.add_argument('filename') 18args = parser.parse_args() 19 20my_file = open(args.filename) 21output_file = open('clean2.csv', 'a') 22dictionary = dict() 23 24for line in my_file: 25 new_time = int(line.split(',')[0]) 26 dictionary[new_time] = line 27 28for key in dictionary.keys(): 29 output_file.write(dictionary[key]) 30