• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/python
2"""Cleans output from other scripts to eliminate duplicates.
3
4When frequently sampling data, we see that records occasionally will contain
5the same timestamp (due to perf recording twice in the same second).
6
7This removes all of the duplicate timestamps for every record. Order with
8respect to timestamps is not preserved. Also, the assumption is that the log
9file is a csv with the first value in each row being the time in seconds from a
10standard time.
11
12"""
13
14import argparse
15
16parser = argparse.ArgumentParser()
17parser.add_argument('filename')
18args = parser.parse_args()
19
20my_file = open(args.filename)
21output_file = open('clean2.csv', 'a')
22dictionary = dict()
23
24for line in my_file:
25  new_time = int(line.split(',')[0])
26  dictionary[new_time] = line
27
28for key in dictionary.keys():
29  output_file.write(dictionary[key])
30