• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import itertools
2from typing import Dict, List
3
4class DataFrame:
5  """Table-like class for storing a 2D cells table with named columns."""
6  def __init__(self, data: Dict[str, List[object]] = {}):
7    """
8    Create a new DataFrame from a dictionary (keys = headers,
9    values = columns).
10    """
11    self._headers = [i for i in data.keys()]
12    self._rows = []
13
14    row_num = 0
15
16    def get_data_row(idx):
17      r = {}
18      for header, header_data in data.items():
19
20        if not len(header_data) > idx:
21          continue
22
23        r[header] = header_data[idx]
24
25      return r
26
27    while True:
28      row_dict = get_data_row(row_num)
29      if len(row_dict) == 0:
30        break
31
32      self._append_row(row_dict.keys(), row_dict.values())
33      row_num = row_num + 1
34
35  def concat_rows(self, other: 'DataFrame') -> None:
36    """
37    In-place concatenate rows of other into the rows of the
38    current DataFrame.
39
40    None is added in pre-existing cells if new headers
41    are introduced.
42    """
43    other_datas = other._data_only()
44
45    other_headers = other.headers
46
47    for d in other_datas:
48      self._append_row(other_headers, d)
49
50  def _append_row(self, headers: List[str], data: List[object]):
51    new_row = {k:v for k,v in zip(headers, data)}
52    self._rows.append(new_row)
53
54    for header in headers:
55      if not header in self._headers:
56        self._headers.append(header)
57
58  def __repr__(self):
59#     return repr(self._rows)
60    repr = ""
61
62    header_list = self._headers_only()
63
64    row_format = u""
65    for header in header_list:
66      row_format = row_format + u"{:>%d}" %(len(header) + 1)
67
68    repr = row_format.format(*header_list) + "\n"
69
70    for v in self._data_only():
71      repr = repr + row_format.format(*v) + "\n"
72
73    return repr
74
75  def __eq__(self, other):
76    if isinstance(other, self.__class__):
77      return self.headers == other.headers and self.data_table == other.data_table
78    else:
79      print("wrong instance", other.__class__)
80      return False
81
82  @property
83  def headers(self) -> List[str]:
84    return [i for i in self._headers_only()]
85
86  @property
87  def data_table(self) -> List[List[object]]:
88    return list(self._data_only())
89
90  @property
91  def data_table_transposed(self) -> List[List[object]]:
92    return list(self._transposed_data())
93
94  @property
95  def data_row_len(self) -> int:
96    return len(self._rows)
97
98  def data_row_at(self, idx) -> List[object]:
99    """
100    Return a single data row at the specified index (0th based).
101
102    Accepts negative indices, e.g. -1 is last row.
103    """
104    row_dict = self._rows[idx]
105    l = []
106
107    for h in self._headers_only():
108      l.append(row_dict.get(h)) # Adds None in blank spots.
109
110    return l
111
112  def copy(self) -> 'DataFrame':
113    """
114    Shallow copy of this DataFrame.
115    """
116    return self.repeat(count=0)
117
118  def repeat(self, count: int) -> 'DataFrame':
119    """
120    Returns a new DataFrame where each row of this dataframe is repeated count times.
121    A repeat of a row is adjacent to other repeats of that same row.
122    """
123    df = DataFrame()
124    df._headers = self._headers.copy()
125
126    rows = []
127    for row in self._rows:
128      for i in range(count):
129        rows.append(row.copy())
130
131    df._rows = rows
132
133    return df
134
135  def merge_data_columns(self, other: 'DataFrame'):
136    """
137    Merge self and another DataFrame by adding the data from other column-wise.
138    For any headers that are the same, data from 'other' is preferred.
139    """
140    for h in other._headers:
141      if not h in self._headers:
142        self._headers.append(h)
143
144    append_rows = []
145
146    for self_dict, other_dict in itertools.zip_longest(self._rows, other._rows):
147      if not self_dict:
148        d = {}
149        append_rows.append(d)
150      else:
151        d = self_dict
152
153      d_other = other_dict
154      if d_other:
155        for k,v in d_other.items():
156          d[k] = v
157
158    for r in append_rows:
159      self._rows.append(r)
160
161  def data_row_reduce(self, fnc) -> 'DataFrame':
162    """
163    Reduces the data row-wise by applying the fnc to each row (column-wise).
164    Empty cells are skipped.
165
166    fnc(Iterable[object]) -> object
167    fnc is applied over every non-empty cell in that column (descending row-wise).
168
169    Example:
170      DataFrame({'a':[1,2,3]}).data_row_reduce(sum) == DataFrame({'a':[6]})
171
172    Returns a new single-row DataFrame.
173    """
174    df = DataFrame()
175    df._headers = self._headers.copy()
176
177    def yield_by_column(header_key):
178      for row_dict in self._rows:
179        val = row_dict.get(header_key)
180        if val:
181          yield val
182
183    new_row_dict = {}
184    for h in df._headers:
185      cell_value = fnc(yield_by_column(h))
186      new_row_dict[h] = cell_value
187
188    df._rows = [new_row_dict]
189    return df
190
191  def _headers_only(self):
192    return self._headers
193
194  def _data_only(self):
195    row_len = len(self._rows)
196
197    for i in range(row_len):
198      yield self.data_row_at(i)
199
200  def _transposed_data(self):
201    return zip(*self._data_only())