1import itertools 2from typing import Dict, List 3 4class DataFrame: 5 """Table-like class for storing a 2D cells table with named columns.""" 6 def __init__(self, data: Dict[str, List[object]] = {}): 7 """ 8 Create a new DataFrame from a dictionary (keys = headers, 9 values = columns). 10 """ 11 self._headers = [i for i in data.keys()] 12 self._rows = [] 13 14 row_num = 0 15 16 def get_data_row(idx): 17 r = {} 18 for header, header_data in data.items(): 19 20 if not len(header_data) > idx: 21 continue 22 23 r[header] = header_data[idx] 24 25 return r 26 27 while True: 28 row_dict = get_data_row(row_num) 29 if len(row_dict) == 0: 30 break 31 32 self._append_row(row_dict.keys(), row_dict.values()) 33 row_num = row_num + 1 34 35 def concat_rows(self, other: 'DataFrame') -> None: 36 """ 37 In-place concatenate rows of other into the rows of the 38 current DataFrame. 39 40 None is added in pre-existing cells if new headers 41 are introduced. 42 """ 43 other_datas = other._data_only() 44 45 other_headers = other.headers 46 47 for d in other_datas: 48 self._append_row(other_headers, d) 49 50 def _append_row(self, headers: List[str], data: List[object]): 51 new_row = {k:v for k,v in zip(headers, data)} 52 self._rows.append(new_row) 53 54 for header in headers: 55 if not header in self._headers: 56 self._headers.append(header) 57 58 def __repr__(self): 59# return repr(self._rows) 60 repr = "" 61 62 header_list = self._headers_only() 63 64 row_format = u"" 65 for header in header_list: 66 row_format = row_format + u"{:>%d}" %(len(header) + 1) 67 68 repr = row_format.format(*header_list) + "\n" 69 70 for v in self._data_only(): 71 repr = repr + row_format.format(*v) + "\n" 72 73 return repr 74 75 def __eq__(self, other): 76 if isinstance(other, self.__class__): 77 return self.headers == other.headers and self.data_table == other.data_table 78 else: 79 print("wrong instance", other.__class__) 80 return False 81 82 @property 83 def headers(self) -> List[str]: 84 return [i for i in self._headers_only()] 85 86 @property 87 def data_table(self) -> List[List[object]]: 88 return list(self._data_only()) 89 90 @property 91 def data_table_transposed(self) -> List[List[object]]: 92 return list(self._transposed_data()) 93 94 @property 95 def data_row_len(self) -> int: 96 return len(self._rows) 97 98 def data_row_at(self, idx) -> List[object]: 99 """ 100 Return a single data row at the specified index (0th based). 101 102 Accepts negative indices, e.g. -1 is last row. 103 """ 104 row_dict = self._rows[idx] 105 l = [] 106 107 for h in self._headers_only(): 108 l.append(row_dict.get(h)) # Adds None in blank spots. 109 110 return l 111 112 def copy(self) -> 'DataFrame': 113 """ 114 Shallow copy of this DataFrame. 115 """ 116 return self.repeat(count=0) 117 118 def repeat(self, count: int) -> 'DataFrame': 119 """ 120 Returns a new DataFrame where each row of this dataframe is repeated count times. 121 A repeat of a row is adjacent to other repeats of that same row. 122 """ 123 df = DataFrame() 124 df._headers = self._headers.copy() 125 126 rows = [] 127 for row in self._rows: 128 for i in range(count): 129 rows.append(row.copy()) 130 131 df._rows = rows 132 133 return df 134 135 def merge_data_columns(self, other: 'DataFrame'): 136 """ 137 Merge self and another DataFrame by adding the data from other column-wise. 138 For any headers that are the same, data from 'other' is preferred. 139 """ 140 for h in other._headers: 141 if not h in self._headers: 142 self._headers.append(h) 143 144 append_rows = [] 145 146 for self_dict, other_dict in itertools.zip_longest(self._rows, other._rows): 147 if not self_dict: 148 d = {} 149 append_rows.append(d) 150 else: 151 d = self_dict 152 153 d_other = other_dict 154 if d_other: 155 for k,v in d_other.items(): 156 d[k] = v 157 158 for r in append_rows: 159 self._rows.append(r) 160 161 def data_row_reduce(self, fnc) -> 'DataFrame': 162 """ 163 Reduces the data row-wise by applying the fnc to each row (column-wise). 164 Empty cells are skipped. 165 166 fnc(Iterable[object]) -> object 167 fnc is applied over every non-empty cell in that column (descending row-wise). 168 169 Example: 170 DataFrame({'a':[1,2,3]}).data_row_reduce(sum) == DataFrame({'a':[6]}) 171 172 Returns a new single-row DataFrame. 173 """ 174 df = DataFrame() 175 df._headers = self._headers.copy() 176 177 def yield_by_column(header_key): 178 for row_dict in self._rows: 179 val = row_dict.get(header_key) 180 if val: 181 yield val 182 183 new_row_dict = {} 184 for h in df._headers: 185 cell_value = fnc(yield_by_column(h)) 186 new_row_dict[h] = cell_value 187 188 df._rows = [new_row_dict] 189 return df 190 191 def _headers_only(self): 192 return self._headers 193 194 def _data_only(self): 195 row_len = len(self._rows) 196 197 for i in range(row_len): 198 yield self.data_row_at(i) 199 200 def _transposed_data(self): 201 return zip(*self._data_only())