1# We did not author this file nor mantain it. Skip linting it. 2#pylint: skip-file 3# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved. 4# 5# Permission is hereby granted, free of charge, to any person obtaining a copy 6# of this software and associated documentation files (the "Software"), to deal 7# in the Software without restriction, including without limitation the rights 8# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9# copies of the Software, and to permit persons to whom the Software is 10# furnished to do so, subject to the following conditions: 11# 12# The above copyright notice and this permission notice shall be included in 13# all copies or substantial portions of the Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21# THE SOFTWARE. 22# 23# Comments and/or additions are welcome (send e-mail to: 24# strang@nmr.mgh.harvard.edu). 25# 26"""pstat.py module 27 28################################################# 29####### Written by: Gary Strangman ########### 30####### Last modified: Dec 18, 2007 ########### 31################################################# 32 33This module provides some useful list and array manipulation routines 34modeled after those found in the |Stat package by Gary Perlman, plus a 35number of other useful list/file manipulation functions. The list-based 36functions include: 37 38 abut (source,*args) 39 simpleabut (source, addon) 40 colex (listoflists,cnums) 41 collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 42 dm (listoflists,criterion) 43 flat (l) 44 linexand (listoflists,columnlist,valuelist) 45 linexor (listoflists,columnlist,valuelist) 46 linedelimited (inlist,delimiter) 47 lineincols (inlist,colsize) 48 lineincustcols (inlist,colsizes) 49 list2string (inlist) 50 makelol(inlist) 51 makestr(x) 52 printcc (lst,extra=2) 53 printincols (listoflists,colsize) 54 pl (listoflists) 55 printl(listoflists) 56 replace (lst,oldval,newval) 57 recode (inlist,listmap,cols='all') 58 remap (listoflists,criterion) 59 roundlist (inlist,num_digits_to_round_floats_to) 60 sortby(listoflists,sortcols) 61 unique (inlist) 62 duplicates(inlist) 63 writedelimited (listoflists, delimiter, file, writetype='w') 64 65Some of these functions have alternate versions which are defined only if 66Numeric (NumPy) can be imported. These functions are generally named as 67above, with an 'a' prefix. 68 69 aabut (source, *args) 70 acolex (a,indices,axis=1) 71 acollapse (a,keepcols,collapsecols,sterr=0,ns=0) 72 adm (a,criterion) 73 alinexand (a,columnlist,valuelist) 74 alinexor (a,columnlist,valuelist) 75 areplace (a,oldval,newval) 76 arecode (a,listmap,col='all') 77 arowcompare (row1, row2) 78 arowsame (row1, row2) 79 asortrows(a,axis=0) 80 aunique(inarray) 81 aduplicates(inarray) 82 83Currently, the code is all but completely un-optimized. In many cases, the 84array versions of functions amount simply to aliases to built-in array 85functions/methods. Their inclusion here is for function name consistency. 86""" 87 88## CHANGE LOG: 89## ========== 90## 07-11-26 ... edited to work with numpy 91## 01-11-15 ... changed list2string() to accept a delimiter 92## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 93## 01-05-31 ... added duplicates() and aduplicates() functions 94## 00-12-28 ... license made GPL, docstring and import requirements 95## 99-11-01 ... changed version to 0.3 96## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) 97## 03/27/99 ... added areplace function, made replace fcn recursive 98## 12/31/98 ... added writefc function for ouput to fixed column sizes 99## 12/07/98 ... fixed import problem (failed on collapse() fcn) 100## added __version__ variable (now 0.2) 101## 12/05/98 ... updated doc-strings 102## added features to collapse() function 103## added flat() function for lists 104## fixed a broken asortrows() 105## 11/16/98 ... fixed minor bug in aput for 1D arrays 106## 107## 11/08/98 ... fixed aput to output large arrays correctly 108 109import stats # required 3rd party module 110import string, copy 111from types import * 112 113__version__ = 0.4 114 115###=========================== LIST FUNCTIONS ========================== 116### 117### Here are the list functions, DEFINED FOR ALL SYSTEMS. 118### Array functions (for NumPy-enabled computers) appear below. 119### 120 121 122def abut(source, *args): 123 """ 124Like the |Stat abut command. It concatenates two lists side-by-side 125and returns the result. '2D' lists are also accomodated for either argument 126(source or addon). CAUTION: If one list is shorter, it will be repeated 127until it is as long as the longest list. If this behavior is not desired, 128use pstat.simpleabut(). 129 130Usage: abut(source, args) where args=any # of lists 131Returns: a list of lists as long as the LONGEST list past, source on the 132 'left', lists in <args> attached consecutively on the 'right' 133""" 134 135 if type(source) not in [ListType, TupleType]: 136 source = [source] 137 for addon in args: 138 if type(addon) not in [ListType, TupleType]: 139 addon = [addon] 140 if len(addon) < len(source): # is source list longer? 141 if len(source) % len(addon) == 0: # are they integer multiples? 142 repeats = len(source) / len(addon) # repeat addon n times 143 origadd = copy.deepcopy(addon) 144 for i in range(repeats - 1): 145 addon = addon + origadd 146 else: 147 repeats = len(source) / len(addon) + 1 # repeat addon x times, 148 origadd = copy.deepcopy(addon) # x is NOT an integer 149 for i in range(repeats - 1): 150 addon = addon + origadd 151 addon = addon[0:len(source)] 152 elif len(source) < len(addon): # is addon list longer? 153 if len(addon) % len(source) == 0: # are they integer multiples? 154 repeats = len(addon) / len(source) # repeat source n times 155 origsour = copy.deepcopy(source) 156 for i in range(repeats - 1): 157 source = source + origsour 158 else: 159 repeats = len(addon) / len(source) + 1 # repeat source x times, 160 origsour = copy.deepcopy(source) # x is NOT an integer 161 for i in range(repeats - 1): 162 source = source + origsour 163 source = source[0:len(addon)] 164 165 source = simpleabut(source, addon) 166 return source 167 168 169def simpleabut(source, addon): 170 """ 171Concatenates two lists as columns and returns the result. '2D' lists 172are also accomodated for either argument (source or addon). This DOES NOT 173repeat either list to make the 2 lists of equal length. Beware of list pairs 174with different lengths ... the resulting list will be the length of the 175FIRST list passed. 176 177Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) 178Returns: a list of lists as long as source, with source on the 'left' and 179 addon on the 'right' 180""" 181 if type(source) not in [ListType, TupleType]: 182 source = [source] 183 if type(addon) not in [ListType, TupleType]: 184 addon = [addon] 185 minlen = min(len(source), len(addon)) 186 list = copy.deepcopy(source) # start abut process 187 if type(source[0]) not in [ListType, TupleType]: 188 if type(addon[0]) not in [ListType, TupleType]: 189 for i in range(minlen): 190 list[i] = [source[i]] + [addon[i]] # source/addon = column 191 else: 192 for i in range(minlen): 193 list[i] = [source[i]] + addon[i] # addon=list-of-lists 194 else: 195 if type(addon[0]) not in [ListType, TupleType]: 196 for i in range(minlen): 197 list[i] = source[i] + [addon[i]] # source=list-of-lists 198 else: 199 for i in range(minlen): 200 list[i] = source[i] + addon[i] # source/addon = list-of-lists 201 source = list 202 return source 203 204 205def colex(listoflists, cnums): 206 """ 207Extracts from listoflists the columns specified in the list 'cnums' 208(cnums can be an integer, a sequence of integers, or a string-expression that 209corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex 210columns 3 onward from the listoflists). 211 212Usage: colex (listoflists,cnums) 213Returns: a list-of-lists corresponding to the columns from listoflists 214 specified by cnums, in the order the column numbers appear in cnums 215""" 216 global index 217 column = 0 218 if type(cnums) in [ListType, TupleType]: # if multiple columns to get 219 index = cnums[0] 220 column = map(lambda x: x[index], listoflists) 221 for col in cnums[1:]: 222 index = col 223 column = abut(column, map(lambda x: x[index], listoflists)) 224 elif type(cnums) == StringType: # if an 'x[3:]' type expr. 225 evalstring = 'map(lambda x: x' + cnums + ', listoflists)' 226 column = eval(evalstring) 227 else: # else it's just 1 col to get 228 index = cnums 229 column = map(lambda x: x[index], listoflists) 230 return column 231 232 233def collapse(listoflists, 234 keepcols, 235 collapsecols, 236 fcn1=None, 237 fcn2=None, 238 cfcn=None): 239 """ 240Averages data in collapsecol, keeping all unique items in keepcols 241(using unique, which keeps unique LISTS of column numbers), retaining the 242unique sets of values in keepcols, the mean for each. Setting fcn1 243and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) 244will append those results (e.g., the sterr, N) after each calculated mean. 245cfcn is the collapse function to apply (defaults to mean, defined here in the 246pstat module to avoid circular imports with stats.py, but harmonicmean or 247others could be passed). 248 249Usage: collapse 250(listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 251Returns: a list of lists with all unique permutations of entries appearing in 252 columns ("conditions") specified by keepcols, abutted with the result of 253 cfcn (if cfcn=None, defaults to the mean) of each column specified by 254 collapsecols. 255""" 256 257 def collmean(inlist): 258 s = 0 259 for item in inlist: 260 s = s + item 261 return s / float(len(inlist)) 262 263 if type(keepcols) not in [ListType, TupleType]: 264 keepcols = [keepcols] 265 if type(collapsecols) not in [ListType, TupleType]: 266 collapsecols = [collapsecols] 267 if cfcn == None: 268 cfcn = collmean 269 if keepcols == []: 270 means = [0] * len(collapsecols) 271 for i in range(len(collapsecols)): 272 avgcol = colex(listoflists, collapsecols[i]) 273 means[i] = cfcn(avgcol) 274 if fcn1: 275 try: 276 test = fcn1(avgcol) 277 except: 278 test = 'N/A' 279 means[i] = [means[i], test] 280 if fcn2: 281 try: 282 test = fcn2(avgcol) 283 except: 284 test = 'N/A' 285 try: 286 means[i] = means[i] + [len(avgcol)] 287 except TypeError: 288 means[i] = [means[i], len(avgcol)] 289 return means 290 else: 291 values = colex(listoflists, keepcols) 292 uniques = unique(values) 293 uniques.sort() 294 newlist = [] 295 if type(keepcols) not in [ListType, TupleType]: 296 keepcols = [keepcols] 297 for item in uniques: 298 if type(item) not in [ListType, TupleType]: 299 item = [item] 300 tmprows = linexand(listoflists, keepcols, item) 301 for col in collapsecols: 302 avgcol = colex(tmprows, col) 303 item.append(cfcn(avgcol)) 304 if fcn1 <> None: 305 try: 306 test = fcn1(avgcol) 307 except: 308 test = 'N/A' 309 item.append(test) 310 if fcn2 <> None: 311 try: 312 test = fcn2(avgcol) 313 except: 314 test = 'N/A' 315 item.append(test) 316 newlist.append(item) 317 return newlist 318 319 320def dm(listoflists, criterion): 321 """ 322Returns rows from the passed list of lists that meet the criteria in 323the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' 324will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows 325with column 2 equal to the string 'N'). 326 327Usage: dm (listoflists, criterion) 328Returns: rows from listoflists that meet the specified criterion. 329""" 330 function = 'filter(lambda x: ' + criterion + ',listoflists)' 331 lines = eval(function) 332 return lines 333 334 335def flat(l): 336 """ 337Returns the flattened version of a '2D' list. List-correlate to the a.ravel()() 338method of NumPy arrays. 339 340Usage: flat(l) 341""" 342 newl = [] 343 for i in range(len(l)): 344 for j in range(len(l[i])): 345 newl.append(l[i][j]) 346 return newl 347 348 349def linexand(listoflists, columnlist, valuelist): 350 """ 351Returns the rows of a list of lists where col (from columnlist) = val 352(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). 353len(columnlist) must equal len(valuelist). 354 355Usage: linexand (listoflists,columnlist,valuelist) 356Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i 357""" 358 if type(columnlist) not in [ListType, TupleType]: 359 columnlist = [columnlist] 360 if type(valuelist) not in [ListType, TupleType]: 361 valuelist = [valuelist] 362 criterion = '' 363 for i in range(len(columnlist)): 364 if type(valuelist[i]) == StringType: 365 critval = '\'' + valuelist[i] + '\'' 366 else: 367 critval = str(valuelist[i]) 368 criterion = criterion + ' x[' + str(columnlist[ 369 i]) + ']==' + critval + ' and' 370 criterion = criterion[0:-3] # remove the "and" after the last crit 371 function = 'filter(lambda x: ' + criterion + ',listoflists)' 372 lines = eval(function) 373 return lines 374 375 376def linexor(listoflists, columnlist, valuelist): 377 """ 378Returns the rows of a list of lists where col (from columnlist) = val 379(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). 380One value is required for each column in columnlist. If only one value 381exists for columnlist but multiple values appear in valuelist, the 382valuelist values are all assumed to pertain to the same column. 383 384Usage: linexor (listoflists,columnlist,valuelist) 385Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i 386""" 387 if type(columnlist) not in [ListType, TupleType]: 388 columnlist = [columnlist] 389 if type(valuelist) not in [ListType, TupleType]: 390 valuelist = [valuelist] 391 criterion = '' 392 if len(columnlist) == 1 and len(valuelist) > 1: 393 columnlist = columnlist * len(valuelist) 394 for i in range(len(columnlist)): # build an exec string 395 if type(valuelist[i]) == StringType: 396 critval = '\'' + valuelist[i] + '\'' 397 else: 398 critval = str(valuelist[i]) 399 criterion = criterion + ' x[' + str(columnlist[i]) + ']==' + critval + ' or' 400 criterion = criterion[0:-2] # remove the "or" after the last crit 401 function = 'filter(lambda x: ' + criterion + ',listoflists)' 402 lines = eval(function) 403 return lines 404 405 406def linedelimited(inlist, delimiter): 407 """ 408Returns a string composed of elements in inlist, with each element 409separated by 'delimiter.' Used by function writedelimited. Use '\t' 410for tab-delimiting. 411 412Usage: linedelimited (inlist,delimiter) 413""" 414 outstr = '' 415 for item in inlist: 416 if type(item) <> StringType: 417 item = str(item) 418 outstr = outstr + item + delimiter 419 outstr = outstr[0:-1] 420 return outstr 421 422 423def lineincols(inlist, colsize): 424 """ 425Returns a string composed of elements in inlist, with each element 426right-aligned in columns of (fixed) colsize. 427 428Usage: lineincols (inlist,colsize) where colsize is an integer 429""" 430 outstr = '' 431 for item in inlist: 432 if type(item) <> StringType: 433 item = str(item) 434 size = len(item) 435 if size <= colsize: 436 for i in range(colsize - size): 437 outstr = outstr + ' ' 438 outstr = outstr + item 439 else: 440 outstr = outstr + item[0:colsize + 1] 441 return outstr 442 443 444def lineincustcols(inlist, colsizes): 445 """ 446Returns a string composed of elements in inlist, with each element 447right-aligned in a column of width specified by a sequence colsizes. The 448length of colsizes must be greater than or equal to the number of columns 449in inlist. 450 451Usage: lineincustcols (inlist,colsizes) 452Returns: formatted string created from inlist 453""" 454 outstr = '' 455 for i in range(len(inlist)): 456 if type(inlist[i]) <> StringType: 457 item = str(inlist[i]) 458 else: 459 item = inlist[i] 460 size = len(item) 461 if size <= colsizes[i]: 462 for j in range(colsizes[i] - size): 463 outstr = outstr + ' ' 464 outstr = outstr + item 465 else: 466 outstr = outstr + item[0:colsizes[i] + 1] 467 return outstr 468 469 470def list2string(inlist, delimit=' '): 471 """ 472Converts a 1D list to a single long string for file output, using 473the string.join function. 474 475Usage: list2string (inlist,delimit=' ') 476Returns: the string created from inlist 477""" 478 stringlist = map(makestr, inlist) 479 return string.join(stringlist, delimit) 480 481 482def makelol(inlist): 483 """ 484Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you 485want to use put() to write a 1D list one item per line in the file. 486 487Usage: makelol(inlist) 488Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. 489""" 490 x = [] 491 for item in inlist: 492 x.append([item]) 493 return x 494 495 496def makestr(x): 497 if type(x) <> StringType: 498 x = str(x) 499 return x 500 501 502def printcc(lst, extra=2): 503 """ 504Prints a list of lists in columns, customized by the max size of items 505within the columns (max size of items in col, plus 'extra' number of spaces). 506Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, 507respectively. 508 509Usage: printcc (lst,extra=2) 510Returns: None 511""" 512 if type(lst[0]) not in [ListType, TupleType]: 513 lst = [lst] 514 rowstokill = [] 515 list2print = copy.deepcopy(lst) 516 for i in range(len(lst)): 517 if lst[i] == [ 518 '\n' 519 ] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']: 520 rowstokill = rowstokill + [i] 521 rowstokill.reverse() # delete blank rows from the end 522 for row in rowstokill: 523 del list2print[row] 524 maxsize = [0] * len(list2print[0]) 525 for col in range(len(list2print[0])): 526 items = colex(list2print, col) 527 items = map(makestr, items) 528 maxsize[col] = max(map(len, items)) + extra 529 for row in lst: 530 if row == ['\n'] or row == '\n' or row == '' or row == ['']: 531 print 532 elif row == ['dashes'] or row == 'dashes': 533 dashes = [0] * len(maxsize) 534 for j in range(len(maxsize)): 535 dashes[j] = '-' * (maxsize[j] - 2) 536 print lineincustcols(dashes, maxsize) 537 else: 538 print lineincustcols(row, maxsize) 539 return None 540 541 542def printincols(listoflists, colsize): 543 """ 544Prints a list of lists in columns of (fixed) colsize width, where 545colsize is an integer. 546 547Usage: printincols (listoflists,colsize) 548Returns: None 549""" 550 for row in listoflists: 551 print lineincols(row, colsize) 552 return None 553 554 555def pl(listoflists): 556 """ 557Prints a list of lists, 1 list (row) at a time. 558 559Usage: pl(listoflists) 560Returns: None 561""" 562 for row in listoflists: 563 if row[-1] == '\n': 564 print row, 565 else: 566 print row 567 return None 568 569 570def printl(listoflists): 571 """Alias for pl.""" 572 pl(listoflists) 573 return 574 575 576def replace(inlst, oldval, newval): 577 """ 578Replaces all occurrences of 'oldval' with 'newval', recursively. 579 580Usage: replace (inlst,oldval,newval) 581""" 582 lst = inlst * 1 583 for i in range(len(lst)): 584 if type(lst[i]) not in [ListType, TupleType]: 585 if lst[i] == oldval: 586 lst[i] = newval 587 else: 588 lst[i] = replace(lst[i], oldval, newval) 589 return lst 590 591 592def recode(inlist, listmap, cols=None): 593 """ 594Changes the values in a list to a new set of values (useful when 595you need to recode data from (e.g.) strings to numbers. cols defaults 596to None (meaning all columns are recoded). 597 598Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list 599Returns: inlist with the appropriate values replaced with new ones 600""" 601 lst = copy.deepcopy(inlist) 602 if cols != None: 603 if type(cols) not in [ListType, TupleType]: 604 cols = [cols] 605 for col in cols: 606 for row in range(len(lst)): 607 try: 608 idx = colex(listmap, 0).index(lst[row][col]) 609 lst[row][col] = listmap[idx][1] 610 except ValueError: 611 pass 612 else: 613 for row in range(len(lst)): 614 for col in range(len(lst)): 615 try: 616 idx = colex(listmap, 0).index(lst[row][col]) 617 lst[row][col] = listmap[idx][1] 618 except ValueError: 619 pass 620 return lst 621 622 623def remap(listoflists, criterion): 624 """ 625Remaps values in a given column of a 2D list (listoflists). This requires 626a criterion as a function of 'x' so that the result of the following is 627returned ... map(lambda x: 'criterion',listoflists). 628 629Usage: remap(listoflists,criterion) criterion=string 630Returns: remapped version of listoflists 631""" 632 function = 'map(lambda x: ' + criterion + ',listoflists)' 633 lines = eval(function) 634 return lines 635 636 637def roundlist(inlist, digits): 638 """ 639Goes through each element in a 1D or 2D inlist, and applies the following 640function to all elements of FloatType ... round(element,digits). 641 642Usage: roundlist(inlist,digits) 643Returns: list with rounded floats 644""" 645 if type(inlist[0]) in [IntType, FloatType]: 646 inlist = [inlist] 647 l = inlist * 1 648 for i in range(len(l)): 649 for j in range(len(l[i])): 650 if type(l[i][j]) == FloatType: 651 l[i][j] = round(l[i][j], digits) 652 return l 653 654 655def sortby(listoflists, sortcols): 656 """ 657Sorts a list of lists on the column(s) specified in the sequence 658sortcols. 659 660Usage: sortby(listoflists,sortcols) 661Returns: sorted list, unchanged column ordering 662""" 663 newlist = abut(colex(listoflists, sortcols), listoflists) 664 newlist.sort() 665 try: 666 numcols = len(sortcols) 667 except TypeError: 668 numcols = 1 669 crit = '[' + str(numcols) + ':]' 670 newlist = colex(newlist, crit) 671 return newlist 672 673 674def unique(inlist): 675 """ 676Returns all unique items in the passed list. If the a list-of-lists 677is passed, unique LISTS are found (i.e., items in the first dimension are 678compared). 679 680Usage: unique (inlist) 681Returns: the unique elements (or rows) in inlist 682""" 683 uniques = [] 684 for item in inlist: 685 if item not in uniques: 686 uniques.append(item) 687 return uniques 688 689 690def duplicates(inlist): 691 """ 692Returns duplicate items in the FIRST dimension of the passed list. 693 694Usage: duplicates (inlist) 695""" 696 dups = [] 697 for i in range(len(inlist)): 698 if inlist[i] in inlist[i + 1:]: 699 dups.append(inlist[i]) 700 return dups 701 702 703def nonrepeats(inlist): 704 """ 705Returns items that are NOT duplicated in the first dim of the passed list. 706 707Usage: nonrepeats (inlist) 708""" 709 nonrepeats = [] 710 for i in range(len(inlist)): 711 if inlist.count(inlist[i]) == 1: 712 nonrepeats.append(inlist[i]) 713 return nonrepeats 714 715#=================== PSTAT ARRAY FUNCTIONS ===================== 716#=================== PSTAT ARRAY FUNCTIONS ===================== 717#=================== PSTAT ARRAY FUNCTIONS ===================== 718#=================== PSTAT ARRAY FUNCTIONS ===================== 719#=================== PSTAT ARRAY FUNCTIONS ===================== 720#=================== PSTAT ARRAY FUNCTIONS ===================== 721#=================== PSTAT ARRAY FUNCTIONS ===================== 722#=================== PSTAT ARRAY FUNCTIONS ===================== 723#=================== PSTAT ARRAY FUNCTIONS ===================== 724#=================== PSTAT ARRAY FUNCTIONS ===================== 725#=================== PSTAT ARRAY FUNCTIONS ===================== 726#=================== PSTAT ARRAY FUNCTIONS ===================== 727#=================== PSTAT ARRAY FUNCTIONS ===================== 728#=================== PSTAT ARRAY FUNCTIONS ===================== 729#=================== PSTAT ARRAY FUNCTIONS ===================== 730#=================== PSTAT ARRAY FUNCTIONS ===================== 731 732try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE 733 import numpy as N 734 735 def aabut(source, *args): 736 """ 737Like the |Stat abut command. It concatenates two arrays column-wise 738and returns the result. CAUTION: If one array is shorter, it will be 739repeated until it is as long as the other. 740 741Usage: aabut (source, args) where args=any # of arrays 742Returns: an array as long as the LONGEST array past, source appearing on the 743 'left', arrays in <args> attached on the 'right'. 744""" 745 if len(source.shape) == 1: 746 width = 1 747 source = N.resize(source, [source.shape[0], width]) 748 else: 749 width = source.shape[1] 750 for addon in args: 751 if len(addon.shape) == 1: 752 width = 1 753 addon = N.resize(addon, [source.shape[0], width]) 754 else: 755 width = source.shape[1] 756 if len(addon) < len(source): 757 addon = N.resize(addon, [source.shape[0], addon.shape[1]]) 758 elif len(source) < len(addon): 759 source = N.resize(source, [addon.shape[0], source.shape[1]]) 760 source = N.concatenate((source, addon), 1) 761 return source 762 763 def acolex(a, indices, axis=1): 764 """ 765Extracts specified indices (a list) from passed array, along passed 766axis (column extraction is default). BEWARE: A 1D array is presumed to be a 767column-array (and that the whole array will be returned as a column). 768 769Usage: acolex (a,indices,axis=1) 770Returns: the columns of a specified by indices 771""" 772 if type(indices) not in [ListType, TupleType, N.ndarray]: 773 indices = [indices] 774 if len(N.shape(a)) == 1: 775 cols = N.resize(a, [a.shape[0], 1]) 776 else: 777 # print a[:3] 778 cols = N.take(a, indices, axis) 779# print cols[:3] 780 return cols 781 782 def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None): 783 """ 784Averages data in collapsecol, keeping all unique items in keepcols 785(using unique, which keeps unique LISTS of column numbers), retaining 786the unique sets of values in keepcols, the mean for each. If stderror or 787N of the mean are desired, set either or both parameters to 1. 788 789Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 790Returns: unique 'conditions' specified by the contents of columns specified 791 by keepcols, abutted with the mean(s) of column(s) specified by 792 collapsecols 793""" 794 795 def acollmean(inarray): 796 return N.sum(N.ravel(inarray)) 797 798 if type(keepcols) not in [ListType, TupleType, N.ndarray]: 799 keepcols = [keepcols] 800 if type(collapsecols) not in [ListType, TupleType, N.ndarray]: 801 collapsecols = [collapsecols] 802 803 if cfcn == None: 804 cfcn = acollmean 805 if keepcols == []: 806 avgcol = acolex(a, collapsecols) 807 means = N.sum(avgcol) / float(len(avgcol)) 808 if fcn1 <> None: 809 try: 810 test = fcn1(avgcol) 811 except: 812 test = N.array(['N/A'] * len(means)) 813 means = aabut(means, test) 814 if fcn2 <> None: 815 try: 816 test = fcn2(avgcol) 817 except: 818 test = N.array(['N/A'] * len(means)) 819 means = aabut(means, test) 820 return means 821 else: 822 if type(keepcols) not in [ListType, TupleType, N.ndarray]: 823 keepcols = [keepcols] 824 values = colex(a, keepcols) # so that "item" can be appended (below) 825 uniques = unique(values) # get a LIST, so .sort keeps rows intact 826 uniques.sort() 827 newlist = [] 828 for item in uniques: 829 if type(item) not in [ListType, TupleType, N.ndarray]: 830 item = [item] 831 tmprows = alinexand(a, keepcols, item) 832 for col in collapsecols: 833 avgcol = acolex(tmprows, col) 834 item.append(acollmean(avgcol)) 835 if fcn1 <> None: 836 try: 837 test = fcn1(avgcol) 838 except: 839 test = 'N/A' 840 item.append(test) 841 if fcn2 <> None: 842 try: 843 test = fcn2(avgcol) 844 except: 845 test = 'N/A' 846 item.append(test) 847 newlist.append(item) 848 try: 849 new_a = N.array(newlist) 850 except TypeError: 851 new_a = N.array(newlist, 'O') 852 return new_a 853 854 def adm(a, criterion): 855 """ 856Returns rows from the passed list of lists that meet the criteria in 857the passed criterion expression (a string as a function of x). 858 859Usage: adm (a,criterion) where criterion is like 'x[2]==37' 860""" 861 function = 'filter(lambda x: ' + criterion + ',a)' 862 lines = eval(function) 863 try: 864 lines = N.array(lines) 865 except: 866 lines = N.array(lines, dtype='O') 867 return lines 868 869 def isstring(x): 870 if type(x) == StringType: 871 return 1 872 else: 873 return 0 874 875 def alinexand(a, columnlist, valuelist): 876 """ 877Returns the rows of an array where col (from columnlist) = val 878(from valuelist). One value is required for each column in columnlist. 879 880Usage: alinexand (a,columnlist,valuelist) 881Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i 882""" 883 if type(columnlist) not in [ListType, TupleType, N.ndarray]: 884 columnlist = [columnlist] 885 if type(valuelist) not in [ListType, TupleType, N.ndarray]: 886 valuelist = [valuelist] 887 criterion = '' 888 for i in range(len(columnlist)): 889 if type(valuelist[i]) == StringType: 890 critval = '\'' + valuelist[i] + '\'' 891 else: 892 critval = str(valuelist[i]) 893 criterion = criterion + ' x[' + str(columnlist[ 894 i]) + ']==' + critval + ' and' 895 criterion = criterion[0:-3] # remove the "and" after the last crit 896 return adm(a, criterion) 897 898 def alinexor(a, columnlist, valuelist): 899 """ 900Returns the rows of an array where col (from columnlist) = val (from 901valuelist). One value is required for each column in columnlist. 902The exception is if either columnlist or valuelist has only 1 value, 903in which case that item will be expanded to match the length of the 904other list. 905 906Usage: alinexor (a,columnlist,valuelist) 907Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i 908""" 909 if type(columnlist) not in [ListType, TupleType, N.ndarray]: 910 columnlist = [columnlist] 911 if type(valuelist) not in [ListType, TupleType, N.ndarray]: 912 valuelist = [valuelist] 913 criterion = '' 914 if len(columnlist) == 1 and len(valuelist) > 1: 915 columnlist = columnlist * len(valuelist) 916 elif len(valuelist) == 1 and len(columnlist) > 1: 917 valuelist = valuelist * len(columnlist) 918 for i in range(len(columnlist)): 919 if type(valuelist[i]) == StringType: 920 critval = '\'' + valuelist[i] + '\'' 921 else: 922 critval = str(valuelist[i]) 923 criterion = criterion + ' x[' + str(columnlist[ 924 i]) + ']==' + critval + ' or' 925 criterion = criterion[0:-2] # remove the "or" after the last crit 926 return adm(a, criterion) 927 928 def areplace(a, oldval, newval): 929 """ 930Replaces all occurrences of oldval with newval in array a. 931 932Usage: areplace(a,oldval,newval) 933""" 934 return N.where(a == oldval, newval, a) 935 936 def arecode(a, listmap, col='all'): 937 """ 938Remaps the values in an array to a new set of values (useful when 939you need to recode data from (e.g.) strings to numbers as most stats 940packages require. Can work on SINGLE columns, or 'all' columns at once. 941@@@BROKEN 2007-11-26 942 943Usage: arecode (a,listmap,col='all') 944Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] 945""" 946 ashape = a.shape 947 if col == 'all': 948 work = a.ravel() 949 else: 950 work = acolex(a, col) 951 work = work.ravel() 952 for pair in listmap: 953 if type(pair[ 954 1]) == StringType or work.dtype.char == 'O' or a.dtype.char == 'O': 955 work = N.array(work, dtype='O') 956 a = N.array(a, dtype='O') 957 for i in range(len(work)): 958 if work[i] == pair[0]: 959 work[i] = pair[1] 960 if col == 'all': 961 return N.reshape(work, ashape) 962 else: 963 return N.concatenate( 964 [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1) 965 else: # must be a non-Object type array and replacement 966 work = N.where(work == pair[0], pair[1], work) 967 return N.concatenate( 968 [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1) 969 970 def arowcompare(row1, row2): 971 """ 972Compares two rows from an array, regardless of whether it is an 973array of numbers or of python objects (which requires the cmp function). 974@@@PURPOSE? 2007-11-26 975 976Usage: arowcompare(row1,row2) 977Returns: an array of equal length containing 1s where the two rows had 978 identical elements and 0 otherwise 979""" 980 return 981 if row1.dtype.char == 'O' or row2.dtype == 'O': 982 cmpvect = N.logical_not( 983 abs(N.array(map(cmp, row1, row2)))) # cmp fcn gives -1,0,1 984 else: 985 cmpvect = N.equal(row1, row2) 986 return cmpvect 987 988 def arowsame(row1, row2): 989 """ 990Compares two rows from an array, regardless of whether it is an 991array of numbers or of python objects (which requires the cmp function). 992 993Usage: arowsame(row1,row2) 994Returns: 1 if the two rows are identical, 0 otherwise. 995""" 996 cmpval = N.alltrue(arowcompare(row1, row2)) 997 return cmpval 998 999 def asortrows(a, axis=0): 1000 """ 1001Sorts an array "by rows". This differs from the Numeric.sort() function, 1002which sorts elements WITHIN the given axis. Instead, this function keeps 1003the elements along the given axis intact, but shifts them 'up or down' 1004relative to one another. 1005 1006Usage: asortrows(a,axis=0) 1007Returns: sorted version of a 1008""" 1009 return N.sort(a, axis=axis, kind='mergesort') 1010 1011 def aunique(inarray): 1012 """ 1013Returns unique items in the FIRST dimension of the passed array. Only 1014works on arrays NOT including string items. 1015 1016Usage: aunique (inarray) 1017""" 1018 uniques = N.array([inarray[0]]) 1019 if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY 1020 for item in inarray[1:]: 1021 if N.add.reduce(N.equal(uniques, item).ravel()) == 0: 1022 try: 1023 uniques = N.concatenate([uniques, N.array[N.newaxis, :]]) 1024 except TypeError: 1025 uniques = N.concatenate([uniques, N.array([item])]) 1026 else: # IT MUST BE A 2+D ARRAY 1027 if inarray.dtype.char != 'O': # not an Object array 1028 for item in inarray[1:]: 1029 if not N.sum(N.alltrue(N.equal(uniques, item), 1)): 1030 try: 1031 uniques = N.concatenate([uniques, item[N.newaxis, :]]) 1032 except TypeError: # the item to add isn't a list 1033 uniques = N.concatenate([uniques, N.array([item])]) 1034 else: 1035 pass # this item is already in the uniques array 1036 else: # must be an Object array, alltrue/equal functions don't work 1037 for item in inarray[1:]: 1038 newflag = 1 1039 for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> 1040 test = N.sum(abs(N.array(map(cmp, item, unq)))) 1041 if test == 0: # if item identical to any 1 row in uniques 1042 newflag = 0 # then not a novel item to add 1043 break 1044 if newflag == 1: 1045 try: 1046 uniques = N.concatenate([uniques, item[N.newaxis, :]]) 1047 except TypeError: # the item to add isn't a list 1048 uniques = N.concatenate([uniques, N.array([item])]) 1049 return uniques 1050 1051 def aduplicates(inarray): 1052 """ 1053Returns duplicate items in the FIRST dimension of the passed array. Only 1054works on arrays NOT including string items. 1055 1056Usage: aunique (inarray) 1057""" 1058 inarray = N.array(inarray) 1059 if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY 1060 dups = [] 1061 inarray = inarray.tolist() 1062 for i in range(len(inarray)): 1063 if inarray[i] in inarray[i + 1:]: 1064 dups.append(inarray[i]) 1065 dups = aunique(dups) 1066 else: # IT MUST BE A 2+D ARRAY 1067 dups = [] 1068 aslist = inarray.tolist() 1069 for i in range(len(aslist)): 1070 if aslist[i] in aslist[i + 1:]: 1071 dups.append(aslist[i]) 1072 dups = unique(dups) 1073 dups = N.array(dups) 1074 return dups 1075 1076except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs 1077 pass 1078