• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# We did not author this file nor mantain it. Skip linting it.
2#pylint: skip-file
3# Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
4#
5# Permission is hereby granted, free of charge, to any person obtaining a copy
6# of this software and associated documentation files (the "Software"), to deal
7# in the Software without restriction, including without limitation the rights
8# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9# copies of the Software, and to permit persons to whom the Software is
10# furnished to do so, subject to the following conditions:
11#
12# The above copyright notice and this permission notice shall be included in
13# all copies or substantial portions of the Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21# THE SOFTWARE.
22#
23# Comments and/or additions are welcome (send e-mail to:
24# strang@nmr.mgh.harvard.edu).
25#
26"""pstat.py module
27
28#################################################
29#######  Written by:  Gary Strangman  ###########
30#######  Last modified:  Dec 18, 2007 ###########
31#################################################
32
33This module provides some useful list and array manipulation routines
34modeled after those found in the |Stat package by Gary Perlman, plus a
35number of other useful list/file manipulation functions.  The list-based
36functions include:
37
38      abut (source,*args)
39      simpleabut (source, addon)
40      colex (listoflists,cnums)
41      collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
42      dm (listoflists,criterion)
43      flat (l)
44      linexand (listoflists,columnlist,valuelist)
45      linexor (listoflists,columnlist,valuelist)
46      linedelimited (inlist,delimiter)
47      lineincols (inlist,colsize)
48      lineincustcols (inlist,colsizes)
49      list2string (inlist)
50      makelol(inlist)
51      makestr(x)
52      printcc (lst,extra=2)
53      printincols (listoflists,colsize)
54      pl (listoflists)
55      printl(listoflists)
56      replace (lst,oldval,newval)
57      recode (inlist,listmap,cols='all')
58      remap (listoflists,criterion)
59      roundlist (inlist,num_digits_to_round_floats_to)
60      sortby(listoflists,sortcols)
61      unique (inlist)
62      duplicates(inlist)
63      writedelimited (listoflists, delimiter, file, writetype='w')
64
65Some of these functions have alternate versions which are defined only if
66Numeric (NumPy) can be imported.  These functions are generally named as
67above, with an 'a' prefix.
68
69      aabut (source, *args)
70      acolex (a,indices,axis=1)
71      acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
72      adm (a,criterion)
73      alinexand (a,columnlist,valuelist)
74      alinexor (a,columnlist,valuelist)
75      areplace (a,oldval,newval)
76      arecode (a,listmap,col='all')
77      arowcompare (row1, row2)
78      arowsame (row1, row2)
79      asortrows(a,axis=0)
80      aunique(inarray)
81      aduplicates(inarray)
82
83Currently, the code is all but completely un-optimized.  In many cases, the
84array versions of functions amount simply to aliases to built-in array
85functions/methods.  Their inclusion here is for function name consistency.
86"""
87
88## CHANGE LOG:
89## ==========
90## 07-11-26 ... edited to work with numpy
91## 01-11-15 ... changed list2string() to accept a delimiter
92## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
93## 01-05-31 ... added duplicates() and aduplicates() functions
94## 00-12-28 ... license made GPL, docstring and import requirements
95## 99-11-01 ... changed version to 0.3
96## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
97## 03/27/99 ... added areplace function, made replace fcn recursive
98## 12/31/98 ... added writefc function for ouput to fixed column sizes
99## 12/07/98 ... fixed import problem (failed on collapse() fcn)
100##              added __version__ variable (now 0.2)
101## 12/05/98 ... updated doc-strings
102##              added features to collapse() function
103##              added flat() function for lists
104##              fixed a broken asortrows()
105## 11/16/98 ... fixed minor bug in aput for 1D arrays
106##
107## 11/08/98 ... fixed aput to output large arrays correctly
108
109import stats  # required 3rd party module
110import string, copy
111from types import *
112
113__version__ = 0.4
114
115###===========================  LIST FUNCTIONS  ==========================
116###
117### Here are the list functions, DEFINED FOR ALL SYSTEMS.
118### Array functions (for NumPy-enabled computers) appear below.
119###
120
121
122def abut(source, *args):
123  """
124Like the |Stat abut command.  It concatenates two lists side-by-side
125and returns the result.  '2D' lists are also accomodated for either argument
126(source or addon).  CAUTION:  If one list is shorter, it will be repeated
127until it is as long as the longest list.  If this behavior is not desired,
128use pstat.simpleabut().
129
130Usage:   abut(source, args)   where args=any # of lists
131Returns: a list of lists as long as the LONGEST list past, source on the
132         'left', lists in <args> attached consecutively on the 'right'
133"""
134
135  if type(source) not in [ListType, TupleType]:
136    source = [source]
137  for addon in args:
138    if type(addon) not in [ListType, TupleType]:
139      addon = [addon]
140    if len(addon) < len(source):  # is source list longer?
141      if len(source) % len(addon) == 0:  # are they integer multiples?
142        repeats = len(source) / len(addon)  # repeat addon n times
143        origadd = copy.deepcopy(addon)
144        for i in range(repeats - 1):
145          addon = addon + origadd
146      else:
147        repeats = len(source) / len(addon) + 1  # repeat addon x times,
148        origadd = copy.deepcopy(addon)  #    x is NOT an integer
149        for i in range(repeats - 1):
150          addon = addon + origadd
151          addon = addon[0:len(source)]
152    elif len(source) < len(addon):  # is addon list longer?
153      if len(addon) % len(source) == 0:  # are they integer multiples?
154        repeats = len(addon) / len(source)  # repeat source n times
155        origsour = copy.deepcopy(source)
156        for i in range(repeats - 1):
157          source = source + origsour
158      else:
159        repeats = len(addon) / len(source) + 1  # repeat source x times,
160        origsour = copy.deepcopy(source)  #   x is NOT an integer
161        for i in range(repeats - 1):
162          source = source + origsour
163        source = source[0:len(addon)]
164
165    source = simpleabut(source, addon)
166  return source
167
168
169def simpleabut(source, addon):
170  """
171Concatenates two lists as columns and returns the result.  '2D' lists
172are also accomodated for either argument (source or addon).  This DOES NOT
173repeat either list to make the 2 lists of equal length.  Beware of list pairs
174with different lengths ... the resulting list will be the length of the
175FIRST list passed.
176
177Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
178Returns: a list of lists as long as source, with source on the 'left' and
179                 addon on the 'right'
180"""
181  if type(source) not in [ListType, TupleType]:
182    source = [source]
183  if type(addon) not in [ListType, TupleType]:
184    addon = [addon]
185  minlen = min(len(source), len(addon))
186  list = copy.deepcopy(source)  # start abut process
187  if type(source[0]) not in [ListType, TupleType]:
188    if type(addon[0]) not in [ListType, TupleType]:
189      for i in range(minlen):
190        list[i] = [source[i]] + [addon[i]]  # source/addon = column
191    else:
192      for i in range(minlen):
193        list[i] = [source[i]] + addon[i]  # addon=list-of-lists
194  else:
195    if type(addon[0]) not in [ListType, TupleType]:
196      for i in range(minlen):
197        list[i] = source[i] + [addon[i]]  # source=list-of-lists
198    else:
199      for i in range(minlen):
200        list[i] = source[i] + addon[i]  # source/addon = list-of-lists
201  source = list
202  return source
203
204
205def colex(listoflists, cnums):
206  """
207Extracts from listoflists the columns specified in the list 'cnums'
208(cnums can be an integer, a sequence of integers, or a string-expression that
209corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
210columns 3 onward from the listoflists).
211
212Usage:   colex (listoflists,cnums)
213Returns: a list-of-lists corresponding to the columns from listoflists
214         specified by cnums, in the order the column numbers appear in cnums
215"""
216  global index
217  column = 0
218  if type(cnums) in [ListType, TupleType]:  # if multiple columns to get
219    index = cnums[0]
220    column = map(lambda x: x[index], listoflists)
221    for col in cnums[1:]:
222      index = col
223      column = abut(column, map(lambda x: x[index], listoflists))
224  elif type(cnums) == StringType:  # if an 'x[3:]' type expr.
225    evalstring = 'map(lambda x: x' + cnums + ', listoflists)'
226    column = eval(evalstring)
227  else:  # else it's just 1 col to get
228    index = cnums
229    column = map(lambda x: x[index], listoflists)
230  return column
231
232
233def collapse(listoflists,
234             keepcols,
235             collapsecols,
236             fcn1=None,
237             fcn2=None,
238             cfcn=None):
239  """
240Averages data in collapsecol, keeping all unique items in keepcols
241(using unique, which keeps unique LISTS of column numbers), retaining the
242unique sets of values in keepcols, the mean for each.  Setting fcn1
243and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
244will append those results (e.g., the sterr, N) after each calculated mean.
245cfcn is the collapse function to apply (defaults to mean, defined here in the
246pstat module to avoid circular imports with stats.py, but harmonicmean or
247others could be passed).
248
249Usage:    collapse
250(listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
251Returns: a list of lists with all unique permutations of entries appearing in
252     columns ("conditions") specified by keepcols, abutted with the result of
253     cfcn (if cfcn=None, defaults to the mean) of each column specified by
254     collapsecols.
255"""
256
257  def collmean(inlist):
258    s = 0
259    for item in inlist:
260      s = s + item
261    return s / float(len(inlist))
262
263  if type(keepcols) not in [ListType, TupleType]:
264    keepcols = [keepcols]
265  if type(collapsecols) not in [ListType, TupleType]:
266    collapsecols = [collapsecols]
267  if cfcn == None:
268    cfcn = collmean
269  if keepcols == []:
270    means = [0] * len(collapsecols)
271    for i in range(len(collapsecols)):
272      avgcol = colex(listoflists, collapsecols[i])
273      means[i] = cfcn(avgcol)
274      if fcn1:
275        try:
276          test = fcn1(avgcol)
277        except:
278          test = 'N/A'
279          means[i] = [means[i], test]
280      if fcn2:
281        try:
282          test = fcn2(avgcol)
283        except:
284          test = 'N/A'
285        try:
286          means[i] = means[i] + [len(avgcol)]
287        except TypeError:
288          means[i] = [means[i], len(avgcol)]
289    return means
290  else:
291    values = colex(listoflists, keepcols)
292    uniques = unique(values)
293    uniques.sort()
294    newlist = []
295    if type(keepcols) not in [ListType, TupleType]:
296      keepcols = [keepcols]
297    for item in uniques:
298      if type(item) not in [ListType, TupleType]:
299        item = [item]
300      tmprows = linexand(listoflists, keepcols, item)
301      for col in collapsecols:
302        avgcol = colex(tmprows, col)
303        item.append(cfcn(avgcol))
304        if fcn1 <> None:
305          try:
306            test = fcn1(avgcol)
307          except:
308            test = 'N/A'
309          item.append(test)
310        if fcn2 <> None:
311          try:
312            test = fcn2(avgcol)
313          except:
314            test = 'N/A'
315          item.append(test)
316        newlist.append(item)
317    return newlist
318
319
320def dm(listoflists, criterion):
321  """
322Returns rows from the passed list of lists that meet the criteria in
323the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
324will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
325with column 2 equal to the string 'N').
326
327Usage:   dm (listoflists, criterion)
328Returns: rows from listoflists that meet the specified criterion.
329"""
330  function = 'filter(lambda x: ' + criterion + ',listoflists)'
331  lines = eval(function)
332  return lines
333
334
335def flat(l):
336  """
337Returns the flattened version of a '2D' list.  List-correlate to the a.ravel()()
338method of NumPy arrays.
339
340Usage:    flat(l)
341"""
342  newl = []
343  for i in range(len(l)):
344    for j in range(len(l[i])):
345      newl.append(l[i][j])
346  return newl
347
348
349def linexand(listoflists, columnlist, valuelist):
350  """
351Returns the rows of a list of lists where col (from columnlist) = val
352(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
353len(columnlist) must equal len(valuelist).
354
355Usage:   linexand (listoflists,columnlist,valuelist)
356Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
357"""
358  if type(columnlist) not in [ListType, TupleType]:
359    columnlist = [columnlist]
360  if type(valuelist) not in [ListType, TupleType]:
361    valuelist = [valuelist]
362  criterion = ''
363  for i in range(len(columnlist)):
364    if type(valuelist[i]) == StringType:
365      critval = '\'' + valuelist[i] + '\''
366    else:
367      critval = str(valuelist[i])
368    criterion = criterion + ' x[' + str(columnlist[
369        i]) + ']==' + critval + ' and'
370  criterion = criterion[0:-3]  # remove the "and" after the last crit
371  function = 'filter(lambda x: ' + criterion + ',listoflists)'
372  lines = eval(function)
373  return lines
374
375
376def linexor(listoflists, columnlist, valuelist):
377  """
378Returns the rows of a list of lists where col (from columnlist) = val
379(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
380One value is required for each column in columnlist.  If only one value
381exists for columnlist but multiple values appear in valuelist, the
382valuelist values are all assumed to pertain to the same column.
383
384Usage:   linexor (listoflists,columnlist,valuelist)
385Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
386"""
387  if type(columnlist) not in [ListType, TupleType]:
388    columnlist = [columnlist]
389  if type(valuelist) not in [ListType, TupleType]:
390    valuelist = [valuelist]
391  criterion = ''
392  if len(columnlist) == 1 and len(valuelist) > 1:
393    columnlist = columnlist * len(valuelist)
394  for i in range(len(columnlist)):  # build an exec string
395    if type(valuelist[i]) == StringType:
396      critval = '\'' + valuelist[i] + '\''
397    else:
398      critval = str(valuelist[i])
399    criterion = criterion + ' x[' + str(columnlist[i]) + ']==' + critval + ' or'
400  criterion = criterion[0:-2]  # remove the "or" after the last crit
401  function = 'filter(lambda x: ' + criterion + ',listoflists)'
402  lines = eval(function)
403  return lines
404
405
406def linedelimited(inlist, delimiter):
407  """
408Returns a string composed of elements in inlist, with each element
409separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
410for tab-delimiting.
411
412Usage:   linedelimited (inlist,delimiter)
413"""
414  outstr = ''
415  for item in inlist:
416    if type(item) <> StringType:
417      item = str(item)
418    outstr = outstr + item + delimiter
419  outstr = outstr[0:-1]
420  return outstr
421
422
423def lineincols(inlist, colsize):
424  """
425Returns a string composed of elements in inlist, with each element
426right-aligned in columns of (fixed) colsize.
427
428Usage:   lineincols (inlist,colsize)   where colsize is an integer
429"""
430  outstr = ''
431  for item in inlist:
432    if type(item) <> StringType:
433      item = str(item)
434    size = len(item)
435    if size <= colsize:
436      for i in range(colsize - size):
437        outstr = outstr + ' '
438      outstr = outstr + item
439    else:
440      outstr = outstr + item[0:colsize + 1]
441  return outstr
442
443
444def lineincustcols(inlist, colsizes):
445  """
446Returns a string composed of elements in inlist, with each element
447right-aligned in a column of width specified by a sequence colsizes.  The
448length of colsizes must be greater than or equal to the number of columns
449in inlist.
450
451Usage:   lineincustcols (inlist,colsizes)
452Returns: formatted string created from inlist
453"""
454  outstr = ''
455  for i in range(len(inlist)):
456    if type(inlist[i]) <> StringType:
457      item = str(inlist[i])
458    else:
459      item = inlist[i]
460    size = len(item)
461    if size <= colsizes[i]:
462      for j in range(colsizes[i] - size):
463        outstr = outstr + ' '
464      outstr = outstr + item
465    else:
466      outstr = outstr + item[0:colsizes[i] + 1]
467  return outstr
468
469
470def list2string(inlist, delimit=' '):
471  """
472Converts a 1D list to a single long string for file output, using
473the string.join function.
474
475Usage:   list2string (inlist,delimit=' ')
476Returns: the string created from inlist
477"""
478  stringlist = map(makestr, inlist)
479  return string.join(stringlist, delimit)
480
481
482def makelol(inlist):
483  """
484Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
485want to use put() to write a 1D list one item per line in the file.
486
487Usage:   makelol(inlist)
488Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
489"""
490  x = []
491  for item in inlist:
492    x.append([item])
493  return x
494
495
496def makestr(x):
497  if type(x) <> StringType:
498    x = str(x)
499  return x
500
501
502def printcc(lst, extra=2):
503  """
504Prints a list of lists in columns, customized by the max size of items
505within the columns (max size of items in col, plus 'extra' number of spaces).
506Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
507respectively.
508
509Usage:   printcc (lst,extra=2)
510Returns: None
511"""
512  if type(lst[0]) not in [ListType, TupleType]:
513    lst = [lst]
514  rowstokill = []
515  list2print = copy.deepcopy(lst)
516  for i in range(len(lst)):
517    if lst[i] == [
518        '\n'
519    ] or lst[i] == '\n' or lst[i] == 'dashes' or lst[i] == '' or lst[i] == ['']:
520      rowstokill = rowstokill + [i]
521  rowstokill.reverse()  # delete blank rows from the end
522  for row in rowstokill:
523    del list2print[row]
524  maxsize = [0] * len(list2print[0])
525  for col in range(len(list2print[0])):
526    items = colex(list2print, col)
527    items = map(makestr, items)
528    maxsize[col] = max(map(len, items)) + extra
529  for row in lst:
530    if row == ['\n'] or row == '\n' or row == '' or row == ['']:
531      print
532    elif row == ['dashes'] or row == 'dashes':
533      dashes = [0] * len(maxsize)
534      for j in range(len(maxsize)):
535        dashes[j] = '-' * (maxsize[j] - 2)
536      print lineincustcols(dashes, maxsize)
537    else:
538      print lineincustcols(row, maxsize)
539  return None
540
541
542def printincols(listoflists, colsize):
543  """
544Prints a list of lists in columns of (fixed) colsize width, where
545colsize is an integer.
546
547Usage:   printincols (listoflists,colsize)
548Returns: None
549"""
550  for row in listoflists:
551    print lineincols(row, colsize)
552  return None
553
554
555def pl(listoflists):
556  """
557Prints a list of lists, 1 list (row) at a time.
558
559Usage:   pl(listoflists)
560Returns: None
561"""
562  for row in listoflists:
563    if row[-1] == '\n':
564      print row,
565    else:
566      print row
567  return None
568
569
570def printl(listoflists):
571  """Alias for pl."""
572  pl(listoflists)
573  return
574
575
576def replace(inlst, oldval, newval):
577  """
578Replaces all occurrences of 'oldval' with 'newval', recursively.
579
580Usage:   replace (inlst,oldval,newval)
581"""
582  lst = inlst * 1
583  for i in range(len(lst)):
584    if type(lst[i]) not in [ListType, TupleType]:
585      if lst[i] == oldval:
586        lst[i] = newval
587    else:
588      lst[i] = replace(lst[i], oldval, newval)
589  return lst
590
591
592def recode(inlist, listmap, cols=None):
593  """
594Changes the values in a list to a new set of values (useful when
595you need to recode data from (e.g.) strings to numbers.  cols defaults
596to None (meaning all columns are recoded).
597
598Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
599Returns: inlist with the appropriate values replaced with new ones
600"""
601  lst = copy.deepcopy(inlist)
602  if cols != None:
603    if type(cols) not in [ListType, TupleType]:
604      cols = [cols]
605    for col in cols:
606      for row in range(len(lst)):
607        try:
608          idx = colex(listmap, 0).index(lst[row][col])
609          lst[row][col] = listmap[idx][1]
610        except ValueError:
611          pass
612  else:
613    for row in range(len(lst)):
614      for col in range(len(lst)):
615        try:
616          idx = colex(listmap, 0).index(lst[row][col])
617          lst[row][col] = listmap[idx][1]
618        except ValueError:
619          pass
620  return lst
621
622
623def remap(listoflists, criterion):
624  """
625Remaps values in a given column of a 2D list (listoflists).  This requires
626a criterion as a function of 'x' so that the result of the following is
627returned ... map(lambda x: 'criterion',listoflists).
628
629Usage:   remap(listoflists,criterion)    criterion=string
630Returns: remapped version of listoflists
631"""
632  function = 'map(lambda x: ' + criterion + ',listoflists)'
633  lines = eval(function)
634  return lines
635
636
637def roundlist(inlist, digits):
638  """
639Goes through each element in a 1D or 2D inlist, and applies the following
640function to all elements of FloatType ... round(element,digits).
641
642Usage:   roundlist(inlist,digits)
643Returns: list with rounded floats
644"""
645  if type(inlist[0]) in [IntType, FloatType]:
646    inlist = [inlist]
647  l = inlist * 1
648  for i in range(len(l)):
649    for j in range(len(l[i])):
650      if type(l[i][j]) == FloatType:
651        l[i][j] = round(l[i][j], digits)
652  return l
653
654
655def sortby(listoflists, sortcols):
656  """
657Sorts a list of lists on the column(s) specified in the sequence
658sortcols.
659
660Usage:   sortby(listoflists,sortcols)
661Returns: sorted list, unchanged column ordering
662"""
663  newlist = abut(colex(listoflists, sortcols), listoflists)
664  newlist.sort()
665  try:
666    numcols = len(sortcols)
667  except TypeError:
668    numcols = 1
669  crit = '[' + str(numcols) + ':]'
670  newlist = colex(newlist, crit)
671  return newlist
672
673
674def unique(inlist):
675  """
676Returns all unique items in the passed list.  If the a list-of-lists
677is passed, unique LISTS are found (i.e., items in the first dimension are
678compared).
679
680Usage:   unique (inlist)
681Returns: the unique elements (or rows) in inlist
682"""
683  uniques = []
684  for item in inlist:
685    if item not in uniques:
686      uniques.append(item)
687  return uniques
688
689
690def duplicates(inlist):
691  """
692Returns duplicate items in the FIRST dimension of the passed list.
693
694Usage:   duplicates (inlist)
695"""
696  dups = []
697  for i in range(len(inlist)):
698    if inlist[i] in inlist[i + 1:]:
699      dups.append(inlist[i])
700  return dups
701
702
703def nonrepeats(inlist):
704  """
705Returns items that are NOT duplicated in the first dim of the passed list.
706
707Usage:   nonrepeats (inlist)
708"""
709  nonrepeats = []
710  for i in range(len(inlist)):
711    if inlist.count(inlist[i]) == 1:
712      nonrepeats.append(inlist[i])
713  return nonrepeats
714
715#===================   PSTAT ARRAY FUNCTIONS  =====================
716#===================   PSTAT ARRAY FUNCTIONS  =====================
717#===================   PSTAT ARRAY FUNCTIONS  =====================
718#===================   PSTAT ARRAY FUNCTIONS  =====================
719#===================   PSTAT ARRAY FUNCTIONS  =====================
720#===================   PSTAT ARRAY FUNCTIONS  =====================
721#===================   PSTAT ARRAY FUNCTIONS  =====================
722#===================   PSTAT ARRAY FUNCTIONS  =====================
723#===================   PSTAT ARRAY FUNCTIONS  =====================
724#===================   PSTAT ARRAY FUNCTIONS  =====================
725#===================   PSTAT ARRAY FUNCTIONS  =====================
726#===================   PSTAT ARRAY FUNCTIONS  =====================
727#===================   PSTAT ARRAY FUNCTIONS  =====================
728#===================   PSTAT ARRAY FUNCTIONS  =====================
729#===================   PSTAT ARRAY FUNCTIONS  =====================
730#===================   PSTAT ARRAY FUNCTIONS  =====================
731
732try:  # DEFINE THESE *ONLY* IF numpy IS AVAILABLE
733  import numpy as N
734
735  def aabut(source, *args):
736    """
737Like the |Stat abut command.  It concatenates two arrays column-wise
738and returns the result.  CAUTION:  If one array is shorter, it will be
739repeated until it is as long as the other.
740
741Usage:   aabut (source, args)    where args=any # of arrays
742Returns: an array as long as the LONGEST array past, source appearing on the
743         'left', arrays in <args> attached on the 'right'.
744"""
745    if len(source.shape) == 1:
746      width = 1
747      source = N.resize(source, [source.shape[0], width])
748    else:
749      width = source.shape[1]
750    for addon in args:
751      if len(addon.shape) == 1:
752        width = 1
753        addon = N.resize(addon, [source.shape[0], width])
754      else:
755        width = source.shape[1]
756      if len(addon) < len(source):
757        addon = N.resize(addon, [source.shape[0], addon.shape[1]])
758      elif len(source) < len(addon):
759        source = N.resize(source, [addon.shape[0], source.shape[1]])
760      source = N.concatenate((source, addon), 1)
761    return source
762
763  def acolex(a, indices, axis=1):
764    """
765Extracts specified indices (a list) from passed array, along passed
766axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
767column-array (and that the whole array will be returned as a column).
768
769Usage:   acolex (a,indices,axis=1)
770Returns: the columns of a specified by indices
771"""
772    if type(indices) not in [ListType, TupleType, N.ndarray]:
773      indices = [indices]
774    if len(N.shape(a)) == 1:
775      cols = N.resize(a, [a.shape[0], 1])
776    else:
777      #        print a[:3]
778      cols = N.take(a, indices, axis)
779#        print cols[:3]
780    return cols
781
782  def acollapse(a, keepcols, collapsecols, fcn1=None, fcn2=None, cfcn=None):
783    """
784Averages data in collapsecol, keeping all unique items in keepcols
785(using unique, which keeps unique LISTS of column numbers), retaining
786the unique sets of values in keepcols, the mean for each.  If stderror or
787N of the mean are desired, set either or both parameters to 1.
788
789Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
790Returns: unique 'conditions' specified by the contents of columns specified
791         by keepcols, abutted with the mean(s) of column(s) specified by
792         collapsecols
793"""
794
795    def acollmean(inarray):
796      return N.sum(N.ravel(inarray))
797
798    if type(keepcols) not in [ListType, TupleType, N.ndarray]:
799      keepcols = [keepcols]
800    if type(collapsecols) not in [ListType, TupleType, N.ndarray]:
801      collapsecols = [collapsecols]
802
803    if cfcn == None:
804      cfcn = acollmean
805    if keepcols == []:
806      avgcol = acolex(a, collapsecols)
807      means = N.sum(avgcol) / float(len(avgcol))
808      if fcn1 <> None:
809        try:
810          test = fcn1(avgcol)
811        except:
812          test = N.array(['N/A'] * len(means))
813        means = aabut(means, test)
814      if fcn2 <> None:
815        try:
816          test = fcn2(avgcol)
817        except:
818          test = N.array(['N/A'] * len(means))
819        means = aabut(means, test)
820      return means
821    else:
822      if type(keepcols) not in [ListType, TupleType, N.ndarray]:
823        keepcols = [keepcols]
824      values = colex(a, keepcols)  # so that "item" can be appended (below)
825      uniques = unique(values)  # get a LIST, so .sort keeps rows intact
826      uniques.sort()
827      newlist = []
828      for item in uniques:
829        if type(item) not in [ListType, TupleType, N.ndarray]:
830          item = [item]
831        tmprows = alinexand(a, keepcols, item)
832        for col in collapsecols:
833          avgcol = acolex(tmprows, col)
834          item.append(acollmean(avgcol))
835          if fcn1 <> None:
836            try:
837              test = fcn1(avgcol)
838            except:
839              test = 'N/A'
840            item.append(test)
841          if fcn2 <> None:
842            try:
843              test = fcn2(avgcol)
844            except:
845              test = 'N/A'
846            item.append(test)
847          newlist.append(item)
848      try:
849        new_a = N.array(newlist)
850      except TypeError:
851        new_a = N.array(newlist, 'O')
852      return new_a
853
854  def adm(a, criterion):
855    """
856Returns rows from the passed list of lists that meet the criteria in
857the passed criterion expression (a string as a function of x).
858
859Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
860"""
861    function = 'filter(lambda x: ' + criterion + ',a)'
862    lines = eval(function)
863    try:
864      lines = N.array(lines)
865    except:
866      lines = N.array(lines, dtype='O')
867    return lines
868
869  def isstring(x):
870    if type(x) == StringType:
871      return 1
872    else:
873      return 0
874
875  def alinexand(a, columnlist, valuelist):
876    """
877Returns the rows of an array where col (from columnlist) = val
878(from valuelist).  One value is required for each column in columnlist.
879
880Usage:   alinexand (a,columnlist,valuelist)
881Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
882"""
883    if type(columnlist) not in [ListType, TupleType, N.ndarray]:
884      columnlist = [columnlist]
885    if type(valuelist) not in [ListType, TupleType, N.ndarray]:
886      valuelist = [valuelist]
887    criterion = ''
888    for i in range(len(columnlist)):
889      if type(valuelist[i]) == StringType:
890        critval = '\'' + valuelist[i] + '\''
891      else:
892        critval = str(valuelist[i])
893      criterion = criterion + ' x[' + str(columnlist[
894          i]) + ']==' + critval + ' and'
895    criterion = criterion[0:-3]  # remove the "and" after the last crit
896    return adm(a, criterion)
897
898  def alinexor(a, columnlist, valuelist):
899    """
900Returns the rows of an array where col (from columnlist) = val (from
901valuelist).  One value is required for each column in columnlist.
902The exception is if either columnlist or valuelist has only 1 value,
903in which case that item will be expanded to match the length of the
904other list.
905
906Usage:   alinexor (a,columnlist,valuelist)
907Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
908"""
909    if type(columnlist) not in [ListType, TupleType, N.ndarray]:
910      columnlist = [columnlist]
911    if type(valuelist) not in [ListType, TupleType, N.ndarray]:
912      valuelist = [valuelist]
913    criterion = ''
914    if len(columnlist) == 1 and len(valuelist) > 1:
915      columnlist = columnlist * len(valuelist)
916    elif len(valuelist) == 1 and len(columnlist) > 1:
917      valuelist = valuelist * len(columnlist)
918    for i in range(len(columnlist)):
919      if type(valuelist[i]) == StringType:
920        critval = '\'' + valuelist[i] + '\''
921      else:
922        critval = str(valuelist[i])
923      criterion = criterion + ' x[' + str(columnlist[
924          i]) + ']==' + critval + ' or'
925    criterion = criterion[0:-2]  # remove the "or" after the last crit
926    return adm(a, criterion)
927
928  def areplace(a, oldval, newval):
929    """
930Replaces all occurrences of oldval with newval in array a.
931
932Usage:   areplace(a,oldval,newval)
933"""
934    return N.where(a == oldval, newval, a)
935
936  def arecode(a, listmap, col='all'):
937    """
938Remaps the values in an array to a new set of values (useful when
939you need to recode data from (e.g.) strings to numbers as most stats
940packages require.  Can work on SINGLE columns, or 'all' columns at once.
941@@@BROKEN 2007-11-26
942
943Usage:   arecode (a,listmap,col='all')
944Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
945"""
946    ashape = a.shape
947    if col == 'all':
948      work = a.ravel()
949    else:
950      work = acolex(a, col)
951      work = work.ravel()
952    for pair in listmap:
953      if type(pair[
954          1]) == StringType or work.dtype.char == 'O' or a.dtype.char == 'O':
955        work = N.array(work, dtype='O')
956        a = N.array(a, dtype='O')
957        for i in range(len(work)):
958          if work[i] == pair[0]:
959            work[i] = pair[1]
960        if col == 'all':
961          return N.reshape(work, ashape)
962        else:
963          return N.concatenate(
964              [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
965      else:  # must be a non-Object type array and replacement
966        work = N.where(work == pair[0], pair[1], work)
967        return N.concatenate(
968            [a[:, 0:col], work[:, N.newaxis], a[:, col + 1:]], 1)
969
970  def arowcompare(row1, row2):
971    """
972Compares two rows from an array, regardless of whether it is an
973array of numbers or of python objects (which requires the cmp function).
974@@@PURPOSE? 2007-11-26
975
976Usage:   arowcompare(row1,row2)
977Returns: an array of equal length containing 1s where the two rows had
978         identical elements and 0 otherwise
979"""
980    return
981    if row1.dtype.char == 'O' or row2.dtype == 'O':
982      cmpvect = N.logical_not(
983          abs(N.array(map(cmp, row1, row2))))  # cmp fcn gives -1,0,1
984    else:
985      cmpvect = N.equal(row1, row2)
986    return cmpvect
987
988  def arowsame(row1, row2):
989    """
990Compares two rows from an array, regardless of whether it is an
991array of numbers or of python objects (which requires the cmp function).
992
993Usage:   arowsame(row1,row2)
994Returns: 1 if the two rows are identical, 0 otherwise.
995"""
996    cmpval = N.alltrue(arowcompare(row1, row2))
997    return cmpval
998
999  def asortrows(a, axis=0):
1000    """
1001Sorts an array "by rows".  This differs from the Numeric.sort() function,
1002which sorts elements WITHIN the given axis.  Instead, this function keeps
1003the elements along the given axis intact, but shifts them 'up or down'
1004relative to one another.
1005
1006Usage:   asortrows(a,axis=0)
1007Returns: sorted version of a
1008"""
1009    return N.sort(a, axis=axis, kind='mergesort')
1010
1011  def aunique(inarray):
1012    """
1013Returns unique items in the FIRST dimension of the passed array. Only
1014works on arrays NOT including string items.
1015
1016Usage:   aunique (inarray)
1017"""
1018    uniques = N.array([inarray[0]])
1019    if len(uniques.shape) == 1:  # IF IT'S A 1D ARRAY
1020      for item in inarray[1:]:
1021        if N.add.reduce(N.equal(uniques, item).ravel()) == 0:
1022          try:
1023            uniques = N.concatenate([uniques, N.array[N.newaxis, :]])
1024          except TypeError:
1025            uniques = N.concatenate([uniques, N.array([item])])
1026    else:  # IT MUST BE A 2+D ARRAY
1027      if inarray.dtype.char != 'O':  # not an Object array
1028        for item in inarray[1:]:
1029          if not N.sum(N.alltrue(N.equal(uniques, item), 1)):
1030            try:
1031              uniques = N.concatenate([uniques, item[N.newaxis, :]])
1032            except TypeError:  # the item to add isn't a list
1033              uniques = N.concatenate([uniques, N.array([item])])
1034          else:
1035            pass  # this item is already in the uniques array
1036      else:  # must be an Object array, alltrue/equal functions don't work
1037        for item in inarray[1:]:
1038          newflag = 1
1039          for unq in uniques:  # NOTE: cmp --> 0=same, -1=<, 1=>
1040            test = N.sum(abs(N.array(map(cmp, item, unq))))
1041            if test == 0:  # if item identical to any 1 row in uniques
1042              newflag = 0  # then not a novel item to add
1043              break
1044          if newflag == 1:
1045            try:
1046              uniques = N.concatenate([uniques, item[N.newaxis, :]])
1047            except TypeError:  # the item to add isn't a list
1048              uniques = N.concatenate([uniques, N.array([item])])
1049    return uniques
1050
1051  def aduplicates(inarray):
1052    """
1053Returns duplicate items in the FIRST dimension of the passed array. Only
1054works on arrays NOT including string items.
1055
1056Usage:   aunique (inarray)
1057"""
1058    inarray = N.array(inarray)
1059    if len(inarray.shape) == 1:  # IF IT'S A 1D ARRAY
1060      dups = []
1061      inarray = inarray.tolist()
1062      for i in range(len(inarray)):
1063        if inarray[i] in inarray[i + 1:]:
1064          dups.append(inarray[i])
1065      dups = aunique(dups)
1066    else:  # IT MUST BE A 2+D ARRAY
1067      dups = []
1068      aslist = inarray.tolist()
1069      for i in range(len(aslist)):
1070        if aslist[i] in aslist[i + 1:]:
1071          dups.append(aslist[i])
1072      dups = unique(dups)
1073      dups = N.array(dups)
1074    return dups
1075
1076except ImportError:  # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
1077  pass
1078