1#! /usr/bin/env python 2 3class Markov: 4 def __init__(self, histsize, choice): 5 self.histsize = histsize 6 self.choice = choice 7 self.trans = {} 8 9 def add(self, state, next): 10 self.trans.setdefault(state, []).append(next) 11 12 def put(self, seq): 13 n = self.histsize 14 add = self.add 15 add(None, seq[:0]) 16 for i in range(len(seq)): 17 add(seq[max(0, i-n):i], seq[i:i+1]) 18 add(seq[len(seq)-n:], None) 19 20 def get(self): 21 choice = self.choice 22 trans = self.trans 23 n = self.histsize 24 seq = choice(trans[None]) 25 while True: 26 subseq = seq[max(0, len(seq)-n):] 27 options = trans[subseq] 28 next = choice(options) 29 if not next: 30 break 31 seq += next 32 return seq 33 34 35def test(): 36 import sys, random, getopt 37 args = sys.argv[1:] 38 try: 39 opts, args = getopt.getopt(args, '0123456789cdwq') 40 except getopt.error: 41 print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0] 42 print 'Options:' 43 print '-#: 1-digit history size (default 2)' 44 print '-c: characters (default)' 45 print '-w: words' 46 print '-d: more debugging output' 47 print '-q: no debugging output' 48 print 'Input files (default stdin) are split in paragraphs' 49 print 'separated blank lines and each paragraph is split' 50 print 'in words by whitespace, then reconcatenated with' 51 print 'exactly one space separating words.' 52 print 'Output consists of paragraphs separated by blank' 53 print 'lines, where lines are no longer than 72 characters.' 54 sys.exit(2) 55 histsize = 2 56 do_words = False 57 debug = 1 58 for o, a in opts: 59 if '-0' <= o <= '-9': histsize = int(o[1:]) 60 if o == '-c': do_words = False 61 if o == '-d': debug += 1 62 if o == '-q': debug = 0 63 if o == '-w': do_words = True 64 if not args: 65 args = ['-'] 66 67 m = Markov(histsize, random.choice) 68 try: 69 for filename in args: 70 if filename == '-': 71 f = sys.stdin 72 if f.isatty(): 73 print 'Sorry, need stdin from file' 74 continue 75 else: 76 f = open(filename, 'r') 77 if debug: print 'processing', filename, '...' 78 text = f.read() 79 f.close() 80 paralist = text.split('\n\n') 81 for para in paralist: 82 if debug > 1: print 'feeding ...' 83 words = para.split() 84 if words: 85 if do_words: 86 data = tuple(words) 87 else: 88 data = ' '.join(words) 89 m.put(data) 90 except KeyboardInterrupt: 91 print 'Interrupted -- continue with data read so far' 92 if not m.trans: 93 print 'No valid input files' 94 return 95 if debug: print 'done.' 96 97 if debug > 1: 98 for key in m.trans.keys(): 99 if key is None or len(key) < histsize: 100 print repr(key), m.trans[key] 101 if histsize == 0: print repr(''), m.trans[''] 102 print 103 while True: 104 data = m.get() 105 if do_words: 106 words = data 107 else: 108 words = data.split() 109 n = 0 110 limit = 72 111 for w in words: 112 if n + len(w) > limit: 113 print 114 n = 0 115 print w, 116 n += len(w) + 1 117 print 118 print 119 120if __name__ == "__main__": 121 test() 122