1#!/usr/bin/env python3 2 3""" 4Markov chain simulation of words or characters. 5""" 6 7class Markov: 8 def __init__(self, histsize, choice): 9 self.histsize = histsize 10 self.choice = choice 11 self.trans = {} 12 13 def add(self, state, next): 14 self.trans.setdefault(state, []).append(next) 15 16 def put(self, seq): 17 n = self.histsize 18 add = self.add 19 add(None, seq[:0]) 20 for i in range(len(seq)): 21 add(seq[max(0, i-n):i], seq[i:i+1]) 22 add(seq[len(seq)-n:], None) 23 24 def get(self): 25 choice = self.choice 26 trans = self.trans 27 n = self.histsize 28 seq = choice(trans[None]) 29 while True: 30 subseq = seq[max(0, len(seq)-n):] 31 options = trans[subseq] 32 next = choice(options) 33 if not next: 34 break 35 seq += next 36 return seq 37 38 39def test(): 40 import sys, random, getopt 41 args = sys.argv[1:] 42 try: 43 opts, args = getopt.getopt(args, '0123456789cdwq') 44 except getopt.error: 45 print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]) 46 print('Options:') 47 print('-#: 1-digit history size (default 2)') 48 print('-c: characters (default)') 49 print('-w: words') 50 print('-d: more debugging output') 51 print('-q: no debugging output') 52 print('Input files (default stdin) are split in paragraphs') 53 print('separated blank lines and each paragraph is split') 54 print('in words by whitespace, then reconcatenated with') 55 print('exactly one space separating words.') 56 print('Output consists of paragraphs separated by blank') 57 print('lines, where lines are no longer than 72 characters.') 58 sys.exit(2) 59 histsize = 2 60 do_words = False 61 debug = 1 62 for o, a in opts: 63 if '-0' <= o <= '-9': histsize = int(o[1:]) 64 if o == '-c': do_words = False 65 if o == '-d': debug += 1 66 if o == '-q': debug = 0 67 if o == '-w': do_words = True 68 if not args: 69 args = ['-'] 70 71 m = Markov(histsize, random.choice) 72 try: 73 for filename in args: 74 if filename == '-': 75 f = sys.stdin 76 if f.isatty(): 77 print('Sorry, need stdin from file') 78 continue 79 else: 80 f = open(filename, 'r') 81 with f: 82 if debug: print('processing', filename, '...') 83 text = f.read() 84 paralist = text.split('\n\n') 85 for para in paralist: 86 if debug > 1: print('feeding ...') 87 words = para.split() 88 if words: 89 if do_words: 90 data = tuple(words) 91 else: 92 data = ' '.join(words) 93 m.put(data) 94 except KeyboardInterrupt: 95 print('Interrupted -- continue with data read so far') 96 if not m.trans: 97 print('No valid input files') 98 return 99 if debug: print('done.') 100 101 if debug > 1: 102 for key in m.trans.keys(): 103 if key is None or len(key) < histsize: 104 print(repr(key), m.trans[key]) 105 if histsize == 0: print(repr(''), m.trans['']) 106 print() 107 while True: 108 data = m.get() 109 if do_words: 110 words = data 111 else: 112 words = data.split() 113 n = 0 114 limit = 72 115 for w in words: 116 if n + len(w) > limit: 117 print() 118 n = 0 119 print(w, end=' ') 120 n += len(w) + 1 121 print() 122 print() 123 124if __name__ == "__main__": 125 test() 126