• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2
3"""
4Markov chain simulation of words or characters.
5"""
6
7class Markov:
8    def __init__(self, histsize, choice):
9        self.histsize = histsize
10        self.choice = choice
11        self.trans = {}
12
13    def add(self, state, next):
14        self.trans.setdefault(state, []).append(next)
15
16    def put(self, seq):
17        n = self.histsize
18        add = self.add
19        add(None, seq[:0])
20        for i in range(len(seq)):
21            add(seq[max(0, i-n):i], seq[i:i+1])
22        add(seq[len(seq)-n:], None)
23
24    def get(self):
25        choice = self.choice
26        trans = self.trans
27        n = self.histsize
28        seq = choice(trans[None])
29        while True:
30            subseq = seq[max(0, len(seq)-n):]
31            options = trans[subseq]
32            next = choice(options)
33            if not next:
34                break
35            seq += next
36        return seq
37
38
39def test():
40    import sys, random, getopt
41    args = sys.argv[1:]
42    try:
43        opts, args = getopt.getopt(args, '0123456789cdwq')
44    except getopt.error:
45        print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0])
46        print('Options:')
47        print('-#: 1-digit history size (default 2)')
48        print('-c: characters (default)')
49        print('-w: words')
50        print('-d: more debugging output')
51        print('-q: no debugging output')
52        print('Input files (default stdin) are split in paragraphs')
53        print('separated blank lines and each paragraph is split')
54        print('in words by whitespace, then reconcatenated with')
55        print('exactly one space separating words.')
56        print('Output consists of paragraphs separated by blank')
57        print('lines, where lines are no longer than 72 characters.')
58        sys.exit(2)
59    histsize = 2
60    do_words = False
61    debug = 1
62    for o, a in opts:
63        if '-0' <= o <= '-9': histsize = int(o[1:])
64        if o == '-c': do_words = False
65        if o == '-d': debug += 1
66        if o == '-q': debug = 0
67        if o == '-w': do_words = True
68    if not args:
69        args = ['-']
70
71    m = Markov(histsize, random.choice)
72    try:
73        for filename in args:
74            if filename == '-':
75                f = sys.stdin
76                if f.isatty():
77                    print('Sorry, need stdin from file')
78                    continue
79            else:
80                f = open(filename, 'r')
81            if debug: print('processing', filename, '...')
82            text = f.read()
83            f.close()
84            paralist = text.split('\n\n')
85            for para in paralist:
86                if debug > 1: print('feeding ...')
87                words = para.split()
88                if words:
89                    if do_words:
90                        data = tuple(words)
91                    else:
92                        data = ' '.join(words)
93                    m.put(data)
94    except KeyboardInterrupt:
95        print('Interrupted -- continue with data read so far')
96    if not m.trans:
97        print('No valid input files')
98        return
99    if debug: print('done.')
100
101    if debug > 1:
102        for key in m.trans.keys():
103            if key is None or len(key) < histsize:
104                print(repr(key), m.trans[key])
105        if histsize == 0: print(repr(''), m.trans[''])
106        print()
107    while True:
108        data = m.get()
109        if do_words:
110            words = data
111        else:
112            words = data.split()
113        n = 0
114        limit = 72
115        for w in words:
116            if n + len(w) > limit:
117                print()
118                n = 0
119            print(w, end=' ')
120            n += len(w) + 1
121        print()
122        print()
123
124if __name__ == "__main__":
125    test()
126