• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python
2
3class Markov:
4    def __init__(self, histsize, choice):
5        self.histsize = histsize
6        self.choice = choice
7        self.trans = {}
8
9    def add(self, state, next):
10        self.trans.setdefault(state, []).append(next)
11
12    def put(self, seq):
13        n = self.histsize
14        add = self.add
15        add(None, seq[:0])
16        for i in range(len(seq)):
17            add(seq[max(0, i-n):i], seq[i:i+1])
18        add(seq[len(seq)-n:], None)
19
20    def get(self):
21        choice = self.choice
22        trans = self.trans
23        n = self.histsize
24        seq = choice(trans[None])
25        while True:
26            subseq = seq[max(0, len(seq)-n):]
27            options = trans[subseq]
28            next = choice(options)
29            if not next:
30                break
31            seq += next
32        return seq
33
34
35def test():
36    import sys, random, getopt
37    args = sys.argv[1:]
38    try:
39        opts, args = getopt.getopt(args, '0123456789cdwq')
40    except getopt.error:
41        print 'Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]
42        print 'Options:'
43        print '-#: 1-digit history size (default 2)'
44        print '-c: characters (default)'
45        print '-w: words'
46        print '-d: more debugging output'
47        print '-q: no debugging output'
48        print 'Input files (default stdin) are split in paragraphs'
49        print 'separated blank lines and each paragraph is split'
50        print 'in words by whitespace, then reconcatenated with'
51        print 'exactly one space separating words.'
52        print 'Output consists of paragraphs separated by blank'
53        print 'lines, where lines are no longer than 72 characters.'
54        sys.exit(2)
55    histsize = 2
56    do_words = False
57    debug = 1
58    for o, a in opts:
59        if '-0' <= o <= '-9': histsize = int(o[1:])
60        if o == '-c': do_words = False
61        if o == '-d': debug += 1
62        if o == '-q': debug = 0
63        if o == '-w': do_words = True
64    if not args:
65        args = ['-']
66
67    m = Markov(histsize, random.choice)
68    try:
69        for filename in args:
70            if filename == '-':
71                f = sys.stdin
72                if f.isatty():
73                    print 'Sorry, need stdin from file'
74                    continue
75            else:
76                f = open(filename, 'r')
77            if debug: print 'processing', filename, '...'
78            text = f.read()
79            f.close()
80            paralist = text.split('\n\n')
81            for para in paralist:
82                if debug > 1: print 'feeding ...'
83                words = para.split()
84                if words:
85                    if do_words:
86                        data = tuple(words)
87                    else:
88                        data = ' '.join(words)
89                    m.put(data)
90    except KeyboardInterrupt:
91        print 'Interrupted -- continue with data read so far'
92    if not m.trans:
93        print 'No valid input files'
94        return
95    if debug: print 'done.'
96
97    if debug > 1:
98        for key in m.trans.keys():
99            if key is None or len(key) < histsize:
100                print repr(key), m.trans[key]
101        if histsize == 0: print repr(''), m.trans['']
102        print
103    while True:
104        data = m.get()
105        if do_words:
106            words = data
107        else:
108            words = data.split()
109        n = 0
110        limit = 72
111        for w in words:
112            if n + len(w) > limit:
113                print
114                n = 0
115            print w,
116            n += len(w) + 1
117        print
118        print
119
120if __name__ == "__main__":
121    test()
122