• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#! /usr/bin/env python
2#######################################################################
3# Newslist  $Revision$
4#
5# Syntax:
6#    newslist [ -a ]
7#
8# This is a program to create a directory full of HTML pages
9# which between them contain links to all the newsgroups available
10# on your server.
11#
12# The -a option causes a complete list of all groups to be read from
13# the server rather than just the ones which have appeared since last
14# execution. This recreates the local list from scratch. Use this on
15# the first invocation of the program, and from time to time thereafter.
16#   When new groups are first created they may appear on your server as
17# empty groups. By default, empty groups are ignored by the -a option.
18# However, these new groups will not be created again, and so will not
19# appear in the server's list of 'new groups' at a later date. Hence it
20# won't appear until you do a '-a' after some articles have appeared.
21#
22# I should really keep a list of ignored empty groups and re-check them
23# for articles on every run, but I haven't got around to it yet.
24#
25# This assumes an NNTP news feed.
26#
27# Feel free to copy, distribute and modify this code for
28# non-commercial use. If you make any useful modifications, let me
29# know!
30#
31# (c) Quentin Stafford-Fraser 1994
32# fraser@europarc.xerox.com                     qs101@cl.cam.ac.uk
33#                                                                     #
34#######################################################################
35import sys, nntplib, marshal, time, os
36
37#######################################################################
38# Check these variables before running!                               #
39
40# Top directory.
41# Filenames which don't start with / are taken as being relative to this.
42topdir = os.path.expanduser('~/newspage')
43
44# The name of your NNTP host
45# eg.
46#    newshost = 'nntp-serv.cl.cam.ac.uk'
47# or use following to get the name from the NNTPSERVER environment
48# variable:
49#    newshost = os.environ['NNTPSERVER']
50newshost = 'news.example.com'
51
52# The filename for a local cache of the newsgroup list
53treefile = 'grouptree'
54
55# The filename for descriptions of newsgroups
56# I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz
57# You can set this to '' if you don't wish to use one.
58descfile = 'newsgroups'
59
60# The directory in which HTML pages should be created
61# eg.
62#   pagedir  = '/usr/local/lib/html/newspage'
63#   pagedir  = 'pages'
64pagedir  = topdir
65
66# The html prefix which will refer to this directory
67# eg.
68#   httppref = '/newspage/',
69# or leave blank for relative links between pages: (Recommended)
70#   httppref = ''
71httppref = ''
72
73# The name of the 'root' news page in this directory.
74# A .html suffix will be added.
75rootpage = 'root'
76
77# Set skipempty to 0 if you wish to see links to empty groups as well.
78# Only affects the -a option.
79skipempty = 1
80
81# pagelinkicon can contain html to put an icon after links to
82# further pages. This helps to make important links stand out.
83# Set to '' if not wanted, or '...' is quite a good one.
84pagelinkicon = '... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> '
85
86# ---------------------------------------------------------------------
87# Less important personal preferences:
88
89# Sublistsize controls the maximum number of items the will appear as
90# an indented sub-list before the whole thing is moved onto a different
91# page. The smaller this is, the more pages you will have, but the
92# shorter each will be.
93sublistsize = 4
94
95# That should be all.                                                 #
96#######################################################################
97
98for dir in os.curdir, os.environ['HOME']:
99    rcfile = os.path.join(dir, '.newslistrc.py')
100    if os.path.exists(rcfile):
101        print rcfile
102        execfile(rcfile)
103        break
104
105from nntplib import NNTP
106from stat import *
107
108rcsrev = '$Revision$'
109rcsrev = ' '.join(filter(lambda s: '$' not in s, rcsrev.split()))
110desc = {}
111
112# Make (possibly) relative filenames into absolute ones
113treefile = os.path.join(topdir,treefile)
114descfile = os.path.join(topdir,descfile)
115page = os.path.join(topdir,pagedir)
116
117# First the bits for creating trees ---------------------------
118
119# Addtotree creates/augments a tree from a list of group names
120def addtotree(tree, groups):
121    print 'Updating tree...'
122    for i in groups:
123        parts = i.split('.')
124        makeleaf(tree, parts)
125
126# Makeleaf makes a leaf and the branch leading to it if necessary
127def makeleaf(tree,path):
128    j = path[0]
129    l = len(path)
130
131    if j not in tree:
132        tree[j] = {}
133    if l == 1:
134        tree[j]['.'] = '.'
135    if l > 1:
136        makeleaf(tree[j],path[1:])
137
138# Then the bits for outputting trees as pages ----------------
139
140# Createpage creates an HTML file named <root>.html containing links
141# to those groups beginning with <root>.
142
143def createpage(root, tree, p):
144    filename = os.path.join(pagedir, root+'.html')
145    if root == rootpage:
146        detail = ''
147    else:
148        detail = ' under ' + root
149    with open(filename, 'w') as f:
150        # f.write('Content-Type: text/html\n')
151        f.write('<html>\n<head>\n')
152        f.write('<title>Newsgroups available%s</title>\n' % detail)
153        f.write('</head>\n<body>\n')
154        f.write('<h1>Newsgroups available%s</h1>\n' % detail)
155        f.write('<a href="%s%s.html">Back to top level</a><p>\n' %
156                (httppref, rootpage))
157        printtree(f, tree, 0, p)
158        f.write('\n<p>')
159        f.write("<i>This page automatically created by 'newslist' v. %s." %
160                rcsrev)
161        f.write(time.ctime(time.time()) + '</i>\n')
162        f.write('</body>\n</html>\n')
163
164# Printtree prints the groups as a bulleted list.  Groups with
165# more than <sublistsize> subgroups will be put on a separate page.
166# Other sets of subgroups are just indented.
167
168def printtree(f, tree, indent, p):
169    l = len(tree)
170
171    if l > sublistsize and indent > 0:
172        # Create a new page and a link to it
173        f.write('<li><b><a href="%s%s.html">' % (httppref, p[1:]))
174        f.write(p[1:] + '.*')
175        f.write('</a></b>%s\n' % pagelinkicon)
176        createpage(p[1:], tree, p)
177        return
178
179    kl = tree.keys()
180
181    if l > 1:
182        kl.sort()
183        if indent > 0:
184            # Create a sub-list
185            f.write('<li>%s\n<ul>' % p[1:])
186        else:
187            # Create a main list
188            f.write('<ul>')
189        indent = indent + 1
190
191    for i in kl:
192        if i == '.':
193            # Output a newsgroup
194            f.write('<li><a href="news:%s">%s</a> ' % (p[1:], p[1:]))
195            if p[1:] in desc:
196                f.write('     <i>%s</i>\n' % desc[p[1:]])
197            else:
198                f.write('\n')
199        else:
200            # Output a hierarchy
201            printtree(f, tree[i], indent, p+'.'+i)
202
203    if l > 1:
204        f.write('\n</ul>')
205
206# Reading descriptions file ---------------------------------------
207
208# This returns a dict mapping group name to its description
209
210def readdesc(descfile):
211    global desc
212    desc = {}
213
214    if descfile == '':
215        return
216
217    try:
218        with open(descfile, 'r') as d:
219            print 'Reading descriptions...'
220            for l in d:
221                bits = l.split()
222                try:
223                    grp = bits[0]
224                    dsc = ' '.join(bits[1:])
225                    if len(dsc) > 1:
226                        desc[grp] = dsc
227                except IndexError:
228                    pass
229    except IOError:
230        print 'Failed to open description file ' + descfile
231        return
232
233# Check that ouput directory exists, ------------------------------
234# and offer to create it if not
235
236def checkopdir(pagedir):
237    if not os.path.isdir(pagedir):
238        print 'Directory %s does not exist.' % pagedir
239        print 'Shall I create it for you? (y/n)'
240        if sys.stdin.readline()[0] == 'y':
241            try:
242                os.mkdir(pagedir, 0777)
243            except:
244                print 'Sorry - failed!'
245                sys.exit(1)
246        else:
247            print 'OK. Exiting.'
248            sys.exit(1)
249
250# Read and write current local tree ----------------------------------
251
252def readlocallist(treefile):
253    print 'Reading current local group list...'
254    tree = {}
255    try:
256        treetime = time.localtime(os.stat(treefile)[ST_MTIME])
257    except:
258        print '\n*** Failed to open local group cache '+treefile
259        print 'If this is the first time you have run newslist, then'
260        print 'use the -a option to create it.'
261        sys.exit(1)
262    treedate = '%02d%02d%02d' % (treetime[0] % 100, treetime[1], treetime[2])
263    try:
264        with open(treefile, 'rb') as dump:
265            tree = marshal.load(dump)
266    except IOError:
267        print 'Cannot open local group list ' + treefile
268    return (tree, treedate)
269
270def writelocallist(treefile, tree):
271    try:
272        with open(treefile, 'wb') as dump:
273            groups = marshal.dump(tree, dump)
274        print 'Saved list to %s\n' % treefile
275    except:
276        print 'Sorry - failed to write to local group cache', treefile
277        print 'Does it (or its directory) have the correct permissions?'
278        sys.exit(1)
279
280# Return list of all groups on server -----------------------------
281
282def getallgroups(server):
283    print 'Getting list of all groups...'
284    treedate = '010101'
285    info = server.list()[1]
286    groups = []
287    print 'Processing...'
288    if skipempty:
289        print '\nIgnoring following empty groups:'
290    for i in info:
291        grpname = i[0].split()[0]
292        if skipempty and int(i[1]) < int(i[2]):
293            print grpname + ' ',
294        else:
295            groups.append(grpname)
296    print '\n'
297    if skipempty:
298        print '(End of empty groups)'
299    return groups
300
301# Return list of new groups on server -----------------------------
302
303def getnewgroups(server, treedate):
304    print 'Getting list of new groups since start of %s...' % treedate,
305    info = server.newgroups(treedate, '000001')[1]
306    print 'got %d.' % len(info)
307    print 'Processing...',
308    groups = []
309    for i in info:
310        grpname = i.split()[0]
311        groups.append(grpname)
312    print 'Done'
313    return groups
314
315# Now the main program --------------------------------------------
316
317def main():
318    tree = {}
319
320    # Check that the output directory exists
321    checkopdir(pagedir)
322
323    try:
324        print 'Connecting to %s...' % newshost
325        if sys.version[0] == '0':
326            s = NNTP.init(newshost)
327        else:
328            s = NNTP(newshost)
329        connected = True
330    except (nntplib.error_temp, nntplib.error_perm), x:
331        print 'Error connecting to host:', x
332        print 'I\'ll try to use just the local list.'
333        connected = False
334
335    # If -a is specified, read the full list of groups from server
336    if connected and len(sys.argv) > 1 and sys.argv[1] == '-a':
337        groups = getallgroups(s)
338
339    # Otherwise just read the local file and then add
340    # groups created since local file last modified.
341    else:
342
343        (tree, treedate) = readlocallist(treefile)
344        if connected:
345            groups = getnewgroups(s, treedate)
346
347    if connected:
348        addtotree(tree, groups)
349        writelocallist(treefile,tree)
350
351    # Read group descriptions
352    readdesc(descfile)
353
354    print 'Creating pages...'
355    createpage(rootpage, tree, '')
356    print 'Done'
357
358if __name__ == "__main__":
359    main()
360
361# That's all folks
362######################################################################
363