• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Compare the speed of downloading URLs sequentially vs. using futures."""
2
3import functools
4import time
5import timeit
6import sys
7
8try:
9    from urllib2 import urlopen
10except ImportError:
11    from urllib.request import urlopen
12
13from concurrent.futures import (as_completed, ThreadPoolExecutor,
14                                ProcessPoolExecutor)
15
16URLS = ['http://www.google.com/',
17        'http://www.apple.com/',
18        'http://www.ibm.com',
19        'http://www.thisurlprobablydoesnotexist.com',
20        'http://www.slashdot.org/',
21        'http://www.python.org/',
22        'http://www.bing.com/',
23        'http://www.facebook.com/',
24        'http://www.yahoo.com/',
25        'http://www.youtube.com/',
26        'http://www.blogger.com/']
27
28def load_url(url, timeout):
29    kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
30    return urlopen(url, **kwargs).read()
31
32def download_urls_sequential(urls, timeout=60):
33    url_to_content = {}
34    for url in urls:
35        try:
36            url_to_content[url] = load_url(url, timeout=timeout)
37        except:
38            pass
39    return url_to_content
40
41def download_urls_with_executor(urls, executor, timeout=60):
42    try:
43        url_to_content = {}
44        future_to_url = dict((executor.submit(load_url, url, timeout), url)
45                             for url in urls)
46
47        for future in as_completed(future_to_url):
48            try:
49                url_to_content[future_to_url[future]] = future.result()
50            except:
51                pass
52        return url_to_content
53    finally:
54        executor.shutdown()
55
56def main():
57    for name, fn in [('sequential',
58                      functools.partial(download_urls_sequential, URLS)),
59                     ('processes',
60                      functools.partial(download_urls_with_executor,
61                                        URLS,
62                                        ProcessPoolExecutor(10))),
63                     ('threads',
64                      functools.partial(download_urls_with_executor,
65                                        URLS,
66                                        ThreadPoolExecutor(10)))]:
67        sys.stdout.write('%s: ' % name.ljust(12))
68        start = time.time()
69        url_map = fn()
70        sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %
71                         (time.time() - start, len(url_map), len(URLS)))
72
73if __name__ == '__main__':
74    main()
75