• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6import traceback
7import xml.dom.minidom as xml
8from xml.parsers.expat import ExpatError
9
10from appengine_url_fetcher import AppEngineUrlFetcher
11from docs_server_utils import StringIdentity
12from file_system import (
13    FileNotFoundError, FileSystem, FileSystemError, StatInfo)
14from future import Future
15import url_constants
16
17
18def _ParseHTML(html):
19  '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
20  of all mismatched tags.
21  '''
22  try:
23    return xml.parseString(html)
24  except ExpatError as e:
25    return _ParseHTML('\n'.join(
26        line for (i, line) in enumerate(html.split('\n'))
27        if e.lineno != i + 1))
28
29def _InnerText(node):
30  '''Like node.innerText in JS DOM, but strips surrounding whitespace.
31  '''
32  text = []
33  if node.nodeValue:
34    text.append(node.nodeValue)
35  if hasattr(node, 'childNodes'):
36    for child_node in node.childNodes:
37      text.append(_InnerText(child_node))
38  return ''.join(text).strip()
39
40def _CreateStatInfo(html):
41  parent_version = None
42  child_versions = {}
43
44  # Try all of the tables until we find the ones that contain the data (the
45  # directory and file versions are in different tables).
46  for table in _ParseHTML(html).getElementsByTagName('table'):
47    # Within the table there is a list of files. However, there may be some
48    # things beforehand; a header, "parent directory" list, etc. We will deal
49    # with that below by being generous and just ignoring such rows.
50    rows = table.getElementsByTagName('tr')
51
52    for row in rows:
53      cells = row.getElementsByTagName('td')
54
55      # The version of the directory will eventually appear in the soup of
56      # table rows, like this:
57      #
58      # <tr>
59      #   <td>Directory revision:</td>
60      #   <td><a href=... title="Revision 214692">214692</a> (of...)</td>
61      # </tr>
62      #
63      # So look out for that.
64      if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
65        links = cells[1].getElementsByTagName('a')
66        if len(links) != 2:
67          raise FileSystemError('ViewVC assumption invalid: directory ' +
68                                'revision content did not have 2 <a> ' +
69                                ' elements, instead %s' % _InnerText(cells[1]))
70        this_parent_version = _InnerText(links[0])
71        int(this_parent_version)  # sanity check
72        if parent_version is not None:
73          raise FileSystemError('There was already a parent version %s, and ' +
74                                ' we just found a second at %s' %
75                                (parent_version, this_parent_version))
76        parent_version = this_parent_version
77
78      # The version of each file is a list of rows with 5 cells: name, version,
79      # age, author, and last log entry. Maybe the columns will change; we're
80      # at the mercy viewvc, but this constant can be easily updated.
81      if len(cells) != 5:
82        continue
83      name_element, version_element, _, __, ___ = cells
84
85      name = _InnerText(name_element)  # note: will end in / for directories
86      try:
87        version = int(_InnerText(version_element))
88      except StandardError:
89        continue
90      child_versions[name] = str(version)
91
92    if parent_version and child_versions:
93      break
94
95  return StatInfo(parent_version, child_versions)
96
97class _AsyncFetchFuture(object):
98  def __init__(self, paths, fetcher, args=None):
99    def apply_args(path):
100      return path if args is None else '%s?%s' % (path, args)
101    # A list of tuples of the form (path, Future).
102    self._fetches = [(path, fetcher.FetchAsync(apply_args(path)))
103                     for path in paths]
104    self._value = {}
105    self._error = None
106
107  def _ListDir(self, directory):
108    dom = xml.parseString(directory)
109    files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')]
110    if '..' in files:
111      files.remove('..')
112    return files
113
114  def Get(self):
115    for path, future in self._fetches:
116      try:
117        result = future.Get()
118      except Exception as e:
119        raise FileSystemError('Error fetching %s for Get: %s' %
120            (path, traceback.format_exc()))
121
122      if result.status_code == 404:
123        raise FileNotFoundError('Got 404 when fetching %s for Get, content %s' %
124            (path, result.content))
125      if result.status_code != 200:
126        raise FileSystemError('Got %s when fetching %s for Get, content %s' %
127            (result.status_code, path, result.content))
128
129      if path.endswith('/'):
130        self._value[path] = self._ListDir(result.content)
131      else:
132        self._value[path] = result.content
133    if self._error is not None:
134      raise self._error
135    return self._value
136
137class SubversionFileSystem(FileSystem):
138  '''Class to fetch resources from src.chromium.org.
139  '''
140  @staticmethod
141  def Create(branch='trunk', revision=None):
142    if branch == 'trunk':
143      svn_path = 'trunk/src'
144    else:
145      svn_path = 'branches/%s/src' % branch
146    return SubversionFileSystem(
147        AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
148        AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
149        svn_path,
150        revision=revision)
151
152  def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
153    self._file_fetcher = file_fetcher
154    self._stat_fetcher = stat_fetcher
155    self._svn_path = svn_path
156    self._revision = revision
157
158  def Read(self, paths):
159    args = None
160    if self._revision is not None:
161      # |fetcher| gets from svn.chromium.org which uses p= for version.
162      args = 'p=%s' % self._revision
163    return Future(delegate=_AsyncFetchFuture(paths,
164                                             self._file_fetcher,
165                                             args=args))
166
167  def Refresh(self):
168    return Future(value=())
169
170  def Stat(self, path):
171    directory, filename = posixpath.split(path)
172    if self._revision is not None:
173      # |stat_fetch| uses viewvc which uses pathrev= for version.
174      directory += '?pathrev=%s' % self._revision
175
176    try:
177      result = self._stat_fetcher.Fetch(directory)
178    except Exception as e:
179      raise FileSystemError('Error fetching %s for Stat: %s' %
180          (path, traceback.format_exc()))
181
182    if result.status_code == 404:
183      raise FileNotFoundError('Got 404 when fetching %s for Stat, content %s' %
184          (path, result.content))
185    if result.status_code != 200:
186      raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
187          (result.status_code, path, result.content))
188
189    stat_info = _CreateStatInfo(result.content)
190    if stat_info.version is None:
191      raise FileSystemError('Failed to find version of dir %s' % directory)
192    if path == '' or path.endswith('/'):
193      return stat_info
194    if filename not in stat_info.child_versions:
195      raise FileNotFoundError(
196          '%s from %s was not in child versions for Stat' % (filename, path))
197    return StatInfo(stat_info.child_versions[filename])
198
199  def GetIdentity(self):
200    # NOTE: no revision here, since it would mess up the caching of reads. It
201    # probably doesn't matter since all the caching classes will use the result
202    # of Stat to decide whether to re-read - and Stat has a ceiling of the
203    # revision - so when the revision changes, so might Stat. That is enough.
204    return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))
205