• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6import sys
7
8from file_system import FileSystem, StatInfo, FileNotFoundError
9from future import Future
10from path_util import IsDirectory
11from third_party.json_schema_compiler.memoize import memoize
12
13
14class CachingFileSystem(FileSystem):
15  '''FileSystem which implements a caching layer on top of |file_system|. It's
16  smart, using Stat() to decided whether to skip Read()ing from |file_system|,
17  and only Stat()ing directories never files.
18  '''
19  def __init__(self, file_system, object_store_creator):
20    self._file_system = file_system
21    def create_object_store(category, **optargs):
22      return object_store_creator.Create(
23          CachingFileSystem,
24          category='%s/%s' % (file_system.GetIdentity(), category),
25          **optargs)
26    self._stat_object_store = create_object_store('stat')
27    # The read caches can start populated (start_empty=False) because file
28    # updates are picked up by the stat, so it doesn't need the force-refresh
29    # which starting empty is designed for. Without this optimisation, cron
30    # runs are extra slow.
31    self._read_object_store = create_object_store('read', start_empty=False)
32
33  def Refresh(self):
34    return self._file_system.Refresh()
35
36  def Stat(self, path):
37    return self.StatAsync(path).Get()
38
39  def StatAsync(self, path):
40    '''Stats the directory given, or if a file is given, stats the file's parent
41    directory to get info about the file.
42    '''
43    # Always stat the parent directory, since it will have the stat of the child
44    # anyway, and this gives us an entire directory's stat info at once.
45    dir_path, file_path = posixpath.split(path)
46    if dir_path and not dir_path.endswith('/'):
47      dir_path += '/'
48
49    def make_stat_info(dir_stat):
50      '''Converts a dir stat into the correct resulting StatInfo; if the Stat
51      was for a file, the StatInfo should just contain that file.
52      '''
53      if path == dir_path:
54        return dir_stat
55      # Was a file stat. Extract that file.
56      file_version = dir_stat.child_versions.get(file_path)
57      if file_version is None:
58        raise FileNotFoundError('No stat found for %s in %s (found %s)' %
59                                (path, dir_path, dir_stat.child_versions))
60      return StatInfo(file_version)
61
62    dir_stat = self._stat_object_store.Get(dir_path).Get()
63    if dir_stat is not None:
64      return Future(value=make_stat_info(dir_stat))
65
66    dir_stat_future = self._MemoizedStatAsyncFromFileSystem(dir_path)
67    def resolve():
68      dir_stat = dir_stat_future.Get()
69      assert dir_stat is not None  # should have raised a FileNotFoundError
70      # We only ever need to cache the dir stat.
71      self._stat_object_store.Set(dir_path, dir_stat)
72      return make_stat_info(dir_stat)
73    return Future(callback=resolve)
74
75  @memoize
76  def _MemoizedStatAsyncFromFileSystem(self, dir_path):
77    '''This is a simple wrapper to memoize Futures to directory stats, since
78    StatAsync makes heavy use of it. Only cache directories so that the
79    memoized cache doesn't blow up.
80    '''
81    assert IsDirectory(dir_path)
82    return self._file_system.StatAsync(dir_path)
83
84  def Read(self, paths, skip_not_found=False):
85    '''Reads a list of files. If a file is in memcache and it is not out of
86    date, it is returned. Otherwise, the file is retrieved from the file system.
87    '''
88    cached_read_values = self._read_object_store.GetMulti(paths).Get()
89    cached_stat_values = self._stat_object_store.GetMulti(paths).Get()
90
91    # Populate a map of paths to Futures to their stat. They may have already
92    # been cached in which case their Future will already have been constructed
93    # with a value.
94    stat_futures = {}
95
96    def swallow_file_not_found_error(future):
97      def resolve():
98        try: return future.Get()
99        except FileNotFoundError: return Nnone
100      return Future(callback=resolve)
101
102    for path in paths:
103      stat_value = cached_stat_values.get(path)
104      if stat_value is None:
105        stat_future = self.StatAsync(path)
106        if skip_not_found:
107          stat_future = swallow_file_not_found_error(stat_future)
108      else:
109        stat_future = Future(value=stat_value)
110      stat_futures[path] = stat_future
111
112    # Filter only the cached data which is fresh by comparing to the latest
113    # stat. The cached read data includes the cached version. Remove it for
114    # the result returned to callers.
115    fresh_data = dict(
116        (path, data) for path, (data, version) in cached_read_values.iteritems()
117        if stat_futures[path].Get().version == version)
118
119    if len(fresh_data) == len(paths):
120      # Everything was cached and up-to-date.
121      return Future(value=fresh_data)
122
123    # Read in the values that were uncached or old.
124    read_futures = self._file_system.Read(
125        set(paths) - set(fresh_data.iterkeys()),
126        skip_not_found=skip_not_found)
127    def resolve():
128      new_results = read_futures.Get()
129      # Update the cache. This is a path -> (data, version) mapping.
130      self._read_object_store.SetMulti(
131          dict((path, (new_result, stat_futures[path].Get().version))
132               for path, new_result in new_results.iteritems()))
133      new_results.update(fresh_data)
134      return new_results
135    return Future(callback=resolve)
136
137  def GetIdentity(self):
138    return self._file_system.GetIdentity()
139
140  def __repr__(self):
141    return '%s of <%s>' % (type(self).__name__, repr(self._file_system))
142