1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import posixpath 6import sys 7 8from file_system import FileSystem, StatInfo, FileNotFoundError 9from future import Future 10from path_util import IsDirectory 11from third_party.json_schema_compiler.memoize import memoize 12 13 14class CachingFileSystem(FileSystem): 15 '''FileSystem which implements a caching layer on top of |file_system|. It's 16 smart, using Stat() to decided whether to skip Read()ing from |file_system|, 17 and only Stat()ing directories never files. 18 ''' 19 def __init__(self, file_system, object_store_creator): 20 self._file_system = file_system 21 def create_object_store(category, **optargs): 22 return object_store_creator.Create( 23 CachingFileSystem, 24 category='%s/%s' % (file_system.GetIdentity(), category), 25 **optargs) 26 self._stat_object_store = create_object_store('stat') 27 # The read caches can start populated (start_empty=False) because file 28 # updates are picked up by the stat, so it doesn't need the force-refresh 29 # which starting empty is designed for. Without this optimisation, cron 30 # runs are extra slow. 31 self._read_object_store = create_object_store('read', start_empty=False) 32 33 def Refresh(self): 34 return self._file_system.Refresh() 35 36 def Stat(self, path): 37 return self.StatAsync(path).Get() 38 39 def StatAsync(self, path): 40 '''Stats the directory given, or if a file is given, stats the file's parent 41 directory to get info about the file. 42 ''' 43 # Always stat the parent directory, since it will have the stat of the child 44 # anyway, and this gives us an entire directory's stat info at once. 45 dir_path, file_path = posixpath.split(path) 46 if dir_path and not dir_path.endswith('/'): 47 dir_path += '/' 48 49 def make_stat_info(dir_stat): 50 '''Converts a dir stat into the correct resulting StatInfo; if the Stat 51 was for a file, the StatInfo should just contain that file. 52 ''' 53 if path == dir_path: 54 return dir_stat 55 # Was a file stat. Extract that file. 56 file_version = dir_stat.child_versions.get(file_path) 57 if file_version is None: 58 raise FileNotFoundError('No stat found for %s in %s (found %s)' % 59 (path, dir_path, dir_stat.child_versions)) 60 return StatInfo(file_version) 61 62 dir_stat = self._stat_object_store.Get(dir_path).Get() 63 if dir_stat is not None: 64 return Future(value=make_stat_info(dir_stat)) 65 66 dir_stat_future = self._MemoizedStatAsyncFromFileSystem(dir_path) 67 def resolve(): 68 dir_stat = dir_stat_future.Get() 69 assert dir_stat is not None # should have raised a FileNotFoundError 70 # We only ever need to cache the dir stat. 71 self._stat_object_store.Set(dir_path, dir_stat) 72 return make_stat_info(dir_stat) 73 return Future(callback=resolve) 74 75 @memoize 76 def _MemoizedStatAsyncFromFileSystem(self, dir_path): 77 '''This is a simple wrapper to memoize Futures to directory stats, since 78 StatAsync makes heavy use of it. Only cache directories so that the 79 memoized cache doesn't blow up. 80 ''' 81 assert IsDirectory(dir_path) 82 return self._file_system.StatAsync(dir_path) 83 84 def Read(self, paths, skip_not_found=False): 85 '''Reads a list of files. If a file is in memcache and it is not out of 86 date, it is returned. Otherwise, the file is retrieved from the file system. 87 ''' 88 cached_read_values = self._read_object_store.GetMulti(paths).Get() 89 cached_stat_values = self._stat_object_store.GetMulti(paths).Get() 90 91 # Populate a map of paths to Futures to their stat. They may have already 92 # been cached in which case their Future will already have been constructed 93 # with a value. 94 stat_futures = {} 95 96 def swallow_file_not_found_error(future): 97 def resolve(): 98 try: return future.Get() 99 except FileNotFoundError: return Nnone 100 return Future(callback=resolve) 101 102 for path in paths: 103 stat_value = cached_stat_values.get(path) 104 if stat_value is None: 105 stat_future = self.StatAsync(path) 106 if skip_not_found: 107 stat_future = swallow_file_not_found_error(stat_future) 108 else: 109 stat_future = Future(value=stat_value) 110 stat_futures[path] = stat_future 111 112 # Filter only the cached data which is fresh by comparing to the latest 113 # stat. The cached read data includes the cached version. Remove it for 114 # the result returned to callers. 115 fresh_data = dict( 116 (path, data) for path, (data, version) in cached_read_values.iteritems() 117 if stat_futures[path].Get().version == version) 118 119 if len(fresh_data) == len(paths): 120 # Everything was cached and up-to-date. 121 return Future(value=fresh_data) 122 123 # Read in the values that were uncached or old. 124 read_futures = self._file_system.Read( 125 set(paths) - set(fresh_data.iterkeys()), 126 skip_not_found=skip_not_found) 127 def resolve(): 128 new_results = read_futures.Get() 129 # Update the cache. This is a path -> (data, version) mapping. 130 self._read_object_store.SetMulti( 131 dict((path, (new_result, stat_futures[path].Get().version)) 132 for path, new_result in new_results.iteritems())) 133 new_results.update(fresh_data) 134 return new_results 135 return Future(callback=resolve) 136 137 def GetIdentity(self): 138 return self._file_system.GetIdentity() 139 140 def __repr__(self): 141 return '%s of <%s>' % (type(self).__name__, repr(self._file_system)) 142