/* * Copyright (C) 2008 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Mutex-free cache for key1+key2=value. */ #ifndef _DALVIK_ATOMICCACHE #define _DALVIK_ATOMICCACHE /* * If set to "1", gather some stats on our caching success rate. */ #define CALC_CACHE_STATS 0 /* * One entry in the cache. We store two keys (e.g. the classes that are * arguments to "instanceof") and one result (e.g. a boolean value). * * Must be exactly 16 bytes. */ typedef struct AtomicCacheEntry { u4 key1; u4 key2; u4 value; volatile u4 version; /* version and lock flag */ } AtomicCacheEntry; /* * One cache. * * Thought: we might be able to save a few cycles by storing the cache * struct and "entries" separately, avoiding an indirection. (We already * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.) */ typedef struct AtomicCache { AtomicCacheEntry* entries; /* array of entries */ int numEntries; /* #of entries, must be power of 2 */ void* entryAlloc; /* memory allocated for entries */ /* cache stats; note we don't guarantee atomic increments for these */ int trivial; /* cache access not required */ int fail; /* contention failure */ int hits; /* found entry in cache */ int misses; /* entry was for other keys */ int fills; /* entry was empty */ } AtomicCache; /* * Do a cache lookup. We need to be able to read and write entries * atomically. There are a couple of ways to do this: * (1) Have a global lock. A mutex is too heavy, so instead we would use * an atomic flag. If the flag is set, we could sit and spin, but * if we're a high-priority thread that may cause a lockup. Better * to just ignore the cache and do the full computation. * (2) Have a "version" that gets incremented atomically when a write * begins and again when it completes. Compare the version before * and after doing reads. So long as "version" is volatile the * compiler will do the right thing, allowing us to skip atomic * ops in the common read case. The table updates are expensive, * requiring two volatile writes and (for correctness on * multiprocessor systems) memory barriers. We also need some * sort of lock to ensure that nobody else tries to start an * update while we're in the middle of one. * * We expect a 95+% hit rate for the things we use this for, so #2 is * much better than #1. * * _cache is an AtomicCache* * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup) * _key1, _key2 are the keys * * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value. This * will be invoked when we need to compute the value. * * Returns the value. */ #if CALC_CACHE_STATS > 0 # define CACHE_XARG(_value) ,_value #else # define CACHE_XARG(_value) #endif #define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({ \ AtomicCacheEntry* pEntry; \ int hash; \ u4 firstVersion; \ u4 value; \ \ /* simple hash function */ \ hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1); \ pEntry = (_cache)->entries + hash; \ \ /* volatile read */ \ firstVersion = pEntry->version; \ \ if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) { \ /* \ * The fields match. Get the value, then read the version a \ * second time to verify that we didn't catch a partial update. \ * We're also hosed if "firstVersion" was odd, indicating that \ * an update was in progress before we got here. \ */ \ value = pEntry->value; /* must grab before next check */ \ \ if ((firstVersion & 0x01) != 0 || firstVersion != pEntry->version) \ { \ /* \ * We clashed with another thread. Instead of sitting and \ * spinning, which might not complete if we're a high priority \ * thread, just do the regular computation. \ */ \ if (CALC_CACHE_STATS) \ (_cache)->fail++; \ value = (u4) ATOMIC_CACHE_CALC; \ } else { \ /* all good */ \ if (CALC_CACHE_STATS) \ (_cache)->hits++; \ } \ } else { \ /* \ * Compute the result and update the cache. We really want this \ * to happen in a different method -- it makes the ARM frame \ * setup for this method simpler, which gives us a ~10% speed \ * boost. \ */ \ value = (u4) ATOMIC_CACHE_CALC; \ dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \ firstVersion CACHE_XARG(_cache) ); \ } \ value; \ }) /* * Allocate a cache. */ AtomicCache* dvmAllocAtomicCache(int numEntries); /* * Free a cache. */ void dvmFreeAtomicCache(AtomicCache* cache); /* * Update a cache entry. * * Making the last argument optional, instead of merely unused, saves us * a few percent in the ATOMIC_CACHE_LOOKUP time. */ void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry, u4 firstVersion #if CALC_CACHE_STATS > 0 , AtomicCache* pCache #endif ); /* * Debugging. */ void dvmDumpAtomicCacheStats(const AtomicCache* pCache); #endif /*_DALVIK_ATOMICCACHE*/