1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * Mutex-free cache for key1+key2=value. 18 */ 19 #ifndef _DALVIK_ATOMICCACHE 20 #define _DALVIK_ATOMICCACHE 21 22 /* 23 * If set to "1", gather some stats on our caching success rate. 24 */ 25 #define CALC_CACHE_STATS 0 26 27 28 /* 29 * One entry in the cache. We store two keys (e.g. the classes that are 30 * arguments to "instanceof") and one result (e.g. a boolean value). 31 * 32 * Must be exactly 16 bytes. 33 */ 34 typedef struct AtomicCacheEntry { 35 u4 key1; 36 u4 key2; 37 u4 value; 38 volatile u4 version; /* version and lock flag */ 39 } AtomicCacheEntry; 40 41 /* 42 * One cache. 43 * 44 * Thought: we might be able to save a few cycles by storing the cache 45 * struct and "entries" separately, avoiding an indirection. (We already 46 * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.) 47 */ 48 typedef struct AtomicCache { 49 AtomicCacheEntry* entries; /* array of entries */ 50 int numEntries; /* #of entries, must be power of 2 */ 51 52 void* entryAlloc; /* memory allocated for entries */ 53 54 /* cache stats; note we don't guarantee atomic increments for these */ 55 int trivial; /* cache access not required */ 56 int fail; /* contention failure */ 57 int hits; /* found entry in cache */ 58 int misses; /* entry was for other keys */ 59 int fills; /* entry was empty */ 60 } AtomicCache; 61 62 /* 63 * Do a cache lookup. We need to be able to read and write entries 64 * atomically. There are a couple of ways to do this: 65 * (1) Have a global lock. A mutex is too heavy, so instead we would use 66 * an atomic flag. If the flag is set, we could sit and spin, but 67 * if we're a high-priority thread that may cause a lockup. Better 68 * to just ignore the cache and do the full computation. 69 * (2) Have a "version" that gets incremented atomically when a write 70 * begins and again when it completes. Compare the version before 71 * and after doing reads. So long as "version" is volatile the 72 * compiler will do the right thing, allowing us to skip atomic 73 * ops in the common read case. The table updates are expensive, 74 * requiring two volatile writes and (for correctness on 75 * multiprocessor systems) memory barriers. We also need some 76 * sort of lock to ensure that nobody else tries to start an 77 * update while we're in the middle of one. 78 * 79 * We expect a 95+% hit rate for the things we use this for, so #2 is 80 * much better than #1. 81 * 82 * _cache is an AtomicCache* 83 * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup) 84 * _key1, _key2 are the keys 85 * 86 * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value. This 87 * will be invoked when we need to compute the value. 88 * 89 * Returns the value. 90 */ 91 #if CALC_CACHE_STATS > 0 92 # define CACHE_XARG(_value) ,_value 93 #else 94 # define CACHE_XARG(_value) 95 #endif 96 #define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({ \ 97 AtomicCacheEntry* pEntry; \ 98 int hash; \ 99 u4 firstVersion; \ 100 u4 value; \ 101 \ 102 /* simple hash function */ \ 103 hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1); \ 104 pEntry = (_cache)->entries + hash; \ 105 \ 106 /* volatile read */ \ 107 firstVersion = pEntry->version; \ 108 \ 109 if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) { \ 110 /* \ 111 * The fields match. Get the value, then read the version a \ 112 * second time to verify that we didn't catch a partial update. \ 113 * We're also hosed if "firstVersion" was odd, indicating that \ 114 * an update was in progress before we got here. \ 115 */ \ 116 value = pEntry->value; /* must grab before next check */ \ 117 \ 118 if ((firstVersion & 0x01) != 0 || firstVersion != pEntry->version) \ 119 { \ 120 /* \ 121 * We clashed with another thread. Instead of sitting and \ 122 * spinning, which might not complete if we're a high priority \ 123 * thread, just do the regular computation. \ 124 */ \ 125 if (CALC_CACHE_STATS) \ 126 (_cache)->fail++; \ 127 value = (u4) ATOMIC_CACHE_CALC; \ 128 } else { \ 129 /* all good */ \ 130 if (CALC_CACHE_STATS) \ 131 (_cache)->hits++; \ 132 } \ 133 } else { \ 134 /* \ 135 * Compute the result and update the cache. We really want this \ 136 * to happen in a different method -- it makes the ARM frame \ 137 * setup for this method simpler, which gives us a ~10% speed \ 138 * boost. \ 139 */ \ 140 value = (u4) ATOMIC_CACHE_CALC; \ 141 dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \ 142 firstVersion CACHE_XARG(_cache) ); \ 143 } \ 144 value; \ 145 }) 146 147 /* 148 * Allocate a cache. 149 */ 150 AtomicCache* dvmAllocAtomicCache(int numEntries); 151 152 /* 153 * Free a cache. 154 */ 155 void dvmFreeAtomicCache(AtomicCache* cache); 156 157 /* 158 * Update a cache entry. 159 * 160 * Making the last argument optional, instead of merely unused, saves us 161 * a few percent in the ATOMIC_CACHE_LOOKUP time. 162 */ 163 void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry, 164 u4 firstVersion 165 #if CALC_CACHE_STATS > 0 166 , AtomicCache* pCache 167 #endif 168 ); 169 170 /* 171 * Debugging. 172 */ 173 void dvmDumpAtomicCacheStats(const AtomicCache* pCache); 174 175 #endif /*_DALVIK_ATOMICCACHE*/ 176