1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 /* 17 * Mutex-free cache for key1+key2=value. 18 */ 19 #ifndef DALVIK_ATOMICCACHE_H_ 20 #define DALVIK_ATOMICCACHE_H_ 21 22 /* 23 * If set to "1", gather some stats on our caching success rate. 24 */ 25 #define CALC_CACHE_STATS 0 26 27 28 /* 29 * One entry in the cache. We store two keys (e.g. the classes that are 30 * arguments to "instanceof") and one result (e.g. a boolean value). 31 * 32 * Must be exactly 16 bytes. 33 */ 34 struct AtomicCacheEntry { 35 u4 key1; 36 u4 key2; 37 u4 value; 38 volatile u4 version; /* version and lock flag */ 39 }; 40 41 /* 42 * One cache. 43 * 44 * Thought: we might be able to save a few cycles by storing the cache 45 * struct and "entries" separately, avoiding an indirection. (We already 46 * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.) 47 */ 48 struct AtomicCache { 49 AtomicCacheEntry* entries; /* array of entries */ 50 int numEntries; /* #of entries, must be power of 2 */ 51 52 void* entryAlloc; /* memory allocated for entries */ 53 54 /* cache stats; note we don't guarantee atomic increments for these */ 55 int trivial; /* cache access not required */ 56 int fail; /* contention failure */ 57 int hits; /* found entry in cache */ 58 int misses; /* entry was for other keys */ 59 int fills; /* entry was empty */ 60 }; 61 62 /* 63 * Do a cache lookup. We need to be able to read and write entries 64 * atomically. There are a couple of ways to do this: 65 * (1) Have a global lock. A mutex is too heavy, so instead we would use 66 * an atomic flag. If the flag is set, we could sit and spin, 67 * but if we're a high-priority thread that may cause a lockup. 68 * Better to just ignore the cache and do the full computation. 69 * (2) Have a "version" that gets incremented atomically when a write 70 * begins and again when it completes. Compare the version before 71 * and after doing reads. So long as we have memory barriers in the 72 * right place the compiler and CPU will do the right thing, allowing 73 * us to skip atomic ops in the common read case. The table updates 74 * are expensive, requiring two writes with barriers. We also need 75 * some sort of lock to ensure that nobody else tries to start an 76 * update while we're in the middle of one. 77 * 78 * We expect a 95+% hit rate for the things we use this for, so #2 is 79 * much better than #1. 80 * 81 * _cache is an AtomicCache* 82 * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup) 83 * _key1, _key2 are the keys 84 * 85 * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value. This 86 * will be invoked when we need to compute the value. 87 * 88 * Returns the value. 89 */ 90 #if CALC_CACHE_STATS > 0 91 # define CACHE_XARG(_value) ,_value 92 #else 93 # define CACHE_XARG(_value) 94 #endif 95 #define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({ \ 96 AtomicCacheEntry* pEntry; \ 97 int hash; \ 98 u4 firstVersion, secondVersion; \ 99 u4 value; \ 100 \ 101 /* simple hash function */ \ 102 hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1); \ 103 pEntry = (_cache)->entries + hash; \ 104 \ 105 firstVersion = android_atomic_acquire_load((int32_t*)&pEntry->version); \ 106 \ 107 if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) { \ 108 /* \ 109 * The fields match. Get the value, then read the version a \ 110 * second time to verify that we didn't catch a partial update. \ 111 * We're also hosed if "firstVersion" was odd, indicating that \ 112 * an update was in progress before we got here (unlikely). \ 113 */ \ 114 value = android_atomic_acquire_load((int32_t*) &pEntry->value); \ 115 secondVersion = pEntry->version; \ 116 \ 117 if ((firstVersion & 0x01) != 0 || firstVersion != secondVersion) { \ 118 /* \ 119 * We clashed with another thread. Instead of sitting and \ 120 * spinning, which might not complete if we're a high priority \ 121 * thread, just do the regular computation. \ 122 */ \ 123 if (CALC_CACHE_STATS) \ 124 (_cache)->fail++; \ 125 value = (u4) ATOMIC_CACHE_CALC; \ 126 } else { \ 127 /* all good */ \ 128 if (CALC_CACHE_STATS) \ 129 (_cache)->hits++; \ 130 } \ 131 } else { \ 132 /* \ 133 * Compute the result and update the cache. We really want this \ 134 * to happen in a different method -- it makes the ARM frame \ 135 * setup for this method simpler, which gives us a ~10% speed \ 136 * boost. \ 137 */ \ 138 value = (u4) ATOMIC_CACHE_CALC; \ 139 if (value == 0 && ATOMIC_CACHE_NULL_ALLOWED) { \ 140 dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry, \ 141 firstVersion CACHE_XARG(_cache) ); \ 142 } \ 143 } \ 144 value; \ 145 }) 146 147 /* 148 * Allocate a cache. 149 */ 150 AtomicCache* dvmAllocAtomicCache(int numEntries); 151 152 /* 153 * Free a cache. 154 */ 155 void dvmFreeAtomicCache(AtomicCache* cache); 156 157 /* 158 * Update a cache entry. 159 * 160 * Making the last argument optional, instead of merely unused, saves us 161 * a few percent in the ATOMIC_CACHE_LOOKUP time. 162 */ 163 void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry, 164 u4 firstVersion 165 #if CALC_CACHE_STATS > 0 166 , AtomicCache* pCache 167 #endif 168 ); 169 170 /* 171 * Debugging. 172 */ 173 void dvmDumpAtomicCacheStats(const AtomicCache* pCache); 174 175 #endif // DALVIK_ATOMICCACHE_H_ 176