• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 /*
17  * Mutex-free cache for key1+key2=value.
18  */
19 #ifndef _DALVIK_ATOMICCACHE
20 #define _DALVIK_ATOMICCACHE
21 
22 /*
23  * If set to "1", gather some stats on our caching success rate.
24  */
25 #define CALC_CACHE_STATS 0
26 
27 
28 /*
29  * One entry in the cache.  We store two keys (e.g. the classes that are
30  * arguments to "instanceof") and one result (e.g. a boolean value).
31  *
32  * Must be exactly 16 bytes.
33  */
34 typedef struct AtomicCacheEntry {
35     u4          key1;
36     u4          key2;
37     u4          value;
38     volatile u4 version;    /* version and lock flag */
39 } AtomicCacheEntry;
40 
41 /*
42  * One cache.
43  *
44  * Thought: we might be able to save a few cycles by storing the cache
45  * struct and "entries" separately, avoiding an indirection.  (We already
46  * handle "numEntries" separately in ATOMIC_CACHE_LOOKUP.)
47  */
48 typedef struct AtomicCache {
49     AtomicCacheEntry*   entries;        /* array of entries */
50     int         numEntries;             /* #of entries, must be power of 2 */
51 
52     void*       entryAlloc;             /* memory allocated for entries */
53 
54     /* cache stats; note we don't guarantee atomic increments for these */
55     int         trivial;                /* cache access not required */
56     int         fail;                   /* contention failure */
57     int         hits;                   /* found entry in cache */
58     int         misses;                 /* entry was for other keys */
59     int         fills;                  /* entry was empty */
60 } AtomicCache;
61 
62 /*
63  * Do a cache lookup.  We need to be able to read and write entries
64  * atomically.  There are a couple of ways to do this:
65  *  (1) Have a global lock.  A mutex is too heavy, so instead we would use
66  *      an atomic flag.  If the flag is set, we could sit and spin, but
67  *      if we're a high-priority thread that may cause a lockup.  Better
68  *      to just ignore the cache and do the full computation.
69  *  (2) Have a "version" that gets incremented atomically when a write
70  *      begins and again when it completes.  Compare the version before
71  *      and after doing reads.  So long as "version" is volatile the
72  *      compiler will do the right thing, allowing us to skip atomic
73  *      ops in the common read case.  The table updates are expensive,
74  *      requiring two volatile writes and (for correctness on
75  *      multiprocessor systems) memory barriers.  We also need some
76  *      sort of lock to ensure that nobody else tries to start an
77  *      update while we're in the middle of one.
78  *
79  * We expect a 95+% hit rate for the things we use this for, so #2 is
80  * much better than #1.
81  *
82  * _cache is an AtomicCache*
83  * _cacheSize is _cache->cacheSize (can save a cycle avoiding the lookup)
84  * _key1, _key2 are the keys
85  *
86  * Define a function ATOMIC_CACHE_CALC that returns a 32-bit value.  This
87  * will be invoked when we need to compute the value.
88  *
89  * Returns the value.
90  */
91 #if CALC_CACHE_STATS > 0
92 # define CACHE_XARG(_value) ,_value
93 #else
94 # define CACHE_XARG(_value)
95 #endif
96 #define ATOMIC_CACHE_LOOKUP(_cache, _cacheSize, _key1, _key2) ({            \
97     AtomicCacheEntry* pEntry;                                               \
98     int hash;                                                               \
99     u4 firstVersion;                                                        \
100     u4 value;                                                               \
101                                                                             \
102     /* simple hash function */                                              \
103     hash = (((u4)(_key1) >> 2) ^ (u4)(_key2)) & ((_cacheSize)-1);           \
104     pEntry = (_cache)->entries + hash;                                      \
105                                                                             \
106     /* volatile read */                                                     \
107     firstVersion = pEntry->version;                                         \
108                                                                             \
109     if (pEntry->key1 == (u4)(_key1) && pEntry->key2 == (u4)(_key2)) {       \
110         /*                                                                  \
111          * The fields match.  Get the value, then read the version a        \
112          * second time to verify that we didn't catch a partial update.     \
113          * We're also hosed if "firstVersion" was odd, indicating that      \
114          * an update was in progress before we got here.                    \
115          */                                                                 \
116         value = pEntry->value;    /* must grab before next check */         \
117                                                                             \
118         if ((firstVersion & 0x01) != 0 || firstVersion != pEntry->version)  \
119         {                                                                   \
120             /*                                                              \
121              * We clashed with another thread.  Instead of sitting and      \
122              * spinning, which might not complete if we're a high priority  \
123              * thread, just do the regular computation.                     \
124              */                                                             \
125             if (CALC_CACHE_STATS)                                           \
126                 (_cache)->fail++;                                           \
127             value = (u4) ATOMIC_CACHE_CALC;                                 \
128         } else {                                                            \
129             /* all good */                                                  \
130             if (CALC_CACHE_STATS)                                           \
131                 (_cache)->hits++;                                           \
132         }                                                                   \
133     } else {                                                                \
134         /*                                                                  \
135          * Compute the result and update the cache.  We really want this    \
136          * to happen in a different method -- it makes the ARM frame        \
137          * setup for this method simpler, which gives us a ~10% speed       \
138          * boost.                                                           \
139          */                                                                 \
140         value = (u4) ATOMIC_CACHE_CALC;                                     \
141         dvmUpdateAtomicCache((u4) (_key1), (u4) (_key2), value, pEntry,     \
142                     firstVersion CACHE_XARG(_cache) );                      \
143     }                                                                       \
144     value;                                                                  \
145 })
146 
147 /*
148  * Allocate a cache.
149  */
150 AtomicCache* dvmAllocAtomicCache(int numEntries);
151 
152 /*
153  * Free a cache.
154  */
155 void dvmFreeAtomicCache(AtomicCache* cache);
156 
157 /*
158  * Update a cache entry.
159  *
160  * Making the last argument optional, instead of merely unused, saves us
161  * a few percent in the ATOMIC_CACHE_LOOKUP time.
162  */
163 void dvmUpdateAtomicCache(u4 key1, u4 key2, u4 value, AtomicCacheEntry* pEntry,
164     u4 firstVersion
165 #if CALC_CACHE_STATS > 0
166     , AtomicCache* pCache
167 #endif
168     );
169 
170 /*
171  * Debugging.
172  */
173 void dvmDumpAtomicCacheStats(const AtomicCache* pCache);
174 
175 #endif /*_DALVIK_ATOMICCACHE*/
176