1 /**
2 * @file odb.h
3 * This file contains various definitions and interface for management
4 * of in-memory, through mmaped file, growable hash table, that stores
5 * sample files.
6 *
7 * @remark Copyright 2002 OProfile authors
8 * @remark Read the file COPYING
9 *
10 * @author Philippe Elie
11 */
12
13 #ifndef ODB_HASH_H
14 #define ODB_HASH_H
15
16 #include <stddef.h>
17 #include <stdint.h>
18
19 #include "op_list.h"
20
21 /** the type of key. 64-bit because CG needs 32-bit pair {from,to} */
22 typedef uint64_t odb_key_t;
23 /** the type of an information in the database */
24 typedef unsigned int odb_value_t;
25 /** the type of index (node number), list are implemented through index */
26 typedef unsigned int odb_index_t;
27 /** the type store node number */
28 typedef odb_index_t odb_node_nr_t;
29 /** store the hash mask, hash table size are always power of two */
30 typedef odb_index_t odb_hash_mask_t;
31
32 /* there is (bucket factor * nr node) entry in hash table, this can seem
33 * excessive but hash coding eip don't give a good distributions and our
34 * goal is to get a O(1) amortized insert time. bucket factor must be a
35 * power of two. FIXME: see big comment in odb_hash_add_node, you must
36 * re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you
37 * want to read the comment in odb_hash_add_node() if you tune this define)
38 */
39 #define BUCKET_FACTOR 1
40
41 /** a db hash node */
42 typedef struct {
43 odb_key_t key; /**< eip */
44 odb_value_t value; /**< samples count */
45 odb_index_t next; /**< next entry for this bucket */
46 } odb_node_t;
47
48 /** the minimal information which must be stored in the file to reload
49 * properly the data base, following this header is the node array then
50 * the hash table (when growing we avoid to copy node array)
51 */
52 typedef struct {
53 odb_node_nr_t size; /**< in node nr (power of two) */
54 odb_node_nr_t current_size; /**< nr used node + 1, node 0 unused */
55 int padding[6]; /**< for padding and future use */
56 } odb_descr_t;
57
58 /** a "database". this is an in memory only description.
59 *
60 * We allow to manage a database inside a mapped file with an "header" of
61 * unknown size so odb_open get a parameter to specify the size of this header.
62 * A typical use is:
63 *
64 * struct header { int etc; ... };
65 * odb_open(&hash, filename, ODB_RW, sizeof(header));
66 * so on this library have no dependency on the header type.
67 *
68 * the internal memory layout from base_memory is:
69 * the unknown header (sizeof_header)
70 * odb_descr_t
71 * the node array: (descr->size * sizeof(odb_node_t) entries
72 * the hash table: array of odb_index_t indexing the node array
73 * (descr->size * BUCKET_FACTOR) entries
74 */
75 typedef struct odb_data {
76 odb_node_t * node_base; /**< base memory area of the page */
77 odb_index_t * hash_base; /**< base memory of hash table */
78 odb_descr_t * descr; /**< the current state of database */
79 odb_hash_mask_t hash_mask; /**< == descr->size - 1 */
80 unsigned int sizeof_header; /**< from base_memory to odb header */
81 unsigned int offset_node; /**< from base_memory to node array */
82 void * base_memory; /**< base memory of the maped memory */
83 int fd; /**< mmaped memory file descriptor */
84 char * filename; /**< full path name of sample file */
85 int ref_count; /**< reference count */
86 struct list_head list; /**< hash bucket list */
87 } odb_data_t;
88
89 typedef struct {
90 odb_data_t * data;
91 } odb_t;
92
93 #ifdef __cplusplus
94 extern "C" {
95 #endif
96
97 /* db_manage.c */
98
99 /** how to open the DB file */
100 enum odb_rw {
101 ODB_RDONLY = 0, /**< open for read only */
102 ODB_RDWR = 1 /**< open for read and/or write */
103 };
104
105 /**
106 * odb_init - initialize a DB file
107 * @param odb the DB file to init
108 */
109 void odb_init(odb_t * odb);
110
111 /**
112 * odb_open - open a DB file
113 * @param odb the data base object to setup
114 * @param filename the filename where go the maped memory
115 * @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY
116 * @param sizeof_header size of the file header if any
117 *
118 * The sizeof_header parameter allows the data file to have a header
119 * at the start of the file which is skipped.
120 * odb_open() always preallocate a few number of pages.
121 * returns 0 on success, errno on failure
122 */
123 int odb_open(odb_t * odb, char const * filename,
124 enum odb_rw rw, size_t sizeof_header);
125
126 /** Close the given ODB file */
127 void odb_close(odb_t * odb);
128
129 /** return the number of times this sample file is open */
130 int odb_open_count(odb_t const * odb);
131
132 /** return the start of the mapped data */
133 void * odb_get_data(odb_t * odb);
134
135 /** issue a msync on the used size of the mmaped file */
136 void odb_sync(odb_t const * odb);
137
138 /**
139 * grow the hashtable in such way current_size is the index of the first free
140 * node. Take care all node pointer can be invalidated by this call.
141 *
142 * Node allocation is done in a two step way 1st) ensure a free node exist
143 * eventually, caller can setup it, 2nd) commit the node allocation with
144 * odb_commit_reservation().
145 * This is done in this way to ensure node setup is visible from another
146 * process like pp tools in an atomic way.
147 *
148 * returns 0 on success, non zero on failure in this case this function do
149 * nothing and errno is set by the first libc call failure allowing to retry
150 * after cleanup some program resource.
151 */
152 int odb_grow_hashtable(odb_data_t * data);
153 /**
154 * commit a previously successfull node reservation. This can't fail.
155 */
odb_commit_reservation(odb_data_t * data)156 static __inline void odb_commit_reservation(odb_data_t * data)
157 {
158 ++data->descr->current_size;
159 }
160
161 /** "immpossible" node number to indicate an error from odb_hash_add_node() */
162 #define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1)
163
164 /* db_debug.c */
165 /** check that the hash is well built */
166 int odb_check_hash(odb_t const * odb);
167
168 /* db_stat.c */
169 typedef struct odb_hash_stat_t odb_hash_stat_t;
170 odb_hash_stat_t * odb_hash_stat(odb_t const * odb);
171 void odb_hash_display_stat(odb_hash_stat_t const * stats);
172 void odb_hash_free_stat(odb_hash_stat_t * stats);
173
174 /* db_insert.c */
175 /** update info at key by incrementing its associated value by one,
176 * if the key does not exist a new node is created and the value associated
177 * is set to one.
178 *
179 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
180 */
181 int odb_update_node(odb_t * odb, odb_key_t key);
182
183 /**
184 * odb_update_node_with_offset
185 * @param odb the data base object to setup
186 * @param key the hash key
187 * @param offset the offset to be added
188 *
189 * update info at key by adding the specified offset to its associated value,
190 * if the key does not exist a new node is created and the value associated
191 * is set to offset.
192 *
193 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
194 */
195 int odb_update_node_with_offset(odb_t * odb,
196 odb_key_t key,
197 unsigned long int offset);
198
199 /** Add a new node w/o regarding if a node with the same key already exists
200 *
201 * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure
202 */
203 int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value);
204
205 /* db_travel.c */
206 /**
207 * return a base pointer to the node array and number of node in this array
208 * caller then will iterate through:
209 *
210 * odb_node_nr_t node_nr, pos;
211 * odb_node_t * node = odb_get_iterator(odb, &node_nr);
212 * for ( pos = 0 ; pos < node_nr ; ++pos)
213 * // do something
214 *
215 * note than caller does not need to filter nil key as it's a valid key,
216 * The returned range is all valid (i.e. should never contain zero value).
217 */
218 odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr);
219
220 static __inline unsigned int
odb_do_hash(odb_data_t const * data,odb_key_t value)221 odb_do_hash(odb_data_t const * data, odb_key_t value)
222 {
223 /* FIXME: better hash for eip value, needs to instrument code
224 * and do a lot of tests ... */
225 /* trying to combine high order bits his a no-op: inside a binary image
226 * high order bits don't vary a lot, hash table start with 7 bits mask
227 * so this hash coding use bits 0-7, 8-15. Hash table is stored in
228 * files avoiding to rebuilding them at profiling re-start so
229 * on changing do_hash() change the file format!
230 */
231 uint32_t temp = (value >> 32) ^ value;
232 return ((temp << 0) ^ (temp >> 8)) & data->hash_mask;
233 }
234
235 #ifdef __cplusplus
236 }
237 #endif
238
239 #endif /* !ODB_H */
240