1 | /* Implement simple hashing table with string based keys. |
2 | Copyright (C) 1994-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994. |
5 | |
6 | This program is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published |
8 | by the Free Software Foundation; version 2 of the License, or |
9 | (at your option) any later version. |
10 | |
11 | This program is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | GNU General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifdef HAVE_CONFIG_H |
20 | # include <config.h> |
21 | #endif |
22 | |
23 | #include <inttypes.h> |
24 | #include <stdio.h> |
25 | #include <stdlib.h> |
26 | #include <string.h> |
27 | #include <stdint.h> |
28 | #include <sys/types.h> |
29 | |
30 | #include <obstack.h> |
31 | |
32 | #ifdef HAVE_VALUES_H |
33 | # include <values.h> |
34 | #endif |
35 | |
36 | #include "simple-hash.h" |
37 | |
38 | #define obstack_chunk_alloc malloc |
39 | #define obstack_chunk_free free |
40 | |
41 | #ifndef BITSPERBYTE |
42 | # define BITSPERBYTE 8 |
43 | #endif |
44 | |
45 | #define hashval_t uint32_t |
46 | #include "hashval.h" |
47 | |
48 | #include <programs/xmalloc.h> |
49 | |
50 | typedef struct hash_entry |
51 | { |
52 | unsigned long used; |
53 | const void *key; |
54 | size_t keylen; |
55 | void *data; |
56 | struct hash_entry *next; |
57 | } |
58 | hash_entry; |
59 | |
60 | /* Prototypes for local functions. */ |
61 | static void insert_entry_2 (hash_table *htab, const void *key, size_t keylen, |
62 | unsigned long hval, size_t idx, void *data); |
63 | static size_t lookup (const hash_table *htab, const void *key, size_t keylen, |
64 | unsigned long int hval); |
65 | static int is_prime (unsigned long int candidate); |
66 | |
67 | |
68 | int |
69 | init_hash (hash_table *htab, unsigned long int init_size) |
70 | { |
71 | /* We need the size to be a prime. */ |
72 | init_size = next_prime (init_size); |
73 | |
74 | /* Initialize the data structure. */ |
75 | htab->size = init_size; |
76 | htab->filled = 0; |
77 | htab->first = NULL; |
78 | htab->table = (void *) xcalloc (init_size + 1, sizeof (hash_entry)); |
79 | if (htab->table == NULL) |
80 | return -1; |
81 | |
82 | obstack_init (&htab->mem_pool); |
83 | |
84 | return 0; |
85 | } |
86 | |
87 | |
88 | int |
89 | delete_hash (hash_table *htab) |
90 | { |
91 | free (htab->table); |
92 | obstack_free (&htab->mem_pool, NULL); |
93 | return 0; |
94 | } |
95 | |
96 | |
97 | int |
98 | insert_entry (hash_table *htab, const void *key, size_t keylen, void *data) |
99 | { |
100 | unsigned long int hval = compute_hashval (key, keylen); |
101 | hash_entry *table = (hash_entry *) htab->table; |
102 | size_t idx = lookup (htab, key, keylen, hval); |
103 | |
104 | if (table[idx].used) |
105 | /* We don't want to overwrite the old value. */ |
106 | return -1; |
107 | else |
108 | { |
109 | /* An empty bucket has been found. */ |
110 | insert_entry_2 (htab, obstack_copy (&htab->mem_pool, key, keylen), |
111 | keylen, hval, idx, data); |
112 | return 0; |
113 | } |
114 | } |
115 | |
116 | static void |
117 | insert_entry_2 (hash_table *htab, const void *key, size_t keylen, |
118 | unsigned long int hval, size_t idx, void *data) |
119 | { |
120 | hash_entry *table = (hash_entry *) htab->table; |
121 | |
122 | table[idx].used = hval; |
123 | table[idx].key = key; |
124 | table[idx].keylen = keylen; |
125 | table[idx].data = data; |
126 | |
127 | /* List the new value in the list. */ |
128 | if ((hash_entry *) htab->first == NULL) |
129 | { |
130 | table[idx].next = &table[idx]; |
131 | htab->first = &table[idx]; |
132 | } |
133 | else |
134 | { |
135 | table[idx].next = ((hash_entry *) htab->first)->next; |
136 | ((hash_entry *) htab->first)->next = &table[idx]; |
137 | htab->first = &table[idx]; |
138 | } |
139 | |
140 | ++htab->filled; |
141 | if (100 * htab->filled > 75 * htab->size) |
142 | { |
143 | /* Table is filled more than 75%. Resize the table. |
144 | Experiments have shown that for best performance, this threshold |
145 | must lie between 40% and 85%. */ |
146 | unsigned long int old_size = htab->size; |
147 | |
148 | htab->size = next_prime (htab->size * 2); |
149 | htab->filled = 0; |
150 | htab->first = NULL; |
151 | htab->table = (void *) xcalloc (1 + htab->size, sizeof (hash_entry)); |
152 | |
153 | for (idx = 1; idx <= old_size; ++idx) |
154 | if (table[idx].used) |
155 | insert_entry_2 (htab, table[idx].key, table[idx].keylen, |
156 | table[idx].used, |
157 | lookup (htab, table[idx].key, table[idx].keylen, |
158 | table[idx].used), |
159 | table[idx].data); |
160 | |
161 | free (table); |
162 | } |
163 | } |
164 | |
165 | |
166 | int |
167 | find_entry (const hash_table *htab, const void *key, size_t keylen, |
168 | void **result) |
169 | { |
170 | hash_entry *table = (hash_entry *) htab->table; |
171 | size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); |
172 | |
173 | if (table[idx].used == 0) |
174 | return -1; |
175 | |
176 | *result = table[idx].data; |
177 | return 0; |
178 | } |
179 | |
180 | |
181 | int |
182 | set_entry (hash_table *htab, const void *key, size_t keylen, void *newval) |
183 | { |
184 | hash_entry *table = (hash_entry *) htab->table; |
185 | size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); |
186 | |
187 | if (table[idx].used == 0) |
188 | return -1; |
189 | |
190 | table[idx].data = newval; |
191 | return 0; |
192 | } |
193 | |
194 | |
195 | int |
196 | iterate_table (const hash_table *htab, void **ptr, const void **key, |
197 | size_t *keylen, void **data) |
198 | { |
199 | if (*ptr == NULL) |
200 | { |
201 | if (htab->first == NULL) |
202 | return -1; |
203 | *ptr = (void *) ((hash_entry *) htab->first)->next; |
204 | } |
205 | else |
206 | { |
207 | if (*ptr == htab->first) |
208 | return -1; |
209 | *ptr = (void *) (((hash_entry *) *ptr)->next); |
210 | } |
211 | |
212 | *key = ((hash_entry *) *ptr)->key; |
213 | *keylen = ((hash_entry *) *ptr)->keylen; |
214 | *data = ((hash_entry *) *ptr)->data; |
215 | return 0; |
216 | } |
217 | |
218 | |
219 | /* References: |
220 | [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 |
221 | [Knuth] The Art of Computer Programming, part3 (6.4) */ |
222 | |
223 | static size_t |
224 | lookup (const hash_table *htab, const void *key, size_t keylen, |
225 | unsigned long int hval) |
226 | { |
227 | unsigned long int hash; |
228 | size_t idx; |
229 | hash_entry *table = (hash_entry *) htab->table; |
230 | |
231 | /* First hash function: simply take the modul but prevent zero. */ |
232 | hash = 1 + hval % htab->size; |
233 | |
234 | idx = hash; |
235 | |
236 | if (table[idx].used) |
237 | { |
238 | if (table[idx].used == hval && table[idx].keylen == keylen |
239 | && memcmp (table[idx].key, key, keylen) == 0) |
240 | return idx; |
241 | |
242 | /* Second hash function as suggested in [Knuth]. */ |
243 | hash = 1 + hval % (htab->size - 2); |
244 | |
245 | do |
246 | { |
247 | if (idx <= hash) |
248 | idx = htab->size + idx - hash; |
249 | else |
250 | idx -= hash; |
251 | |
252 | /* If entry is found use it. */ |
253 | if (table[idx].used == hval && table[idx].keylen == keylen |
254 | && memcmp (table[idx].key, key, keylen) == 0) |
255 | return idx; |
256 | } |
257 | while (table[idx].used); |
258 | } |
259 | return idx; |
260 | } |
261 | |
262 | |
263 | unsigned long int |
264 | next_prime (unsigned long int seed) |
265 | { |
266 | /* Make it definitely odd. */ |
267 | seed |= 1; |
268 | |
269 | while (!is_prime (seed)) |
270 | seed += 2; |
271 | |
272 | return seed; |
273 | } |
274 | |
275 | |
276 | static int |
277 | is_prime (unsigned long int candidate) |
278 | { |
279 | /* No even number and none less than 10 will be passed here. */ |
280 | unsigned long int divn = 3; |
281 | unsigned long int sq = divn * divn; |
282 | |
283 | while (sq < candidate && candidate % divn != 0) |
284 | { |
285 | ++divn; |
286 | sq += 4 * divn; |
287 | ++divn; |
288 | } |
289 | |
290 | return candidate % divn != 0; |
291 | } |
292 | |