1 | /* Implement simple hashing table with string based keys. |
2 | Copyright (C) 1994-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published |
7 | by the Free Software Foundation; version 2 of the License, or |
8 | (at your option) any later version. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #ifdef HAVE_CONFIG_H |
19 | # include <config.h> |
20 | #endif |
21 | |
22 | #include <inttypes.h> |
23 | #include <stdio.h> |
24 | #include <stdlib.h> |
25 | #include <string.h> |
26 | #include <stdint.h> |
27 | #include <sys/types.h> |
28 | |
29 | #include <obstack.h> |
30 | |
31 | #ifdef HAVE_VALUES_H |
32 | # include <values.h> |
33 | #endif |
34 | |
35 | #include "simple-hash.h" |
36 | |
37 | #define obstack_chunk_alloc malloc |
38 | #define obstack_chunk_free free |
39 | |
40 | #ifndef BITSPERBYTE |
41 | # define BITSPERBYTE 8 |
42 | #endif |
43 | |
44 | #define hashval_t uint32_t |
45 | #include "hashval.h" |
46 | |
47 | #include <programs/xmalloc.h> |
48 | |
49 | typedef struct hash_entry |
50 | { |
51 | unsigned long used; |
52 | const void *key; |
53 | size_t keylen; |
54 | void *data; |
55 | struct hash_entry *next; |
56 | } |
57 | hash_entry; |
58 | |
59 | /* Prototypes for local functions. */ |
60 | static void insert_entry_2 (hash_table *htab, const void *key, size_t keylen, |
61 | unsigned long hval, size_t idx, void *data); |
62 | static size_t lookup (const hash_table *htab, const void *key, size_t keylen, |
63 | unsigned long int hval); |
64 | static int is_prime (unsigned long int candidate); |
65 | |
66 | |
67 | int |
68 | init_hash (hash_table *htab, unsigned long int init_size) |
69 | { |
70 | /* We need the size to be a prime. */ |
71 | init_size = next_prime (init_size); |
72 | |
73 | /* Initialize the data structure. */ |
74 | htab->size = init_size; |
75 | htab->filled = 0; |
76 | htab->first = NULL; |
77 | htab->table = (void *) xcalloc (init_size + 1, sizeof (hash_entry)); |
78 | if (htab->table == NULL) |
79 | return -1; |
80 | |
81 | obstack_init (&htab->mem_pool); |
82 | |
83 | return 0; |
84 | } |
85 | |
86 | |
87 | int |
88 | delete_hash (hash_table *htab) |
89 | { |
90 | free (htab->table); |
91 | obstack_free (&htab->mem_pool, NULL); |
92 | return 0; |
93 | } |
94 | |
95 | |
96 | int |
97 | insert_entry (hash_table *htab, const void *key, size_t keylen, void *data) |
98 | { |
99 | unsigned long int hval = compute_hashval (key, keylen); |
100 | hash_entry *table = (hash_entry *) htab->table; |
101 | size_t idx = lookup (htab, key, keylen, hval); |
102 | |
103 | if (table[idx].used) |
104 | /* We don't want to overwrite the old value. */ |
105 | return -1; |
106 | else |
107 | { |
108 | /* An empty bucket has been found. */ |
109 | insert_entry_2 (htab, obstack_copy (&htab->mem_pool, key, keylen), |
110 | keylen, hval, idx, data); |
111 | return 0; |
112 | } |
113 | } |
114 | |
115 | static void |
116 | insert_entry_2 (hash_table *htab, const void *key, size_t keylen, |
117 | unsigned long int hval, size_t idx, void *data) |
118 | { |
119 | hash_entry *table = (hash_entry *) htab->table; |
120 | |
121 | table[idx].used = hval; |
122 | table[idx].key = key; |
123 | table[idx].keylen = keylen; |
124 | table[idx].data = data; |
125 | |
126 | /* List the new value in the list. */ |
127 | if ((hash_entry *) htab->first == NULL) |
128 | { |
129 | table[idx].next = &table[idx]; |
130 | htab->first = &table[idx]; |
131 | } |
132 | else |
133 | { |
134 | table[idx].next = ((hash_entry *) htab->first)->next; |
135 | ((hash_entry *) htab->first)->next = &table[idx]; |
136 | htab->first = &table[idx]; |
137 | } |
138 | |
139 | ++htab->filled; |
140 | if (100 * htab->filled > 75 * htab->size) |
141 | { |
142 | /* Table is filled more than 75%. Resize the table. |
143 | Experiments have shown that for best performance, this threshold |
144 | must lie between 40% and 85%. */ |
145 | unsigned long int old_size = htab->size; |
146 | |
147 | htab->size = next_prime (htab->size * 2); |
148 | htab->filled = 0; |
149 | htab->first = NULL; |
150 | htab->table = (void *) xcalloc (1 + htab->size, sizeof (hash_entry)); |
151 | |
152 | for (idx = 1; idx <= old_size; ++idx) |
153 | if (table[idx].used) |
154 | insert_entry_2 (htab, table[idx].key, table[idx].keylen, |
155 | table[idx].used, |
156 | lookup (htab, table[idx].key, table[idx].keylen, |
157 | table[idx].used), |
158 | table[idx].data); |
159 | |
160 | free (table); |
161 | } |
162 | } |
163 | |
164 | |
165 | int |
166 | find_entry (const hash_table *htab, const void *key, size_t keylen, |
167 | void **result) |
168 | { |
169 | hash_entry *table = (hash_entry *) htab->table; |
170 | size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); |
171 | |
172 | if (table[idx].used == 0) |
173 | return -1; |
174 | |
175 | *result = table[idx].data; |
176 | return 0; |
177 | } |
178 | |
179 | |
180 | int |
181 | set_entry (hash_table *htab, const void *key, size_t keylen, void *newval) |
182 | { |
183 | hash_entry *table = (hash_entry *) htab->table; |
184 | size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); |
185 | |
186 | if (table[idx].used == 0) |
187 | return -1; |
188 | |
189 | table[idx].data = newval; |
190 | return 0; |
191 | } |
192 | |
193 | |
194 | int |
195 | iterate_table (const hash_table *htab, void **ptr, const void **key, |
196 | size_t *keylen, void **data) |
197 | { |
198 | if (*ptr == NULL) |
199 | { |
200 | if (htab->first == NULL) |
201 | return -1; |
202 | *ptr = (void *) ((hash_entry *) htab->first)->next; |
203 | } |
204 | else |
205 | { |
206 | if (*ptr == htab->first) |
207 | return -1; |
208 | *ptr = (void *) (((hash_entry *) *ptr)->next); |
209 | } |
210 | |
211 | *key = ((hash_entry *) *ptr)->key; |
212 | *keylen = ((hash_entry *) *ptr)->keylen; |
213 | *data = ((hash_entry *) *ptr)->data; |
214 | return 0; |
215 | } |
216 | |
217 | |
218 | /* References: |
219 | [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 |
220 | [Knuth] The Art of Computer Programming, part3 (6.4) */ |
221 | |
222 | static size_t |
223 | lookup (const hash_table *htab, const void *key, size_t keylen, |
224 | unsigned long int hval) |
225 | { |
226 | unsigned long int hash; |
227 | size_t idx; |
228 | hash_entry *table = (hash_entry *) htab->table; |
229 | |
230 | /* First hash function: simply take the modul but prevent zero. */ |
231 | hash = 1 + hval % htab->size; |
232 | |
233 | idx = hash; |
234 | |
235 | if (table[idx].used) |
236 | { |
237 | if (table[idx].used == hval && table[idx].keylen == keylen |
238 | && memcmp (table[idx].key, key, keylen) == 0) |
239 | return idx; |
240 | |
241 | /* Second hash function as suggested in [Knuth]. */ |
242 | hash = 1 + hval % (htab->size - 2); |
243 | |
244 | do |
245 | { |
246 | if (idx <= hash) |
247 | idx = htab->size + idx - hash; |
248 | else |
249 | idx -= hash; |
250 | |
251 | /* If entry is found use it. */ |
252 | if (table[idx].used == hval && table[idx].keylen == keylen |
253 | && memcmp (table[idx].key, key, keylen) == 0) |
254 | return idx; |
255 | } |
256 | while (table[idx].used); |
257 | } |
258 | return idx; |
259 | } |
260 | |
261 | |
262 | unsigned long int |
263 | next_prime (unsigned long int seed) |
264 | { |
265 | /* Make it definitely odd. */ |
266 | seed |= 1; |
267 | |
268 | while (!is_prime (seed)) |
269 | seed += 2; |
270 | |
271 | return seed; |
272 | } |
273 | |
274 | |
275 | static int |
276 | is_prime (unsigned long int candidate) |
277 | { |
278 | /* No even number and none less than 10 will be passed here. */ |
279 | unsigned long int divn = 3; |
280 | unsigned long int sq = divn * divn; |
281 | |
282 | while (sq < candidate && candidate % divn != 0) |
283 | { |
284 | ++divn; |
285 | sq += 4 * divn; |
286 | ++divn; |
287 | } |
288 | |
289 | return candidate % divn != 0; |
290 | } |
291 | |