| 1 | /* |
| 2 | * Copyright (c) 2007-2008 Apple Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * This file contains Original Code and/or Modifications of Original Code |
| 7 | * as defined in and that are subject to the Apple Public Source License |
| 8 | * Version 2.0 (the 'License'). You may not use this file except in |
| 9 | * compliance with the License. The rights granted to you under the License |
| 10 | * may not be used to create, or enable the creation or redistribution of, |
| 11 | * unlawful or unlicensed copies of an Apple operating system, or to |
| 12 | * circumvent, violate, or enable the circumvention or violation of, any |
| 13 | * terms of an Apple operating system software license agreement. |
| 14 | * |
| 15 | * Please obtain a copy of the License at |
| 16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. |
| 17 | * |
| 18 | * The Original Code and all software distributed under the License are |
| 19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
| 23 | * Please see the License for the specific language governing rights and |
| 24 | * limitations under the License. |
| 25 | * |
| 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
| 27 | */ |
| 28 | #include <string.h> |
| 29 | #include <sys/types.h> |
| 30 | |
| 31 | #define DEBUG_ASSERT_COMPONENT_NAME_STRING "kxld" |
| 32 | #include <AssertMacros.h> |
| 33 | |
| 34 | #include "kxld_dict.h" |
| 35 | #include "kxld_util.h" |
| 36 | |
| 37 | /******************************************************************************* |
| 38 | * Types and macros |
| 39 | *******************************************************************************/ |
| 40 | |
| 41 | /* Ratio of num_entries:num_buckets that will cause a resize */ |
| 42 | #define RESIZE_NUMER 7 |
| 43 | #define RESIZE_DENOM 10 |
| 44 | #define RESIZE_THRESHOLD(x) (((x)*RESIZE_NUMER) / RESIZE_DENOM) |
| 45 | #define MIN_BUCKETS(x) (((x)*RESIZE_DENOM) / RESIZE_NUMER) |
| 46 | |
| 47 | /* Selected for good scaling qualities when resizing dictionary |
| 48 | * ... see: http://www.concentric.net/~ttwang/tech/hashsize.htm |
| 49 | */ |
| 50 | #define DEFAULT_DICT_SIZE 89 |
| 51 | |
| 52 | typedef struct dict_entry DictEntry; |
| 53 | |
| 54 | typedef enum { |
| 55 | EMPTY = 0, |
| 56 | USED = 1, |
| 57 | DELETED = 2 |
| 58 | } DictEntryState; |
| 59 | |
| 60 | struct dict_entry { |
| 61 | const void *key; |
| 62 | void *value; |
| 63 | DictEntryState state; |
| 64 | }; |
| 65 | |
| 66 | /******************************************************************************* |
| 67 | * Function prototypes |
| 68 | *******************************************************************************/ |
| 69 | |
| 70 | static kern_return_t get_locate_index(const KXLDDict *dict, const void *key, |
| 71 | u_int *idx); |
| 72 | static kern_return_t get_insert_index(const KXLDDict *dict, const void *key, |
| 73 | u_int *idx); |
| 74 | static kern_return_t resize_dict(KXLDDict *dict); |
| 75 | |
| 76 | /******************************************************************************* |
| 77 | *******************************************************************************/ |
| 78 | kern_return_t |
| 79 | kxld_dict_init(KXLDDict * dict, kxld_dict_hash hash, kxld_dict_cmp cmp, |
| 80 | u_int num_entries) |
| 81 | { |
| 82 | kern_return_t rval = KERN_FAILURE; |
| 83 | u_int min_buckets = MIN_BUCKETS(num_entries); |
| 84 | u_int num_buckets = DEFAULT_DICT_SIZE; |
| 85 | |
| 86 | check(dict); |
| 87 | check(hash); |
| 88 | check(cmp); |
| 89 | |
| 90 | /* We want the number of allocated buckets to be at least twice that of the |
| 91 | * number to be inserted. |
| 92 | */ |
| 93 | while (min_buckets > num_buckets) { |
| 94 | num_buckets *= 2; |
| 95 | num_buckets++; |
| 96 | } |
| 97 | |
| 98 | /* Allocate enough buckets for the anticipated number of entries */ |
| 99 | rval = kxld_array_init(&dict->buckets, sizeof(DictEntry), num_buckets); |
| 100 | require_noerr(rval, finish); |
| 101 | |
| 102 | /* Initialize */ |
| 103 | dict->hash = hash; |
| 104 | dict->cmp = cmp; |
| 105 | dict->num_entries = 0; |
| 106 | dict->resize_threshold = RESIZE_THRESHOLD(num_buckets); |
| 107 | |
| 108 | rval = KERN_SUCCESS; |
| 109 | |
| 110 | finish: |
| 111 | return rval; |
| 112 | } |
| 113 | |
| 114 | /******************************************************************************* |
| 115 | *******************************************************************************/ |
| 116 | void |
| 117 | kxld_dict_clear(KXLDDict *dict) |
| 118 | { |
| 119 | check(dict); |
| 120 | |
| 121 | dict->hash = NULL; |
| 122 | dict->cmp = NULL; |
| 123 | dict->num_entries = 0; |
| 124 | dict->resize_threshold = 0; |
| 125 | kxld_array_clear(&dict->buckets); |
| 126 | kxld_array_clear(&dict->resize_buckets); |
| 127 | } |
| 128 | |
| 129 | /******************************************************************************* |
| 130 | *******************************************************************************/ |
| 131 | void |
| 132 | kxld_dict_iterator_init(KXLDDictIterator *iter, const KXLDDict *dict) |
| 133 | { |
| 134 | check(iter); |
| 135 | check(dict); |
| 136 | |
| 137 | iter->idx = 0; |
| 138 | iter->dict = dict; |
| 139 | } |
| 140 | |
| 141 | /******************************************************************************* |
| 142 | *******************************************************************************/ |
| 143 | void |
| 144 | kxld_dict_deinit(KXLDDict *dict) |
| 145 | { |
| 146 | check(dict); |
| 147 | |
| 148 | kxld_array_deinit(&dict->buckets); |
| 149 | kxld_array_deinit(&dict->resize_buckets); |
| 150 | } |
| 151 | |
| 152 | /******************************************************************************* |
| 153 | *******************************************************************************/ |
| 154 | u_int |
| 155 | kxld_dict_get_num_entries(const KXLDDict *dict) |
| 156 | { |
| 157 | check(dict); |
| 158 | |
| 159 | return dict->num_entries; |
| 160 | } |
| 161 | |
| 162 | /******************************************************************************* |
| 163 | *******************************************************************************/ |
| 164 | void * |
| 165 | kxld_dict_find(const KXLDDict *dict, const void *key) |
| 166 | { |
| 167 | kern_return_t rval = KERN_FAILURE; |
| 168 | DictEntry *entry = NULL; |
| 169 | u_int idx = 0; |
| 170 | |
| 171 | check(dict); |
| 172 | check(key); |
| 173 | |
| 174 | rval = get_locate_index(dict, key, &idx); |
| 175 | if (rval) return NULL; |
| 176 | |
| 177 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 178 | |
| 179 | return entry->value; |
| 180 | } |
| 181 | |
| 182 | /******************************************************************************* |
| 183 | * This dictionary uses linear probing, which means that when there is a |
| 184 | * collision, we just walk along the buckets until a free bucket shows up. |
| 185 | * A consequence of this is that when looking up an item, items that lie between |
| 186 | * its hash value and its actual bucket may have been deleted since it was |
| 187 | * inserted. Thus, we should only stop a lookup when we've wrapped around the |
| 188 | * dictionary or encountered an EMPTY bucket. |
| 189 | ********************************************************************************/ |
| 190 | static kern_return_t |
| 191 | get_locate_index(const KXLDDict *dict, const void *key, u_int *_idx) |
| 192 | { |
| 193 | kern_return_t rval = KERN_FAILURE; |
| 194 | DictEntry *entry = NULL; |
| 195 | u_int base, idx; |
| 196 | |
| 197 | base = idx = dict->hash(dict, key); |
| 198 | |
| 199 | /* Iterate until we match the key, wrap, or hit an empty bucket */ |
| 200 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 201 | while (!dict->cmp(entry->key, key)) { |
| 202 | if (entry->state == EMPTY) goto finish; |
| 203 | |
| 204 | idx = (idx + 1) % dict->buckets.nitems; |
| 205 | if (idx == base) goto finish; |
| 206 | |
| 207 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 208 | } |
| 209 | |
| 210 | check(idx < dict->buckets.nitems); |
| 211 | |
| 212 | *_idx = idx; |
| 213 | rval = KERN_SUCCESS; |
| 214 | |
| 215 | finish: |
| 216 | return rval; |
| 217 | } |
| 218 | |
| 219 | /******************************************************************************* |
| 220 | *******************************************************************************/ |
| 221 | kern_return_t |
| 222 | kxld_dict_insert(KXLDDict *dict, const void *key, void *value) |
| 223 | { |
| 224 | kern_return_t rval = KERN_FAILURE; |
| 225 | DictEntry *entry = NULL; |
| 226 | u_int idx = 0; |
| 227 | |
| 228 | check(dict); |
| 229 | check(key); |
| 230 | check(value); |
| 231 | |
| 232 | /* Resize if we are greater than the capacity threshold. |
| 233 | * Note: this is expensive, but the dictionary can be sized correctly at |
| 234 | * construction to avoid ever having to do this. |
| 235 | */ |
| 236 | while (dict->num_entries > dict->resize_threshold) { |
| 237 | rval = resize_dict(dict); |
| 238 | require_noerr(rval, finish); |
| 239 | } |
| 240 | |
| 241 | /* If this function returns FULL after we've already resized appropriately |
| 242 | * something is very wrong and we should return an error. |
| 243 | */ |
| 244 | rval = get_insert_index(dict, key, &idx); |
| 245 | require_noerr(rval, finish); |
| 246 | |
| 247 | /* Insert the new key-value pair into the bucket, but only count it as a |
| 248 | * new entry if we are not overwriting an existing entry. |
| 249 | */ |
| 250 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 251 | if (entry->state != USED) { |
| 252 | dict->num_entries++; |
| 253 | entry->key = key; |
| 254 | entry->state = USED; |
| 255 | } |
| 256 | entry->value = value; |
| 257 | |
| 258 | rval = KERN_SUCCESS; |
| 259 | |
| 260 | finish: |
| 261 | return rval; |
| 262 | } |
| 263 | |
| 264 | /******************************************************************************* |
| 265 | * Increases the hash table's capacity by 2N+1. Uses dictionary API. Not |
| 266 | * fast; just correct. |
| 267 | *******************************************************************************/ |
| 268 | static kern_return_t |
| 269 | resize_dict(KXLDDict *dict) |
| 270 | { |
| 271 | kern_return_t rval = KERN_FAILURE; |
| 272 | KXLDArray tmparray; |
| 273 | DictEntry *entry = NULL; |
| 274 | u_int nbuckets = (dict->buckets.nitems * 2 + 1); |
| 275 | u_int i = 0; |
| 276 | |
| 277 | check(dict); |
| 278 | |
| 279 | /* Initialize a new set of buckets to hold more entries */ |
| 280 | rval = kxld_array_init(&dict->resize_buckets, sizeof(DictEntry), nbuckets); |
| 281 | require_noerr(rval, finish); |
| 282 | |
| 283 | /* Swap the new buckets with the old buckets */ |
| 284 | tmparray = dict->buckets; |
| 285 | dict->buckets = dict->resize_buckets; |
| 286 | dict->resize_buckets = tmparray; |
| 287 | |
| 288 | /* Reset dictionary parameters */ |
| 289 | dict->num_entries = 0; |
| 290 | dict->resize_threshold = RESIZE_THRESHOLD(dict->buckets.nitems); |
| 291 | |
| 292 | /* Rehash all of the entries */ |
| 293 | for (i = 0; i < dict->resize_buckets.nitems; ++i) { |
| 294 | entry = kxld_array_get_item(&dict->resize_buckets, i); |
| 295 | if (entry->state == USED) { |
| 296 | rval = kxld_dict_insert(dict, entry->key, entry->value); |
| 297 | require_noerr(rval, finish); |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | /* Clear the old buckets */ |
| 302 | kxld_array_clear(&dict->resize_buckets); |
| 303 | |
| 304 | rval = KERN_SUCCESS; |
| 305 | |
| 306 | finish: |
| 307 | return rval; |
| 308 | } |
| 309 | |
| 310 | /******************************************************************************* |
| 311 | * Simple function to find the first empty cell |
| 312 | *******************************************************************************/ |
| 313 | static kern_return_t |
| 314 | get_insert_index(const KXLDDict *dict, const void *key, u_int *r_index) |
| 315 | { |
| 316 | kern_return_t rval = KERN_FAILURE; |
| 317 | DictEntry *entry = NULL; |
| 318 | u_int base, idx; |
| 319 | |
| 320 | base = idx = dict->hash(dict, key); |
| 321 | |
| 322 | /* Iterate through the buckets until we find an EMPTY bucket, a DELETED |
| 323 | * bucket, or a key match. |
| 324 | */ |
| 325 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 326 | while (entry->state == USED && !dict->cmp(entry->key, key)) { |
| 327 | idx = (idx + 1) % dict->buckets.nitems; |
| 328 | require_action(base != idx, finish, rval=KERN_FAILURE); |
| 329 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 330 | } |
| 331 | |
| 332 | *r_index = idx; |
| 333 | rval = KERN_SUCCESS; |
| 334 | |
| 335 | finish: |
| 336 | return rval; |
| 337 | } |
| 338 | |
| 339 | /******************************************************************************* |
| 340 | *******************************************************************************/ |
| 341 | void |
| 342 | kxld_dict_remove(KXLDDict *dict, const void *key, void **value) |
| 343 | { |
| 344 | kern_return_t rval = KERN_FAILURE; |
| 345 | DictEntry *entry = NULL; |
| 346 | u_int idx = 0; |
| 347 | |
| 348 | check(dict); |
| 349 | check(key); |
| 350 | |
| 351 | /* Find the item */ |
| 352 | rval = get_locate_index(dict, key, &idx); |
| 353 | if (rval) { |
| 354 | if (value) *value = NULL; |
| 355 | return; |
| 356 | } |
| 357 | |
| 358 | entry = kxld_array_get_item(&dict->buckets, idx); |
| 359 | |
| 360 | /* Save the value if requested */ |
| 361 | if (value) *value = entry->value; |
| 362 | |
| 363 | /* Delete the item from the dictionary */ |
| 364 | entry->key = NULL; |
| 365 | entry->value = NULL; |
| 366 | entry->state = DELETED; |
| 367 | dict->num_entries--; |
| 368 | } |
| 369 | |
| 370 | /******************************************************************************* |
| 371 | *******************************************************************************/ |
| 372 | void |
| 373 | kxld_dict_iterator_get_next(KXLDDictIterator *iter, const void **key, |
| 374 | void **value) |
| 375 | { |
| 376 | DictEntry *entry = NULL; |
| 377 | |
| 378 | check(iter); |
| 379 | check(key); |
| 380 | check(value); |
| 381 | |
| 382 | *key = NULL; |
| 383 | *value = NULL; |
| 384 | |
| 385 | /* Walk over the dictionary looking for USED buckets */ |
| 386 | for (; iter->idx < iter->dict->buckets.nitems; ++(iter->idx)) { |
| 387 | entry = kxld_array_get_item(&iter->dict->buckets, iter->idx); |
| 388 | if (entry->state == USED) { |
| 389 | *key = entry->key; |
| 390 | *value = entry->value; |
| 391 | ++(iter->idx); |
| 392 | break; |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | /******************************************************************************* |
| 398 | *******************************************************************************/ |
| 399 | void |
| 400 | kxld_dict_iterator_reset(KXLDDictIterator *iter) |
| 401 | { |
| 402 | iter->idx = 0; |
| 403 | } |
| 404 | |
| 405 | /******************************************************************************* |
| 406 | * This is Daniel Bernstein's hash algorithm from comp.lang.c |
| 407 | * It's fast and distributes well. Returns an idx into the symbol hash table. |
| 408 | * NOTE: Will not check for a valid pointer - performance |
| 409 | *******************************************************************************/ |
| 410 | u_int |
| 411 | kxld_dict_string_hash(const KXLDDict *dict, const void *_key) |
| 412 | { |
| 413 | const char *key = _key; |
| 414 | u_int c = 0; |
| 415 | u_int hash_val = 5381; |
| 416 | |
| 417 | check(dict); |
| 418 | check(_key); |
| 419 | |
| 420 | while ((c = *key++)) { |
| 421 | /* hash(i) = hash(i-1) *33 ^ name[i] */ |
| 422 | hash_val = ((hash_val << 5) + hash_val) ^ c; |
| 423 | } |
| 424 | |
| 425 | return (hash_val % dict->buckets.nitems); |
| 426 | } |
| 427 | |
| 428 | u_int |
| 429 | kxld_dict_uint32_hash(const KXLDDict *dict, const void *_key) |
| 430 | { |
| 431 | uint32_t key = *(const uint32_t *) _key; |
| 432 | |
| 433 | check(_key); |
| 434 | |
| 435 | return (u_int) (key % dict->buckets.nitems); |
| 436 | } |
| 437 | |
| 438 | u_int |
| 439 | kxld_dict_kxldaddr_hash(const KXLDDict *dict, const void *_key) |
| 440 | { |
| 441 | kxld_addr_t key = *(const kxld_addr_t *) _key; |
| 442 | |
| 443 | check(_key); |
| 444 | |
| 445 | return (u_int) (key % dict->buckets.nitems); |
| 446 | } |
| 447 | |
| 448 | u_int |
| 449 | kxld_dict_string_cmp(const void *key1, const void *key2) |
| 450 | { |
| 451 | return streq(key1, key2); |
| 452 | } |
| 453 | |
| 454 | u_int |
| 455 | kxld_dict_uint32_cmp(const void *key1, const void *key2) |
| 456 | { |
| 457 | const uint32_t *a = key1; |
| 458 | const uint32_t *b = key2; |
| 459 | |
| 460 | return (a && b && (*a == *b)); |
| 461 | } |
| 462 | |
| 463 | u_int |
| 464 | kxld_dict_kxldaddr_cmp(const void *key1, const void *key2) |
| 465 | { |
| 466 | const kxld_addr_t *a = key1; |
| 467 | const kxld_addr_t *b = key2; |
| 468 | |
| 469 | return (a && b && (*a == *b)); |
| 470 | } |
| 471 | |
| 472 | |