| 1 | /* String tables for ld.so.cache construction. Implementation. |
| 2 | Copyright (C) 2020-2023 Free Software Foundation, Inc. |
| 3 | This file is part of the GNU C Library. |
| 4 | |
| 5 | This program is free software; you can redistribute it and/or modify |
| 6 | it under the terms of the GNU General Public License as published |
| 7 | by the Free Software Foundation; version 2 of the License, or |
| 8 | (at your option) any later version. |
| 9 | |
| 10 | This program is distributed in the hope that it will be useful, |
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | GNU General Public License for more details. |
| 14 | |
| 15 | You should have received a copy of the GNU General Public License |
| 16 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
| 17 | |
| 18 | #include <assert.h> |
| 19 | #include <error.h> |
| 20 | #include <ldconfig.h> |
| 21 | #include <libintl.h> |
| 22 | #include <stdlib.h> |
| 23 | #include <string.h> |
| 24 | #include <stringtable.h> |
| 25 | |
| 26 | static void |
| 27 | stringtable_init (struct stringtable *table) |
| 28 | { |
| 29 | table->count = 0; |
| 30 | |
| 31 | /* This needs to be a power of two. 128 is sufficient to keep track |
| 32 | of 42 DSOs without resizing (assuming two strings per DSOs). |
| 33 | glibc itself comes with more than 20 DSOs, so 64 would likely to |
| 34 | be too small. */ |
| 35 | table->allocated = 128; |
| 36 | |
| 37 | table->entries = xcalloc (table->allocated, sizeof (table->entries[0])); |
| 38 | } |
| 39 | |
| 40 | /* 32-bit FNV-1a hash function. */ |
| 41 | static uint32_t |
| 42 | fnv1a (const char *string, size_t length) |
| 43 | { |
| 44 | const unsigned char *p = (const unsigned char *) string; |
| 45 | uint32_t hash = 2166136261U; |
| 46 | for (size_t i = 0; i < length; ++i) |
| 47 | { |
| 48 | hash ^= p[i]; |
| 49 | hash *= 16777619U; |
| 50 | } |
| 51 | return hash; |
| 52 | } |
| 53 | |
| 54 | /* Double the capacity of the hash table. */ |
| 55 | static void |
| 56 | stringtable_rehash (struct stringtable *table) |
| 57 | { |
| 58 | /* This computation cannot overflow because the old total in-memory |
| 59 | size of the hash table is larger than the computed value. */ |
| 60 | uint32_t new_allocated = table->allocated * 2; |
| 61 | struct stringtable_entry **new_entries |
| 62 | = xcalloc (new_allocated, sizeof (table->entries[0])); |
| 63 | |
| 64 | uint32_t mask = new_allocated - 1; |
| 65 | for (uint32_t i = 0; i < table->allocated; ++i) |
| 66 | for (struct stringtable_entry *e = table->entries[i]; e != NULL; ) |
| 67 | { |
| 68 | struct stringtable_entry *next = e->next; |
| 69 | uint32_t hash = fnv1a (e->string, e->length); |
| 70 | uint32_t new_index = hash & mask; |
| 71 | e->next = new_entries[new_index]; |
| 72 | new_entries[new_index] = e; |
| 73 | e = next; |
| 74 | } |
| 75 | |
| 76 | free (table->entries); |
| 77 | table->entries = new_entries; |
| 78 | table->allocated = new_allocated; |
| 79 | } |
| 80 | |
| 81 | struct stringtable_entry * |
| 82 | stringtable_add (struct stringtable *table, const char *string) |
| 83 | { |
| 84 | /* Check for a zero-initialized table. */ |
| 85 | if (table->allocated == 0) |
| 86 | stringtable_init (table); |
| 87 | |
| 88 | size_t length = strlen (string); |
| 89 | if (length > (1U << 30)) |
| 90 | error (EXIT_FAILURE, 0, _("String table string is too long" )); |
| 91 | uint32_t hash = fnv1a (string, length); |
| 92 | |
| 93 | /* Return a previously-existing entry. */ |
| 94 | for (struct stringtable_entry *e |
| 95 | = table->entries[hash & (table->allocated - 1)]; |
| 96 | e != NULL; e = e->next) |
| 97 | if (e->length == length && memcmp (e->string, string, length) == 0) |
| 98 | return e; |
| 99 | |
| 100 | /* Increase the size of the table if necessary. Keep utilization |
| 101 | below two thirds. */ |
| 102 | if (table->count >= (1U << 30)) |
| 103 | error (EXIT_FAILURE, 0, _("String table has too many entries" )); |
| 104 | if (table->count * 3 > table->allocated * 2) |
| 105 | stringtable_rehash (table); |
| 106 | |
| 107 | /* Add the new table entry. */ |
| 108 | ++table->count; |
| 109 | struct stringtable_entry *e |
| 110 | = xmalloc (offsetof (struct stringtable_entry, string) + length + 1); |
| 111 | uint32_t index = hash & (table->allocated - 1); |
| 112 | e->next = table->entries[index]; |
| 113 | table->entries[index] = e; |
| 114 | e->length = length; |
| 115 | e->offset = 0; |
| 116 | memcpy (e->string, string, length + 1); |
| 117 | return e; |
| 118 | } |
| 119 | |
| 120 | /* Sort reversed strings in reverse lexicographic order. This is used |
| 121 | for tail merging. */ |
| 122 | static int |
| 123 | finalize_compare (const void *l, const void *r) |
| 124 | { |
| 125 | struct stringtable_entry *left = *(struct stringtable_entry **) l; |
| 126 | struct stringtable_entry *right = *(struct stringtable_entry **) r; |
| 127 | size_t to_compare; |
| 128 | if (left->length < right->length) |
| 129 | to_compare = left->length; |
| 130 | else |
| 131 | to_compare = right->length; |
| 132 | for (size_t i = 1; i <= to_compare; ++i) |
| 133 | { |
| 134 | unsigned char lch = left->string[left->length - i]; |
| 135 | unsigned char rch = right->string[right->length - i]; |
| 136 | if (lch != rch) |
| 137 | return rch - lch; |
| 138 | } |
| 139 | if (left->length == right->length) |
| 140 | return 0; |
| 141 | else if (left->length < right->length) |
| 142 | /* Longer strings should come first. */ |
| 143 | return 1; |
| 144 | else |
| 145 | return -1; |
| 146 | } |
| 147 | |
| 148 | void |
| 149 | stringtable_finalize (struct stringtable *table, |
| 150 | struct stringtable_finalized *result) |
| 151 | { |
| 152 | if (table->count == 0) |
| 153 | { |
| 154 | result->strings = xstrdup ("" ); |
| 155 | result->size = 0; |
| 156 | return; |
| 157 | } |
| 158 | |
| 159 | /* Optimize the order of the strings. */ |
| 160 | struct stringtable_entry **array = xcalloc (table->count, sizeof (*array)); |
| 161 | { |
| 162 | size_t j = 0; |
| 163 | for (uint32_t i = 0; i < table->allocated; ++i) |
| 164 | for (struct stringtable_entry *e = table->entries[i]; e != NULL; |
| 165 | e = e->next) |
| 166 | { |
| 167 | array[j] = e; |
| 168 | ++j; |
| 169 | } |
| 170 | assert (j == table->count); |
| 171 | } |
| 172 | qsort (array, table->count, sizeof (*array), finalize_compare); |
| 173 | |
| 174 | /* Assign offsets, using tail merging (sharing suffixes) if possible. */ |
| 175 | array[0]->offset = 0; |
| 176 | for (uint32_t j = 1; j < table->count; ++j) |
| 177 | { |
| 178 | struct stringtable_entry *previous = array[j - 1]; |
| 179 | struct stringtable_entry *current = array[j]; |
| 180 | if (previous->length >= current->length |
| 181 | && memcmp (&previous->string[previous->length - current->length], |
| 182 | current->string, current->length) == 0) |
| 183 | current->offset = (previous->offset + previous->length |
| 184 | - current->length); |
| 185 | else if (__builtin_add_overflow (previous->offset, |
| 186 | previous->length + 1, |
| 187 | ¤t->offset)) |
| 188 | error (EXIT_FAILURE, 0, _("String table is too large" )); |
| 189 | } |
| 190 | |
| 191 | /* Allocate the result string. */ |
| 192 | { |
| 193 | struct stringtable_entry *last = array[table->count - 1]; |
| 194 | if (__builtin_add_overflow (last->offset, last->length + 1, |
| 195 | &result->size)) |
| 196 | error (EXIT_FAILURE, 0, _("String table is too large" )); |
| 197 | } |
| 198 | /* The strings are copied from the hash table, so the array is no |
| 199 | longer needed. */ |
| 200 | free (array); |
| 201 | result->strings = xcalloc (result->size, 1); |
| 202 | |
| 203 | /* Copy the strings. */ |
| 204 | for (uint32_t i = 0; i < table->allocated; ++i) |
| 205 | for (struct stringtable_entry *e = table->entries[i]; e != NULL; |
| 206 | e = e->next) |
| 207 | if (result->strings[e->offset] == '\0') |
| 208 | memcpy (&result->strings[e->offset], e->string, e->length + 1); |
| 209 | } |
| 210 | |