1 | /* _dl_new_hash for elf symbol lookup |
2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #ifndef _DL_NEW_HASH_H |
20 | #define _DL_NEW_HASH_H 1 |
21 | |
22 | #include <stdint.h> |
23 | /* For __always_inline and __glibc_unlikely. */ |
24 | #include <sys/cdefs.h> |
25 | |
26 | /* The simplest implementation of _dl_new_hash is: |
27 | |
28 | _dl_new_hash (const char *s) |
29 | { |
30 | uint32_t h = 5381; |
31 | for (unsigned char c = *s; c != '\0'; c = *++s) |
32 | h = h * 33 + c; |
33 | return h; |
34 | } |
35 | |
36 | We can get better performance by slightly unrolling the loop to |
37 | pipeline the multiples, which gcc cannot easily do due to |
38 | dependencies across iterations. |
39 | |
40 | As well, as an architecture specific option we add asm statements |
41 | to explicitly specify order of operations and prevent reassociation |
42 | of instructions that lengthens the loop carried dependency. This |
43 | may have no affect as the compiler may have ordered instructions |
44 | the same way without it but in testing this has not been the case |
45 | for GCC. Improving GCC to reliably schedule instructions ideally |
46 | cannot be easily done. |
47 | |
48 | Architecture(s) that use the reassociation barriers are: |
49 | x86 |
50 | |
51 | Note it is very unlikely the reassociation barriers would |
52 | de-optimize performance on any architecture and with an imperfect |
53 | compiler it may help performance, especially on out-of-order cpus, |
54 | so it is suggested that the respective maintainers add them. |
55 | |
56 | Architecture maintainers are encouraged to benchmark this with |
57 | __asm_reassociation_barrier defined to __asm__ like it is in x86. |
58 | */ |
59 | |
60 | |
61 | #ifndef __asm_reassociation_barrier |
62 | # define __asm_reassociation_barrier(...) |
63 | #endif |
64 | |
65 | static __always_inline uint32_t |
66 | __attribute__ ((unused)) |
67 | _dl_new_hash (const char *str) |
68 | { |
69 | const unsigned char *s = (const unsigned char *) str; |
70 | unsigned int h = 5381; |
71 | unsigned int c0, c1; |
72 | for (;;) |
73 | { |
74 | c0 = s[0]; |
75 | /* Since hashed string is normally not empty, this is unlikely on the |
76 | first iteration of the loop. */ |
77 | if (__glibc_unlikely (c0 == 0)) |
78 | return h; |
79 | |
80 | c1 = s[1]; |
81 | if (c1 == 0) |
82 | { |
83 | /* Ideal computational order is: |
84 | c0 += h; |
85 | h *= 32; |
86 | h += c0; */ |
87 | c0 += h; |
88 | __asm_reassociation_barrier("" : "+r" (h) : "r" (c0)); |
89 | h = h * 32 + c0; |
90 | return h; |
91 | } |
92 | |
93 | /* Ideal computational order is: |
94 | c1 += c0; |
95 | h *= 33 * 33; |
96 | c0 *= 32; |
97 | c1 += c0; |
98 | h += c1; */ |
99 | c1 += c0; |
100 | __asm_reassociation_barrier("" : "+r" (c1), "+r" (c0)); |
101 | h *= 33 * 33; |
102 | c1 += c0 * 32; |
103 | __asm_reassociation_barrier("" : "+r" (c1)); |
104 | h += c1; |
105 | s += 2; |
106 | } |
107 | } |
108 | |
109 | #endif /* dl-new-hash.h */ |
110 | |