1 | /* Placeholder function, not used by any processor at the moment. |
2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* UNUSED. Exists purely as reference implementation. */ |
20 | |
21 | #include <isa-level.h> |
22 | |
23 | #if ISA_SHOULD_BUILD (4) |
24 | |
25 | # include <sysdep.h> |
26 | |
27 | # ifdef USE_AS_WCSCHR |
28 | # define CHAR_REG esi |
29 | # define CHAR_SIZE 4 |
30 | # define VPBROADCAST vpbroadcastd |
31 | # define VPCMP vpcmpd |
32 | # define VPCMPNE vpcmpneqd |
33 | # define VPMINU vpminud |
34 | # define VPTEST vptestmd |
35 | # define VPTESTN vptestnmd |
36 | # else |
37 | # define CHAR_REG sil |
38 | # define CHAR_SIZE 1 |
39 | # define VPBROADCAST vpbroadcastb |
40 | # define VPCMP vpcmpb |
41 | # define VPCMPNE vpcmpneqb |
42 | # define VPMINU vpminub |
43 | # define VPTEST vptestmb |
44 | # define VPTESTN vptestnmb |
45 | # endif |
46 | |
47 | # define PAGE_SIZE 4096 |
48 | # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) |
49 | # define VEC_MATCH_MASK ((1 << CHAR_PER_VEC) - 1) |
50 | |
51 | .section SECTION(.text), "ax" , @progbits |
52 | /* Aligning entry point to 64 byte, provides better performance for |
53 | one vector length string. */ |
54 | ENTRY_P2ALIGN (STRCHR, 6) |
55 | |
56 | /* Broadcast CHAR to VMM(0). */ |
57 | VPBROADCAST %esi, %VMM(0) |
58 | movl %edi, %eax |
59 | sall $20,%eax |
60 | cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax |
61 | ja L(page_cross) |
62 | |
63 | VMOVU (%rdi), %VMM(1) |
64 | VPCMPNE %VMM(1), %VMM(0), %k1 |
65 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
66 | KMOV %k0, %VRAX |
67 | /* Compare [w]char for null, mask bit will be set for match. */ |
68 | |
69 | # ifdef USE_AS_WCSCHR |
70 | sub $VEC_MATCH_MASK, %VRAX |
71 | # else |
72 | inc %VRAX |
73 | # endif |
74 | jz L(align_more) |
75 | |
76 | bsf %VRAX, %VRAX |
77 | |
78 | # ifdef USE_AS_WCSCHR |
79 | leaq (%rdi, %rax, CHAR_SIZE), %rax |
80 | # else |
81 | add %rdi, %rax |
82 | # endif |
83 | # ifndef USE_AS_STRCHRNUL |
84 | cmp (%rax), %CHAR_REG |
85 | jne L(zero) |
86 | ret |
87 | L(zero): |
88 | xorl %eax, %eax |
89 | # endif |
90 | ret |
91 | |
92 | L(ret_vec_x3): |
93 | subq $-VEC_SIZE, %rdi |
94 | L(ret_vec_x2): |
95 | subq $-VEC_SIZE, %rdi |
96 | L(ret_vec_x1): |
97 | bsf %VRAX, %VRAX |
98 | # ifdef USE_AS_WCSCHR |
99 | leaq (%rdi, %rax, CHAR_SIZE), %rax |
100 | # else |
101 | add %rdi, %rax |
102 | # endif |
103 | |
104 | # ifndef USE_AS_STRCHRNUL |
105 | cmp (%rax), %CHAR_REG |
106 | jne L(zero) |
107 | # endif |
108 | ret |
109 | |
110 | L(page_cross): |
111 | mov %rdi, %rax |
112 | movl %edi, %ecx |
113 | # ifdef USE_AS_WCSCHR |
114 | /* Calculate number of compare result bits to be skipped for |
115 | wide string alignment adjustment. */ |
116 | andl $(VEC_SIZE - 1), %ecx |
117 | sarl $2, %ecx |
118 | # endif |
119 | /* ecx contains number of w[char] to be skipped as a result |
120 | of address alignment. */ |
121 | andq $-VEC_SIZE, %rax |
122 | |
123 | VMOVA (%rax), %VMM(1) |
124 | VPCMPNE %VMM(1), %VMM(0), %k1 |
125 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
126 | KMOV %k0, %VRAX |
127 | # ifdef USE_AS_WCSCHR |
128 | sub $VEC_MATCH_MASK, %VRAX |
129 | # else |
130 | inc %VRAX |
131 | # endif |
132 | /* Ignore number of character for alignment adjustment. */ |
133 | shr %cl, %VRAX |
134 | jz L(align_more) |
135 | |
136 | bsf %VRAX, %VRAX |
137 | # ifdef USE_AS_WCSCHR |
138 | leaq (%rdi, %rax, CHAR_SIZE), %rax |
139 | # else |
140 | addq %rdi, %rax |
141 | # endif |
142 | |
143 | # ifndef USE_AS_STRCHRNUL |
144 | cmp (%rax), %CHAR_REG |
145 | jne L(zero) |
146 | # endif |
147 | ret |
148 | |
149 | L(align_more): |
150 | /* Align rax to VEC_SIZE. */ |
151 | andq $-VEC_SIZE, %rdi |
152 | |
153 | /* Loop unroll 4 times for 4 vector loop. */ |
154 | VMOVA VEC_SIZE(%rdi), %VMM(1) |
155 | VPCMPNE %VMM(1), %VMM(0), %k1 |
156 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
157 | |
158 | /* Increment rdi by vector size for further comparison and |
159 | return. */ |
160 | subq $-VEC_SIZE, %rdi |
161 | KMOV %k0, %VRAX |
162 | |
163 | # ifdef USE_AS_WCSCHR |
164 | sub $VEC_MATCH_MASK, %VRAX |
165 | # else |
166 | inc %VRAX |
167 | # endif |
168 | jnz L(ret_vec_x1) |
169 | |
170 | VMOVA VEC_SIZE(%rdi), %VMM(1) |
171 | VPCMPNE %VMM(1), %VMM(0), %k1 |
172 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
173 | KMOV %k0, %VRAX |
174 | # ifdef USE_AS_WCSCHR |
175 | sub $VEC_MATCH_MASK, %VRAX |
176 | # else |
177 | inc %VRAX |
178 | # endif |
179 | jnz L(ret_vec_x2) |
180 | |
181 | VMOVA (VEC_SIZE * 2)(%rdi), %VMM(1) |
182 | VPCMPNE %VMM(1), %VMM(0), %k1 |
183 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
184 | KMOV %k0, %VRAX |
185 | # ifdef USE_AS_WCSCHR |
186 | sub $VEC_MATCH_MASK, %VRAX |
187 | # else |
188 | inc %VRAX |
189 | # endif |
190 | jnz L(ret_vec_x3) |
191 | |
192 | VMOVA (VEC_SIZE * 3)(%rdi), %VMM(1) |
193 | VPCMPNE %VMM(1), %VMM(0), %k1 |
194 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
195 | KMOV %k0, %VRDX |
196 | # ifdef USE_AS_WCSCHR |
197 | sub $VEC_MATCH_MASK, %VRDX |
198 | # else |
199 | inc %VRDX |
200 | # endif |
201 | jnz L(ret_vec_x4) |
202 | |
203 | |
204 | /* Align address to VEC_SIZE * 4 for loop. */ |
205 | andq $-(VEC_SIZE * 4), %rdi |
206 | L(loop): |
207 | /* VPMINU and VPCMP combination provide better performance as |
208 | compared to alternative combinations. */ |
209 | VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1) |
210 | VMOVA (VEC_SIZE * 5)(%rdi), %VMM(2) |
211 | VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3) |
212 | VMOVA (VEC_SIZE * 7)(%rdi), %VMM(4) |
213 | |
214 | VPCMPNE %VMM(1), %VMM(0), %k1 |
215 | VPCMPNE %VMM(2), %VMM(0), %k2 |
216 | |
217 | VPMINU %VMM(2), %VMM(1), %VMM(2) |
218 | |
219 | VPCMPNE %VMM(3), %VMM(0), %k3{%k1} |
220 | VPCMPNE %VMM(4), %VMM(0), %k4{%k2} |
221 | |
222 | VPMINU %VMM(4), %VMM(3), %VMM(4) |
223 | VPMINU %VMM(2), %VMM(4), %VMM(4){%k3}{z} |
224 | |
225 | VPTEST %VMM(4), %VMM(4), %k5{%k4} |
226 | |
227 | KMOV %k5, %VRDX |
228 | subq $-(VEC_SIZE * 4), %rdi |
229 | # ifdef USE_AS_WCSCHR |
230 | sub $VEC_MATCH_MASK, %VRDX |
231 | # else |
232 | inc %VRDX |
233 | # endif |
234 | jz L(loop) |
235 | |
236 | VPTEST %VMM(1), %VMM(1), %k0{%k1} |
237 | KMOV %k0, %VRAX |
238 | # ifdef USE_AS_WCSCHR |
239 | sub $VEC_MATCH_MASK, %VRAX |
240 | # else |
241 | inc %VRAX |
242 | # endif |
243 | jnz L(ret_vec_x1) |
244 | |
245 | VPTEST %VMM(2), %VMM(2), %k0{%k2} |
246 | KMOV %k0, %VRAX |
247 | /* At this point, if k1 is non zero, null char must be in the |
248 | second vector. */ |
249 | # ifdef USE_AS_WCSCHR |
250 | sub $VEC_MATCH_MASK, %VRAX |
251 | # else |
252 | inc %VRAX |
253 | # endif |
254 | jnz L(ret_vec_x2) |
255 | |
256 | VPTEST %VMM(3), %VMM(3), %k0{%k3} |
257 | KMOV %k0, %VRAX |
258 | # ifdef USE_AS_WCSCHR |
259 | sub $VEC_MATCH_MASK, %VRAX |
260 | # else |
261 | inc %VRAX |
262 | # endif |
263 | jnz L(ret_vec_x3) |
264 | /* At this point null [w]char must be in the fourth vector so no |
265 | need to check. */ |
266 | |
267 | L(ret_vec_x4): |
268 | bsf %VRDX, %VRDX |
269 | leaq (VEC_SIZE * 3)(%rdi, %rdx, CHAR_SIZE), %rax |
270 | # ifndef USE_AS_STRCHRNUL |
271 | cmp (%rax), %CHAR_REG |
272 | jne L(zero_2) |
273 | # endif |
274 | ret |
275 | |
276 | # ifndef USE_AS_STRCHRNUL |
277 | L(zero_2): |
278 | xor %eax, %eax |
279 | ret |
280 | # endif |
281 | END (STRCHR) |
282 | #endif |
283 | |