1 | /* Placeholder function, not used by any processor at the moment. |
2 | Copyright (C) 2022-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | /* UNUSED. Exists purely as reference implementation. */ |
20 | |
21 | #include <isa-level.h> |
22 | |
23 | #if ISA_SHOULD_BUILD (4) |
24 | |
25 | # include <sysdep.h> |
26 | |
27 | # ifdef USE_AS_WMEMCHR |
28 | # define CHAR_SIZE 4 |
29 | # define VPBROADCAST vpbroadcastd |
30 | # define VPCMPEQ vpcmpeqd |
31 | # define VPCMPNE vpcmpneqd |
32 | # define VPMINU vpminud |
33 | # define VPTESTNM vptestnmd |
34 | # else |
35 | # define CHAR_SIZE 1 |
36 | # define VPBROADCAST vpbroadcastb |
37 | # define VPCMPEQ vpcmpeqb |
38 | # define VPCMPNE vpcmpneqb |
39 | # define VPMINU vpminub |
40 | # define VPTESTNM vptestnmb |
41 | # endif |
42 | |
43 | # define PAGE_SIZE 4096 |
44 | # define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE) |
45 | |
46 | .section SECTION(.text), "ax" , @progbits |
47 | /* Aligning entry point to 64 byte, provides better performance for |
48 | one vector length string. */ |
49 | ENTRY_P2ALIGN (MEMCHR, 6) |
50 | # ifndef USE_AS_RAWMEMCHR |
51 | /* Check for zero length. */ |
52 | test %RDX_LP, %RDX_LP |
53 | jz L(zero) |
54 | |
55 | # ifdef __ILP32__ |
56 | /* Clear the upper 32 bits. */ |
57 | movl %edx, %edx |
58 | # endif |
59 | # endif |
60 | |
61 | /* Broadcast CHAR to VMM(1). */ |
62 | VPBROADCAST %esi, %VMM(1) |
63 | movl %edi, %eax |
64 | andl $(PAGE_SIZE - 1), %eax |
65 | cmpl $(PAGE_SIZE - VEC_SIZE), %eax |
66 | ja L(page_cross) |
67 | |
68 | /* Compare [w]char for null, mask bit will be set for match. */ |
69 | VPCMPEQ (%rdi), %VMM(1), %k0 |
70 | |
71 | KMOV %k0, %VRCX |
72 | # ifndef USE_AS_RAWMEMCHR |
73 | mov %rdx, %rsi |
74 | /* Need to use bsfq here as upper 32 bit of rsi may zero out |
75 | for 'bsf %ecx, %esi', if %ecx is 0. */ |
76 | bsfq %rcx, %rsi |
77 | cmp $CHAR_PER_VEC, %rsi |
78 | ja L(align_more) |
79 | # ifdef USE_AS_WMEMCHR |
80 | leaq (%rdi, %rsi, CHAR_SIZE), %rdi |
81 | # else |
82 | addq %rsi, %rdi |
83 | # endif |
84 | xor %eax, %eax |
85 | cmp %rsi, %rdx |
86 | cmova %rdi, %rax |
87 | # else |
88 | bsf %VRCX, %VRAX |
89 | jz L(align_more) |
90 | add %rdi, %rax |
91 | # endif |
92 | ret |
93 | |
94 | .p2align 5,,5 |
95 | L(page_cross): |
96 | movl %eax, %ecx |
97 | andl $(VEC_SIZE - 1), %ecx |
98 | # ifdef USE_AS_WMEMCHR |
99 | shrl $2, %ecx |
100 | # endif |
101 | xorq %rdi, %rax |
102 | VPCMPEQ (PAGE_SIZE - VEC_SIZE)(%rax), %VMM(1), %k0 |
103 | KMOV %k0, %VRSI |
104 | shr %cl, %VRSI |
105 | # ifndef USE_AS_RAWMEMCHR |
106 | jnz L(page_cross_end) |
107 | movl $CHAR_PER_VEC, %eax |
108 | sub %ecx, %eax |
109 | cmp %rax, %rdx |
110 | ja L(align_more) |
111 | # else |
112 | jz L(align_more) |
113 | # endif |
114 | |
115 | L(page_cross_end): |
116 | # ifndef USE_AS_RAWMEMCHR |
117 | bsf %VRSI, %VRCX |
118 | jz L(zero) |
119 | leaq (%rdi, %rcx, CHAR_SIZE), %rdi |
120 | xor %eax, %eax |
121 | cmp %rcx, %rdx |
122 | cmova %rdi, %rax |
123 | # else |
124 | bsf %VRSI, %VRAX |
125 | add %rdi, %rax |
126 | # endif |
127 | ret |
128 | |
129 | # ifndef USE_AS_RAWMEMCHR |
130 | L(zero): |
131 | xorl %eax, %eax |
132 | ret |
133 | # endif |
134 | |
135 | L(ret_vec_x2): |
136 | subq $-VEC_SIZE, %rdi |
137 | L(ret_vec_x1): |
138 | bsf %VRAX, %VRAX |
139 | # ifndef USE_AS_RAWMEMCHR |
140 | cmp %rax, %rdx |
141 | jbe L(zero) |
142 | # endif |
143 | # ifdef USE_AS_WMEMCHR |
144 | leaq (%rdi, %rax, CHAR_SIZE), %rax |
145 | # else |
146 | add %rdi, %rax |
147 | # endif |
148 | ret |
149 | |
150 | .p2align 5,,5 |
151 | L(align_more): |
152 | # ifndef USE_AS_RAWMEMCHR |
153 | mov %rdi, %rax |
154 | # endif |
155 | subq $-VEC_SIZE, %rdi |
156 | /* Align rdi to VEC_SIZE. */ |
157 | andq $-VEC_SIZE, %rdi |
158 | |
159 | # ifndef USE_AS_RAWMEMCHR |
160 | subq %rdi, %rax |
161 | # ifdef USE_AS_WMEMCHR |
162 | sar $2, %rax |
163 | # endif |
164 | addq %rax, %rdx |
165 | # endif |
166 | |
167 | /* Loop unroll 4 times for 4 vector loop. */ |
168 | VPCMPEQ (%rdi), %VMM(1), %k0 |
169 | |
170 | KMOV %k0, %VRAX |
171 | test %VRAX, %VRAX |
172 | jnz L(ret_vec_x1) |
173 | |
174 | # ifndef USE_AS_RAWMEMCHR |
175 | subq $CHAR_PER_VEC, %rdx |
176 | jbe L(zero) |
177 | # endif |
178 | |
179 | VPCMPEQ VEC_SIZE(%rdi), %VMM(1), %k0 |
180 | |
181 | KMOV %k0, %VRAX |
182 | test %VRAX, %VRAX |
183 | jnz L(ret_vec_x2) |
184 | |
185 | # ifndef USE_AS_RAWMEMCHR |
186 | subq $CHAR_PER_VEC, %rdx |
187 | jbe L(zero) |
188 | # endif |
189 | |
190 | VPCMPEQ (VEC_SIZE * 2)(%rdi), %VMM(1), %k0 |
191 | |
192 | KMOV %k0, %VRAX |
193 | test %VRAX, %VRAX |
194 | jnz L(ret_vec_x3) |
195 | |
196 | # ifndef USE_AS_RAWMEMCHR |
197 | subq $CHAR_PER_VEC, %rdx |
198 | jbe L(zero) |
199 | # endif |
200 | |
201 | VPCMPEQ (VEC_SIZE * 3)(%rdi), %VMM(1), %k0 |
202 | |
203 | KMOV %k0, %VRAX |
204 | test %VRAX, %VRAX |
205 | jnz L(ret_vec_x4) |
206 | |
207 | # ifndef USE_AS_RAWMEMCHR |
208 | subq $CHAR_PER_VEC, %rdx |
209 | jbe L(zero) |
210 | /* Save pointer to find alignment adjustment. */ |
211 | movq %rdi, %rax |
212 | # endif |
213 | /* Align address to VEC_SIZE * 4 for loop. */ |
214 | andq $-(VEC_SIZE * 4), %rdi |
215 | |
216 | /* Add alignment difference to rdx. */ |
217 | # ifndef USE_AS_RAWMEMCHR |
218 | subq %rdi, %rax |
219 | # ifdef USE_AS_WMEMCHR |
220 | shr $2, %VRAX |
221 | # endif |
222 | addq %rax, %rdx |
223 | # endif |
224 | |
225 | /* 4 vector loop. */ |
226 | .p2align 5,,11 |
227 | L(loop): |
228 | |
229 | VPCMPNE (VEC_SIZE * 4)(%rdi), %VMM(1), %k1 |
230 | vpxorq (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2) |
231 | vpxorq (VEC_SIZE * 6)(%rdi), %VMM(1), %VMM(3) |
232 | VPCMPEQ (VEC_SIZE * 7)(%rdi), %VMM(1), %k3 |
233 | VPMINU %VMM(2), %VMM(3), %VMM(3){%k1}{z} |
234 | VPTESTNM %VMM(3), %VMM(3), %k2 |
235 | |
236 | subq $-(VEC_SIZE * 4), %rdi |
237 | KORTEST %k2, %k3 |
238 | # ifdef USE_AS_RAWMEMCHR |
239 | jz L(loop) |
240 | # else |
241 | jnz L(loopend) |
242 | subq $(CHAR_PER_VEC * 4), %rdx |
243 | ja L(loop) |
244 | L(zero_2): |
245 | xor %eax, %eax |
246 | ret |
247 | # endif |
248 | |
249 | L(loopend): |
250 | VPCMPEQ (%rdi), %VMM(1), %k1 |
251 | KMOV %k1, %VRAX |
252 | test %VRAX, %VRAX |
253 | jnz L(ret_vec_x1) |
254 | |
255 | # ifndef USE_AS_RAWMEMCHR |
256 | subq $CHAR_PER_VEC, %rdx |
257 | jbe L(zero_2) |
258 | # endif |
259 | |
260 | VPCMPEQ VEC_SIZE(%rdi), %VMM(1), %k1 |
261 | KMOV %k1, %VRAX |
262 | test %VRAX, %VRAX |
263 | jnz L(ret_vec_x2) |
264 | |
265 | # ifndef USE_AS_RAWMEMCHR |
266 | subq $CHAR_PER_VEC, %rdx |
267 | jbe L(zero_2) |
268 | # endif |
269 | |
270 | VPCMPEQ (VEC_SIZE * 2)(%rdi), %VMM(1), %k1 |
271 | KMOV %k1, %VRAX |
272 | test %VRAX, %VRAX |
273 | jnz L(ret_vec_x3) |
274 | |
275 | # ifndef USE_AS_RAWMEMCHR |
276 | subq $CHAR_PER_VEC, %rdx |
277 | jbe L(zero_2) |
278 | # endif |
279 | |
280 | /* At this point null [w]char must be in the fourth vector so no |
281 | need to check. */ |
282 | KMOV %k3, %VRAX |
283 | |
284 | L(ret_vec_x4): |
285 | bsf %VRAX, %VRAX |
286 | # ifndef USE_AS_RAWMEMCHR |
287 | cmp %rax, %rdx |
288 | jbe L(zero) |
289 | # endif |
290 | leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax |
291 | ret |
292 | |
293 | .p2align 5,,5 |
294 | L(ret_vec_x3): |
295 | bsf %VRAX, %VRAX |
296 | # ifndef USE_AS_RAWMEMCHR |
297 | cmp %rax, %rdx |
298 | jbe L(zero) |
299 | # endif |
300 | leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax |
301 | ret |
302 | |
303 | END (MEMCHR) |
304 | #endif |
305 | |