1/* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* UNUSED. Exists purely as reference implementation. */
20
21#include <isa-level.h>
22
23#if ISA_SHOULD_BUILD (4)
24
25# include <sysdep.h>
26
27# ifdef USE_AS_WMEMCHR
28# define CHAR_SIZE 4
29# define VPBROADCAST vpbroadcastd
30# define VPCMPEQ vpcmpeqd
31# define VPCMPNE vpcmpneqd
32# define VPMINU vpminud
33# define VPTESTNM vptestnmd
34# else
35# define CHAR_SIZE 1
36# define VPBROADCAST vpbroadcastb
37# define VPCMPEQ vpcmpeqb
38# define VPCMPNE vpcmpneqb
39# define VPMINU vpminub
40# define VPTESTNM vptestnmb
41# endif
42
43# define PAGE_SIZE 4096
44# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
45
46 .section SECTION(.text), "ax", @progbits
47/* Aligning entry point to 64 byte, provides better performance for
48 one vector length string. */
49ENTRY_P2ALIGN (MEMCHR, 6)
50# ifndef USE_AS_RAWMEMCHR
51 /* Check for zero length. */
52 test %RDX_LP, %RDX_LP
53 jz L(zero)
54
55# ifdef __ILP32__
56 /* Clear the upper 32 bits. */
57 movl %edx, %edx
58# endif
59# endif
60
61 /* Broadcast CHAR to VMM(1). */
62 VPBROADCAST %esi, %VMM(1)
63 movl %edi, %eax
64 andl $(PAGE_SIZE - 1), %eax
65 cmpl $(PAGE_SIZE - VEC_SIZE), %eax
66 ja L(page_cross)
67
68 /* Compare [w]char for null, mask bit will be set for match. */
69 VPCMPEQ (%rdi), %VMM(1), %k0
70
71 KMOV %k0, %VRCX
72# ifndef USE_AS_RAWMEMCHR
73 mov %rdx, %rsi
74 /* Need to use bsfq here as upper 32 bit of rsi may zero out
75 for 'bsf %ecx, %esi', if %ecx is 0. */
76 bsfq %rcx, %rsi
77 cmp $CHAR_PER_VEC, %rsi
78 ja L(align_more)
79# ifdef USE_AS_WMEMCHR
80 leaq (%rdi, %rsi, CHAR_SIZE), %rdi
81# else
82 addq %rsi, %rdi
83# endif
84 xor %eax, %eax
85 cmp %rsi, %rdx
86 cmova %rdi, %rax
87# else
88 bsf %VRCX, %VRAX
89 jz L(align_more)
90 add %rdi, %rax
91# endif
92 ret
93
94 .p2align 5,,5
95L(page_cross):
96 movl %eax, %ecx
97 andl $(VEC_SIZE - 1), %ecx
98# ifdef USE_AS_WMEMCHR
99 shrl $2, %ecx
100# endif
101 xorq %rdi, %rax
102 VPCMPEQ (PAGE_SIZE - VEC_SIZE)(%rax), %VMM(1), %k0
103 KMOV %k0, %VRSI
104 shr %cl, %VRSI
105# ifndef USE_AS_RAWMEMCHR
106 jnz L(page_cross_end)
107 movl $CHAR_PER_VEC, %eax
108 sub %ecx, %eax
109 cmp %rax, %rdx
110 ja L(align_more)
111# else
112 jz L(align_more)
113# endif
114
115L(page_cross_end):
116# ifndef USE_AS_RAWMEMCHR
117 bsf %VRSI, %VRCX
118 jz L(zero)
119 leaq (%rdi, %rcx, CHAR_SIZE), %rdi
120 xor %eax, %eax
121 cmp %rcx, %rdx
122 cmova %rdi, %rax
123# else
124 bsf %VRSI, %VRAX
125 add %rdi, %rax
126# endif
127 ret
128
129# ifndef USE_AS_RAWMEMCHR
130L(zero):
131 xorl %eax, %eax
132 ret
133# endif
134
135L(ret_vec_x2):
136 subq $-VEC_SIZE, %rdi
137L(ret_vec_x1):
138 bsf %VRAX, %VRAX
139# ifndef USE_AS_RAWMEMCHR
140 cmp %rax, %rdx
141 jbe L(zero)
142# endif
143# ifdef USE_AS_WMEMCHR
144 leaq (%rdi, %rax, CHAR_SIZE), %rax
145# else
146 add %rdi, %rax
147# endif
148 ret
149
150 .p2align 5,,5
151L(align_more):
152# ifndef USE_AS_RAWMEMCHR
153 mov %rdi, %rax
154# endif
155 subq $-VEC_SIZE, %rdi
156 /* Align rdi to VEC_SIZE. */
157 andq $-VEC_SIZE, %rdi
158
159# ifndef USE_AS_RAWMEMCHR
160 subq %rdi, %rax
161# ifdef USE_AS_WMEMCHR
162 sar $2, %rax
163# endif
164 addq %rax, %rdx
165# endif
166
167 /* Loop unroll 4 times for 4 vector loop. */
168 VPCMPEQ (%rdi), %VMM(1), %k0
169
170 KMOV %k0, %VRAX
171 test %VRAX, %VRAX
172 jnz L(ret_vec_x1)
173
174# ifndef USE_AS_RAWMEMCHR
175 subq $CHAR_PER_VEC, %rdx
176 jbe L(zero)
177# endif
178
179 VPCMPEQ VEC_SIZE(%rdi), %VMM(1), %k0
180
181 KMOV %k0, %VRAX
182 test %VRAX, %VRAX
183 jnz L(ret_vec_x2)
184
185# ifndef USE_AS_RAWMEMCHR
186 subq $CHAR_PER_VEC, %rdx
187 jbe L(zero)
188# endif
189
190 VPCMPEQ (VEC_SIZE * 2)(%rdi), %VMM(1), %k0
191
192 KMOV %k0, %VRAX
193 test %VRAX, %VRAX
194 jnz L(ret_vec_x3)
195
196# ifndef USE_AS_RAWMEMCHR
197 subq $CHAR_PER_VEC, %rdx
198 jbe L(zero)
199# endif
200
201 VPCMPEQ (VEC_SIZE * 3)(%rdi), %VMM(1), %k0
202
203 KMOV %k0, %VRAX
204 test %VRAX, %VRAX
205 jnz L(ret_vec_x4)
206
207# ifndef USE_AS_RAWMEMCHR
208 subq $CHAR_PER_VEC, %rdx
209 jbe L(zero)
210 /* Save pointer to find alignment adjustment. */
211 movq %rdi, %rax
212# endif
213 /* Align address to VEC_SIZE * 4 for loop. */
214 andq $-(VEC_SIZE * 4), %rdi
215
216 /* Add alignment difference to rdx. */
217# ifndef USE_AS_RAWMEMCHR
218 subq %rdi, %rax
219# ifdef USE_AS_WMEMCHR
220 shr $2, %VRAX
221# endif
222 addq %rax, %rdx
223# endif
224
225 /* 4 vector loop. */
226 .p2align 5,,11
227L(loop):
228
229 VPCMPNE (VEC_SIZE * 4)(%rdi), %VMM(1), %k1
230 vpxorq (VEC_SIZE * 5)(%rdi), %VMM(1), %VMM(2)
231 vpxorq (VEC_SIZE * 6)(%rdi), %VMM(1), %VMM(3)
232 VPCMPEQ (VEC_SIZE * 7)(%rdi), %VMM(1), %k3
233 VPMINU %VMM(2), %VMM(3), %VMM(3){%k1}{z}
234 VPTESTNM %VMM(3), %VMM(3), %k2
235
236 subq $-(VEC_SIZE * 4), %rdi
237 KORTEST %k2, %k3
238# ifdef USE_AS_RAWMEMCHR
239 jz L(loop)
240# else
241 jnz L(loopend)
242 subq $(CHAR_PER_VEC * 4), %rdx
243 ja L(loop)
244L(zero_2):
245 xor %eax, %eax
246 ret
247# endif
248
249L(loopend):
250 VPCMPEQ (%rdi), %VMM(1), %k1
251 KMOV %k1, %VRAX
252 test %VRAX, %VRAX
253 jnz L(ret_vec_x1)
254
255# ifndef USE_AS_RAWMEMCHR
256 subq $CHAR_PER_VEC, %rdx
257 jbe L(zero_2)
258# endif
259
260 VPCMPEQ VEC_SIZE(%rdi), %VMM(1), %k1
261 KMOV %k1, %VRAX
262 test %VRAX, %VRAX
263 jnz L(ret_vec_x2)
264
265# ifndef USE_AS_RAWMEMCHR
266 subq $CHAR_PER_VEC, %rdx
267 jbe L(zero_2)
268# endif
269
270 VPCMPEQ (VEC_SIZE * 2)(%rdi), %VMM(1), %k1
271 KMOV %k1, %VRAX
272 test %VRAX, %VRAX
273 jnz L(ret_vec_x3)
274
275# ifndef USE_AS_RAWMEMCHR
276 subq $CHAR_PER_VEC, %rdx
277 jbe L(zero_2)
278# endif
279
280 /* At this point null [w]char must be in the fourth vector so no
281 need to check. */
282 KMOV %k3, %VRAX
283
284L(ret_vec_x4):
285 bsf %VRAX, %VRAX
286# ifndef USE_AS_RAWMEMCHR
287 cmp %rax, %rdx
288 jbe L(zero)
289# endif
290 leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
291 ret
292
293 .p2align 5,,5
294L(ret_vec_x3):
295 bsf %VRAX, %VRAX
296# ifndef USE_AS_RAWMEMCHR
297 cmp %rax, %rdx
298 jbe L(zero)
299# endif
300 leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
301 ret
302
303END (MEMCHR)
304#endif
305