1/* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* UNUSED. Exists purely as reference implementation. */
20
21#include <isa-level.h>
22
23#if ISA_SHOULD_BUILD (4)
24
25# include <sysdep.h>
26
27# ifdef USE_AS_WCSLEN
28# define VPCMPEQ vpcmpeqd
29# define VPTESTN vptestnmd
30# define VPMINU vpminud
31# define CHAR_SIZE 4
32# else
33# define VPCMPEQ vpcmpeqb
34# define VPTESTN vptestnmb
35# define VPMINU vpminub
36# define CHAR_SIZE 1
37# endif
38
39# define PAGE_SIZE 4096
40# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
41
42 .section SECTION(.text),"ax",@progbits
43/* Aligning entry point to 64 byte, provides better performance for
44 one vector length string. */
45ENTRY_P2ALIGN (STRLEN, 6)
46# ifdef USE_AS_STRNLEN
47 /* Check zero length. */
48 test %RSI_LP, %RSI_LP
49 jz L(ret_max)
50# ifdef __ILP32__
51 /* Clear the upper 32 bits. */
52 movl %esi, %esi
53# endif
54# endif
55
56 movl %edi, %eax
57 vpxorq %VMM_128(0), %VMM_128(0), %VMM_128(0)
58 sall $20, %eax
59 cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax
60 ja L(page_cross)
61
62 /* Compare [w]char for null, mask bit will be set for match. */
63 VPCMPEQ (%rdi), %VMM(0), %k0
64# ifdef USE_AS_STRNLEN
65 KMOV %k0, %VRCX
66 /* Store max length in rax. */
67 mov %rsi, %rax
68 /* If rcx is 0, rax will have max length. We can not use VRCX
69 and VRAX here for evex256 because, upper 32 bits may be
70 undefined for ecx and eax. */
71 bsfq %rcx, %rax
72 cmp $CHAR_PER_VEC, %rax
73 ja L(align_more)
74 cmpq %rax, %rsi
75 cmovb %esi, %eax
76# else
77 KMOV %k0, %VRAX
78 test %VRAX, %VRAX
79 jz L(align_more)
80 bsf %VRAX, %VRAX
81# endif
82 ret
83
84 /* At this point vector max length reached. */
85# ifdef USE_AS_STRNLEN
86 .p2align 4,,3
87L(ret_max):
88 movq %rsi, %rax
89 ret
90# endif
91
92L(align_more):
93 mov %rdi, %rax
94 /* Align rax to VEC_SIZE. */
95 andq $-VEC_SIZE, %rax
96# ifdef USE_AS_STRNLEN
97 movq %rdi, %rdx
98 subq %rax, %rdx
99# ifdef USE_AS_WCSLEN
100 shr $2, %VRDX
101# endif
102 /* At this point rdx contains [w]chars already compared. */
103 leaq -CHAR_PER_VEC(%rsi, %rdx), %rdx
104 /* At this point rdx contains number of w[char] needs to go.
105 Now onwards rdx will keep decrementing with each compare. */
106# endif
107
108 /* Loop unroll 4 times for 4 vector loop. */
109 VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0
110 subq $-VEC_SIZE, %rax
111 KMOV %k0, %VRCX
112 test %VRCX, %VRCX
113 jnz L(ret_vec_x1)
114
115# ifdef USE_AS_STRNLEN
116 subq $CHAR_PER_VEC, %rdx
117 jbe L(ret_max)
118# endif
119
120 VPCMPEQ VEC_SIZE(%rax), %VMM(0), %k0
121 KMOV %k0, %VRCX
122 test %VRCX, %VRCX
123 jnz L(ret_vec_x2)
124
125# ifdef USE_AS_STRNLEN
126 subq $CHAR_PER_VEC, %rdx
127 jbe L(ret_max)
128# endif
129
130 VPCMPEQ (VEC_SIZE * 2)(%rax), %VMM(0), %k0
131 KMOV %k0, %VRCX
132 test %VRCX, %VRCX
133 jnz L(ret_vec_x3)
134
135# ifdef USE_AS_STRNLEN
136 subq $CHAR_PER_VEC, %rdx
137 jbe L(ret_max)
138# endif
139
140 VPCMPEQ (VEC_SIZE * 3)(%rax), %VMM(0), %k0
141 KMOV %k0, %VRCX
142 test %VRCX, %VRCX
143 jnz L(ret_vec_x4)
144
145# ifdef USE_AS_STRNLEN
146 subq $CHAR_PER_VEC, %rdx
147 jbe L(ret_max)
148 /* Save pointer before 4 x VEC_SIZE alignment. */
149 movq %rax, %rcx
150# endif
151
152 /* Align address to VEC_SIZE * 4 for loop. */
153 andq $-(VEC_SIZE * 4), %rax
154
155# ifdef USE_AS_STRNLEN
156 subq %rax, %rcx
157# ifdef USE_AS_WCSLEN
158 shr $2, %VRCX
159# endif
160 /* rcx contains number of [w]char will be recompared due to
161 alignment fixes. rdx must be incremented by rcx to offset
162 alignment adjustment. */
163 addq %rcx, %rdx
164 /* Need jump as we don't want to add/subtract rdx for first
165 iteration of 4 x VEC_SIZE aligned loop. */
166# endif
167
168 .p2align 4,,11
169L(loop):
170 /* VPMINU and VPCMP combination provide better performance as
171 compared to alternative combinations. */
172 VMOVA (VEC_SIZE * 4)(%rax), %VMM(1)
173 VPMINU (VEC_SIZE * 5)(%rax), %VMM(1), %VMM(2)
174 VMOVA (VEC_SIZE * 6)(%rax), %VMM(3)
175 VPMINU (VEC_SIZE * 7)(%rax), %VMM(3), %VMM(4)
176
177 VPTESTN %VMM(2), %VMM(2), %k0
178 VPTESTN %VMM(4), %VMM(4), %k1
179
180 subq $-(VEC_SIZE * 4), %rax
181 KORTEST %k0, %k1
182
183# ifndef USE_AS_STRNLEN
184 jz L(loop)
185# else
186 jnz L(loopend)
187 subq $(CHAR_PER_VEC * 4), %rdx
188 ja L(loop)
189 mov %rsi, %rax
190 ret
191# endif
192
193L(loopend):
194
195 VPTESTN %VMM(1), %VMM(1), %k2
196 KMOV %k2, %VRCX
197 test %VRCX, %VRCX
198 jnz L(ret_vec_x1)
199
200 KMOV %k0, %VRCX
201 /* At this point, if k0 is non zero, null char must be in the
202 second vector. */
203 test %VRCX, %VRCX
204 jnz L(ret_vec_x2)
205
206 VPTESTN %VMM(3), %VMM(3), %k3
207 KMOV %k3, %VRCX
208 test %VRCX, %VRCX
209 jnz L(ret_vec_x3)
210 /* At this point null [w]char must be in the fourth vector so no
211 need to check. */
212 KMOV %k1, %VRCX
213
214 /* Fourth, third, second vector terminating are pretty much
215 same, implemented this way to avoid branching and reuse code
216 from pre loop exit condition. */
217L(ret_vec_x4):
218 bsf %VRCX, %VRCX
219 subq %rdi, %rax
220# ifdef USE_AS_WCSLEN
221 subq $-(VEC_SIZE * 3), %rax
222 shrq $2, %rax
223 addq %rcx, %rax
224# else
225 leaq (VEC_SIZE * 3)(%rcx, %rax), %rax
226# endif
227# ifdef USE_AS_STRNLEN
228 cmpq %rsi, %rax
229 cmovnb %rsi, %rax
230# endif
231 ret
232
233L(ret_vec_x3):
234 bsf %VRCX, %VRCX
235 subq %rdi, %rax
236# ifdef USE_AS_WCSLEN
237 subq $-(VEC_SIZE * 2), %rax
238 shrq $2, %rax
239 addq %rcx, %rax
240# else
241 leaq (VEC_SIZE * 2)(%rcx, %rax), %rax
242# endif
243# ifdef USE_AS_STRNLEN
244 cmpq %rsi, %rax
245 cmovnb %rsi, %rax
246# endif
247 ret
248
249L(ret_vec_x2):
250 subq $-VEC_SIZE, %rax
251L(ret_vec_x1):
252 bsf %VRCX, %VRCX
253 subq %rdi, %rax
254# ifdef USE_AS_WCSLEN
255 shrq $2, %rax
256# endif
257 addq %rcx, %rax
258# ifdef USE_AS_STRNLEN
259 cmpq %rsi, %rax
260 cmovnb %rsi, %rax
261# endif
262 ret
263
264L(page_cross):
265 mov %rdi, %rax
266 movl %edi, %ecx
267 andl $(VEC_SIZE - 1), %ecx
268# ifdef USE_AS_WCSLEN
269 sarl $2, %ecx
270# endif
271 /* ecx contains number of w[char] to be skipped as a result
272 of address alignment. */
273 andq $-VEC_SIZE, %rax
274 VPCMPEQ (%rax), %VMM(0), %k0
275 KMOV %k0, %VRDX
276 /* Ignore number of character for alignment adjustment. */
277 shr %cl, %VRDX
278# ifdef USE_AS_STRNLEN
279 jnz L(page_cross_end)
280 movl $CHAR_PER_VEC, %eax
281 sub %ecx, %eax
282 cmp %rax, %rsi
283 ja L(align_more)
284# else
285 jz L(align_more)
286# endif
287
288L(page_cross_end):
289 bsf %VRDX, %VRAX
290# ifdef USE_AS_STRNLEN
291 cmpq %rsi, %rax
292 cmovnb %esi, %eax
293# endif
294 ret
295
296END (STRLEN)
297#endif
298