1/* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* UNUSED. Exists purely as reference implementation. */
20
21#include <isa-level.h>
22
23#if ISA_SHOULD_BUILD (4)
24
25# include <sysdep.h>
26
27# ifdef USE_AS_WCSCHR
28# define CHAR_REG esi
29# define CHAR_SIZE 4
30# define VPBROADCAST vpbroadcastd
31# define VPCMP vpcmpd
32# define VPCMPNE vpcmpneqd
33# define VPMINU vpminud
34# define VPTEST vptestmd
35# define VPTESTN vptestnmd
36# else
37# define CHAR_REG sil
38# define CHAR_SIZE 1
39# define VPBROADCAST vpbroadcastb
40# define VPCMP vpcmpb
41# define VPCMPNE vpcmpneqb
42# define VPMINU vpminub
43# define VPTEST vptestmb
44# define VPTESTN vptestnmb
45# endif
46
47# define PAGE_SIZE 4096
48# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
49# define VEC_MATCH_MASK ((1 << CHAR_PER_VEC) - 1)
50
51 .section SECTION(.text), "ax", @progbits
52/* Aligning entry point to 64 byte, provides better performance for
53 one vector length string. */
54ENTRY_P2ALIGN (STRCHR, 6)
55
56 /* Broadcast CHAR to VMM(0). */
57 VPBROADCAST %esi, %VMM(0)
58 movl %edi, %eax
59 sall $20,%eax
60 cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax
61 ja L(page_cross)
62
63 VMOVU (%rdi), %VMM(1)
64 VPCMPNE %VMM(1), %VMM(0), %k1
65 VPTEST %VMM(1), %VMM(1), %k0{%k1}
66 KMOV %k0, %VRAX
67 /* Compare [w]char for null, mask bit will be set for match. */
68
69# ifdef USE_AS_WCSCHR
70 sub $VEC_MATCH_MASK, %VRAX
71# else
72 inc %VRAX
73# endif
74 jz L(align_more)
75
76 bsf %VRAX, %VRAX
77
78# ifdef USE_AS_WCSCHR
79 leaq (%rdi, %rax, CHAR_SIZE), %rax
80# else
81 add %rdi, %rax
82# endif
83# ifndef USE_AS_STRCHRNUL
84 cmp (%rax), %CHAR_REG
85 jne L(zero)
86 ret
87L(zero):
88 xorl %eax, %eax
89# endif
90 ret
91
92L(ret_vec_x3):
93 subq $-VEC_SIZE, %rdi
94L(ret_vec_x2):
95 subq $-VEC_SIZE, %rdi
96L(ret_vec_x1):
97 bsf %VRAX, %VRAX
98# ifdef USE_AS_WCSCHR
99 leaq (%rdi, %rax, CHAR_SIZE), %rax
100# else
101 add %rdi, %rax
102# endif
103
104# ifndef USE_AS_STRCHRNUL
105 cmp (%rax), %CHAR_REG
106 jne L(zero)
107# endif
108 ret
109
110L(page_cross):
111 mov %rdi, %rax
112 movl %edi, %ecx
113# ifdef USE_AS_WCSCHR
114 /* Calculate number of compare result bits to be skipped for
115 wide string alignment adjustment. */
116 andl $(VEC_SIZE - 1), %ecx
117 sarl $2, %ecx
118# endif
119 /* ecx contains number of w[char] to be skipped as a result
120 of address alignment. */
121 andq $-VEC_SIZE, %rax
122
123 VMOVA (%rax), %VMM(1)
124 VPCMPNE %VMM(1), %VMM(0), %k1
125 VPTEST %VMM(1), %VMM(1), %k0{%k1}
126 KMOV %k0, %VRAX
127# ifdef USE_AS_WCSCHR
128 sub $VEC_MATCH_MASK, %VRAX
129# else
130 inc %VRAX
131# endif
132 /* Ignore number of character for alignment adjustment. */
133 shr %cl, %VRAX
134 jz L(align_more)
135
136 bsf %VRAX, %VRAX
137# ifdef USE_AS_WCSCHR
138 leaq (%rdi, %rax, CHAR_SIZE), %rax
139# else
140 addq %rdi, %rax
141# endif
142
143# ifndef USE_AS_STRCHRNUL
144 cmp (%rax), %CHAR_REG
145 jne L(zero)
146# endif
147 ret
148
149L(align_more):
150 /* Align rax to VEC_SIZE. */
151 andq $-VEC_SIZE, %rdi
152
153 /* Loop unroll 4 times for 4 vector loop. */
154 VMOVA VEC_SIZE(%rdi), %VMM(1)
155 VPCMPNE %VMM(1), %VMM(0), %k1
156 VPTEST %VMM(1), %VMM(1), %k0{%k1}
157
158 /* Increment rdi by vector size for further comparison and
159 return. */
160 subq $-VEC_SIZE, %rdi
161 KMOV %k0, %VRAX
162
163# ifdef USE_AS_WCSCHR
164 sub $VEC_MATCH_MASK, %VRAX
165# else
166 inc %VRAX
167# endif
168 jnz L(ret_vec_x1)
169
170 VMOVA VEC_SIZE(%rdi), %VMM(1)
171 VPCMPNE %VMM(1), %VMM(0), %k1
172 VPTEST %VMM(1), %VMM(1), %k0{%k1}
173 KMOV %k0, %VRAX
174# ifdef USE_AS_WCSCHR
175 sub $VEC_MATCH_MASK, %VRAX
176# else
177 inc %VRAX
178# endif
179 jnz L(ret_vec_x2)
180
181 VMOVA (VEC_SIZE * 2)(%rdi), %VMM(1)
182 VPCMPNE %VMM(1), %VMM(0), %k1
183 VPTEST %VMM(1), %VMM(1), %k0{%k1}
184 KMOV %k0, %VRAX
185# ifdef USE_AS_WCSCHR
186 sub $VEC_MATCH_MASK, %VRAX
187# else
188 inc %VRAX
189# endif
190 jnz L(ret_vec_x3)
191
192 VMOVA (VEC_SIZE * 3)(%rdi), %VMM(1)
193 VPCMPNE %VMM(1), %VMM(0), %k1
194 VPTEST %VMM(1), %VMM(1), %k0{%k1}
195 KMOV %k0, %VRDX
196# ifdef USE_AS_WCSCHR
197 sub $VEC_MATCH_MASK, %VRDX
198# else
199 inc %VRDX
200# endif
201 jnz L(ret_vec_x4)
202
203
204 /* Align address to VEC_SIZE * 4 for loop. */
205 andq $-(VEC_SIZE * 4), %rdi
206L(loop):
207 /* VPMINU and VPCMP combination provide better performance as
208 compared to alternative combinations. */
209 VMOVA (VEC_SIZE * 4)(%rdi), %VMM(1)
210 VMOVA (VEC_SIZE * 5)(%rdi), %VMM(2)
211 VMOVA (VEC_SIZE * 6)(%rdi), %VMM(3)
212 VMOVA (VEC_SIZE * 7)(%rdi), %VMM(4)
213
214 VPCMPNE %VMM(1), %VMM(0), %k1
215 VPCMPNE %VMM(2), %VMM(0), %k2
216
217 VPMINU %VMM(2), %VMM(1), %VMM(2)
218
219 VPCMPNE %VMM(3), %VMM(0), %k3{%k1}
220 VPCMPNE %VMM(4), %VMM(0), %k4{%k2}
221
222 VPMINU %VMM(4), %VMM(3), %VMM(4)
223 VPMINU %VMM(2), %VMM(4), %VMM(4){%k3}{z}
224
225 VPTEST %VMM(4), %VMM(4), %k5{%k4}
226
227 KMOV %k5, %VRDX
228 subq $-(VEC_SIZE * 4), %rdi
229# ifdef USE_AS_WCSCHR
230 sub $VEC_MATCH_MASK, %VRDX
231# else
232 inc %VRDX
233# endif
234 jz L(loop)
235
236 VPTEST %VMM(1), %VMM(1), %k0{%k1}
237 KMOV %k0, %VRAX
238# ifdef USE_AS_WCSCHR
239 sub $VEC_MATCH_MASK, %VRAX
240# else
241 inc %VRAX
242# endif
243 jnz L(ret_vec_x1)
244
245 VPTEST %VMM(2), %VMM(2), %k0{%k2}
246 KMOV %k0, %VRAX
247 /* At this point, if k1 is non zero, null char must be in the
248 second vector. */
249# ifdef USE_AS_WCSCHR
250 sub $VEC_MATCH_MASK, %VRAX
251# else
252 inc %VRAX
253# endif
254 jnz L(ret_vec_x2)
255
256 VPTEST %VMM(3), %VMM(3), %k0{%k3}
257 KMOV %k0, %VRAX
258# ifdef USE_AS_WCSCHR
259 sub $VEC_MATCH_MASK, %VRAX
260# else
261 inc %VRAX
262# endif
263 jnz L(ret_vec_x3)
264 /* At this point null [w]char must be in the fourth vector so no
265 need to check. */
266
267L(ret_vec_x4):
268 bsf %VRDX, %VRDX
269 leaq (VEC_SIZE * 3)(%rdi, %rdx, CHAR_SIZE), %rax
270# ifndef USE_AS_STRCHRNUL
271 cmp (%rax), %CHAR_REG
272 jne L(zero_2)
273# endif
274 ret
275
276# ifndef USE_AS_STRCHRNUL
277L(zero_2):
278 xor %eax, %eax
279 ret
280# endif
281END (STRCHR)
282#endif
283