1/* Placeholder function, not used by any processor at the moment.
2 Copyright (C) 2022-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19/* UNUSED. Exists purely as reference implementation. */
20
21#include <isa-level.h>
22
23#if ISA_SHOULD_BUILD (4)
24
25# include <sysdep.h>
26
27# ifdef USE_AS_WCSRCHR
28# define CHAR_SIZE 4
29# define VPBROADCAST vpbroadcastd
30# define VPCMPEQ vpcmpeqd
31# define VPMINU vpminud
32# define VPTESTN vptestnmd
33# else
34# define CHAR_SIZE 1
35# define VPBROADCAST vpbroadcastb
36# define VPCMPEQ vpcmpeqb
37# define VPMINU vpminub
38# define VPTESTN vptestnmb
39# endif
40
41# define PAGE_SIZE 4096
42# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
43
44 .section SECTION(.text), "ax", @progbits
45/* Aligning entry point to 64 byte, provides better performance for
46 one vector length string. */
47ENTRY_P2ALIGN (STRRCHR, 6)
48
49 /* Broadcast CHAR to VMM(0). */
50 VPBROADCAST %esi, %VMM(0)
51 movl %edi, %eax
52 sall $20, %eax
53 cmpl $((PAGE_SIZE - VEC_SIZE) << 20), %eax
54 ja L(page_cross)
55
56L(page_cross_continue):
57 /* Compare [w]char for null, mask bit will be set for match. */
58 VMOVU (%rdi), %VMM(1)
59
60 VPTESTN %VMM(1), %VMM(1), %k1
61 KMOV %k1, %VRCX
62 test %VRCX, %VRCX
63 jz L(align_more)
64
65 VPCMPEQ %VMM(1), %VMM(0), %k0
66 KMOV %k0, %VRAX
67 BLSMSK %VRCX, %VRCX
68 and %VRCX, %VRAX
69 jz L(ret)
70
71 BSR %VRAX, %VRAX
72# ifdef USE_AS_WCSRCHR
73 leaq (%rdi, %rax, CHAR_SIZE), %rax
74# else
75 add %rdi, %rax
76# endif
77L(ret):
78 ret
79
80L(vector_x2_end):
81 VPCMPEQ %VMM(2), %VMM(0), %k2
82 KMOV %k2, %VRAX
83 BLSMSK %VRCX, %VRCX
84 and %VRCX, %VRAX
85 jz L(vector_x1_ret)
86
87 BSR %VRAX, %VRAX
88 leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
89 ret
90
91 /* Check the first vector at very last to look for match. */
92L(vector_x1_ret):
93 VPCMPEQ %VMM(1), %VMM(0), %k2
94 KMOV %k2, %VRAX
95 test %VRAX, %VRAX
96 jz L(ret)
97
98 BSR %VRAX, %VRAX
99# ifdef USE_AS_WCSRCHR
100 leaq (%rsi, %rax, CHAR_SIZE), %rax
101# else
102 add %rsi, %rax
103# endif
104 ret
105
106L(align_more):
107 /* Zero r8 to store match result. */
108 xorl %r8d, %r8d
109 /* Save pointer of first vector, in case if no match found. */
110 movq %rdi, %rsi
111 /* Align pointer to vector size. */
112 andq $-VEC_SIZE, %rdi
113 /* Loop unroll for 2 vector loop. */
114 VMOVA (VEC_SIZE)(%rdi), %VMM(2)
115 VPTESTN %VMM(2), %VMM(2), %k0
116 KMOV %k0, %VRCX
117 test %VRCX, %VRCX
118 jnz L(vector_x2_end)
119
120 /* Save pointer of second vector, in case if no match
121 found. */
122 movq %rdi, %r9
123 /* Align address to VEC_SIZE * 2 for loop. */
124 andq $-(VEC_SIZE * 2), %rdi
125
126 .p2align 4,,11
127L(loop):
128 /* 2 vector loop, as it provide better performance as compared
129 to 4 vector loop. */
130 VMOVA (VEC_SIZE * 2)(%rdi), %VMM(3)
131 VMOVA (VEC_SIZE * 3)(%rdi), %VMM(4)
132 VPCMPEQ %VMM(3), %VMM(0), %k1
133 VPCMPEQ %VMM(4), %VMM(0), %k2
134 VPMINU %VMM(3), %VMM(4), %VMM(5)
135 VPTESTN %VMM(5), %VMM(5), %k0
136 KOR %k1, %k2, %k3
137 subq $-(VEC_SIZE * 2), %rdi
138 /* If k0 and k3 zero, match and end of string not found. */
139 KORTEST %k0, %k3
140 jz L(loop)
141
142 /* If k0 is non zero, end of string found. */
143 KORTEST %k0, %k0
144 jnz L(endloop)
145
146 lea VEC_SIZE(%rdi), %r8
147 /* A match found, it need to be stored in r8 before loop
148 continue. */
149 /* Check second vector first. */
150 KMOV %k2, %VRDX
151 test %VRDX, %VRDX
152 jnz L(loop_vec_x2_match)
153
154 KMOV %k1, %VRDX
155 /* Match is in first vector, rdi offset need to be substracted
156 by VEC_SIZE. */
157 sub $VEC_SIZE, %r8
158
159 /* If second vector doesn't have match, first vector must
160 have match. */
161L(loop_vec_x2_match):
162 BSR %VRDX, %VRDX
163# ifdef USE_AS_WCSRCHR
164 sal $2, %rdx
165# endif
166 add %rdx, %r8
167 jmp L(loop)
168
169L(endloop):
170 /* Check if string end in first loop vector. */
171 VPTESTN %VMM(3), %VMM(3), %k0
172 KMOV %k0, %VRCX
173 test %VRCX, %VRCX
174 jnz L(loop_vector_x1_end)
175
176 /* Check if it has match in first loop vector. */
177 KMOV %k1, %VRAX
178 test %VRAX, %VRAX
179 jz L(loop_vector_x2_end)
180
181 BSR %VRAX, %VRAX
182 leaq (%rdi, %rax, CHAR_SIZE), %r8
183
184 /* String must end in second loop vector. */
185L(loop_vector_x2_end):
186 VPTESTN %VMM(4), %VMM(4), %k0
187 KMOV %k0, %VRCX
188 KMOV %k2, %VRAX
189 BLSMSK %VRCX, %VRCX
190 /* Check if it has match in second loop vector. */
191 and %VRCX, %VRAX
192 jz L(check_last_match)
193
194 BSR %VRAX, %VRAX
195 leaq (VEC_SIZE)(%rdi, %rax, CHAR_SIZE), %rax
196 ret
197
198 /* String end in first loop vector. */
199L(loop_vector_x1_end):
200 KMOV %k1, %VRAX
201 BLSMSK %VRCX, %VRCX
202 /* Check if it has match in second loop vector. */
203 and %VRCX, %VRAX
204 jz L(check_last_match)
205
206 BSR %VRAX, %VRAX
207 leaq (%rdi, %rax, CHAR_SIZE), %rax
208 ret
209
210 /* No match in first and second loop vector. */
211L(check_last_match):
212 /* Check if any match recorded in r8. */
213 test %r8, %r8
214 jz L(vector_x2_ret)
215 movq %r8, %rax
216 ret
217
218 /* No match recorded in r8. Check the second saved vector
219 in begining. */
220L(vector_x2_ret):
221 VPCMPEQ %VMM(2), %VMM(0), %k2
222 KMOV %k2, %VRAX
223 test %VRAX, %VRAX
224 jz L(vector_x1_ret)
225
226 /* Match found in the second saved vector. */
227 BSR %VRAX, %VRAX
228 leaq (VEC_SIZE)(%r9, %rax, CHAR_SIZE), %rax
229 ret
230
231L(page_cross):
232 mov %rdi, %rax
233 movl %edi, %ecx
234
235# ifdef USE_AS_WCSRCHR
236 /* Calculate number of compare result bits to be skipped for
237 wide string alignment adjustment. */
238 andl $(VEC_SIZE - 1), %ecx
239 sarl $2, %ecx
240# endif
241 /* ecx contains number of w[char] to be skipped as a result
242 of address alignment. */
243 andq $-VEC_SIZE, %rax
244 VMOVA (%rax), %VMM(1)
245 VPTESTN %VMM(1), %VMM(1), %k1
246 KMOV %k1, %VRAX
247 SHR %cl, %VRAX
248 jz L(page_cross_continue)
249 VPCMPEQ %VMM(1), %VMM(0), %k0
250 KMOV %k0, %VRDX
251 SHR %cl, %VRDX
252 BLSMSK %VRAX, %VRAX
253 and %VRDX, %VRAX
254 jz L(ret)
255 BSR %VRAX, %VRAX
256# ifdef USE_AS_WCSRCHR
257 leaq (%rdi, %rax, CHAR_SIZE), %rax
258# else
259 add %rdi, %rax
260# endif
261
262 ret
263END (STRRCHR)
264#endif
265