1/* strchr with SSE2 without bsf
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22# include "asm-syntax.h"
23
24 atom_text_section
25ENTRY (__strchr_sse2_no_bsf)
26 movd %esi, %xmm1
27 movq %rdi, %rcx
28 punpcklbw %xmm1, %xmm1
29 andq $~15, %rdi
30 pxor %xmm2, %xmm2
31 punpcklbw %xmm1, %xmm1
32 orl $0xffffffff, %esi
33 movdqa (%rdi), %xmm0
34 pshufd $0, %xmm1, %xmm1
35 subq %rdi, %rcx
36 movdqa %xmm0, %xmm3
37 leaq 16(%rdi), %rdi
38 pcmpeqb %xmm1, %xmm0
39 pcmpeqb %xmm2, %xmm3
40 shl %cl, %esi
41 pmovmskb %xmm0, %eax
42 pmovmskb %xmm3, %edx
43 andl %esi, %eax
44 andl %esi, %edx
45 test %eax, %eax
46 jnz L(matches)
47 test %edx, %edx
48 jnz L(return_null)
49
50L(loop):
51 movdqa (%rdi), %xmm0
52 leaq 16(%rdi), %rdi
53 movdqa %xmm0, %xmm3
54 pcmpeqb %xmm1, %xmm0
55 pcmpeqb %xmm2, %xmm3
56 pmovmskb %xmm0, %eax
57 pmovmskb %xmm3, %edx
58 or %eax, %edx
59 jz L(loop)
60
61 pmovmskb %xmm3, %edx
62 test %eax, %eax
63 jnz L(matches)
64
65/* Return NULL. */
66 .p2align 4
67L(return_null):
68 xor %rax, %rax
69 ret
70
71L(matches):
72 /* There is a match. First find where NULL is. */
73 leaq -16(%rdi), %rdi
74 test %edx, %edx
75 jz L(match_case1)
76
77 .p2align 4
78L(match_case2):
79 test %al, %al
80 jz L(match_high_case2)
81
82 mov %al, %cl
83 and $15, %cl
84 jnz L(match_case2_4)
85
86 mov %dl, %ch
87 and $15, %ch
88 jnz L(return_null)
89
90 test $0x10, %al
91 jnz L(Exit5)
92 test $0x10, %dl
93 jnz L(return_null)
94 test $0x20, %al
95 jnz L(Exit6)
96 test $0x20, %dl
97 jnz L(return_null)
98 test $0x40, %al
99 jnz L(Exit7)
100 test $0x40, %dl
101 jnz L(return_null)
102 lea 7(%rdi), %rax
103 ret
104
105 .p2align 4
106L(match_case2_4):
107 test $0x01, %al
108 jnz L(Exit1)
109 test $0x01, %dl
110 jnz L(return_null)
111 test $0x02, %al
112 jnz L(Exit2)
113 test $0x02, %dl
114 jnz L(return_null)
115 test $0x04, %al
116 jnz L(Exit3)
117 test $0x04, %dl
118 jnz L(return_null)
119 lea 3(%rdi), %rax
120 ret
121
122 .p2align 4
123L(match_high_case2):
124 test %dl, %dl
125 jnz L(return_null)
126
127 mov %ah, %cl
128 and $15, %cl
129 jnz L(match_case2_12)
130
131 mov %dh, %ch
132 and $15, %ch
133 jnz L(return_null)
134
135 test $0x10, %ah
136 jnz L(Exit13)
137 test $0x10, %dh
138 jnz L(return_null)
139 test $0x20, %ah
140 jnz L(Exit14)
141 test $0x20, %dh
142 jnz L(return_null)
143 test $0x40, %ah
144 jnz L(Exit15)
145 test $0x40, %dh
146 jnz L(return_null)
147 lea 15(%rdi), %rax
148 ret
149
150 .p2align 4
151L(match_case2_12):
152 test $0x01, %ah
153 jnz L(Exit9)
154 test $0x01, %dh
155 jnz L(return_null)
156 test $0x02, %ah
157 jnz L(Exit10)
158 test $0x02, %dh
159 jnz L(return_null)
160 test $0x04, %ah
161 jnz L(Exit11)
162 test $0x04, %dh
163 jnz L(return_null)
164 lea 11(%rdi), %rax
165 ret
166
167 .p2align 4
168L(match_case1):
169 test %al, %al
170 jz L(match_high_case1)
171
172 test $0x01, %al
173 jnz L(Exit1)
174 test $0x02, %al
175 jnz L(Exit2)
176 test $0x04, %al
177 jnz L(Exit3)
178 test $0x08, %al
179 jnz L(Exit4)
180 test $0x10, %al
181 jnz L(Exit5)
182 test $0x20, %al
183 jnz L(Exit6)
184 test $0x40, %al
185 jnz L(Exit7)
186 lea 7(%rdi), %rax
187 ret
188
189 .p2align 4
190L(match_high_case1):
191 test $0x01, %ah
192 jnz L(Exit9)
193 test $0x02, %ah
194 jnz L(Exit10)
195 test $0x04, %ah
196 jnz L(Exit11)
197 test $0x08, %ah
198 jnz L(Exit12)
199 test $0x10, %ah
200 jnz L(Exit13)
201 test $0x20, %ah
202 jnz L(Exit14)
203 test $0x40, %ah
204 jnz L(Exit15)
205 lea 15(%rdi), %rax
206 ret
207
208 .p2align 4
209L(Exit1):
210 lea (%rdi), %rax
211 ret
212
213 .p2align 4
214L(Exit2):
215 lea 1(%rdi), %rax
216 ret
217
218 .p2align 4
219L(Exit3):
220 lea 2(%rdi), %rax
221 ret
222
223 .p2align 4
224L(Exit4):
225 lea 3(%rdi), %rax
226 ret
227
228 .p2align 4
229L(Exit5):
230 lea 4(%rdi), %rax
231 ret
232
233 .p2align 4
234L(Exit6):
235 lea 5(%rdi), %rax
236 ret
237
238 .p2align 4
239L(Exit7):
240 lea 6(%rdi), %rax
241 ret
242
243 .p2align 4
244L(Exit9):
245 lea 8(%rdi), %rax
246 ret
247
248 .p2align 4
249L(Exit10):
250 lea 9(%rdi), %rax
251 ret
252
253 .p2align 4
254L(Exit11):
255 lea 10(%rdi), %rax
256 ret
257
258 .p2align 4
259L(Exit12):
260 lea 11(%rdi), %rax
261 ret
262
263 .p2align 4
264L(Exit13):
265 lea 12(%rdi), %rax
266 ret
267
268 .p2align 4
269L(Exit14):
270 lea 13(%rdi), %rax
271 ret
272
273 .p2align 4
274L(Exit15):
275 lea 14(%rdi), %rax
276 ret
277
278END (__strchr_sse2_no_bsf)
279#endif
280