1 | /* strchr with SSE2 without bsf |
2 | Copyright (C) 2011-2021 Free Software Foundation, Inc. |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #if IS_IN (libc) |
21 | |
22 | # include <sysdep.h> |
23 | # include "asm-syntax.h" |
24 | |
25 | atom_text_section |
26 | ENTRY (__strchr_sse2_no_bsf) |
27 | movd %esi, %xmm1 |
28 | movq %rdi, %rcx |
29 | punpcklbw %xmm1, %xmm1 |
30 | andq $~15, %rdi |
31 | pxor %xmm2, %xmm2 |
32 | punpcklbw %xmm1, %xmm1 |
33 | orl $0xffffffff, %esi |
34 | movdqa (%rdi), %xmm0 |
35 | pshufd $0, %xmm1, %xmm1 |
36 | subq %rdi, %rcx |
37 | movdqa %xmm0, %xmm3 |
38 | leaq 16(%rdi), %rdi |
39 | pcmpeqb %xmm1, %xmm0 |
40 | pcmpeqb %xmm2, %xmm3 |
41 | shl %cl, %esi |
42 | pmovmskb %xmm0, %eax |
43 | pmovmskb %xmm3, %edx |
44 | andl %esi, %eax |
45 | andl %esi, %edx |
46 | test %eax, %eax |
47 | jnz L(matches) |
48 | test %edx, %edx |
49 | jnz L(return_null) |
50 | |
51 | L(loop): |
52 | movdqa (%rdi), %xmm0 |
53 | leaq 16(%rdi), %rdi |
54 | movdqa %xmm0, %xmm3 |
55 | pcmpeqb %xmm1, %xmm0 |
56 | pcmpeqb %xmm2, %xmm3 |
57 | pmovmskb %xmm0, %eax |
58 | pmovmskb %xmm3, %edx |
59 | or %eax, %edx |
60 | jz L(loop) |
61 | |
62 | pmovmskb %xmm3, %edx |
63 | test %eax, %eax |
64 | jnz L(matches) |
65 | |
66 | /* Return NULL. */ |
67 | .p2align 4 |
68 | L(return_null): |
69 | xor %rax, %rax |
70 | ret |
71 | |
72 | L(matches): |
73 | /* There is a match. First find where NULL is. */ |
74 | leaq -16(%rdi), %rdi |
75 | test %edx, %edx |
76 | jz L(match_case1) |
77 | |
78 | .p2align 4 |
79 | L(match_case2): |
80 | test %al, %al |
81 | jz L(match_high_case2) |
82 | |
83 | mov %al, %cl |
84 | and $15, %cl |
85 | jnz L(match_case2_4) |
86 | |
87 | mov %dl, %ch |
88 | and $15, %ch |
89 | jnz L(return_null) |
90 | |
91 | test $0x10, %al |
92 | jnz L(Exit5) |
93 | test $0x10, %dl |
94 | jnz L(return_null) |
95 | test $0x20, %al |
96 | jnz L(Exit6) |
97 | test $0x20, %dl |
98 | jnz L(return_null) |
99 | test $0x40, %al |
100 | jnz L(Exit7) |
101 | test $0x40, %dl |
102 | jnz L(return_null) |
103 | lea 7(%rdi), %rax |
104 | ret |
105 | |
106 | .p2align 4 |
107 | L(match_case2_4): |
108 | test $0x01, %al |
109 | jnz L(Exit1) |
110 | test $0x01, %dl |
111 | jnz L(return_null) |
112 | test $0x02, %al |
113 | jnz L(Exit2) |
114 | test $0x02, %dl |
115 | jnz L(return_null) |
116 | test $0x04, %al |
117 | jnz L(Exit3) |
118 | test $0x04, %dl |
119 | jnz L(return_null) |
120 | lea 3(%rdi), %rax |
121 | ret |
122 | |
123 | .p2align 4 |
124 | L(match_high_case2): |
125 | test %dl, %dl |
126 | jnz L(return_null) |
127 | |
128 | mov %ah, %cl |
129 | and $15, %cl |
130 | jnz L(match_case2_12) |
131 | |
132 | mov %dh, %ch |
133 | and $15, %ch |
134 | jnz L(return_null) |
135 | |
136 | test $0x10, %ah |
137 | jnz L(Exit13) |
138 | test $0x10, %dh |
139 | jnz L(return_null) |
140 | test $0x20, %ah |
141 | jnz L(Exit14) |
142 | test $0x20, %dh |
143 | jnz L(return_null) |
144 | test $0x40, %ah |
145 | jnz L(Exit15) |
146 | test $0x40, %dh |
147 | jnz L(return_null) |
148 | lea 15(%rdi), %rax |
149 | ret |
150 | |
151 | .p2align 4 |
152 | L(match_case2_12): |
153 | test $0x01, %ah |
154 | jnz L(Exit9) |
155 | test $0x01, %dh |
156 | jnz L(return_null) |
157 | test $0x02, %ah |
158 | jnz L(Exit10) |
159 | test $0x02, %dh |
160 | jnz L(return_null) |
161 | test $0x04, %ah |
162 | jnz L(Exit11) |
163 | test $0x04, %dh |
164 | jnz L(return_null) |
165 | lea 11(%rdi), %rax |
166 | ret |
167 | |
168 | .p2align 4 |
169 | L(match_case1): |
170 | test %al, %al |
171 | jz L(match_high_case1) |
172 | |
173 | test $0x01, %al |
174 | jnz L(Exit1) |
175 | test $0x02, %al |
176 | jnz L(Exit2) |
177 | test $0x04, %al |
178 | jnz L(Exit3) |
179 | test $0x08, %al |
180 | jnz L(Exit4) |
181 | test $0x10, %al |
182 | jnz L(Exit5) |
183 | test $0x20, %al |
184 | jnz L(Exit6) |
185 | test $0x40, %al |
186 | jnz L(Exit7) |
187 | lea 7(%rdi), %rax |
188 | ret |
189 | |
190 | .p2align 4 |
191 | L(match_high_case1): |
192 | test $0x01, %ah |
193 | jnz L(Exit9) |
194 | test $0x02, %ah |
195 | jnz L(Exit10) |
196 | test $0x04, %ah |
197 | jnz L(Exit11) |
198 | test $0x08, %ah |
199 | jnz L(Exit12) |
200 | test $0x10, %ah |
201 | jnz L(Exit13) |
202 | test $0x20, %ah |
203 | jnz L(Exit14) |
204 | test $0x40, %ah |
205 | jnz L(Exit15) |
206 | lea 15(%rdi), %rax |
207 | ret |
208 | |
209 | .p2align 4 |
210 | L(Exit1): |
211 | lea (%rdi), %rax |
212 | ret |
213 | |
214 | .p2align 4 |
215 | L(Exit2): |
216 | lea 1(%rdi), %rax |
217 | ret |
218 | |
219 | .p2align 4 |
220 | L(Exit3): |
221 | lea 2(%rdi), %rax |
222 | ret |
223 | |
224 | .p2align 4 |
225 | L(Exit4): |
226 | lea 3(%rdi), %rax |
227 | ret |
228 | |
229 | .p2align 4 |
230 | L(Exit5): |
231 | lea 4(%rdi), %rax |
232 | ret |
233 | |
234 | .p2align 4 |
235 | L(Exit6): |
236 | lea 5(%rdi), %rax |
237 | ret |
238 | |
239 | .p2align 4 |
240 | L(Exit7): |
241 | lea 6(%rdi), %rax |
242 | ret |
243 | |
244 | .p2align 4 |
245 | L(Exit9): |
246 | lea 8(%rdi), %rax |
247 | ret |
248 | |
249 | .p2align 4 |
250 | L(Exit10): |
251 | lea 9(%rdi), %rax |
252 | ret |
253 | |
254 | .p2align 4 |
255 | L(Exit11): |
256 | lea 10(%rdi), %rax |
257 | ret |
258 | |
259 | .p2align 4 |
260 | L(Exit12): |
261 | lea 11(%rdi), %rax |
262 | ret |
263 | |
264 | .p2align 4 |
265 | L(Exit13): |
266 | lea 12(%rdi), %rax |
267 | ret |
268 | |
269 | .p2align 4 |
270 | L(Exit14): |
271 | lea 13(%rdi), %rax |
272 | ret |
273 | |
274 | .p2align 4 |
275 | L(Exit15): |
276 | lea 14(%rdi), %rax |
277 | ret |
278 | |
279 | END (__strchr_sse2_no_bsf) |
280 | #endif |
281 | |