1 | /* strchr with SSE2 without bsf |
2 | Copyright (C) 2011-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #if IS_IN (libc) |
20 | |
21 | # include <sysdep.h> |
22 | # include "asm-syntax.h" |
23 | |
24 | atom_text_section |
25 | ENTRY (__strchr_sse2_no_bsf) |
26 | movd %esi, %xmm1 |
27 | movq %rdi, %rcx |
28 | punpcklbw %xmm1, %xmm1 |
29 | andq $~15, %rdi |
30 | pxor %xmm2, %xmm2 |
31 | punpcklbw %xmm1, %xmm1 |
32 | orl $0xffffffff, %esi |
33 | movdqa (%rdi), %xmm0 |
34 | pshufd $0, %xmm1, %xmm1 |
35 | subq %rdi, %rcx |
36 | movdqa %xmm0, %xmm3 |
37 | leaq 16(%rdi), %rdi |
38 | pcmpeqb %xmm1, %xmm0 |
39 | pcmpeqb %xmm2, %xmm3 |
40 | shl %cl, %esi |
41 | pmovmskb %xmm0, %eax |
42 | pmovmskb %xmm3, %edx |
43 | andl %esi, %eax |
44 | andl %esi, %edx |
45 | test %eax, %eax |
46 | jnz L(matches) |
47 | test %edx, %edx |
48 | jnz L(return_null) |
49 | |
50 | L(loop): |
51 | movdqa (%rdi), %xmm0 |
52 | leaq 16(%rdi), %rdi |
53 | movdqa %xmm0, %xmm3 |
54 | pcmpeqb %xmm1, %xmm0 |
55 | pcmpeqb %xmm2, %xmm3 |
56 | pmovmskb %xmm0, %eax |
57 | pmovmskb %xmm3, %edx |
58 | or %eax, %edx |
59 | jz L(loop) |
60 | |
61 | pmovmskb %xmm3, %edx |
62 | test %eax, %eax |
63 | jnz L(matches) |
64 | |
65 | /* Return NULL. */ |
66 | .p2align 4 |
67 | L(return_null): |
68 | xor %rax, %rax |
69 | ret |
70 | |
71 | L(matches): |
72 | /* There is a match. First find where NULL is. */ |
73 | leaq -16(%rdi), %rdi |
74 | test %edx, %edx |
75 | jz L(match_case1) |
76 | |
77 | .p2align 4 |
78 | L(match_case2): |
79 | test %al, %al |
80 | jz L(match_high_case2) |
81 | |
82 | mov %al, %cl |
83 | and $15, %cl |
84 | jnz L(match_case2_4) |
85 | |
86 | mov %dl, %ch |
87 | and $15, %ch |
88 | jnz L(return_null) |
89 | |
90 | test $0x10, %al |
91 | jnz L(Exit5) |
92 | test $0x10, %dl |
93 | jnz L(return_null) |
94 | test $0x20, %al |
95 | jnz L(Exit6) |
96 | test $0x20, %dl |
97 | jnz L(return_null) |
98 | test $0x40, %al |
99 | jnz L(Exit7) |
100 | test $0x40, %dl |
101 | jnz L(return_null) |
102 | lea 7(%rdi), %rax |
103 | ret |
104 | |
105 | .p2align 4 |
106 | L(match_case2_4): |
107 | test $0x01, %al |
108 | jnz L(Exit1) |
109 | test $0x01, %dl |
110 | jnz L(return_null) |
111 | test $0x02, %al |
112 | jnz L(Exit2) |
113 | test $0x02, %dl |
114 | jnz L(return_null) |
115 | test $0x04, %al |
116 | jnz L(Exit3) |
117 | test $0x04, %dl |
118 | jnz L(return_null) |
119 | lea 3(%rdi), %rax |
120 | ret |
121 | |
122 | .p2align 4 |
123 | L(match_high_case2): |
124 | test %dl, %dl |
125 | jnz L(return_null) |
126 | |
127 | mov %ah, %cl |
128 | and $15, %cl |
129 | jnz L(match_case2_12) |
130 | |
131 | mov %dh, %ch |
132 | and $15, %ch |
133 | jnz L(return_null) |
134 | |
135 | test $0x10, %ah |
136 | jnz L(Exit13) |
137 | test $0x10, %dh |
138 | jnz L(return_null) |
139 | test $0x20, %ah |
140 | jnz L(Exit14) |
141 | test $0x20, %dh |
142 | jnz L(return_null) |
143 | test $0x40, %ah |
144 | jnz L(Exit15) |
145 | test $0x40, %dh |
146 | jnz L(return_null) |
147 | lea 15(%rdi), %rax |
148 | ret |
149 | |
150 | .p2align 4 |
151 | L(match_case2_12): |
152 | test $0x01, %ah |
153 | jnz L(Exit9) |
154 | test $0x01, %dh |
155 | jnz L(return_null) |
156 | test $0x02, %ah |
157 | jnz L(Exit10) |
158 | test $0x02, %dh |
159 | jnz L(return_null) |
160 | test $0x04, %ah |
161 | jnz L(Exit11) |
162 | test $0x04, %dh |
163 | jnz L(return_null) |
164 | lea 11(%rdi), %rax |
165 | ret |
166 | |
167 | .p2align 4 |
168 | L(match_case1): |
169 | test %al, %al |
170 | jz L(match_high_case1) |
171 | |
172 | test $0x01, %al |
173 | jnz L(Exit1) |
174 | test $0x02, %al |
175 | jnz L(Exit2) |
176 | test $0x04, %al |
177 | jnz L(Exit3) |
178 | test $0x08, %al |
179 | jnz L(Exit4) |
180 | test $0x10, %al |
181 | jnz L(Exit5) |
182 | test $0x20, %al |
183 | jnz L(Exit6) |
184 | test $0x40, %al |
185 | jnz L(Exit7) |
186 | lea 7(%rdi), %rax |
187 | ret |
188 | |
189 | .p2align 4 |
190 | L(match_high_case1): |
191 | test $0x01, %ah |
192 | jnz L(Exit9) |
193 | test $0x02, %ah |
194 | jnz L(Exit10) |
195 | test $0x04, %ah |
196 | jnz L(Exit11) |
197 | test $0x08, %ah |
198 | jnz L(Exit12) |
199 | test $0x10, %ah |
200 | jnz L(Exit13) |
201 | test $0x20, %ah |
202 | jnz L(Exit14) |
203 | test $0x40, %ah |
204 | jnz L(Exit15) |
205 | lea 15(%rdi), %rax |
206 | ret |
207 | |
208 | .p2align 4 |
209 | L(Exit1): |
210 | lea (%rdi), %rax |
211 | ret |
212 | |
213 | .p2align 4 |
214 | L(Exit2): |
215 | lea 1(%rdi), %rax |
216 | ret |
217 | |
218 | .p2align 4 |
219 | L(Exit3): |
220 | lea 2(%rdi), %rax |
221 | ret |
222 | |
223 | .p2align 4 |
224 | L(Exit4): |
225 | lea 3(%rdi), %rax |
226 | ret |
227 | |
228 | .p2align 4 |
229 | L(Exit5): |
230 | lea 4(%rdi), %rax |
231 | ret |
232 | |
233 | .p2align 4 |
234 | L(Exit6): |
235 | lea 5(%rdi), %rax |
236 | ret |
237 | |
238 | .p2align 4 |
239 | L(Exit7): |
240 | lea 6(%rdi), %rax |
241 | ret |
242 | |
243 | .p2align 4 |
244 | L(Exit9): |
245 | lea 8(%rdi), %rax |
246 | ret |
247 | |
248 | .p2align 4 |
249 | L(Exit10): |
250 | lea 9(%rdi), %rax |
251 | ret |
252 | |
253 | .p2align 4 |
254 | L(Exit11): |
255 | lea 10(%rdi), %rax |
256 | ret |
257 | |
258 | .p2align 4 |
259 | L(Exit12): |
260 | lea 11(%rdi), %rax |
261 | ret |
262 | |
263 | .p2align 4 |
264 | L(Exit13): |
265 | lea 12(%rdi), %rax |
266 | ret |
267 | |
268 | .p2align 4 |
269 | L(Exit14): |
270 | lea 13(%rdi), %rax |
271 | ret |
272 | |
273 | .p2align 4 |
274 | L(Exit15): |
275 | lea 14(%rdi), %rax |
276 | ret |
277 | |
278 | END (__strchr_sse2_no_bsf) |
279 | #endif |
280 | |