1 | /* strchr with SSE2 without bsf |
2 | Copyright (C) 2011-2023 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <isa-level.h> |
20 | |
21 | /* NB: atom builds with ISA level == 1 so no reason to hold onto this |
22 | at ISA level >= 2. */ |
23 | #if ISA_SHOULD_BUILD (1) |
24 | |
25 | # include <sysdep.h> |
26 | # include "asm-syntax.h" |
27 | |
28 | atom_text_section |
29 | ENTRY (__strchr_sse2_no_bsf) |
30 | movd %esi, %xmm1 |
31 | movq %rdi, %rcx |
32 | punpcklbw %xmm1, %xmm1 |
33 | andq $~15, %rdi |
34 | pxor %xmm2, %xmm2 |
35 | punpcklbw %xmm1, %xmm1 |
36 | orl $0xffffffff, %esi |
37 | movdqa (%rdi), %xmm0 |
38 | pshufd $0, %xmm1, %xmm1 |
39 | subq %rdi, %rcx |
40 | movdqa %xmm0, %xmm3 |
41 | leaq 16(%rdi), %rdi |
42 | pcmpeqb %xmm1, %xmm0 |
43 | pcmpeqb %xmm2, %xmm3 |
44 | shl %cl, %esi |
45 | pmovmskb %xmm0, %eax |
46 | pmovmskb %xmm3, %edx |
47 | andl %esi, %eax |
48 | andl %esi, %edx |
49 | test %eax, %eax |
50 | jnz L(matches) |
51 | test %edx, %edx |
52 | jnz L(return_null) |
53 | |
54 | L(loop): |
55 | movdqa (%rdi), %xmm0 |
56 | leaq 16(%rdi), %rdi |
57 | movdqa %xmm0, %xmm3 |
58 | pcmpeqb %xmm1, %xmm0 |
59 | pcmpeqb %xmm2, %xmm3 |
60 | pmovmskb %xmm0, %eax |
61 | pmovmskb %xmm3, %edx |
62 | or %eax, %edx |
63 | jz L(loop) |
64 | |
65 | pmovmskb %xmm3, %edx |
66 | test %eax, %eax |
67 | jnz L(matches) |
68 | |
69 | /* Return NULL. */ |
70 | .p2align 4 |
71 | L(return_null): |
72 | xor %rax, %rax |
73 | ret |
74 | |
75 | L(matches): |
76 | /* There is a match. First find where NULL is. */ |
77 | leaq -16(%rdi), %rdi |
78 | test %edx, %edx |
79 | jz L(match_case1) |
80 | |
81 | .p2align 4 |
82 | L(match_case2): |
83 | test %al, %al |
84 | jz L(match_high_case2) |
85 | |
86 | mov %al, %cl |
87 | and $15, %cl |
88 | jnz L(match_case2_4) |
89 | |
90 | mov %dl, %ch |
91 | and $15, %ch |
92 | jnz L(return_null) |
93 | |
94 | test $0x10, %al |
95 | jnz L(Exit5) |
96 | test $0x10, %dl |
97 | jnz L(return_null) |
98 | test $0x20, %al |
99 | jnz L(Exit6) |
100 | test $0x20, %dl |
101 | jnz L(return_null) |
102 | test $0x40, %al |
103 | jnz L(Exit7) |
104 | test $0x40, %dl |
105 | jnz L(return_null) |
106 | lea 7(%rdi), %rax |
107 | ret |
108 | |
109 | .p2align 4 |
110 | L(match_case2_4): |
111 | test $0x01, %al |
112 | jnz L(Exit1) |
113 | test $0x01, %dl |
114 | jnz L(return_null) |
115 | test $0x02, %al |
116 | jnz L(Exit2) |
117 | test $0x02, %dl |
118 | jnz L(return_null) |
119 | test $0x04, %al |
120 | jnz L(Exit3) |
121 | test $0x04, %dl |
122 | jnz L(return_null) |
123 | lea 3(%rdi), %rax |
124 | ret |
125 | |
126 | .p2align 4 |
127 | L(match_high_case2): |
128 | test %dl, %dl |
129 | jnz L(return_null) |
130 | |
131 | mov %ah, %cl |
132 | and $15, %cl |
133 | jnz L(match_case2_12) |
134 | |
135 | mov %dh, %ch |
136 | and $15, %ch |
137 | jnz L(return_null) |
138 | |
139 | test $0x10, %ah |
140 | jnz L(Exit13) |
141 | test $0x10, %dh |
142 | jnz L(return_null) |
143 | test $0x20, %ah |
144 | jnz L(Exit14) |
145 | test $0x20, %dh |
146 | jnz L(return_null) |
147 | test $0x40, %ah |
148 | jnz L(Exit15) |
149 | test $0x40, %dh |
150 | jnz L(return_null) |
151 | lea 15(%rdi), %rax |
152 | ret |
153 | |
154 | .p2align 4 |
155 | L(match_case2_12): |
156 | test $0x01, %ah |
157 | jnz L(Exit9) |
158 | test $0x01, %dh |
159 | jnz L(return_null) |
160 | test $0x02, %ah |
161 | jnz L(Exit10) |
162 | test $0x02, %dh |
163 | jnz L(return_null) |
164 | test $0x04, %ah |
165 | jnz L(Exit11) |
166 | test $0x04, %dh |
167 | jnz L(return_null) |
168 | lea 11(%rdi), %rax |
169 | ret |
170 | |
171 | .p2align 4 |
172 | L(match_case1): |
173 | test %al, %al |
174 | jz L(match_high_case1) |
175 | |
176 | test $0x01, %al |
177 | jnz L(Exit1) |
178 | test $0x02, %al |
179 | jnz L(Exit2) |
180 | test $0x04, %al |
181 | jnz L(Exit3) |
182 | test $0x08, %al |
183 | jnz L(Exit4) |
184 | test $0x10, %al |
185 | jnz L(Exit5) |
186 | test $0x20, %al |
187 | jnz L(Exit6) |
188 | test $0x40, %al |
189 | jnz L(Exit7) |
190 | lea 7(%rdi), %rax |
191 | ret |
192 | |
193 | .p2align 4 |
194 | L(match_high_case1): |
195 | test $0x01, %ah |
196 | jnz L(Exit9) |
197 | test $0x02, %ah |
198 | jnz L(Exit10) |
199 | test $0x04, %ah |
200 | jnz L(Exit11) |
201 | test $0x08, %ah |
202 | jnz L(Exit12) |
203 | test $0x10, %ah |
204 | jnz L(Exit13) |
205 | test $0x20, %ah |
206 | jnz L(Exit14) |
207 | test $0x40, %ah |
208 | jnz L(Exit15) |
209 | lea 15(%rdi), %rax |
210 | ret |
211 | |
212 | .p2align 4 |
213 | L(Exit1): |
214 | lea (%rdi), %rax |
215 | ret |
216 | |
217 | .p2align 4 |
218 | L(Exit2): |
219 | lea 1(%rdi), %rax |
220 | ret |
221 | |
222 | .p2align 4 |
223 | L(Exit3): |
224 | lea 2(%rdi), %rax |
225 | ret |
226 | |
227 | .p2align 4 |
228 | L(Exit4): |
229 | lea 3(%rdi), %rax |
230 | ret |
231 | |
232 | .p2align 4 |
233 | L(Exit5): |
234 | lea 4(%rdi), %rax |
235 | ret |
236 | |
237 | .p2align 4 |
238 | L(Exit6): |
239 | lea 5(%rdi), %rax |
240 | ret |
241 | |
242 | .p2align 4 |
243 | L(Exit7): |
244 | lea 6(%rdi), %rax |
245 | ret |
246 | |
247 | .p2align 4 |
248 | L(Exit9): |
249 | lea 8(%rdi), %rax |
250 | ret |
251 | |
252 | .p2align 4 |
253 | L(Exit10): |
254 | lea 9(%rdi), %rax |
255 | ret |
256 | |
257 | .p2align 4 |
258 | L(Exit11): |
259 | lea 10(%rdi), %rax |
260 | ret |
261 | |
262 | .p2align 4 |
263 | L(Exit12): |
264 | lea 11(%rdi), %rax |
265 | ret |
266 | |
267 | .p2align 4 |
268 | L(Exit13): |
269 | lea 12(%rdi), %rax |
270 | ret |
271 | |
272 | .p2align 4 |
273 | L(Exit14): |
274 | lea 13(%rdi), %rax |
275 | ret |
276 | |
277 | .p2align 4 |
278 | L(Exit15): |
279 | lea 14(%rdi), %rax |
280 | ret |
281 | |
282 | END (__strchr_sse2_no_bsf) |
283 | #endif |
284 | |