strchr-evex-base.S source code [glibc/sysdeps/x86_64/multiarch/strchr-evex-base.S]

1	/ Placeholder function, not used by any processor at the moment.*
2	Copyright (C) 2022-2023 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	/ UNUSED. Exists purely as reference implementation. /
20
21	#include <isa-level.h>
22
23	#if ISA_SHOULD_BUILD (4)
24
25	# include <sysdep.h>
26
27	# ifdef USE_AS_WCSCHR
28	# define CHAR_REG esi
29	# define CHAR_SIZE 4
30	# define VPBROADCAST vpbroadcastd
31	# define VPCMP vpcmpd
32	# define VPCMPNE vpcmpneqd
33	# define VPMINU vpminud
34	# define VPTEST vptestmd
35	# define VPTESTN vptestnmd
36	# else
37	# define CHAR_REG sil
38	# define CHAR_SIZE 1
39	# define VPBROADCAST vpbroadcastb
40	# define VPCMP vpcmpb
41	# define VPCMPNE vpcmpneqb
42	# define VPMINU vpminub
43	# define VPTEST vptestmb
44	# define VPTESTN vptestnmb
45	# endif
46
47	# define PAGE_SIZE 4096
48	# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
49	# define VEC_MATCH_MASK ((1 << CHAR_PER_VEC) - 1)
50
51	.section SECTION(.text), "ax", @progbits
52	/ Aligning entry point to 64 byte, provides better performance for*
53	one vector length string. /*
54	ENTRY_P2ALIGN (STRCHR, `6`)
55
56	/ Broadcast CHAR to VMM(0). /
57	VPBROADCAST %esi, %VMM(`0`)
58	movl %edi, %eax
59	sall $`20`,%eax
60	cmpl $((PAGE_SIZE - VEC_SIZE) << `20`), %eax
61	ja L(page_cross)
62
63	VMOVU (%rdi), %VMM(`1`)
64	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
65	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
66	KMOV %k0, %VRAX
67	/ Compare [w]char for null, mask bit will be set for match. /
68
69	# ifdef USE_AS_WCSCHR
70	sub $VEC_MATCH_MASK, %VRAX
71	# else
72	inc %VRAX
73	# endif
74	jz L(align_more)
75
76	bsf %VRAX, %VRAX
77
78	# ifdef USE_AS_WCSCHR
79	leaq (%rdi, %rax, CHAR_SIZE), %rax
80	# else
81	add %rdi, %rax
82	# endif
83	# ifndef USE_AS_STRCHRNUL
84	cmp (%rax), %CHAR_REG
85	jne L(zero)
86	ret
87	L(zero):
88	xorl %eax, %eax
89	# endif
90	ret
91
92	L(ret_vec_x3):
93	subq $-VEC_SIZE, %rdi
94	L(ret_vec_x2):
95	subq $-VEC_SIZE, %rdi
96	L(ret_vec_x1):
97	bsf %VRAX, %VRAX
98	# ifdef USE_AS_WCSCHR
99	leaq (%rdi, %rax, CHAR_SIZE), %rax
100	# else
101	add %rdi, %rax
102	# endif
103
104	# ifndef USE_AS_STRCHRNUL
105	cmp (%rax), %CHAR_REG
106	jne L(zero)
107	# endif
108	ret
109
110	L(page_cross):
111	mov %rdi, %rax
112	movl %edi, %ecx
113	# ifdef USE_AS_WCSCHR
114	/ Calculate number of compare result bits to be skipped for*
115	wide string alignment adjustment. /*
116	andl $(VEC_SIZE - `1`), %ecx
117	sarl $`2`, %ecx
118	# endif
119	/ ecx contains number of w[char] to be skipped as a result*
120	of address alignment. /*
121	andq $-VEC_SIZE, %rax
122
123	VMOVA (%rax), %VMM(`1`)
124	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
125	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
126	KMOV %k0, %VRAX
127	# ifdef USE_AS_WCSCHR
128	sub $VEC_MATCH_MASK, %VRAX
129	# else
130	inc %VRAX
131	# endif
132	/ Ignore number of character for alignment adjustment. /
133	shr %cl, %VRAX
134	jz L(align_more)
135
136	bsf %VRAX, %VRAX
137	# ifdef USE_AS_WCSCHR
138	leaq (%rdi, %rax, CHAR_SIZE), %rax
139	# else
140	addq %rdi, %rax
141	# endif
142
143	# ifndef USE_AS_STRCHRNUL
144	cmp (%rax), %CHAR_REG
145	jne L(zero)
146	# endif
147	ret
148
149	L(align_more):
150	/ Align rax to VEC_SIZE. /
151	andq $-VEC_SIZE, %rdi
152
153	/ Loop unroll 4 times for 4 vector loop. /
154	VMOVA VEC_SIZE(%rdi), %VMM(`1`)
155	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
156	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
157
158	/ Increment rdi by vector size for further comparison and*
159	return. /*
160	subq $-VEC_SIZE, %rdi
161	KMOV %k0, %VRAX
162
163	# ifdef USE_AS_WCSCHR
164	sub $VEC_MATCH_MASK, %VRAX
165	# else
166	inc %VRAX
167	# endif
168	jnz L(ret_vec_x1)
169
170	VMOVA VEC_SIZE(%rdi), %VMM(`1`)
171	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
172	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
173	KMOV %k0, %VRAX
174	# ifdef USE_AS_WCSCHR
175	sub $VEC_MATCH_MASK, %VRAX
176	# else
177	inc %VRAX
178	# endif
179	jnz L(ret_vec_x2)
180
181	VMOVA (VEC_SIZE * `2`)(%rdi), %VMM(`1`)
182	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
183	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
184	KMOV %k0, %VRAX
185	# ifdef USE_AS_WCSCHR
186	sub $VEC_MATCH_MASK, %VRAX
187	# else
188	inc %VRAX
189	# endif
190	jnz L(ret_vec_x3)
191
192	VMOVA (VEC_SIZE * `3`)(%rdi), %VMM(`1`)
193	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
194	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
195	KMOV %k0, %VRDX
196	# ifdef USE_AS_WCSCHR
197	sub $VEC_MATCH_MASK, %VRDX
198	# else
199	inc %VRDX
200	# endif
201	jnz L(ret_vec_x4)
202
203
204	/ Align address to VEC_SIZE * 4 for loop. /
205	andq $-(VEC_SIZE * `4`), %rdi
206	L(loop):
207	/ VPMINU and VPCMP combination provide better performance as*
208	compared to alternative combinations. /*
209	VMOVA (VEC_SIZE * `4`)(%rdi), %VMM(`1`)
210	VMOVA (VEC_SIZE * `5`)(%rdi), %VMM(`2`)
211	VMOVA (VEC_SIZE * `6`)(%rdi), %VMM(`3`)
212	VMOVA (VEC_SIZE * `7`)(%rdi), %VMM(`4`)
213
214	VPCMPNE %VMM(`1`), %VMM(`0`), %k1
215	VPCMPNE %VMM(`2`), %VMM(`0`), %k2
216
217	VPMINU %VMM(`2`), %VMM(`1`), %VMM(`2`)
218
219	VPCMPNE %VMM(`3`), %VMM(`0`), %k3{%k1}
220	VPCMPNE %VMM(`4`), %VMM(`0`), %k4{%k2}
221
222	VPMINU %VMM(`4`), %VMM(`3`), %VMM(`4`)
223	VPMINU %VMM(`2`), %VMM(`4`), %VMM(`4`){%k3}{z}
224
225	VPTEST %VMM(`4`), %VMM(`4`), %k5{%k4}
226
227	KMOV %k5, %VRDX
228	subq $-(VEC_SIZE * `4`), %rdi
229	# ifdef USE_AS_WCSCHR
230	sub $VEC_MATCH_MASK, %VRDX
231	# else
232	inc %VRDX
233	# endif
234	jz L(loop)
235
236	VPTEST %VMM(`1`), %VMM(`1`), %k0{%k1}
237	KMOV %k0, %VRAX
238	# ifdef USE_AS_WCSCHR
239	sub $VEC_MATCH_MASK, %VRAX
240	# else
241	inc %VRAX
242	# endif
243	jnz L(ret_vec_x1)
244
245	VPTEST %VMM(`2`), %VMM(`2`), %k0{%k2}
246	KMOV %k0, %VRAX
247	/ At this point, if k1 is non zero, null char must be in the*
248	second vector. /*
249	# ifdef USE_AS_WCSCHR
250	sub $VEC_MATCH_MASK, %VRAX
251	# else
252	inc %VRAX
253	# endif
254	jnz L(ret_vec_x2)
255
256	VPTEST %VMM(`3`), %VMM(`3`), %k0{%k3}
257	KMOV %k0, %VRAX
258	# ifdef USE_AS_WCSCHR
259	sub $VEC_MATCH_MASK, %VRAX
260	# else
261	inc %VRAX
262	# endif
263	jnz L(ret_vec_x3)
264	/ At this point null [w]char must be in the fourth vector so no*
265	need to check. /*
266
267	L(ret_vec_x4):
268	bsf %VRDX, %VRDX
269	leaq (VEC_SIZE * `3`)(%rdi, %rdx, CHAR_SIZE), %rax
270	# ifndef USE_AS_STRCHRNUL
271	cmp (%rax), %CHAR_REG
272	jne L(zero_2)
273	# endif
274	ret
275
276	# ifndef USE_AS_STRCHRNUL
277	L(zero_2):
278	xor %eax, %eax
279	ret
280	# endif
281	END (STRCHR)
282	#endif
283

Browse the source code of glibc/sysdeps/x86_64/multiarch/strchr-evex-base.S