1 | /* strcat(dest, src) -- Append SRC on the end of DEST. |
2 | Optimized for x86-64. |
3 | Copyright (C) 2002-2022 Free Software Foundation, Inc. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | |
23 | /* Will be removed when new strcpy implementation gets merged. */ |
24 | |
25 | .text |
26 | ENTRY (strcat) |
27 | movq %rdi, %rcx /* Dest. register. */ |
28 | andl $7, %ecx /* mask alignment bits */ |
29 | movq %rdi, %rax /* Duplicate destination pointer. */ |
30 | movq $0xfefefefefefefeff,%r8 |
31 | |
32 | /* First step: Find end of destination. */ |
33 | jz 4f /* aligned => start loop */ |
34 | |
35 | neg %ecx /* We need to align to 8 bytes. */ |
36 | addl $8,%ecx |
37 | /* Search the first bytes directly. */ |
38 | 0: cmpb $0x0,(%rax) /* is byte NUL? */ |
39 | je 2f /* yes => start copy */ |
40 | incq %rax /* increment pointer */ |
41 | decl %ecx |
42 | jnz 0b |
43 | |
44 | |
45 | |
46 | /* Now the source is aligned. Scan for NUL byte. */ |
47 | .p2align 4 |
48 | 4: |
49 | /* First unroll. */ |
50 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ |
51 | addq $8,%rax /* adjust pointer for next word */ |
52 | movq %r8, %rdx /* magic value */ |
53 | addq %rcx, %rdx /* add the magic value to the word. We get |
54 | carry bits reported for each byte which |
55 | is *not* 0 */ |
56 | jnc 3f /* highest byte is NUL => return pointer */ |
57 | xorq %rcx, %rdx /* (word+magic)^word */ |
58 | orq %r8, %rdx /* set all non-carry bits */ |
59 | incq %rdx /* add 1: if one carry bit was *not* set |
60 | the addition will not result in 0. */ |
61 | jnz 3f /* found NUL => return pointer */ |
62 | |
63 | /* Second unroll. */ |
64 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ |
65 | addq $8,%rax /* adjust pointer for next word */ |
66 | movq %r8, %rdx /* magic value */ |
67 | addq %rcx, %rdx /* add the magic value to the word. We get |
68 | carry bits reported for each byte which |
69 | is *not* 0 */ |
70 | jnc 3f /* highest byte is NUL => return pointer */ |
71 | xorq %rcx, %rdx /* (word+magic)^word */ |
72 | orq %r8, %rdx /* set all non-carry bits */ |
73 | incq %rdx /* add 1: if one carry bit was *not* set |
74 | the addition will not result in 0. */ |
75 | jnz 3f /* found NUL => return pointer */ |
76 | |
77 | /* Third unroll. */ |
78 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ |
79 | addq $8,%rax /* adjust pointer for next word */ |
80 | movq %r8, %rdx /* magic value */ |
81 | addq %rcx, %rdx /* add the magic value to the word. We get |
82 | carry bits reported for each byte which |
83 | is *not* 0 */ |
84 | jnc 3f /* highest byte is NUL => return pointer */ |
85 | xorq %rcx, %rdx /* (word+magic)^word */ |
86 | orq %r8, %rdx /* set all non-carry bits */ |
87 | incq %rdx /* add 1: if one carry bit was *not* set |
88 | the addition will not result in 0. */ |
89 | jnz 3f /* found NUL => return pointer */ |
90 | |
91 | /* Fourth unroll. */ |
92 | movq (%rax), %rcx /* get double word (= 8 bytes) in question */ |
93 | addq $8,%rax /* adjust pointer for next word */ |
94 | movq %r8, %rdx /* magic value */ |
95 | addq %rcx, %rdx /* add the magic value to the word. We get |
96 | carry bits reported for each byte which |
97 | is *not* 0 */ |
98 | jnc 3f /* highest byte is NUL => return pointer */ |
99 | xorq %rcx, %rdx /* (word+magic)^word */ |
100 | orq %r8, %rdx /* set all non-carry bits */ |
101 | incq %rdx /* add 1: if one carry bit was *not* set |
102 | the addition will not result in 0. */ |
103 | jz 4b /* no NUL found => continue loop */ |
104 | |
105 | .p2align 4 /* Align, it's a jump target. */ |
106 | 3: subq $8,%rax /* correct pointer increment. */ |
107 | |
108 | testb %cl, %cl /* is first byte NUL? */ |
109 | jz 2f /* yes => return */ |
110 | incq %rax /* increment pointer */ |
111 | |
112 | testb %ch, %ch /* is second byte NUL? */ |
113 | jz 2f /* yes => return */ |
114 | incq %rax /* increment pointer */ |
115 | |
116 | testl $0x00ff0000, %ecx /* is third byte NUL? */ |
117 | jz 2f /* yes => return pointer */ |
118 | incq %rax /* increment pointer */ |
119 | |
120 | testl $0xff000000, %ecx /* is fourth byte NUL? */ |
121 | jz 2f /* yes => return pointer */ |
122 | incq %rax /* increment pointer */ |
123 | |
124 | shrq $32, %rcx /* look at other half. */ |
125 | |
126 | testb %cl, %cl /* is first byte NUL? */ |
127 | jz 2f /* yes => return */ |
128 | incq %rax /* increment pointer */ |
129 | |
130 | testb %ch, %ch /* is second byte NUL? */ |
131 | jz 2f /* yes => return */ |
132 | incq %rax /* increment pointer */ |
133 | |
134 | testl $0xff0000, %ecx /* is third byte NUL? */ |
135 | jz 2f /* yes => return pointer */ |
136 | incq %rax /* increment pointer */ |
137 | |
138 | 2: |
139 | /* Second step: Copy source to destination. */ |
140 | |
141 | movq %rsi, %rcx /* duplicate */ |
142 | andl $7,%ecx /* mask alignment bits */ |
143 | movq %rax, %rdx /* move around */ |
144 | jz 22f /* aligned => start loop */ |
145 | |
146 | neg %ecx /* align to 8 bytes. */ |
147 | addl $8, %ecx |
148 | /* Align the source pointer. */ |
149 | 21: |
150 | movb (%rsi), %al /* Fetch a byte */ |
151 | testb %al, %al /* Is it NUL? */ |
152 | movb %al, (%rdx) /* Store it */ |
153 | jz 24f /* If it was NUL, done! */ |
154 | incq %rsi |
155 | incq %rdx |
156 | decl %ecx |
157 | jnz 21b |
158 | |
159 | /* Now the sources is aligned. Unfortunatly we cannot force |
160 | to have both source and destination aligned, so ignore the |
161 | alignment of the destination. */ |
162 | .p2align 4 |
163 | 22: |
164 | /* 1st unroll. */ |
165 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
166 | addq $8, %rsi /* Adjust pointer for next word. */ |
167 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
168 | addq %r8, %r9 /* add the magic value to the word. We get |
169 | carry bits reported for each byte which |
170 | is *not* 0 */ |
171 | jnc 23f /* highest byte is NUL => return pointer */ |
172 | xorq %rax, %r9 /* (word+magic)^word */ |
173 | orq %r8, %r9 /* set all non-carry bits */ |
174 | incq %r9 /* add 1: if one carry bit was *not* set |
175 | the addition will not result in 0. */ |
176 | |
177 | jnz 23f /* found NUL => return pointer */ |
178 | |
179 | movq %rax, (%rdx) /* Write value to destination. */ |
180 | addq $8, %rdx /* Adjust pointer. */ |
181 | |
182 | /* 2nd unroll. */ |
183 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
184 | addq $8, %rsi /* Adjust pointer for next word. */ |
185 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
186 | addq %r8, %r9 /* add the magic value to the word. We get |
187 | carry bits reported for each byte which |
188 | is *not* 0 */ |
189 | jnc 23f /* highest byte is NUL => return pointer */ |
190 | xorq %rax, %r9 /* (word+magic)^word */ |
191 | orq %r8, %r9 /* set all non-carry bits */ |
192 | incq %r9 /* add 1: if one carry bit was *not* set |
193 | the addition will not result in 0. */ |
194 | |
195 | jnz 23f /* found NUL => return pointer */ |
196 | |
197 | movq %rax, (%rdx) /* Write value to destination. */ |
198 | addq $8, %rdx /* Adjust pointer. */ |
199 | |
200 | /* 3rd unroll. */ |
201 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
202 | addq $8, %rsi /* Adjust pointer for next word. */ |
203 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
204 | addq %r8, %r9 /* add the magic value to the word. We get |
205 | carry bits reported for each byte which |
206 | is *not* 0 */ |
207 | jnc 23f /* highest byte is NUL => return pointer */ |
208 | xorq %rax, %r9 /* (word+magic)^word */ |
209 | orq %r8, %r9 /* set all non-carry bits */ |
210 | incq %r9 /* add 1: if one carry bit was *not* set |
211 | the addition will not result in 0. */ |
212 | |
213 | jnz 23f /* found NUL => return pointer */ |
214 | |
215 | movq %rax, (%rdx) /* Write value to destination. */ |
216 | addq $8, %rdx /* Adjust pointer. */ |
217 | |
218 | /* 4th unroll. */ |
219 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
220 | addq $8, %rsi /* Adjust pointer for next word. */ |
221 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
222 | addq %r8, %r9 /* add the magic value to the word. We get |
223 | carry bits reported for each byte which |
224 | is *not* 0 */ |
225 | jnc 23f /* highest byte is NUL => return pointer */ |
226 | xorq %rax, %r9 /* (word+magic)^word */ |
227 | orq %r8, %r9 /* set all non-carry bits */ |
228 | incq %r9 /* add 1: if one carry bit was *not* set |
229 | the addition will not result in 0. */ |
230 | |
231 | jnz 23f /* found NUL => return pointer */ |
232 | |
233 | movq %rax, (%rdx) /* Write value to destination. */ |
234 | addq $8, %rdx /* Adjust pointer. */ |
235 | jmp 22b /* Next iteration. */ |
236 | |
237 | /* Do the last few bytes. %rax contains the value to write. |
238 | The loop is unrolled twice. */ |
239 | .p2align 4 |
240 | 23: |
241 | movb %al, (%rdx) /* 1st byte. */ |
242 | testb %al, %al /* Is it NUL. */ |
243 | jz 24f /* yes, finish. */ |
244 | incq %rdx /* Increment destination. */ |
245 | movb %ah, (%rdx) /* 2nd byte. */ |
246 | testb %ah, %ah /* Is it NUL?. */ |
247 | jz 24f /* yes, finish. */ |
248 | incq %rdx /* Increment destination. */ |
249 | shrq $16, %rax /* Shift... */ |
250 | jmp 23b /* and look at next two bytes in %rax. */ |
251 | |
252 | |
253 | 24: |
254 | movq %rdi, %rax /* Source is return value. */ |
255 | retq |
256 | END (strcat) |
257 | libc_hidden_builtin_def (strcat) |
258 | |