| 1 | /* strcat(dest, src) -- Append SRC on the end of DEST. | 
| 2 |    Optimized for x86-64. | 
| 3 |    Copyright (C) 2002-2022 Free Software Foundation, Inc. | 
| 4 |    This file is part of the GNU C Library. | 
| 5 |  | 
| 6 |    The GNU C Library is free software; you can redistribute it and/or | 
| 7 |    modify it under the terms of the GNU Lesser General Public | 
| 8 |    License as published by the Free Software Foundation; either | 
| 9 |    version 2.1 of the License, or (at your option) any later version. | 
| 10 |  | 
| 11 |    The GNU C Library is distributed in the hope that it will be useful, | 
| 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 14 |    Lesser General Public License for more details. | 
| 15 |  | 
| 16 |    You should have received a copy of the GNU Lesser General Public | 
| 17 |    License along with the GNU C Library; if not, see | 
| 18 |    <https://www.gnu.org/licenses/>.  */ | 
| 19 |  | 
| 20 | #include <sysdep.h> | 
| 21 | #include "asm-syntax.h" | 
| 22 |  | 
| 23 | /* Will be removed when new strcpy implementation gets merged.  */ | 
| 24 |  | 
| 25 | 	.text | 
| 26 | ENTRY (strcat) | 
| 27 | 	movq %rdi, %rcx		/* Dest. register. */ | 
| 28 | 	andl $7, %ecx		/* mask alignment bits */ | 
| 29 | 	movq %rdi, %rax		/* Duplicate destination pointer.  */ | 
| 30 | 	movq $0xfefefefefefefeff,%r8 | 
| 31 |  | 
| 32 | 	/* First step: Find end of destination.  */ | 
| 33 | 	jz 4f			/* aligned => start loop */ | 
| 34 |  | 
| 35 | 	neg %ecx		/* We need to align to 8 bytes.  */ | 
| 36 | 	addl $8,%ecx | 
| 37 | 	/* Search the first bytes directly.  */ | 
| 38 | 0:	cmpb $0x0,(%rax)	/* is byte NUL? */ | 
| 39 | 	je 2f			/* yes => start copy */ | 
| 40 | 	incq %rax		/* increment pointer */ | 
| 41 | 	decl %ecx | 
| 42 | 	jnz 0b | 
| 43 |  | 
| 44 |  | 
| 45 |  | 
| 46 | 	/* Now the source is aligned.  Scan for NUL byte.  */ | 
| 47 | 	.p2align 4 | 
| 48 | 4: | 
| 49 | 	/* First unroll.  */ | 
| 50 | 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
| 51 | 	addq $8,%rax		/* adjust pointer for next word */ | 
| 52 | 	movq %r8, %rdx		/* magic value */ | 
| 53 | 	addq %rcx, %rdx		/* add the magic value to the word.  We get | 
| 54 | 				   carry bits reported for each byte which | 
| 55 | 				   is *not* 0 */ | 
| 56 | 	jnc 3f			/* highest byte is NUL => return pointer */ | 
| 57 | 	xorq %rcx, %rdx		/* (word+magic)^word */ | 
| 58 | 	orq %r8, %rdx		/* set all non-carry bits */ | 
| 59 | 	incq %rdx		/* add 1: if one carry bit was *not* set | 
| 60 | 				   the addition will not result in 0.  */ | 
| 61 | 	jnz 3f			/* found NUL => return pointer */ | 
| 62 |  | 
| 63 | 	/* Second unroll.  */ | 
| 64 | 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
| 65 | 	addq $8,%rax		/* adjust pointer for next word */ | 
| 66 | 	movq %r8, %rdx		/* magic value */ | 
| 67 | 	addq %rcx, %rdx		/* add the magic value to the word.  We get | 
| 68 | 				   carry bits reported for each byte which | 
| 69 | 				   is *not* 0 */ | 
| 70 | 	jnc 3f			/* highest byte is NUL => return pointer */ | 
| 71 | 	xorq %rcx, %rdx		/* (word+magic)^word */ | 
| 72 | 	orq %r8, %rdx		/* set all non-carry bits */ | 
| 73 | 	incq %rdx		/* add 1: if one carry bit was *not* set | 
| 74 | 				   the addition will not result in 0.  */ | 
| 75 | 	jnz 3f			/* found NUL => return pointer */ | 
| 76 |  | 
| 77 | 	/* Third unroll.  */ | 
| 78 | 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
| 79 | 	addq $8,%rax		/* adjust pointer for next word */ | 
| 80 | 	movq %r8, %rdx		/* magic value */ | 
| 81 | 	addq %rcx, %rdx		/* add the magic value to the word.  We get | 
| 82 | 				   carry bits reported for each byte which | 
| 83 | 				   is *not* 0 */ | 
| 84 | 	jnc 3f			/* highest byte is NUL => return pointer */ | 
| 85 | 	xorq %rcx, %rdx		/* (word+magic)^word */ | 
| 86 | 	orq %r8, %rdx		/* set all non-carry bits */ | 
| 87 | 	incq %rdx		/* add 1: if one carry bit was *not* set | 
| 88 | 				   the addition will not result in 0.  */ | 
| 89 | 	jnz 3f			/* found NUL => return pointer */ | 
| 90 |  | 
| 91 | 	/* Fourth unroll.  */ | 
| 92 | 	movq (%rax), %rcx	/* get double word (= 8 bytes) in question */ | 
| 93 | 	addq $8,%rax		/* adjust pointer for next word */ | 
| 94 | 	movq %r8, %rdx		/* magic value */ | 
| 95 | 	addq %rcx, %rdx		/* add the magic value to the word.  We get | 
| 96 | 				   carry bits reported for each byte which | 
| 97 | 				   is *not* 0 */ | 
| 98 | 	jnc 3f			/* highest byte is NUL => return pointer */ | 
| 99 | 	xorq %rcx, %rdx		/* (word+magic)^word */ | 
| 100 | 	orq %r8, %rdx		/* set all non-carry bits */ | 
| 101 | 	incq %rdx		/* add 1: if one carry bit was *not* set | 
| 102 | 				   the addition will not result in 0.  */ | 
| 103 | 	jz 4b			/* no NUL found => continue loop */ | 
| 104 |  | 
| 105 | 	.p2align 4		/* Align, it's a jump target.  */ | 
| 106 | 3:	subq $8,%rax		/* correct pointer increment.  */ | 
| 107 |  | 
| 108 | 	testb %cl, %cl		/* is first byte NUL? */ | 
| 109 | 	jz 2f			/* yes => return */ | 
| 110 | 	incq %rax		/* increment pointer */ | 
| 111 |  | 
| 112 | 	testb %ch, %ch		/* is second byte NUL? */ | 
| 113 | 	jz 2f			/* yes => return */ | 
| 114 | 	incq %rax		/* increment pointer */ | 
| 115 |  | 
| 116 | 	testl $0x00ff0000, %ecx /* is third byte NUL? */ | 
| 117 | 	jz 2f			/* yes => return pointer */ | 
| 118 | 	incq %rax		/* increment pointer */ | 
| 119 |  | 
| 120 | 	testl $0xff000000, %ecx /* is fourth byte NUL? */ | 
| 121 | 	jz 2f			/* yes => return pointer */ | 
| 122 | 	incq %rax		/* increment pointer */ | 
| 123 |  | 
| 124 | 	shrq $32, %rcx		/* look at other half.  */ | 
| 125 |  | 
| 126 | 	testb %cl, %cl		/* is first byte NUL? */ | 
| 127 | 	jz 2f			/* yes => return */ | 
| 128 | 	incq %rax		/* increment pointer */ | 
| 129 |  | 
| 130 | 	testb %ch, %ch		/* is second byte NUL? */ | 
| 131 | 	jz 2f			/* yes => return */ | 
| 132 | 	incq %rax		/* increment pointer */ | 
| 133 |  | 
| 134 | 	testl $0xff0000, %ecx	/* is third byte NUL? */ | 
| 135 | 	jz 2f			/* yes => return pointer */ | 
| 136 | 	incq %rax		/* increment pointer */ | 
| 137 |  | 
| 138 | 2: | 
| 139 | 	/* Second step: Copy source to destination.  */ | 
| 140 |  | 
| 141 | 	movq	%rsi, %rcx	/* duplicate  */ | 
| 142 | 	andl	$7,%ecx		/* mask alignment bits */ | 
| 143 | 	movq	%rax, %rdx	/* move around */ | 
| 144 | 	jz	22f		/* aligned => start loop */ | 
| 145 |  | 
| 146 | 	neg	%ecx		/* align to 8 bytes.  */ | 
| 147 | 	addl	$8, %ecx | 
| 148 | 	/* Align the source pointer.  */ | 
| 149 | 21: | 
| 150 | 	movb	(%rsi), %al	/* Fetch a byte */ | 
| 151 | 	testb	%al, %al	/* Is it NUL? */ | 
| 152 | 	movb	%al, (%rdx)	/* Store it */ | 
| 153 | 	jz	24f		/* If it was NUL, done! */ | 
| 154 | 	incq	%rsi | 
| 155 | 	incq	%rdx | 
| 156 | 	decl	%ecx | 
| 157 | 	jnz	21b | 
| 158 |  | 
| 159 | 	/* Now the sources is aligned.  Unfortunatly we cannot force | 
| 160 | 	   to have both source and destination aligned, so ignore the | 
| 161 | 	   alignment of the destination.  */ | 
| 162 | 	.p2align 4 | 
| 163 | 22: | 
| 164 | 	/* 1st unroll.  */ | 
| 165 | 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
| 166 | 	addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
| 167 | 	movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
| 168 | 	addq	%r8, %r9	/* add the magic value to the word.  We get | 
| 169 | 				   carry bits reported for each byte which | 
| 170 | 				   is *not* 0 */ | 
| 171 | 	jnc	23f		/* highest byte is NUL => return pointer */ | 
| 172 | 	xorq	%rax, %r9	/* (word+magic)^word */ | 
| 173 | 	orq	%r8, %r9	/* set all non-carry bits */ | 
| 174 | 	incq	%r9		/* add 1: if one carry bit was *not* set | 
| 175 | 				   the addition will not result in 0.  */ | 
| 176 |  | 
| 177 | 	jnz	23f		/* found NUL => return pointer */ | 
| 178 |  | 
| 179 | 	movq	%rax, (%rdx)	/* Write value to destination.  */ | 
| 180 | 	addq	$8, %rdx	/* Adjust pointer.  */ | 
| 181 |  | 
| 182 | 	/* 2nd unroll.  */ | 
| 183 | 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
| 184 | 	addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
| 185 | 	movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
| 186 | 	addq	%r8, %r9	/* add the magic value to the word.  We get | 
| 187 | 				   carry bits reported for each byte which | 
| 188 | 				   is *not* 0 */ | 
| 189 | 	jnc	23f		/* highest byte is NUL => return pointer */ | 
| 190 | 	xorq	%rax, %r9	/* (word+magic)^word */ | 
| 191 | 	orq	%r8, %r9	/* set all non-carry bits */ | 
| 192 | 	incq	%r9		/* add 1: if one carry bit was *not* set | 
| 193 | 				   the addition will not result in 0.  */ | 
| 194 |  | 
| 195 | 	jnz	23f		/* found NUL => return pointer */ | 
| 196 |  | 
| 197 | 	movq	%rax, (%rdx)	/* Write value to destination.  */ | 
| 198 | 	addq	$8, %rdx	/* Adjust pointer.  */ | 
| 199 |  | 
| 200 | 	/* 3rd unroll.  */ | 
| 201 | 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
| 202 | 	addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
| 203 | 	movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
| 204 | 	addq	%r8, %r9	/* add the magic value to the word.  We get | 
| 205 | 				   carry bits reported for each byte which | 
| 206 | 				   is *not* 0 */ | 
| 207 | 	jnc	23f		/* highest byte is NUL => return pointer */ | 
| 208 | 	xorq	%rax, %r9	/* (word+magic)^word */ | 
| 209 | 	orq	%r8, %r9	/* set all non-carry bits */ | 
| 210 | 	incq	%r9		/* add 1: if one carry bit was *not* set | 
| 211 | 				   the addition will not result in 0.  */ | 
| 212 |  | 
| 213 | 	jnz	23f		/* found NUL => return pointer */ | 
| 214 |  | 
| 215 | 	movq	%rax, (%rdx)	/* Write value to destination.  */ | 
| 216 | 	addq	$8, %rdx	/* Adjust pointer.  */ | 
| 217 |  | 
| 218 | 	/* 4th unroll.  */ | 
| 219 | 	movq	(%rsi), %rax	/* Read double word (8 bytes).  */ | 
| 220 | 	addq	$8, %rsi	/* Adjust pointer for next word.  */ | 
| 221 | 	movq	%rax, %r9	/* Save a copy for NUL finding.  */ | 
| 222 | 	addq	%r8, %r9	/* add the magic value to the word.  We get | 
| 223 | 				   carry bits reported for each byte which | 
| 224 | 				   is *not* 0 */ | 
| 225 | 	jnc	23f		/* highest byte is NUL => return pointer */ | 
| 226 | 	xorq	%rax, %r9	/* (word+magic)^word */ | 
| 227 | 	orq	%r8, %r9	/* set all non-carry bits */ | 
| 228 | 	incq	%r9		/* add 1: if one carry bit was *not* set | 
| 229 | 				   the addition will not result in 0.  */ | 
| 230 |  | 
| 231 | 	jnz	23f		/* found NUL => return pointer */ | 
| 232 |  | 
| 233 | 	movq	%rax, (%rdx)	/* Write value to destination.  */ | 
| 234 | 	addq	$8, %rdx	/* Adjust pointer.  */ | 
| 235 | 	jmp	22b		/* Next iteration.  */ | 
| 236 |  | 
| 237 | 	/* Do the last few bytes. %rax contains the value to write. | 
| 238 | 	   The loop is unrolled twice.  */ | 
| 239 | 	.p2align 4 | 
| 240 | 23: | 
| 241 | 	movb	%al, (%rdx)	/* 1st byte.  */ | 
| 242 | 	testb	%al, %al	/* Is it NUL.  */ | 
| 243 | 	jz	24f		/* yes, finish.  */ | 
| 244 | 	incq	%rdx		/* Increment destination.  */ | 
| 245 | 	movb	%ah, (%rdx)	/* 2nd byte.  */ | 
| 246 | 	testb	%ah, %ah	/* Is it NUL?.  */ | 
| 247 | 	jz	24f		/* yes, finish.  */ | 
| 248 | 	incq	%rdx		/* Increment destination.  */ | 
| 249 | 	shrq	$16, %rax	/* Shift...  */ | 
| 250 | 	jmp	23b		/* and look at next two bytes in %rax.  */ | 
| 251 |  | 
| 252 |  | 
| 253 | 24: | 
| 254 | 	movq	%rdi, %rax	/* Source is return value.  */ | 
| 255 | 	retq | 
| 256 | END (strcat) | 
| 257 | libc_hidden_builtin_def (strcat) | 
| 258 |  |