1 | /* strcpy/stpcpy implementation for x86-64. |
2 | Copyright (C) 2002-2021 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Andreas Jaeger <aj@suse.de>, 2002. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <sysdep.h> |
21 | #include "asm-syntax.h" |
22 | |
23 | #ifndef USE_AS_STPCPY |
24 | # define STRCPY strcpy |
25 | #endif |
26 | |
27 | .text |
28 | ENTRY (STRCPY) |
29 | movq %rsi, %rcx /* Source register. */ |
30 | andl $7, %ecx /* mask alignment bits */ |
31 | movq %rdi, %rdx /* Duplicate destination pointer. */ |
32 | |
33 | jz 5f /* aligned => start loop */ |
34 | |
35 | neg %ecx /* We need to align to 8 bytes. */ |
36 | addl $8,%ecx |
37 | /* Search the first bytes directly. */ |
38 | 0: |
39 | movb (%rsi), %al /* Fetch a byte */ |
40 | testb %al, %al /* Is it NUL? */ |
41 | movb %al, (%rdx) /* Store it */ |
42 | jz 4f /* If it was NUL, done! */ |
43 | incq %rsi |
44 | incq %rdx |
45 | decl %ecx |
46 | jnz 0b |
47 | |
48 | 5: |
49 | movq $0xfefefefefefefeff,%r8 |
50 | |
51 | /* Now the sources is aligned. Unfortunatly we cannot force |
52 | to have both source and destination aligned, so ignore the |
53 | alignment of the destination. */ |
54 | .p2align 4 |
55 | 1: |
56 | /* 1st unroll. */ |
57 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
58 | addq $8, %rsi /* Adjust pointer for next word. */ |
59 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
60 | addq %r8, %r9 /* add the magic value to the word. We get |
61 | carry bits reported for each byte which |
62 | is *not* 0 */ |
63 | jnc 3f /* highest byte is NUL => return pointer */ |
64 | xorq %rax, %r9 /* (word+magic)^word */ |
65 | orq %r8, %r9 /* set all non-carry bits */ |
66 | incq %r9 /* add 1: if one carry bit was *not* set |
67 | the addition will not result in 0. */ |
68 | |
69 | jnz 3f /* found NUL => return pointer */ |
70 | |
71 | movq %rax, (%rdx) /* Write value to destination. */ |
72 | addq $8, %rdx /* Adjust pointer. */ |
73 | |
74 | /* 2nd unroll. */ |
75 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
76 | addq $8, %rsi /* Adjust pointer for next word. */ |
77 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
78 | addq %r8, %r9 /* add the magic value to the word. We get |
79 | carry bits reported for each byte which |
80 | is *not* 0 */ |
81 | jnc 3f /* highest byte is NUL => return pointer */ |
82 | xorq %rax, %r9 /* (word+magic)^word */ |
83 | orq %r8, %r9 /* set all non-carry bits */ |
84 | incq %r9 /* add 1: if one carry bit was *not* set |
85 | the addition will not result in 0. */ |
86 | |
87 | jnz 3f /* found NUL => return pointer */ |
88 | |
89 | movq %rax, (%rdx) /* Write value to destination. */ |
90 | addq $8, %rdx /* Adjust pointer. */ |
91 | |
92 | /* 3rd unroll. */ |
93 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
94 | addq $8, %rsi /* Adjust pointer for next word. */ |
95 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
96 | addq %r8, %r9 /* add the magic value to the word. We get |
97 | carry bits reported for each byte which |
98 | is *not* 0 */ |
99 | jnc 3f /* highest byte is NUL => return pointer */ |
100 | xorq %rax, %r9 /* (word+magic)^word */ |
101 | orq %r8, %r9 /* set all non-carry bits */ |
102 | incq %r9 /* add 1: if one carry bit was *not* set |
103 | the addition will not result in 0. */ |
104 | |
105 | jnz 3f /* found NUL => return pointer */ |
106 | |
107 | movq %rax, (%rdx) /* Write value to destination. */ |
108 | addq $8, %rdx /* Adjust pointer. */ |
109 | |
110 | /* 4th unroll. */ |
111 | movq (%rsi), %rax /* Read double word (8 bytes). */ |
112 | addq $8, %rsi /* Adjust pointer for next word. */ |
113 | movq %rax, %r9 /* Save a copy for NUL finding. */ |
114 | addq %r8, %r9 /* add the magic value to the word. We get |
115 | carry bits reported for each byte which |
116 | is *not* 0 */ |
117 | jnc 3f /* highest byte is NUL => return pointer */ |
118 | xorq %rax, %r9 /* (word+magic)^word */ |
119 | orq %r8, %r9 /* set all non-carry bits */ |
120 | incq %r9 /* add 1: if one carry bit was *not* set |
121 | the addition will not result in 0. */ |
122 | |
123 | jnz 3f /* found NUL => return pointer */ |
124 | |
125 | movq %rax, (%rdx) /* Write value to destination. */ |
126 | addq $8, %rdx /* Adjust pointer. */ |
127 | jmp 1b /* Next iteration. */ |
128 | |
129 | /* Do the last few bytes. %rax contains the value to write. |
130 | The loop is unrolled twice. */ |
131 | .p2align 4 |
132 | 3: |
133 | /* Note that stpcpy needs to return with the value of the NUL |
134 | byte. */ |
135 | movb %al, (%rdx) /* 1st byte. */ |
136 | testb %al, %al /* Is it NUL. */ |
137 | jz 4f /* yes, finish. */ |
138 | incq %rdx /* Increment destination. */ |
139 | movb %ah, (%rdx) /* 2nd byte. */ |
140 | testb %ah, %ah /* Is it NUL?. */ |
141 | jz 4f /* yes, finish. */ |
142 | incq %rdx /* Increment destination. */ |
143 | shrq $16, %rax /* Shift... */ |
144 | jmp 3b /* and look at next two bytes in %rax. */ |
145 | |
146 | 4: |
147 | #ifdef USE_AS_STPCPY |
148 | movq %rdx, %rax /* Destination is return value. */ |
149 | #else |
150 | movq %rdi, %rax /* Source is return value. */ |
151 | #endif |
152 | retq |
153 | END (STRCPY) |
154 | #ifndef USE_AS_STPCPY |
155 | libc_hidden_builtin_def (strcpy) |
156 | #endif |
157 | |