1 | /* strcat with SSSE3 |
2 | Copyright (C) 2011-2021 Free Software Foundation, Inc. |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #if IS_IN (libc) |
21 | |
22 | # include <sysdep.h> |
23 | |
24 | # ifndef STRCAT |
25 | # define STRCAT __strcat_ssse3 |
26 | # endif |
27 | |
28 | # define USE_AS_STRCAT |
29 | |
30 | .text |
31 | ENTRY (STRCAT) |
32 | # ifdef USE_AS_STRNCAT |
33 | mov %rdx, %r8 |
34 | # endif |
35 | |
36 | |
37 | /* Inline corresponding strlen file, temporary until new strcpy |
38 | implementation gets merged. */ |
39 | |
40 | xor %eax, %eax |
41 | cmpb $0, (%rdi) |
42 | jz L(exit_tail0) |
43 | cmpb $0, 1(%rdi) |
44 | jz L(exit_tail1) |
45 | cmpb $0, 2(%rdi) |
46 | jz L(exit_tail2) |
47 | cmpb $0, 3(%rdi) |
48 | jz L(exit_tail3) |
49 | |
50 | cmpb $0, 4(%rdi) |
51 | jz L(exit_tail4) |
52 | cmpb $0, 5(%rdi) |
53 | jz L(exit_tail5) |
54 | cmpb $0, 6(%rdi) |
55 | jz L(exit_tail6) |
56 | cmpb $0, 7(%rdi) |
57 | jz L(exit_tail7) |
58 | |
59 | cmpb $0, 8(%rdi) |
60 | jz L(exit_tail8) |
61 | cmpb $0, 9(%rdi) |
62 | jz L(exit_tail9) |
63 | cmpb $0, 10(%rdi) |
64 | jz L(exit_tail10) |
65 | cmpb $0, 11(%rdi) |
66 | jz L(exit_tail11) |
67 | |
68 | cmpb $0, 12(%rdi) |
69 | jz L(exit_tail12) |
70 | cmpb $0, 13(%rdi) |
71 | jz L(exit_tail13) |
72 | cmpb $0, 14(%rdi) |
73 | jz L(exit_tail14) |
74 | cmpb $0, 15(%rdi) |
75 | jz L(exit_tail15) |
76 | pxor %xmm0, %xmm0 |
77 | lea 16(%rdi), %rcx |
78 | lea 16(%rdi), %rax |
79 | and $-16, %rax |
80 | |
81 | pcmpeqb (%rax), %xmm0 |
82 | pmovmskb %xmm0, %edx |
83 | pxor %xmm1, %xmm1 |
84 | test %edx, %edx |
85 | lea 16(%rax), %rax |
86 | jnz L(exit) |
87 | |
88 | pcmpeqb (%rax), %xmm1 |
89 | pmovmskb %xmm1, %edx |
90 | pxor %xmm2, %xmm2 |
91 | test %edx, %edx |
92 | lea 16(%rax), %rax |
93 | jnz L(exit) |
94 | |
95 | pcmpeqb (%rax), %xmm2 |
96 | pmovmskb %xmm2, %edx |
97 | pxor %xmm3, %xmm3 |
98 | test %edx, %edx |
99 | lea 16(%rax), %rax |
100 | jnz L(exit) |
101 | |
102 | pcmpeqb (%rax), %xmm3 |
103 | pmovmskb %xmm3, %edx |
104 | test %edx, %edx |
105 | lea 16(%rax), %rax |
106 | jnz L(exit) |
107 | |
108 | pcmpeqb (%rax), %xmm0 |
109 | pmovmskb %xmm0, %edx |
110 | test %edx, %edx |
111 | lea 16(%rax), %rax |
112 | jnz L(exit) |
113 | |
114 | pcmpeqb (%rax), %xmm1 |
115 | pmovmskb %xmm1, %edx |
116 | test %edx, %edx |
117 | lea 16(%rax), %rax |
118 | jnz L(exit) |
119 | |
120 | pcmpeqb (%rax), %xmm2 |
121 | pmovmskb %xmm2, %edx |
122 | test %edx, %edx |
123 | lea 16(%rax), %rax |
124 | jnz L(exit) |
125 | |
126 | pcmpeqb (%rax), %xmm3 |
127 | pmovmskb %xmm3, %edx |
128 | test %edx, %edx |
129 | lea 16(%rax), %rax |
130 | jnz L(exit) |
131 | |
132 | pcmpeqb (%rax), %xmm0 |
133 | pmovmskb %xmm0, %edx |
134 | test %edx, %edx |
135 | lea 16(%rax), %rax |
136 | jnz L(exit) |
137 | |
138 | pcmpeqb (%rax), %xmm1 |
139 | pmovmskb %xmm1, %edx |
140 | test %edx, %edx |
141 | lea 16(%rax), %rax |
142 | jnz L(exit) |
143 | |
144 | pcmpeqb (%rax), %xmm2 |
145 | pmovmskb %xmm2, %edx |
146 | test %edx, %edx |
147 | lea 16(%rax), %rax |
148 | jnz L(exit) |
149 | |
150 | pcmpeqb (%rax), %xmm3 |
151 | pmovmskb %xmm3, %edx |
152 | test %edx, %edx |
153 | lea 16(%rax), %rax |
154 | jnz L(exit) |
155 | |
156 | pcmpeqb (%rax), %xmm0 |
157 | pmovmskb %xmm0, %edx |
158 | test %edx, %edx |
159 | lea 16(%rax), %rax |
160 | jnz L(exit) |
161 | |
162 | pcmpeqb (%rax), %xmm1 |
163 | pmovmskb %xmm1, %edx |
164 | test %edx, %edx |
165 | lea 16(%rax), %rax |
166 | jnz L(exit) |
167 | |
168 | pcmpeqb (%rax), %xmm2 |
169 | pmovmskb %xmm2, %edx |
170 | test %edx, %edx |
171 | lea 16(%rax), %rax |
172 | jnz L(exit) |
173 | |
174 | pcmpeqb (%rax), %xmm3 |
175 | pmovmskb %xmm3, %edx |
176 | test %edx, %edx |
177 | lea 16(%rax), %rax |
178 | jnz L(exit) |
179 | |
180 | and $-0x40, %rax |
181 | |
182 | .p2align 4 |
183 | L(aligned_64): |
184 | pcmpeqb (%rax), %xmm0 |
185 | pcmpeqb 16(%rax), %xmm1 |
186 | pcmpeqb 32(%rax), %xmm2 |
187 | pcmpeqb 48(%rax), %xmm3 |
188 | pmovmskb %xmm0, %edx |
189 | pmovmskb %xmm1, %r11d |
190 | pmovmskb %xmm2, %r10d |
191 | pmovmskb %xmm3, %r9d |
192 | or %edx, %r9d |
193 | or %r11d, %r9d |
194 | or %r10d, %r9d |
195 | lea 64(%rax), %rax |
196 | jz L(aligned_64) |
197 | |
198 | test %edx, %edx |
199 | jnz L(aligned_64_exit_16) |
200 | test %r11d, %r11d |
201 | jnz L(aligned_64_exit_32) |
202 | test %r10d, %r10d |
203 | jnz L(aligned_64_exit_48) |
204 | |
205 | L(aligned_64_exit_64): |
206 | pmovmskb %xmm3, %edx |
207 | jmp L(exit) |
208 | |
209 | L(aligned_64_exit_48): |
210 | lea -16(%rax), %rax |
211 | mov %r10d, %edx |
212 | jmp L(exit) |
213 | |
214 | L(aligned_64_exit_32): |
215 | lea -32(%rax), %rax |
216 | mov %r11d, %edx |
217 | jmp L(exit) |
218 | |
219 | L(aligned_64_exit_16): |
220 | lea -48(%rax), %rax |
221 | |
222 | L(exit): |
223 | sub %rcx, %rax |
224 | test %dl, %dl |
225 | jz L(exit_high) |
226 | test $0x01, %dl |
227 | jnz L(exit_tail0) |
228 | |
229 | test $0x02, %dl |
230 | jnz L(exit_tail1) |
231 | |
232 | test $0x04, %dl |
233 | jnz L(exit_tail2) |
234 | |
235 | test $0x08, %dl |
236 | jnz L(exit_tail3) |
237 | |
238 | test $0x10, %dl |
239 | jnz L(exit_tail4) |
240 | |
241 | test $0x20, %dl |
242 | jnz L(exit_tail5) |
243 | |
244 | test $0x40, %dl |
245 | jnz L(exit_tail6) |
246 | add $7, %eax |
247 | L(exit_tail0): |
248 | jmp L(StartStrcpyPart) |
249 | |
250 | .p2align 4 |
251 | L(exit_high): |
252 | add $8, %eax |
253 | test $0x01, %dh |
254 | jnz L(exit_tail0) |
255 | |
256 | test $0x02, %dh |
257 | jnz L(exit_tail1) |
258 | |
259 | test $0x04, %dh |
260 | jnz L(exit_tail2) |
261 | |
262 | test $0x08, %dh |
263 | jnz L(exit_tail3) |
264 | |
265 | test $0x10, %dh |
266 | jnz L(exit_tail4) |
267 | |
268 | test $0x20, %dh |
269 | jnz L(exit_tail5) |
270 | |
271 | test $0x40, %dh |
272 | jnz L(exit_tail6) |
273 | add $7, %eax |
274 | jmp L(StartStrcpyPart) |
275 | |
276 | .p2align 4 |
277 | L(exit_tail1): |
278 | add $1, %eax |
279 | jmp L(StartStrcpyPart) |
280 | |
281 | .p2align 4 |
282 | L(exit_tail2): |
283 | add $2, %eax |
284 | jmp L(StartStrcpyPart) |
285 | |
286 | .p2align 4 |
287 | L(exit_tail3): |
288 | add $3, %eax |
289 | jmp L(StartStrcpyPart) |
290 | |
291 | .p2align 4 |
292 | L(exit_tail4): |
293 | add $4, %eax |
294 | jmp L(StartStrcpyPart) |
295 | |
296 | .p2align 4 |
297 | L(exit_tail5): |
298 | add $5, %eax |
299 | jmp L(StartStrcpyPart) |
300 | |
301 | .p2align 4 |
302 | L(exit_tail6): |
303 | add $6, %eax |
304 | jmp L(StartStrcpyPart) |
305 | |
306 | .p2align 4 |
307 | L(exit_tail7): |
308 | add $7, %eax |
309 | jmp L(StartStrcpyPart) |
310 | |
311 | .p2align 4 |
312 | L(exit_tail8): |
313 | add $8, %eax |
314 | jmp L(StartStrcpyPart) |
315 | |
316 | .p2align 4 |
317 | L(exit_tail9): |
318 | add $9, %eax |
319 | jmp L(StartStrcpyPart) |
320 | |
321 | .p2align 4 |
322 | L(exit_tail10): |
323 | add $10, %eax |
324 | jmp L(StartStrcpyPart) |
325 | |
326 | .p2align 4 |
327 | L(exit_tail11): |
328 | add $11, %eax |
329 | jmp L(StartStrcpyPart) |
330 | |
331 | .p2align 4 |
332 | L(exit_tail12): |
333 | add $12, %eax |
334 | jmp L(StartStrcpyPart) |
335 | |
336 | .p2align 4 |
337 | L(exit_tail13): |
338 | add $13, %eax |
339 | jmp L(StartStrcpyPart) |
340 | |
341 | .p2align 4 |
342 | L(exit_tail14): |
343 | add $14, %eax |
344 | jmp L(StartStrcpyPart) |
345 | |
346 | .p2align 4 |
347 | L(exit_tail15): |
348 | add $15, %eax |
349 | |
350 | .p2align 4 |
351 | L(StartStrcpyPart): |
352 | mov %rsi, %rcx |
353 | lea (%rdi, %rax), %rdx |
354 | # ifdef USE_AS_STRNCAT |
355 | test %r8, %r8 |
356 | jz L(StrncatExit0) |
357 | cmp $8, %r8 |
358 | jbe L(StrncatExit8Bytes) |
359 | # endif |
360 | cmpb $0, (%rcx) |
361 | jz L(Exit1) |
362 | cmpb $0, 1(%rcx) |
363 | jz L(Exit2) |
364 | cmpb $0, 2(%rcx) |
365 | jz L(Exit3) |
366 | cmpb $0, 3(%rcx) |
367 | jz L(Exit4) |
368 | cmpb $0, 4(%rcx) |
369 | jz L(Exit5) |
370 | cmpb $0, 5(%rcx) |
371 | jz L(Exit6) |
372 | cmpb $0, 6(%rcx) |
373 | jz L(Exit7) |
374 | cmpb $0, 7(%rcx) |
375 | jz L(Exit8) |
376 | cmpb $0, 8(%rcx) |
377 | jz L(Exit9) |
378 | # ifdef USE_AS_STRNCAT |
379 | cmp $16, %r8 |
380 | jb L(StrncatExit15Bytes) |
381 | # endif |
382 | cmpb $0, 9(%rcx) |
383 | jz L(Exit10) |
384 | cmpb $0, 10(%rcx) |
385 | jz L(Exit11) |
386 | cmpb $0, 11(%rcx) |
387 | jz L(Exit12) |
388 | cmpb $0, 12(%rcx) |
389 | jz L(Exit13) |
390 | cmpb $0, 13(%rcx) |
391 | jz L(Exit14) |
392 | cmpb $0, 14(%rcx) |
393 | jz L(Exit15) |
394 | cmpb $0, 15(%rcx) |
395 | jz L(Exit16) |
396 | # ifdef USE_AS_STRNCAT |
397 | cmp $16, %r8 |
398 | je L(StrncatExit16) |
399 | # define USE_AS_STRNCPY |
400 | # endif |
401 | |
402 | # include "strcpy-ssse3.S" |
403 | |
404 | .p2align 4 |
405 | L(CopyFrom1To16Bytes): |
406 | add %rsi, %rdx |
407 | add %rsi, %rcx |
408 | |
409 | test %al, %al |
410 | jz L(ExitHigh) |
411 | test $0x01, %al |
412 | jnz L(Exit1) |
413 | test $0x02, %al |
414 | jnz L(Exit2) |
415 | test $0x04, %al |
416 | jnz L(Exit3) |
417 | test $0x08, %al |
418 | jnz L(Exit4) |
419 | test $0x10, %al |
420 | jnz L(Exit5) |
421 | test $0x20, %al |
422 | jnz L(Exit6) |
423 | test $0x40, %al |
424 | jnz L(Exit7) |
425 | movlpd (%rcx), %xmm0 |
426 | movlpd %xmm0, (%rdx) |
427 | mov %rdi, %rax |
428 | ret |
429 | |
430 | .p2align 4 |
431 | L(ExitHigh): |
432 | test $0x01, %ah |
433 | jnz L(Exit9) |
434 | test $0x02, %ah |
435 | jnz L(Exit10) |
436 | test $0x04, %ah |
437 | jnz L(Exit11) |
438 | test $0x08, %ah |
439 | jnz L(Exit12) |
440 | test $0x10, %ah |
441 | jnz L(Exit13) |
442 | test $0x20, %ah |
443 | jnz L(Exit14) |
444 | test $0x40, %ah |
445 | jnz L(Exit15) |
446 | movlpd (%rcx), %xmm0 |
447 | movlpd 8(%rcx), %xmm1 |
448 | movlpd %xmm0, (%rdx) |
449 | movlpd %xmm1, 8(%rdx) |
450 | mov %rdi, %rax |
451 | ret |
452 | |
453 | .p2align 4 |
454 | L(StrncatExit1): |
455 | xor %ah, %ah |
456 | movb %ah, 1(%rdx) |
457 | L(Exit1): |
458 | movb (%rcx), %al |
459 | movb %al, (%rdx) |
460 | mov %rdi, %rax |
461 | ret |
462 | |
463 | .p2align 4 |
464 | L(StrncatExit2): |
465 | xor %ah, %ah |
466 | movb %ah, 2(%rdx) |
467 | L(Exit2): |
468 | movw (%rcx), %ax |
469 | movw %ax, (%rdx) |
470 | mov %rdi, %rax |
471 | ret |
472 | |
473 | .p2align 4 |
474 | L(StrncatExit3): |
475 | xor %ah, %ah |
476 | movb %ah, 3(%rdx) |
477 | L(Exit3): |
478 | movw (%rcx), %ax |
479 | movw %ax, (%rdx) |
480 | movb 2(%rcx), %al |
481 | movb %al, 2(%rdx) |
482 | mov %rdi, %rax |
483 | ret |
484 | |
485 | .p2align 4 |
486 | L(StrncatExit4): |
487 | xor %ah, %ah |
488 | movb %ah, 4(%rdx) |
489 | L(Exit4): |
490 | mov (%rcx), %eax |
491 | mov %eax, (%rdx) |
492 | mov %rdi, %rax |
493 | ret |
494 | |
495 | .p2align 4 |
496 | L(StrncatExit5): |
497 | xor %ah, %ah |
498 | movb %ah, 5(%rdx) |
499 | L(Exit5): |
500 | mov (%rcx), %eax |
501 | mov %eax, (%rdx) |
502 | movb 4(%rcx), %al |
503 | movb %al, 4(%rdx) |
504 | mov %rdi, %rax |
505 | ret |
506 | |
507 | .p2align 4 |
508 | L(StrncatExit6): |
509 | xor %ah, %ah |
510 | movb %ah, 6(%rdx) |
511 | L(Exit6): |
512 | mov (%rcx), %eax |
513 | mov %eax, (%rdx) |
514 | movw 4(%rcx), %ax |
515 | movw %ax, 4(%rdx) |
516 | mov %rdi, %rax |
517 | ret |
518 | |
519 | .p2align 4 |
520 | L(StrncatExit7): |
521 | xor %ah, %ah |
522 | movb %ah, 7(%rdx) |
523 | L(Exit7): |
524 | mov (%rcx), %eax |
525 | mov %eax, (%rdx) |
526 | mov 3(%rcx), %eax |
527 | mov %eax, 3(%rdx) |
528 | mov %rdi, %rax |
529 | ret |
530 | |
531 | .p2align 4 |
532 | L(StrncatExit8): |
533 | xor %ah, %ah |
534 | movb %ah, 8(%rdx) |
535 | L(Exit8): |
536 | movlpd (%rcx), %xmm0 |
537 | movlpd %xmm0, (%rdx) |
538 | mov %rdi, %rax |
539 | ret |
540 | |
541 | .p2align 4 |
542 | L(StrncatExit9): |
543 | xor %ah, %ah |
544 | movb %ah, 9(%rdx) |
545 | L(Exit9): |
546 | movlpd (%rcx), %xmm0 |
547 | movlpd %xmm0, (%rdx) |
548 | movb 8(%rcx), %al |
549 | movb %al, 8(%rdx) |
550 | mov %rdi, %rax |
551 | ret |
552 | |
553 | .p2align 4 |
554 | L(StrncatExit10): |
555 | xor %ah, %ah |
556 | movb %ah, 10(%rdx) |
557 | L(Exit10): |
558 | movlpd (%rcx), %xmm0 |
559 | movlpd %xmm0, (%rdx) |
560 | movw 8(%rcx), %ax |
561 | movw %ax, 8(%rdx) |
562 | mov %rdi, %rax |
563 | ret |
564 | |
565 | .p2align 4 |
566 | L(StrncatExit11): |
567 | xor %ah, %ah |
568 | movb %ah, 11(%rdx) |
569 | L(Exit11): |
570 | movlpd (%rcx), %xmm0 |
571 | movlpd %xmm0, (%rdx) |
572 | mov 7(%rcx), %eax |
573 | mov %eax, 7(%rdx) |
574 | mov %rdi, %rax |
575 | ret |
576 | |
577 | .p2align 4 |
578 | L(StrncatExit12): |
579 | xor %ah, %ah |
580 | movb %ah, 12(%rdx) |
581 | L(Exit12): |
582 | movlpd (%rcx), %xmm0 |
583 | movlpd %xmm0, (%rdx) |
584 | mov 8(%rcx), %eax |
585 | mov %eax, 8(%rdx) |
586 | mov %rdi, %rax |
587 | ret |
588 | |
589 | .p2align 4 |
590 | L(StrncatExit13): |
591 | xor %ah, %ah |
592 | movb %ah, 13(%rdx) |
593 | L(Exit13): |
594 | movlpd (%rcx), %xmm0 |
595 | movlpd %xmm0, (%rdx) |
596 | movlpd 5(%rcx), %xmm1 |
597 | movlpd %xmm1, 5(%rdx) |
598 | mov %rdi, %rax |
599 | ret |
600 | |
601 | .p2align 4 |
602 | L(StrncatExit14): |
603 | xor %ah, %ah |
604 | movb %ah, 14(%rdx) |
605 | L(Exit14): |
606 | movlpd (%rcx), %xmm0 |
607 | movlpd %xmm0, (%rdx) |
608 | movlpd 6(%rcx), %xmm1 |
609 | movlpd %xmm1, 6(%rdx) |
610 | mov %rdi, %rax |
611 | ret |
612 | |
613 | .p2align 4 |
614 | L(StrncatExit15): |
615 | xor %ah, %ah |
616 | movb %ah, 15(%rdx) |
617 | L(Exit15): |
618 | movlpd (%rcx), %xmm0 |
619 | movlpd %xmm0, (%rdx) |
620 | movlpd 7(%rcx), %xmm1 |
621 | movlpd %xmm1, 7(%rdx) |
622 | mov %rdi, %rax |
623 | ret |
624 | |
625 | .p2align 4 |
626 | L(StrncatExit16): |
627 | xor %ah, %ah |
628 | movb %ah, 16(%rdx) |
629 | L(Exit16): |
630 | movlpd (%rcx), %xmm0 |
631 | movlpd 8(%rcx), %xmm1 |
632 | movlpd %xmm0, (%rdx) |
633 | movlpd %xmm1, 8(%rdx) |
634 | mov %rdi, %rax |
635 | ret |
636 | |
637 | # ifdef USE_AS_STRNCPY |
638 | |
639 | .p2align 4 |
640 | L(CopyFrom1To16BytesCase2): |
641 | add $16, %r8 |
642 | add %rsi, %rcx |
643 | lea (%rsi, %rdx), %rsi |
644 | lea -9(%r8), %rdx |
645 | and $1<<7, %dh |
646 | or %al, %dh |
647 | test %dh, %dh |
648 | lea (%rsi), %rdx |
649 | jz L(ExitHighCase2) |
650 | |
651 | test $0x01, %al |
652 | jnz L(Exit1) |
653 | cmp $1, %r8 |
654 | je L(StrncatExit1) |
655 | test $0x02, %al |
656 | jnz L(Exit2) |
657 | cmp $2, %r8 |
658 | je L(StrncatExit2) |
659 | test $0x04, %al |
660 | jnz L(Exit3) |
661 | cmp $3, %r8 |
662 | je L(StrncatExit3) |
663 | test $0x08, %al |
664 | jnz L(Exit4) |
665 | cmp $4, %r8 |
666 | je L(StrncatExit4) |
667 | test $0x10, %al |
668 | jnz L(Exit5) |
669 | cmp $5, %r8 |
670 | je L(StrncatExit5) |
671 | test $0x20, %al |
672 | jnz L(Exit6) |
673 | cmp $6, %r8 |
674 | je L(StrncatExit6) |
675 | test $0x40, %al |
676 | jnz L(Exit7) |
677 | cmp $7, %r8 |
678 | je L(StrncatExit7) |
679 | movlpd (%rcx), %xmm0 |
680 | movlpd %xmm0, (%rdx) |
681 | lea 7(%rdx), %rax |
682 | cmpb $1, (%rax) |
683 | sbb $-1, %rax |
684 | xor %cl, %cl |
685 | movb %cl, (%rax) |
686 | mov %rdi, %rax |
687 | ret |
688 | |
689 | .p2align 4 |
690 | L(ExitHighCase2): |
691 | test $0x01, %ah |
692 | jnz L(Exit9) |
693 | cmp $9, %r8 |
694 | je L(StrncatExit9) |
695 | test $0x02, %ah |
696 | jnz L(Exit10) |
697 | cmp $10, %r8 |
698 | je L(StrncatExit10) |
699 | test $0x04, %ah |
700 | jnz L(Exit11) |
701 | cmp $11, %r8 |
702 | je L(StrncatExit11) |
703 | test $0x8, %ah |
704 | jnz L(Exit12) |
705 | cmp $12, %r8 |
706 | je L(StrncatExit12) |
707 | test $0x10, %ah |
708 | jnz L(Exit13) |
709 | cmp $13, %r8 |
710 | je L(StrncatExit13) |
711 | test $0x20, %ah |
712 | jnz L(Exit14) |
713 | cmp $14, %r8 |
714 | je L(StrncatExit14) |
715 | test $0x40, %ah |
716 | jnz L(Exit15) |
717 | cmp $15, %r8 |
718 | je L(StrncatExit15) |
719 | movlpd (%rcx), %xmm0 |
720 | movlpd %xmm0, (%rdx) |
721 | movlpd 8(%rcx), %xmm1 |
722 | movlpd %xmm1, 8(%rdx) |
723 | mov %rdi, %rax |
724 | ret |
725 | |
726 | L(CopyFrom1To16BytesCase2OrCase3): |
727 | test %rax, %rax |
728 | jnz L(CopyFrom1To16BytesCase2) |
729 | |
730 | .p2align 4 |
731 | L(CopyFrom1To16BytesCase3): |
732 | add $16, %r8 |
733 | add %rsi, %rdx |
734 | add %rsi, %rcx |
735 | |
736 | cmp $8, %r8 |
737 | ja L(ExitHighCase3) |
738 | cmp $1, %r8 |
739 | je L(StrncatExit1) |
740 | cmp $2, %r8 |
741 | je L(StrncatExit2) |
742 | cmp $3, %r8 |
743 | je L(StrncatExit3) |
744 | cmp $4, %r8 |
745 | je L(StrncatExit4) |
746 | cmp $5, %r8 |
747 | je L(StrncatExit5) |
748 | cmp $6, %r8 |
749 | je L(StrncatExit6) |
750 | cmp $7, %r8 |
751 | je L(StrncatExit7) |
752 | movlpd (%rcx), %xmm0 |
753 | movlpd %xmm0, (%rdx) |
754 | xor %ah, %ah |
755 | movb %ah, 8(%rdx) |
756 | mov %rdi, %rax |
757 | ret |
758 | |
759 | .p2align 4 |
760 | L(ExitHighCase3): |
761 | cmp $9, %r8 |
762 | je L(StrncatExit9) |
763 | cmp $10, %r8 |
764 | je L(StrncatExit10) |
765 | cmp $11, %r8 |
766 | je L(StrncatExit11) |
767 | cmp $12, %r8 |
768 | je L(StrncatExit12) |
769 | cmp $13, %r8 |
770 | je L(StrncatExit13) |
771 | cmp $14, %r8 |
772 | je L(StrncatExit14) |
773 | cmp $15, %r8 |
774 | je L(StrncatExit15) |
775 | movlpd (%rcx), %xmm0 |
776 | movlpd %xmm0, (%rdx) |
777 | movlpd 8(%rcx), %xmm1 |
778 | movlpd %xmm1, 8(%rdx) |
779 | xor %ah, %ah |
780 | movb %ah, 16(%rdx) |
781 | mov %rdi, %rax |
782 | ret |
783 | |
784 | .p2align 4 |
785 | L(StrncatExit0): |
786 | mov %rdi, %rax |
787 | ret |
788 | |
789 | .p2align 4 |
790 | L(StrncatExit15Bytes): |
791 | cmp $9, %r8 |
792 | je L(StrncatExit9) |
793 | cmpb $0, 9(%rcx) |
794 | jz L(Exit10) |
795 | cmp $10, %r8 |
796 | je L(StrncatExit10) |
797 | cmpb $0, 10(%rcx) |
798 | jz L(Exit11) |
799 | cmp $11, %r8 |
800 | je L(StrncatExit11) |
801 | cmpb $0, 11(%rcx) |
802 | jz L(Exit12) |
803 | cmp $12, %r8 |
804 | je L(StrncatExit12) |
805 | cmpb $0, 12(%rcx) |
806 | jz L(Exit13) |
807 | cmp $13, %r8 |
808 | je L(StrncatExit13) |
809 | cmpb $0, 13(%rcx) |
810 | jz L(Exit14) |
811 | cmp $14, %r8 |
812 | je L(StrncatExit14) |
813 | movlpd (%rcx), %xmm0 |
814 | movlpd %xmm0, (%rdx) |
815 | movlpd 7(%rcx), %xmm1 |
816 | movlpd %xmm1, 7(%rdx) |
817 | lea 14(%rdx), %rax |
818 | cmpb $1, (%rax) |
819 | sbb $-1, %rax |
820 | xor %cl, %cl |
821 | movb %cl, (%rax) |
822 | mov %rdi, %rax |
823 | ret |
824 | |
825 | .p2align 4 |
826 | L(StrncatExit8Bytes): |
827 | cmpb $0, (%rcx) |
828 | jz L(Exit1) |
829 | cmp $1, %r8 |
830 | je L(StrncatExit1) |
831 | cmpb $0, 1(%rcx) |
832 | jz L(Exit2) |
833 | cmp $2, %r8 |
834 | je L(StrncatExit2) |
835 | cmpb $0, 2(%rcx) |
836 | jz L(Exit3) |
837 | cmp $3, %r8 |
838 | je L(StrncatExit3) |
839 | cmpb $0, 3(%rcx) |
840 | jz L(Exit4) |
841 | cmp $4, %r8 |
842 | je L(StrncatExit4) |
843 | cmpb $0, 4(%rcx) |
844 | jz L(Exit5) |
845 | cmp $5, %r8 |
846 | je L(StrncatExit5) |
847 | cmpb $0, 5(%rcx) |
848 | jz L(Exit6) |
849 | cmp $6, %r8 |
850 | je L(StrncatExit6) |
851 | cmpb $0, 6(%rcx) |
852 | jz L(Exit7) |
853 | cmp $7, %r8 |
854 | je L(StrncatExit7) |
855 | movlpd (%rcx), %xmm0 |
856 | movlpd %xmm0, (%rdx) |
857 | lea 7(%rdx), %rax |
858 | cmpb $1, (%rax) |
859 | sbb $-1, %rax |
860 | xor %cl, %cl |
861 | movb %cl, (%rax) |
862 | mov %rdi, %rax |
863 | ret |
864 | |
865 | # endif |
866 | END (STRCAT) |
867 | #endif |
868 | |