| 1 | /* strlen used for beginning of str{n}cat using EVEX 256/512. | 
|---|
| 2 | Copyright (C) 2011-2023 Free Software Foundation, Inc. | 
|---|
| 3 | This file is part of the GNU C Library. | 
|---|
| 4 |  | 
|---|
| 5 | The GNU C Library is free software; you can redistribute it and/or | 
|---|
| 6 | modify it under the terms of the GNU Lesser General Public | 
|---|
| 7 | License as published by the Free Software Foundation; either | 
|---|
| 8 | version 2.1 of the License, or (at your option) any later version. | 
|---|
| 9 |  | 
|---|
| 10 | The GNU C Library is distributed in the hope that it will be useful, | 
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|---|
| 13 | Lesser General Public License for more details. | 
|---|
| 14 |  | 
|---|
| 15 | You should have received a copy of the GNU Lesser General Public | 
|---|
| 16 | License along with the GNU C Library; if not, see | 
|---|
| 17 | <https://www.gnu.org/licenses/>.  */ | 
|---|
| 18 |  | 
|---|
| 19 |  | 
|---|
| 20 | /* NOTE: This file is meant to be included by strcat-evex or | 
|---|
| 21 | strncat-evex and does not standalone.  Before including %rdi | 
|---|
| 22 | must be saved in %rax.  */ | 
|---|
| 23 |  | 
|---|
| 24 |  | 
|---|
| 25 | /* Simple strlen implementation that ends at | 
|---|
| 26 | L(strcat_strlen_done).  */ | 
|---|
| 27 | vpxorq	%VZERO_128, %VZERO_128, %VZERO_128 | 
|---|
| 28 | movq	%rdi, %r8 | 
|---|
| 29 | andq	$(VEC_SIZE * -1), %r8 | 
|---|
| 30 | VPCMPEQ	(%r8), %VZERO, %k0 | 
|---|
| 31 | KMOV	%k0, %VRCX | 
|---|
| 32 | #ifdef USE_AS_WCSCPY | 
|---|
| 33 | subl	%r8d, %edi | 
|---|
| 34 | shrl	$2, %edi | 
|---|
| 35 | #endif | 
|---|
| 36 | shrx	%VRDI, %VRCX, %VRCX | 
|---|
| 37 | #ifdef USE_AS_WCSCPY | 
|---|
| 38 | movq	%rax, %rdi | 
|---|
| 39 | #endif | 
|---|
| 40 | test	%VRCX, %VRCX | 
|---|
| 41 | jnz	L(bsf_and_done_v0) | 
|---|
| 42 |  | 
|---|
| 43 |  | 
|---|
| 44 | VPCMPEQ	VEC_SIZE(%r8), %VZERO, %k0 | 
|---|
| 45 | KMOV	%k0, %VRCX | 
|---|
| 46 | leaq	(VEC_SIZE)(%r8), %rdi | 
|---|
| 47 | test	%VRCX, %VRCX | 
|---|
| 48 | jnz	L(bsf_and_done_v0) | 
|---|
| 49 |  | 
|---|
| 50 | VPCMPEQ	(VEC_SIZE * 2)(%r8), %VZERO, %k0 | 
|---|
| 51 | KMOV	%k0, %VRCX | 
|---|
| 52 | test	%VRCX, %VRCX | 
|---|
| 53 | jnz	L(bsf_and_done_v1) | 
|---|
| 54 |  | 
|---|
| 55 | VPCMPEQ	(VEC_SIZE * 3)(%r8), %VZERO, %k0 | 
|---|
| 56 | KMOV	%k0, %VRCX | 
|---|
| 57 | test	%VRCX, %VRCX | 
|---|
| 58 | jnz	L(bsf_and_done_v2) | 
|---|
| 59 |  | 
|---|
| 60 | VPCMPEQ	(VEC_SIZE * 4)(%r8), %VZERO, %k0 | 
|---|
| 61 | KMOV	%k0, %VRCX | 
|---|
| 62 | test	%VRCX, %VRCX | 
|---|
| 63 | jnz	L(bsf_and_done_v3) | 
|---|
| 64 |  | 
|---|
| 65 | andq	$-(VEC_SIZE * 4), %rdi | 
|---|
| 66 | .p2align 4,, 8 | 
|---|
| 67 | L(loop_2x_vec): | 
|---|
| 68 | VMOVA	(VEC_SIZE * 4)(%rdi), %VMM(0) | 
|---|
| 69 | VPMIN	(VEC_SIZE * 5)(%rdi), %VMM(0), %VMM(1) | 
|---|
| 70 | VMOVA	(VEC_SIZE * 6)(%rdi), %VMM(2) | 
|---|
| 71 | VPMIN	(VEC_SIZE * 7)(%rdi), %VMM(2), %VMM(3) | 
|---|
| 72 | VPTESTN	%VMM(1), %VMM(1), %k1 | 
|---|
| 73 | VPTESTN	%VMM(3), %VMM(3), %k3 | 
|---|
| 74 | subq	$(VEC_SIZE * -4), %rdi | 
|---|
| 75 | KORTEST	%k1, %k3 | 
|---|
| 76 | jz	L(loop_2x_vec) | 
|---|
| 77 |  | 
|---|
| 78 | VPTESTN	%VMM(0), %VMM(0), %k0 | 
|---|
| 79 | KMOV	%k0, %VRCX | 
|---|
| 80 | test	%VRCX, %VRCX | 
|---|
| 81 | jnz	L(bsf_and_done_v0) | 
|---|
| 82 |  | 
|---|
| 83 | KMOV	%k1, %VRCX | 
|---|
| 84 | test	%VRCX, %VRCX | 
|---|
| 85 | jnz	L(bsf_and_done_v1) | 
|---|
| 86 |  | 
|---|
| 87 | VPTESTN	%VMM(2), %VMM(2), %k0 | 
|---|
| 88 | KMOV	%k0, %VRCX | 
|---|
| 89 | test	%VRCX, %VRCX | 
|---|
| 90 | jnz	L(bsf_and_done_v2) | 
|---|
| 91 |  | 
|---|
| 92 | KMOV	%k3, %VRCX | 
|---|
| 93 | L(bsf_and_done_v3): | 
|---|
| 94 | addq	$VEC_SIZE, %rdi | 
|---|
| 95 | L(bsf_and_done_v2): | 
|---|
| 96 | bsf	%VRCX, %VRCX | 
|---|
| 97 | leaq	(VEC_SIZE * 2)(%rdi, %rcx, CHAR_SIZE), %rdi | 
|---|
| 98 | jmp	L(strcat_strlen_done) | 
|---|
| 99 |  | 
|---|
| 100 | .p2align 4,, 4 | 
|---|
| 101 | L(bsf_and_done_v1): | 
|---|
| 102 | addq	$VEC_SIZE, %rdi | 
|---|
| 103 | L(bsf_and_done_v0): | 
|---|
| 104 | bsf	%VRCX, %VRCX | 
|---|
| 105 | #ifdef USE_AS_WCSCPY | 
|---|
| 106 | leaq	(%rdi, %rcx, CHAR_SIZE), %rdi | 
|---|
| 107 | #else | 
|---|
| 108 | addq	%rcx, %rdi | 
|---|
| 109 | #endif | 
|---|
| 110 | L(strcat_strlen_done): | 
|---|
| 111 |  | 
|---|