1 | /* memcmp with SSE4.1, wmemcmp with SSE4.1 |
2 | Copyright (C) 2010-2021 Free Software Foundation, Inc. |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <https://www.gnu.org/licenses/>. */ |
19 | |
20 | #if IS_IN (libc) |
21 | |
22 | # include <sysdep.h> |
23 | |
24 | # ifndef MEMCMP |
25 | # define MEMCMP __memcmp_sse4_1 |
26 | # endif |
27 | |
28 | # define JMPTBL(I, B) (I - B) |
29 | |
30 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
31 | lea TABLE(%rip), %r11; \ |
32 | movslq (%r11, INDEX, SCALE), %rcx; \ |
33 | add %r11, %rcx; \ |
34 | _CET_NOTRACK jmp *%rcx; \ |
35 | ud2 |
36 | |
37 | /* Warning! |
38 | wmemcmp has to use SIGNED comparison for elements. |
39 | memcmp has to use UNSIGNED comparison for elemnts. |
40 | */ |
41 | |
42 | .section .text.sse4.1,"ax" ,@progbits |
43 | ENTRY (MEMCMP) |
44 | # ifdef USE_AS_WMEMCMP |
45 | shl $2, %RDX_LP |
46 | # elif defined __ILP32__ |
47 | /* Clear the upper 32 bits. */ |
48 | mov %edx, %edx |
49 | # endif |
50 | pxor %xmm0, %xmm0 |
51 | cmp $79, %RDX_LP |
52 | ja L(79bytesormore) |
53 | # ifndef USE_AS_WMEMCMP |
54 | cmp $1, %RDX_LP |
55 | je L(firstbyte) |
56 | # endif |
57 | add %rdx, %rsi |
58 | add %rdx, %rdi |
59 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
60 | |
61 | # ifndef USE_AS_WMEMCMP |
62 | .p2align 4 |
63 | L(firstbyte): |
64 | movzbl (%rdi), %eax |
65 | movzbl (%rsi), %ecx |
66 | sub %ecx, %eax |
67 | ret |
68 | # endif |
69 | |
70 | .p2align 4 |
71 | L(79bytesormore): |
72 | movdqu (%rsi), %xmm1 |
73 | movdqu (%rdi), %xmm2 |
74 | pxor %xmm1, %xmm2 |
75 | ptest %xmm2, %xmm0 |
76 | jnc L(16bytesin256) |
77 | mov %rsi, %rcx |
78 | and $-16, %rsi |
79 | add $16, %rsi |
80 | sub %rsi, %rcx |
81 | |
82 | sub %rcx, %rdi |
83 | add %rcx, %rdx |
84 | test $0xf, %rdi |
85 | jz L(2aligned) |
86 | |
87 | cmp $128, %rdx |
88 | ja L(128bytesormore) |
89 | L(less128bytes): |
90 | sub $64, %rdx |
91 | |
92 | movdqu (%rdi), %xmm2 |
93 | pxor (%rsi), %xmm2 |
94 | ptest %xmm2, %xmm0 |
95 | jnc L(16bytesin256) |
96 | |
97 | movdqu 16(%rdi), %xmm2 |
98 | pxor 16(%rsi), %xmm2 |
99 | ptest %xmm2, %xmm0 |
100 | jnc L(32bytesin256) |
101 | |
102 | movdqu 32(%rdi), %xmm2 |
103 | pxor 32(%rsi), %xmm2 |
104 | ptest %xmm2, %xmm0 |
105 | jnc L(48bytesin256) |
106 | |
107 | movdqu 48(%rdi), %xmm2 |
108 | pxor 48(%rsi), %xmm2 |
109 | ptest %xmm2, %xmm0 |
110 | jnc L(64bytesin256) |
111 | cmp $32, %rdx |
112 | jb L(less32bytesin64) |
113 | |
114 | movdqu 64(%rdi), %xmm2 |
115 | pxor 64(%rsi), %xmm2 |
116 | ptest %xmm2, %xmm0 |
117 | jnc L(80bytesin256) |
118 | |
119 | movdqu 80(%rdi), %xmm2 |
120 | pxor 80(%rsi), %xmm2 |
121 | ptest %xmm2, %xmm0 |
122 | jnc L(96bytesin256) |
123 | sub $32, %rdx |
124 | add $32, %rdi |
125 | add $32, %rsi |
126 | L(less32bytesin64): |
127 | add $64, %rdi |
128 | add $64, %rsi |
129 | add %rdx, %rsi |
130 | add %rdx, %rdi |
131 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
132 | |
133 | L(128bytesormore): |
134 | cmp $512, %rdx |
135 | ja L(512bytesormore) |
136 | cmp $256, %rdx |
137 | ja L(less512bytes) |
138 | L(less256bytes): |
139 | sub $128, %rdx |
140 | |
141 | movdqu (%rdi), %xmm2 |
142 | pxor (%rsi), %xmm2 |
143 | ptest %xmm2, %xmm0 |
144 | jnc L(16bytesin256) |
145 | |
146 | movdqu 16(%rdi), %xmm2 |
147 | pxor 16(%rsi), %xmm2 |
148 | ptest %xmm2, %xmm0 |
149 | jnc L(32bytesin256) |
150 | |
151 | movdqu 32(%rdi), %xmm2 |
152 | pxor 32(%rsi), %xmm2 |
153 | ptest %xmm2, %xmm0 |
154 | jnc L(48bytesin256) |
155 | |
156 | movdqu 48(%rdi), %xmm2 |
157 | pxor 48(%rsi), %xmm2 |
158 | ptest %xmm2, %xmm0 |
159 | jnc L(64bytesin256) |
160 | |
161 | movdqu 64(%rdi), %xmm2 |
162 | pxor 64(%rsi), %xmm2 |
163 | ptest %xmm2, %xmm0 |
164 | jnc L(80bytesin256) |
165 | |
166 | movdqu 80(%rdi), %xmm2 |
167 | pxor 80(%rsi), %xmm2 |
168 | ptest %xmm2, %xmm0 |
169 | jnc L(96bytesin256) |
170 | |
171 | movdqu 96(%rdi), %xmm2 |
172 | pxor 96(%rsi), %xmm2 |
173 | ptest %xmm2, %xmm0 |
174 | jnc L(112bytesin256) |
175 | |
176 | movdqu 112(%rdi), %xmm2 |
177 | pxor 112(%rsi), %xmm2 |
178 | ptest %xmm2, %xmm0 |
179 | jnc L(128bytesin256) |
180 | |
181 | add $128, %rsi |
182 | add $128, %rdi |
183 | |
184 | cmp $64, %rdx |
185 | jae L(less128bytes) |
186 | |
187 | cmp $32, %rdx |
188 | jb L(less32bytesin128) |
189 | |
190 | movdqu (%rdi), %xmm2 |
191 | pxor (%rsi), %xmm2 |
192 | ptest %xmm2, %xmm0 |
193 | jnc L(16bytesin256) |
194 | |
195 | movdqu 16(%rdi), %xmm2 |
196 | pxor 16(%rsi), %xmm2 |
197 | ptest %xmm2, %xmm0 |
198 | jnc L(32bytesin256) |
199 | sub $32, %rdx |
200 | add $32, %rdi |
201 | add $32, %rsi |
202 | L(less32bytesin128): |
203 | add %rdx, %rsi |
204 | add %rdx, %rdi |
205 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
206 | |
207 | L(less512bytes): |
208 | sub $256, %rdx |
209 | movdqu (%rdi), %xmm2 |
210 | pxor (%rsi), %xmm2 |
211 | ptest %xmm2, %xmm0 |
212 | jnc L(16bytesin256) |
213 | |
214 | movdqu 16(%rdi), %xmm2 |
215 | pxor 16(%rsi), %xmm2 |
216 | ptest %xmm2, %xmm0 |
217 | jnc L(32bytesin256) |
218 | |
219 | movdqu 32(%rdi), %xmm2 |
220 | pxor 32(%rsi), %xmm2 |
221 | ptest %xmm2, %xmm0 |
222 | jnc L(48bytesin256) |
223 | |
224 | movdqu 48(%rdi), %xmm2 |
225 | pxor 48(%rsi), %xmm2 |
226 | ptest %xmm2, %xmm0 |
227 | jnc L(64bytesin256) |
228 | |
229 | movdqu 64(%rdi), %xmm2 |
230 | pxor 64(%rsi), %xmm2 |
231 | ptest %xmm2, %xmm0 |
232 | jnc L(80bytesin256) |
233 | |
234 | movdqu 80(%rdi), %xmm2 |
235 | pxor 80(%rsi), %xmm2 |
236 | ptest %xmm2, %xmm0 |
237 | jnc L(96bytesin256) |
238 | |
239 | movdqu 96(%rdi), %xmm2 |
240 | pxor 96(%rsi), %xmm2 |
241 | ptest %xmm2, %xmm0 |
242 | jnc L(112bytesin256) |
243 | |
244 | movdqu 112(%rdi), %xmm2 |
245 | pxor 112(%rsi), %xmm2 |
246 | ptest %xmm2, %xmm0 |
247 | jnc L(128bytesin256) |
248 | |
249 | movdqu 128(%rdi), %xmm2 |
250 | pxor 128(%rsi), %xmm2 |
251 | ptest %xmm2, %xmm0 |
252 | jnc L(144bytesin256) |
253 | |
254 | movdqu 144(%rdi), %xmm2 |
255 | pxor 144(%rsi), %xmm2 |
256 | ptest %xmm2, %xmm0 |
257 | jnc L(160bytesin256) |
258 | |
259 | movdqu 160(%rdi), %xmm2 |
260 | pxor 160(%rsi), %xmm2 |
261 | ptest %xmm2, %xmm0 |
262 | jnc L(176bytesin256) |
263 | |
264 | movdqu 176(%rdi), %xmm2 |
265 | pxor 176(%rsi), %xmm2 |
266 | ptest %xmm2, %xmm0 |
267 | jnc L(192bytesin256) |
268 | |
269 | movdqu 192(%rdi), %xmm2 |
270 | pxor 192(%rsi), %xmm2 |
271 | ptest %xmm2, %xmm0 |
272 | jnc L(208bytesin256) |
273 | |
274 | movdqu 208(%rdi), %xmm2 |
275 | pxor 208(%rsi), %xmm2 |
276 | ptest %xmm2, %xmm0 |
277 | jnc L(224bytesin256) |
278 | |
279 | movdqu 224(%rdi), %xmm2 |
280 | pxor 224(%rsi), %xmm2 |
281 | ptest %xmm2, %xmm0 |
282 | jnc L(240bytesin256) |
283 | |
284 | movdqu 240(%rdi), %xmm2 |
285 | pxor 240(%rsi), %xmm2 |
286 | ptest %xmm2, %xmm0 |
287 | jnc L(256bytesin256) |
288 | |
289 | add $256, %rsi |
290 | add $256, %rdi |
291 | |
292 | cmp $128, %rdx |
293 | jae L(less256bytes) |
294 | |
295 | cmp $64, %rdx |
296 | jae L(less128bytes) |
297 | |
298 | cmp $32, %rdx |
299 | jb L(less32bytesin256) |
300 | |
301 | movdqu (%rdi), %xmm2 |
302 | pxor (%rsi), %xmm2 |
303 | ptest %xmm2, %xmm0 |
304 | jnc L(16bytesin256) |
305 | |
306 | movdqu 16(%rdi), %xmm2 |
307 | pxor 16(%rsi), %xmm2 |
308 | ptest %xmm2, %xmm0 |
309 | jnc L(32bytesin256) |
310 | sub $32, %rdx |
311 | add $32, %rdi |
312 | add $32, %rsi |
313 | L(less32bytesin256): |
314 | add %rdx, %rsi |
315 | add %rdx, %rdi |
316 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
317 | |
318 | .p2align 4 |
319 | L(512bytesormore): |
320 | # ifdef DATA_CACHE_SIZE_HALF |
321 | mov $DATA_CACHE_SIZE_HALF, %R8_LP |
322 | # else |
323 | mov __x86_data_cache_size_half(%rip), %R8_LP |
324 | # endif |
325 | mov %r8, %r9 |
326 | shr $1, %r8 |
327 | add %r9, %r8 |
328 | cmp %r8, %rdx |
329 | ja L(L2_L3_cache_unaglined) |
330 | sub $64, %rdx |
331 | .p2align 4 |
332 | L(64bytesormore_loop): |
333 | movdqu (%rdi), %xmm2 |
334 | pxor (%rsi), %xmm2 |
335 | movdqa %xmm2, %xmm1 |
336 | |
337 | movdqu 16(%rdi), %xmm3 |
338 | pxor 16(%rsi), %xmm3 |
339 | por %xmm3, %xmm1 |
340 | |
341 | movdqu 32(%rdi), %xmm4 |
342 | pxor 32(%rsi), %xmm4 |
343 | por %xmm4, %xmm1 |
344 | |
345 | movdqu 48(%rdi), %xmm5 |
346 | pxor 48(%rsi), %xmm5 |
347 | por %xmm5, %xmm1 |
348 | |
349 | ptest %xmm1, %xmm0 |
350 | jnc L(64bytesormore_loop_end) |
351 | add $64, %rsi |
352 | add $64, %rdi |
353 | sub $64, %rdx |
354 | jae L(64bytesormore_loop) |
355 | |
356 | add $64, %rdx |
357 | add %rdx, %rsi |
358 | add %rdx, %rdi |
359 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
360 | |
361 | L(L2_L3_cache_unaglined): |
362 | sub $64, %rdx |
363 | .p2align 4 |
364 | L(L2_L3_unaligned_128bytes_loop): |
365 | prefetchnta 0x1c0(%rdi) |
366 | prefetchnta 0x1c0(%rsi) |
367 | movdqu (%rdi), %xmm2 |
368 | pxor (%rsi), %xmm2 |
369 | movdqa %xmm2, %xmm1 |
370 | |
371 | movdqu 16(%rdi), %xmm3 |
372 | pxor 16(%rsi), %xmm3 |
373 | por %xmm3, %xmm1 |
374 | |
375 | movdqu 32(%rdi), %xmm4 |
376 | pxor 32(%rsi), %xmm4 |
377 | por %xmm4, %xmm1 |
378 | |
379 | movdqu 48(%rdi), %xmm5 |
380 | pxor 48(%rsi), %xmm5 |
381 | por %xmm5, %xmm1 |
382 | |
383 | ptest %xmm1, %xmm0 |
384 | jnc L(64bytesormore_loop_end) |
385 | add $64, %rsi |
386 | add $64, %rdi |
387 | sub $64, %rdx |
388 | jae L(L2_L3_unaligned_128bytes_loop) |
389 | |
390 | add $64, %rdx |
391 | add %rdx, %rsi |
392 | add %rdx, %rdi |
393 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
394 | |
395 | /* |
396 | * This case is for machines which are sensitive for unaligned instructions. |
397 | */ |
398 | .p2align 4 |
399 | L(2aligned): |
400 | cmp $128, %rdx |
401 | ja L(128bytesormorein2aligned) |
402 | L(less128bytesin2aligned): |
403 | sub $64, %rdx |
404 | |
405 | movdqa (%rdi), %xmm2 |
406 | pxor (%rsi), %xmm2 |
407 | ptest %xmm2, %xmm0 |
408 | jnc L(16bytesin256) |
409 | |
410 | movdqa 16(%rdi), %xmm2 |
411 | pxor 16(%rsi), %xmm2 |
412 | ptest %xmm2, %xmm0 |
413 | jnc L(32bytesin256) |
414 | |
415 | movdqa 32(%rdi), %xmm2 |
416 | pxor 32(%rsi), %xmm2 |
417 | ptest %xmm2, %xmm0 |
418 | jnc L(48bytesin256) |
419 | |
420 | movdqa 48(%rdi), %xmm2 |
421 | pxor 48(%rsi), %xmm2 |
422 | ptest %xmm2, %xmm0 |
423 | jnc L(64bytesin256) |
424 | cmp $32, %rdx |
425 | jb L(less32bytesin64in2alinged) |
426 | |
427 | movdqa 64(%rdi), %xmm2 |
428 | pxor 64(%rsi), %xmm2 |
429 | ptest %xmm2, %xmm0 |
430 | jnc L(80bytesin256) |
431 | |
432 | movdqa 80(%rdi), %xmm2 |
433 | pxor 80(%rsi), %xmm2 |
434 | ptest %xmm2, %xmm0 |
435 | jnc L(96bytesin256) |
436 | sub $32, %rdx |
437 | add $32, %rdi |
438 | add $32, %rsi |
439 | L(less32bytesin64in2alinged): |
440 | add $64, %rdi |
441 | add $64, %rsi |
442 | add %rdx, %rsi |
443 | add %rdx, %rdi |
444 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
445 | |
446 | .p2align 4 |
447 | L(128bytesormorein2aligned): |
448 | cmp $512, %rdx |
449 | ja L(512bytesormorein2aligned) |
450 | cmp $256, %rdx |
451 | ja L(256bytesormorein2aligned) |
452 | L(less256bytesin2alinged): |
453 | sub $128, %rdx |
454 | |
455 | movdqa (%rdi), %xmm2 |
456 | pxor (%rsi), %xmm2 |
457 | ptest %xmm2, %xmm0 |
458 | jnc L(16bytesin256) |
459 | |
460 | movdqa 16(%rdi), %xmm2 |
461 | pxor 16(%rsi), %xmm2 |
462 | ptest %xmm2, %xmm0 |
463 | jnc L(32bytesin256) |
464 | |
465 | movdqa 32(%rdi), %xmm2 |
466 | pxor 32(%rsi), %xmm2 |
467 | ptest %xmm2, %xmm0 |
468 | jnc L(48bytesin256) |
469 | |
470 | movdqa 48(%rdi), %xmm2 |
471 | pxor 48(%rsi), %xmm2 |
472 | ptest %xmm2, %xmm0 |
473 | jnc L(64bytesin256) |
474 | |
475 | movdqa 64(%rdi), %xmm2 |
476 | pxor 64(%rsi), %xmm2 |
477 | ptest %xmm2, %xmm0 |
478 | jnc L(80bytesin256) |
479 | |
480 | movdqa 80(%rdi), %xmm2 |
481 | pxor 80(%rsi), %xmm2 |
482 | ptest %xmm2, %xmm0 |
483 | jnc L(96bytesin256) |
484 | |
485 | movdqa 96(%rdi), %xmm2 |
486 | pxor 96(%rsi), %xmm2 |
487 | ptest %xmm2, %xmm0 |
488 | jnc L(112bytesin256) |
489 | |
490 | movdqa 112(%rdi), %xmm2 |
491 | pxor 112(%rsi), %xmm2 |
492 | ptest %xmm2, %xmm0 |
493 | jnc L(128bytesin256) |
494 | |
495 | add $128, %rsi |
496 | add $128, %rdi |
497 | |
498 | cmp $64, %rdx |
499 | jae L(less128bytesin2aligned) |
500 | |
501 | cmp $32, %rdx |
502 | jb L(less32bytesin128in2aligned) |
503 | |
504 | movdqu (%rdi), %xmm2 |
505 | pxor (%rsi), %xmm2 |
506 | ptest %xmm2, %xmm0 |
507 | jnc L(16bytesin256) |
508 | |
509 | movdqu 16(%rdi), %xmm2 |
510 | pxor 16(%rsi), %xmm2 |
511 | ptest %xmm2, %xmm0 |
512 | jnc L(32bytesin256) |
513 | sub $32, %rdx |
514 | add $32, %rdi |
515 | add $32, %rsi |
516 | L(less32bytesin128in2aligned): |
517 | add %rdx, %rsi |
518 | add %rdx, %rdi |
519 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
520 | |
521 | .p2align 4 |
522 | L(256bytesormorein2aligned): |
523 | |
524 | sub $256, %rdx |
525 | movdqa (%rdi), %xmm2 |
526 | pxor (%rsi), %xmm2 |
527 | ptest %xmm2, %xmm0 |
528 | jnc L(16bytesin256) |
529 | |
530 | movdqa 16(%rdi), %xmm2 |
531 | pxor 16(%rsi), %xmm2 |
532 | ptest %xmm2, %xmm0 |
533 | jnc L(32bytesin256) |
534 | |
535 | movdqa 32(%rdi), %xmm2 |
536 | pxor 32(%rsi), %xmm2 |
537 | ptest %xmm2, %xmm0 |
538 | jnc L(48bytesin256) |
539 | |
540 | movdqa 48(%rdi), %xmm2 |
541 | pxor 48(%rsi), %xmm2 |
542 | ptest %xmm2, %xmm0 |
543 | jnc L(64bytesin256) |
544 | |
545 | movdqa 64(%rdi), %xmm2 |
546 | pxor 64(%rsi), %xmm2 |
547 | ptest %xmm2, %xmm0 |
548 | jnc L(80bytesin256) |
549 | |
550 | movdqa 80(%rdi), %xmm2 |
551 | pxor 80(%rsi), %xmm2 |
552 | ptest %xmm2, %xmm0 |
553 | jnc L(96bytesin256) |
554 | |
555 | movdqa 96(%rdi), %xmm2 |
556 | pxor 96(%rsi), %xmm2 |
557 | ptest %xmm2, %xmm0 |
558 | jnc L(112bytesin256) |
559 | |
560 | movdqa 112(%rdi), %xmm2 |
561 | pxor 112(%rsi), %xmm2 |
562 | ptest %xmm2, %xmm0 |
563 | jnc L(128bytesin256) |
564 | |
565 | movdqa 128(%rdi), %xmm2 |
566 | pxor 128(%rsi), %xmm2 |
567 | ptest %xmm2, %xmm0 |
568 | jnc L(144bytesin256) |
569 | |
570 | movdqa 144(%rdi), %xmm2 |
571 | pxor 144(%rsi), %xmm2 |
572 | ptest %xmm2, %xmm0 |
573 | jnc L(160bytesin256) |
574 | |
575 | movdqa 160(%rdi), %xmm2 |
576 | pxor 160(%rsi), %xmm2 |
577 | ptest %xmm2, %xmm0 |
578 | jnc L(176bytesin256) |
579 | |
580 | movdqa 176(%rdi), %xmm2 |
581 | pxor 176(%rsi), %xmm2 |
582 | ptest %xmm2, %xmm0 |
583 | jnc L(192bytesin256) |
584 | |
585 | movdqa 192(%rdi), %xmm2 |
586 | pxor 192(%rsi), %xmm2 |
587 | ptest %xmm2, %xmm0 |
588 | jnc L(208bytesin256) |
589 | |
590 | movdqa 208(%rdi), %xmm2 |
591 | pxor 208(%rsi), %xmm2 |
592 | ptest %xmm2, %xmm0 |
593 | jnc L(224bytesin256) |
594 | |
595 | movdqa 224(%rdi), %xmm2 |
596 | pxor 224(%rsi), %xmm2 |
597 | ptest %xmm2, %xmm0 |
598 | jnc L(240bytesin256) |
599 | |
600 | movdqa 240(%rdi), %xmm2 |
601 | pxor 240(%rsi), %xmm2 |
602 | ptest %xmm2, %xmm0 |
603 | jnc L(256bytesin256) |
604 | |
605 | add $256, %rsi |
606 | add $256, %rdi |
607 | |
608 | cmp $128, %rdx |
609 | jae L(less256bytesin2alinged) |
610 | |
611 | cmp $64, %rdx |
612 | jae L(less128bytesin2aligned) |
613 | |
614 | cmp $32, %rdx |
615 | jb L(less32bytesin256in2alinged) |
616 | |
617 | movdqa (%rdi), %xmm2 |
618 | pxor (%rsi), %xmm2 |
619 | ptest %xmm2, %xmm0 |
620 | jnc L(16bytesin256) |
621 | |
622 | movdqa 16(%rdi), %xmm2 |
623 | pxor 16(%rsi), %xmm2 |
624 | ptest %xmm2, %xmm0 |
625 | jnc L(32bytesin256) |
626 | sub $32, %rdx |
627 | add $32, %rdi |
628 | add $32, %rsi |
629 | L(less32bytesin256in2alinged): |
630 | add %rdx, %rsi |
631 | add %rdx, %rdi |
632 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
633 | |
634 | .p2align 4 |
635 | L(512bytesormorein2aligned): |
636 | # ifdef DATA_CACHE_SIZE_HALF |
637 | mov $DATA_CACHE_SIZE_HALF, %R8_LP |
638 | # else |
639 | mov __x86_data_cache_size_half(%rip), %R8_LP |
640 | # endif |
641 | mov %r8, %r9 |
642 | shr $1, %r8 |
643 | add %r9, %r8 |
644 | cmp %r8, %rdx |
645 | ja L(L2_L3_cache_aglined) |
646 | |
647 | sub $64, %rdx |
648 | .p2align 4 |
649 | L(64bytesormore_loopin2aligned): |
650 | movdqa (%rdi), %xmm2 |
651 | pxor (%rsi), %xmm2 |
652 | movdqa %xmm2, %xmm1 |
653 | |
654 | movdqa 16(%rdi), %xmm3 |
655 | pxor 16(%rsi), %xmm3 |
656 | por %xmm3, %xmm1 |
657 | |
658 | movdqa 32(%rdi), %xmm4 |
659 | pxor 32(%rsi), %xmm4 |
660 | por %xmm4, %xmm1 |
661 | |
662 | movdqa 48(%rdi), %xmm5 |
663 | pxor 48(%rsi), %xmm5 |
664 | por %xmm5, %xmm1 |
665 | |
666 | ptest %xmm1, %xmm0 |
667 | jnc L(64bytesormore_loop_end) |
668 | add $64, %rsi |
669 | add $64, %rdi |
670 | sub $64, %rdx |
671 | jae L(64bytesormore_loopin2aligned) |
672 | |
673 | add $64, %rdx |
674 | add %rdx, %rsi |
675 | add %rdx, %rdi |
676 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
677 | L(L2_L3_cache_aglined): |
678 | sub $64, %rdx |
679 | |
680 | .p2align 4 |
681 | L(L2_L3_aligned_128bytes_loop): |
682 | prefetchnta 0x1c0(%rdi) |
683 | prefetchnta 0x1c0(%rsi) |
684 | movdqa (%rdi), %xmm2 |
685 | pxor (%rsi), %xmm2 |
686 | movdqa %xmm2, %xmm1 |
687 | |
688 | movdqa 16(%rdi), %xmm3 |
689 | pxor 16(%rsi), %xmm3 |
690 | por %xmm3, %xmm1 |
691 | |
692 | movdqa 32(%rdi), %xmm4 |
693 | pxor 32(%rsi), %xmm4 |
694 | por %xmm4, %xmm1 |
695 | |
696 | movdqa 48(%rdi), %xmm5 |
697 | pxor 48(%rsi), %xmm5 |
698 | por %xmm5, %xmm1 |
699 | |
700 | ptest %xmm1, %xmm0 |
701 | jnc L(64bytesormore_loop_end) |
702 | add $64, %rsi |
703 | add $64, %rdi |
704 | sub $64, %rdx |
705 | jae L(L2_L3_aligned_128bytes_loop) |
706 | |
707 | add $64, %rdx |
708 | add %rdx, %rsi |
709 | add %rdx, %rdi |
710 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
711 | |
712 | |
713 | .p2align 4 |
714 | L(64bytesormore_loop_end): |
715 | add $16, %rdi |
716 | add $16, %rsi |
717 | ptest %xmm2, %xmm0 |
718 | jnc L(16bytes) |
719 | |
720 | add $16, %rdi |
721 | add $16, %rsi |
722 | ptest %xmm3, %xmm0 |
723 | jnc L(16bytes) |
724 | |
725 | add $16, %rdi |
726 | add $16, %rsi |
727 | ptest %xmm4, %xmm0 |
728 | jnc L(16bytes) |
729 | |
730 | add $16, %rdi |
731 | add $16, %rsi |
732 | jmp L(16bytes) |
733 | |
734 | L(256bytesin256): |
735 | add $256, %rdi |
736 | add $256, %rsi |
737 | jmp L(16bytes) |
738 | L(240bytesin256): |
739 | add $240, %rdi |
740 | add $240, %rsi |
741 | jmp L(16bytes) |
742 | L(224bytesin256): |
743 | add $224, %rdi |
744 | add $224, %rsi |
745 | jmp L(16bytes) |
746 | L(208bytesin256): |
747 | add $208, %rdi |
748 | add $208, %rsi |
749 | jmp L(16bytes) |
750 | L(192bytesin256): |
751 | add $192, %rdi |
752 | add $192, %rsi |
753 | jmp L(16bytes) |
754 | L(176bytesin256): |
755 | add $176, %rdi |
756 | add $176, %rsi |
757 | jmp L(16bytes) |
758 | L(160bytesin256): |
759 | add $160, %rdi |
760 | add $160, %rsi |
761 | jmp L(16bytes) |
762 | L(144bytesin256): |
763 | add $144, %rdi |
764 | add $144, %rsi |
765 | jmp L(16bytes) |
766 | L(128bytesin256): |
767 | add $128, %rdi |
768 | add $128, %rsi |
769 | jmp L(16bytes) |
770 | L(112bytesin256): |
771 | add $112, %rdi |
772 | add $112, %rsi |
773 | jmp L(16bytes) |
774 | L(96bytesin256): |
775 | add $96, %rdi |
776 | add $96, %rsi |
777 | jmp L(16bytes) |
778 | L(80bytesin256): |
779 | add $80, %rdi |
780 | add $80, %rsi |
781 | jmp L(16bytes) |
782 | L(64bytesin256): |
783 | add $64, %rdi |
784 | add $64, %rsi |
785 | jmp L(16bytes) |
786 | L(48bytesin256): |
787 | add $16, %rdi |
788 | add $16, %rsi |
789 | L(32bytesin256): |
790 | add $16, %rdi |
791 | add $16, %rsi |
792 | L(16bytesin256): |
793 | add $16, %rdi |
794 | add $16, %rsi |
795 | L(16bytes): |
796 | mov -16(%rdi), %rax |
797 | mov -16(%rsi), %rcx |
798 | cmp %rax, %rcx |
799 | jne L(diffin8bytes) |
800 | L(8bytes): |
801 | mov -8(%rdi), %rax |
802 | mov -8(%rsi), %rcx |
803 | cmp %rax, %rcx |
804 | jne L(diffin8bytes) |
805 | xor %eax, %eax |
806 | ret |
807 | |
808 | .p2align 4 |
809 | L(12bytes): |
810 | mov -12(%rdi), %rax |
811 | mov -12(%rsi), %rcx |
812 | cmp %rax, %rcx |
813 | jne L(diffin8bytes) |
814 | L(4bytes): |
815 | mov -4(%rsi), %ecx |
816 | # ifndef USE_AS_WMEMCMP |
817 | mov -4(%rdi), %eax |
818 | cmp %eax, %ecx |
819 | # else |
820 | cmp -4(%rdi), %ecx |
821 | # endif |
822 | jne L(diffin4bytes) |
823 | L(0bytes): |
824 | xor %eax, %eax |
825 | ret |
826 | |
827 | # ifndef USE_AS_WMEMCMP |
828 | /* unreal case for wmemcmp */ |
829 | .p2align 4 |
830 | L(65bytes): |
831 | movdqu -65(%rdi), %xmm1 |
832 | movdqu -65(%rsi), %xmm2 |
833 | mov $-65, %dl |
834 | pxor %xmm1, %xmm2 |
835 | ptest %xmm2, %xmm0 |
836 | jnc L(less16bytes) |
837 | L(49bytes): |
838 | movdqu -49(%rdi), %xmm1 |
839 | movdqu -49(%rsi), %xmm2 |
840 | mov $-49, %dl |
841 | pxor %xmm1, %xmm2 |
842 | ptest %xmm2, %xmm0 |
843 | jnc L(less16bytes) |
844 | L(33bytes): |
845 | movdqu -33(%rdi), %xmm1 |
846 | movdqu -33(%rsi), %xmm2 |
847 | mov $-33, %dl |
848 | pxor %xmm1, %xmm2 |
849 | ptest %xmm2, %xmm0 |
850 | jnc L(less16bytes) |
851 | L(17bytes): |
852 | mov -17(%rdi), %rax |
853 | mov -17(%rsi), %rcx |
854 | cmp %rax, %rcx |
855 | jne L(diffin8bytes) |
856 | L(9bytes): |
857 | mov -9(%rdi), %rax |
858 | mov -9(%rsi), %rcx |
859 | cmp %rax, %rcx |
860 | jne L(diffin8bytes) |
861 | movzbl -1(%rdi), %eax |
862 | movzbl -1(%rsi), %edx |
863 | sub %edx, %eax |
864 | ret |
865 | |
866 | .p2align 4 |
867 | L(13bytes): |
868 | mov -13(%rdi), %rax |
869 | mov -13(%rsi), %rcx |
870 | cmp %rax, %rcx |
871 | jne L(diffin8bytes) |
872 | mov -8(%rdi), %rax |
873 | mov -8(%rsi), %rcx |
874 | cmp %rax, %rcx |
875 | jne L(diffin8bytes) |
876 | xor %eax, %eax |
877 | ret |
878 | |
879 | .p2align 4 |
880 | L(5bytes): |
881 | mov -5(%rdi), %eax |
882 | mov -5(%rsi), %ecx |
883 | cmp %eax, %ecx |
884 | jne L(diffin4bytes) |
885 | movzbl -1(%rdi), %eax |
886 | movzbl -1(%rsi), %edx |
887 | sub %edx, %eax |
888 | ret |
889 | |
890 | .p2align 4 |
891 | L(66bytes): |
892 | movdqu -66(%rdi), %xmm1 |
893 | movdqu -66(%rsi), %xmm2 |
894 | mov $-66, %dl |
895 | pxor %xmm1, %xmm2 |
896 | ptest %xmm2, %xmm0 |
897 | jnc L(less16bytes) |
898 | L(50bytes): |
899 | movdqu -50(%rdi), %xmm1 |
900 | movdqu -50(%rsi), %xmm2 |
901 | mov $-50, %dl |
902 | pxor %xmm1, %xmm2 |
903 | ptest %xmm2, %xmm0 |
904 | jnc L(less16bytes) |
905 | L(34bytes): |
906 | movdqu -34(%rdi), %xmm1 |
907 | movdqu -34(%rsi), %xmm2 |
908 | mov $-34, %dl |
909 | pxor %xmm1, %xmm2 |
910 | ptest %xmm2, %xmm0 |
911 | jnc L(less16bytes) |
912 | L(18bytes): |
913 | mov -18(%rdi), %rax |
914 | mov -18(%rsi), %rcx |
915 | cmp %rax, %rcx |
916 | jne L(diffin8bytes) |
917 | L(10bytes): |
918 | mov -10(%rdi), %rax |
919 | mov -10(%rsi), %rcx |
920 | cmp %rax, %rcx |
921 | jne L(diffin8bytes) |
922 | movzwl -2(%rdi), %eax |
923 | movzwl -2(%rsi), %ecx |
924 | cmp %cl, %al |
925 | jne L(end) |
926 | and $0xffff, %eax |
927 | and $0xffff, %ecx |
928 | sub %ecx, %eax |
929 | ret |
930 | |
931 | .p2align 4 |
932 | L(14bytes): |
933 | mov -14(%rdi), %rax |
934 | mov -14(%rsi), %rcx |
935 | cmp %rax, %rcx |
936 | jne L(diffin8bytes) |
937 | mov -8(%rdi), %rax |
938 | mov -8(%rsi), %rcx |
939 | cmp %rax, %rcx |
940 | jne L(diffin8bytes) |
941 | xor %eax, %eax |
942 | ret |
943 | |
944 | .p2align 4 |
945 | L(6bytes): |
946 | mov -6(%rdi), %eax |
947 | mov -6(%rsi), %ecx |
948 | cmp %eax, %ecx |
949 | jne L(diffin4bytes) |
950 | L(2bytes): |
951 | movzwl -2(%rsi), %ecx |
952 | movzwl -2(%rdi), %eax |
953 | cmp %cl, %al |
954 | jne L(end) |
955 | and $0xffff, %eax |
956 | and $0xffff, %ecx |
957 | sub %ecx, %eax |
958 | ret |
959 | |
960 | .p2align 4 |
961 | L(67bytes): |
962 | movdqu -67(%rdi), %xmm2 |
963 | movdqu -67(%rsi), %xmm1 |
964 | mov $-67, %dl |
965 | pxor %xmm1, %xmm2 |
966 | ptest %xmm2, %xmm0 |
967 | jnc L(less16bytes) |
968 | L(51bytes): |
969 | movdqu -51(%rdi), %xmm2 |
970 | movdqu -51(%rsi), %xmm1 |
971 | mov $-51, %dl |
972 | pxor %xmm1, %xmm2 |
973 | ptest %xmm2, %xmm0 |
974 | jnc L(less16bytes) |
975 | L(35bytes): |
976 | movdqu -35(%rsi), %xmm1 |
977 | movdqu -35(%rdi), %xmm2 |
978 | mov $-35, %dl |
979 | pxor %xmm1, %xmm2 |
980 | ptest %xmm2, %xmm0 |
981 | jnc L(less16bytes) |
982 | L(19bytes): |
983 | mov -19(%rdi), %rax |
984 | mov -19(%rsi), %rcx |
985 | cmp %rax, %rcx |
986 | jne L(diffin8bytes) |
987 | L(11bytes): |
988 | mov -11(%rdi), %rax |
989 | mov -11(%rsi), %rcx |
990 | cmp %rax, %rcx |
991 | jne L(diffin8bytes) |
992 | mov -4(%rdi), %eax |
993 | mov -4(%rsi), %ecx |
994 | cmp %eax, %ecx |
995 | jne L(diffin4bytes) |
996 | xor %eax, %eax |
997 | ret |
998 | |
999 | .p2align 4 |
1000 | L(15bytes): |
1001 | mov -15(%rdi), %rax |
1002 | mov -15(%rsi), %rcx |
1003 | cmp %rax, %rcx |
1004 | jne L(diffin8bytes) |
1005 | mov -8(%rdi), %rax |
1006 | mov -8(%rsi), %rcx |
1007 | cmp %rax, %rcx |
1008 | jne L(diffin8bytes) |
1009 | xor %eax, %eax |
1010 | ret |
1011 | |
1012 | .p2align 4 |
1013 | L(7bytes): |
1014 | mov -7(%rdi), %eax |
1015 | mov -7(%rsi), %ecx |
1016 | cmp %eax, %ecx |
1017 | jne L(diffin4bytes) |
1018 | mov -4(%rdi), %eax |
1019 | mov -4(%rsi), %ecx |
1020 | cmp %eax, %ecx |
1021 | jne L(diffin4bytes) |
1022 | xor %eax, %eax |
1023 | ret |
1024 | |
1025 | .p2align 4 |
1026 | L(3bytes): |
1027 | movzwl -3(%rdi), %eax |
1028 | movzwl -3(%rsi), %ecx |
1029 | cmp %eax, %ecx |
1030 | jne L(diffin2bytes) |
1031 | L(1bytes): |
1032 | movzbl -1(%rdi), %eax |
1033 | movzbl -1(%rsi), %ecx |
1034 | sub %ecx, %eax |
1035 | ret |
1036 | # endif |
1037 | |
1038 | .p2align 4 |
1039 | L(68bytes): |
1040 | movdqu -68(%rdi), %xmm2 |
1041 | movdqu -68(%rsi), %xmm1 |
1042 | mov $-68, %dl |
1043 | pxor %xmm1, %xmm2 |
1044 | ptest %xmm2, %xmm0 |
1045 | jnc L(less16bytes) |
1046 | L(52bytes): |
1047 | movdqu -52(%rdi), %xmm2 |
1048 | movdqu -52(%rsi), %xmm1 |
1049 | mov $-52, %dl |
1050 | pxor %xmm1, %xmm2 |
1051 | ptest %xmm2, %xmm0 |
1052 | jnc L(less16bytes) |
1053 | L(36bytes): |
1054 | movdqu -36(%rdi), %xmm2 |
1055 | movdqu -36(%rsi), %xmm1 |
1056 | mov $-36, %dl |
1057 | pxor %xmm1, %xmm2 |
1058 | ptest %xmm2, %xmm0 |
1059 | jnc L(less16bytes) |
1060 | L(20bytes): |
1061 | movdqu -20(%rdi), %xmm2 |
1062 | movdqu -20(%rsi), %xmm1 |
1063 | mov $-20, %dl |
1064 | pxor %xmm1, %xmm2 |
1065 | ptest %xmm2, %xmm0 |
1066 | jnc L(less16bytes) |
1067 | mov -4(%rsi), %ecx |
1068 | |
1069 | # ifndef USE_AS_WMEMCMP |
1070 | mov -4(%rdi), %eax |
1071 | cmp %eax, %ecx |
1072 | # else |
1073 | cmp -4(%rdi), %ecx |
1074 | # endif |
1075 | jne L(diffin4bytes) |
1076 | xor %eax, %eax |
1077 | ret |
1078 | |
1079 | # ifndef USE_AS_WMEMCMP |
1080 | /* unreal cases for wmemcmp */ |
1081 | .p2align 4 |
1082 | L(69bytes): |
1083 | movdqu -69(%rsi), %xmm1 |
1084 | movdqu -69(%rdi), %xmm2 |
1085 | mov $-69, %dl |
1086 | pxor %xmm1, %xmm2 |
1087 | ptest %xmm2, %xmm0 |
1088 | jnc L(less16bytes) |
1089 | L(53bytes): |
1090 | movdqu -53(%rsi), %xmm1 |
1091 | movdqu -53(%rdi), %xmm2 |
1092 | mov $-53, %dl |
1093 | pxor %xmm1, %xmm2 |
1094 | ptest %xmm2, %xmm0 |
1095 | jnc L(less16bytes) |
1096 | L(37bytes): |
1097 | movdqu -37(%rsi), %xmm1 |
1098 | movdqu -37(%rdi), %xmm2 |
1099 | mov $-37, %dl |
1100 | pxor %xmm1, %xmm2 |
1101 | ptest %xmm2, %xmm0 |
1102 | jnc L(less16bytes) |
1103 | L(21bytes): |
1104 | movdqu -21(%rsi), %xmm1 |
1105 | movdqu -21(%rdi), %xmm2 |
1106 | mov $-21, %dl |
1107 | pxor %xmm1, %xmm2 |
1108 | ptest %xmm2, %xmm0 |
1109 | jnc L(less16bytes) |
1110 | mov -8(%rdi), %rax |
1111 | mov -8(%rsi), %rcx |
1112 | cmp %rax, %rcx |
1113 | jne L(diffin8bytes) |
1114 | xor %eax, %eax |
1115 | ret |
1116 | |
1117 | .p2align 4 |
1118 | L(70bytes): |
1119 | movdqu -70(%rsi), %xmm1 |
1120 | movdqu -70(%rdi), %xmm2 |
1121 | mov $-70, %dl |
1122 | pxor %xmm1, %xmm2 |
1123 | ptest %xmm2, %xmm0 |
1124 | jnc L(less16bytes) |
1125 | L(54bytes): |
1126 | movdqu -54(%rsi), %xmm1 |
1127 | movdqu -54(%rdi), %xmm2 |
1128 | mov $-54, %dl |
1129 | pxor %xmm1, %xmm2 |
1130 | ptest %xmm2, %xmm0 |
1131 | jnc L(less16bytes) |
1132 | L(38bytes): |
1133 | movdqu -38(%rsi), %xmm1 |
1134 | movdqu -38(%rdi), %xmm2 |
1135 | mov $-38, %dl |
1136 | pxor %xmm1, %xmm2 |
1137 | ptest %xmm2, %xmm0 |
1138 | jnc L(less16bytes) |
1139 | L(22bytes): |
1140 | movdqu -22(%rsi), %xmm1 |
1141 | movdqu -22(%rdi), %xmm2 |
1142 | mov $-22, %dl |
1143 | pxor %xmm1, %xmm2 |
1144 | ptest %xmm2, %xmm0 |
1145 | jnc L(less16bytes) |
1146 | mov -8(%rdi), %rax |
1147 | mov -8(%rsi), %rcx |
1148 | cmp %rax, %rcx |
1149 | jne L(diffin8bytes) |
1150 | xor %eax, %eax |
1151 | ret |
1152 | |
1153 | .p2align 4 |
1154 | L(71bytes): |
1155 | movdqu -71(%rsi), %xmm1 |
1156 | movdqu -71(%rdi), %xmm2 |
1157 | mov $-71, %dl |
1158 | pxor %xmm1, %xmm2 |
1159 | ptest %xmm2, %xmm0 |
1160 | jnc L(less16bytes) |
1161 | L(55bytes): |
1162 | movdqu -55(%rdi), %xmm2 |
1163 | movdqu -55(%rsi), %xmm1 |
1164 | mov $-55, %dl |
1165 | pxor %xmm1, %xmm2 |
1166 | ptest %xmm2, %xmm0 |
1167 | jnc L(less16bytes) |
1168 | L(39bytes): |
1169 | movdqu -39(%rdi), %xmm2 |
1170 | movdqu -39(%rsi), %xmm1 |
1171 | mov $-39, %dl |
1172 | pxor %xmm1, %xmm2 |
1173 | ptest %xmm2, %xmm0 |
1174 | jnc L(less16bytes) |
1175 | L(23bytes): |
1176 | movdqu -23(%rdi), %xmm2 |
1177 | movdqu -23(%rsi), %xmm1 |
1178 | mov $-23, %dl |
1179 | pxor %xmm1, %xmm2 |
1180 | ptest %xmm2, %xmm0 |
1181 | jnc L(less16bytes) |
1182 | mov -8(%rdi), %rax |
1183 | mov -8(%rsi), %rcx |
1184 | cmp %rax, %rcx |
1185 | jne L(diffin8bytes) |
1186 | xor %eax, %eax |
1187 | ret |
1188 | # endif |
1189 | |
1190 | .p2align 4 |
1191 | L(72bytes): |
1192 | movdqu -72(%rsi), %xmm1 |
1193 | movdqu -72(%rdi), %xmm2 |
1194 | mov $-72, %dl |
1195 | pxor %xmm1, %xmm2 |
1196 | ptest %xmm2, %xmm0 |
1197 | jnc L(less16bytes) |
1198 | L(56bytes): |
1199 | movdqu -56(%rdi), %xmm2 |
1200 | movdqu -56(%rsi), %xmm1 |
1201 | mov $-56, %dl |
1202 | pxor %xmm1, %xmm2 |
1203 | ptest %xmm2, %xmm0 |
1204 | jnc L(less16bytes) |
1205 | L(40bytes): |
1206 | movdqu -40(%rdi), %xmm2 |
1207 | movdqu -40(%rsi), %xmm1 |
1208 | mov $-40, %dl |
1209 | pxor %xmm1, %xmm2 |
1210 | ptest %xmm2, %xmm0 |
1211 | jnc L(less16bytes) |
1212 | L(24bytes): |
1213 | movdqu -24(%rdi), %xmm2 |
1214 | movdqu -24(%rsi), %xmm1 |
1215 | mov $-24, %dl |
1216 | pxor %xmm1, %xmm2 |
1217 | ptest %xmm2, %xmm0 |
1218 | jnc L(less16bytes) |
1219 | |
1220 | mov -8(%rsi), %rcx |
1221 | mov -8(%rdi), %rax |
1222 | cmp %rax, %rcx |
1223 | jne L(diffin8bytes) |
1224 | xor %eax, %eax |
1225 | ret |
1226 | |
1227 | # ifndef USE_AS_WMEMCMP |
1228 | /* unreal cases for wmemcmp */ |
1229 | .p2align 4 |
1230 | L(73bytes): |
1231 | movdqu -73(%rsi), %xmm1 |
1232 | movdqu -73(%rdi), %xmm2 |
1233 | mov $-73, %dl |
1234 | pxor %xmm1, %xmm2 |
1235 | ptest %xmm2, %xmm0 |
1236 | jnc L(less16bytes) |
1237 | L(57bytes): |
1238 | movdqu -57(%rdi), %xmm2 |
1239 | movdqu -57(%rsi), %xmm1 |
1240 | mov $-57, %dl |
1241 | pxor %xmm1, %xmm2 |
1242 | ptest %xmm2, %xmm0 |
1243 | jnc L(less16bytes) |
1244 | L(41bytes): |
1245 | movdqu -41(%rdi), %xmm2 |
1246 | movdqu -41(%rsi), %xmm1 |
1247 | mov $-41, %dl |
1248 | pxor %xmm1, %xmm2 |
1249 | ptest %xmm2, %xmm0 |
1250 | jnc L(less16bytes) |
1251 | L(25bytes): |
1252 | movdqu -25(%rdi), %xmm2 |
1253 | movdqu -25(%rsi), %xmm1 |
1254 | mov $-25, %dl |
1255 | pxor %xmm1, %xmm2 |
1256 | ptest %xmm2, %xmm0 |
1257 | jnc L(less16bytes) |
1258 | mov -9(%rdi), %rax |
1259 | mov -9(%rsi), %rcx |
1260 | cmp %rax, %rcx |
1261 | jne L(diffin8bytes) |
1262 | movzbl -1(%rdi), %eax |
1263 | movzbl -1(%rsi), %ecx |
1264 | sub %ecx, %eax |
1265 | ret |
1266 | |
1267 | .p2align 4 |
1268 | L(74bytes): |
1269 | movdqu -74(%rsi), %xmm1 |
1270 | movdqu -74(%rdi), %xmm2 |
1271 | mov $-74, %dl |
1272 | pxor %xmm1, %xmm2 |
1273 | ptest %xmm2, %xmm0 |
1274 | jnc L(less16bytes) |
1275 | L(58bytes): |
1276 | movdqu -58(%rdi), %xmm2 |
1277 | movdqu -58(%rsi), %xmm1 |
1278 | mov $-58, %dl |
1279 | pxor %xmm1, %xmm2 |
1280 | ptest %xmm2, %xmm0 |
1281 | jnc L(less16bytes) |
1282 | L(42bytes): |
1283 | movdqu -42(%rdi), %xmm2 |
1284 | movdqu -42(%rsi), %xmm1 |
1285 | mov $-42, %dl |
1286 | pxor %xmm1, %xmm2 |
1287 | ptest %xmm2, %xmm0 |
1288 | jnc L(less16bytes) |
1289 | L(26bytes): |
1290 | movdqu -26(%rdi), %xmm2 |
1291 | movdqu -26(%rsi), %xmm1 |
1292 | mov $-26, %dl |
1293 | pxor %xmm1, %xmm2 |
1294 | ptest %xmm2, %xmm0 |
1295 | jnc L(less16bytes) |
1296 | mov -10(%rdi), %rax |
1297 | mov -10(%rsi), %rcx |
1298 | cmp %rax, %rcx |
1299 | jne L(diffin8bytes) |
1300 | movzwl -2(%rdi), %eax |
1301 | movzwl -2(%rsi), %ecx |
1302 | jmp L(diffin2bytes) |
1303 | |
1304 | .p2align 4 |
1305 | L(75bytes): |
1306 | movdqu -75(%rsi), %xmm1 |
1307 | movdqu -75(%rdi), %xmm2 |
1308 | mov $-75, %dl |
1309 | pxor %xmm1, %xmm2 |
1310 | ptest %xmm2, %xmm0 |
1311 | jnc L(less16bytes) |
1312 | L(59bytes): |
1313 | movdqu -59(%rdi), %xmm2 |
1314 | movdqu -59(%rsi), %xmm1 |
1315 | mov $-59, %dl |
1316 | pxor %xmm1, %xmm2 |
1317 | ptest %xmm2, %xmm0 |
1318 | jnc L(less16bytes) |
1319 | L(43bytes): |
1320 | movdqu -43(%rdi), %xmm2 |
1321 | movdqu -43(%rsi), %xmm1 |
1322 | mov $-43, %dl |
1323 | pxor %xmm1, %xmm2 |
1324 | ptest %xmm2, %xmm0 |
1325 | jnc L(less16bytes) |
1326 | L(27bytes): |
1327 | movdqu -27(%rdi), %xmm2 |
1328 | movdqu -27(%rsi), %xmm1 |
1329 | mov $-27, %dl |
1330 | pxor %xmm1, %xmm2 |
1331 | ptest %xmm2, %xmm0 |
1332 | jnc L(less16bytes) |
1333 | mov -11(%rdi), %rax |
1334 | mov -11(%rsi), %rcx |
1335 | cmp %rax, %rcx |
1336 | jne L(diffin8bytes) |
1337 | mov -4(%rdi), %eax |
1338 | mov -4(%rsi), %ecx |
1339 | cmp %eax, %ecx |
1340 | jne L(diffin4bytes) |
1341 | xor %eax, %eax |
1342 | ret |
1343 | # endif |
1344 | .p2align 4 |
1345 | L(76bytes): |
1346 | movdqu -76(%rsi), %xmm1 |
1347 | movdqu -76(%rdi), %xmm2 |
1348 | mov $-76, %dl |
1349 | pxor %xmm1, %xmm2 |
1350 | ptest %xmm2, %xmm0 |
1351 | jnc L(less16bytes) |
1352 | L(60bytes): |
1353 | movdqu -60(%rdi), %xmm2 |
1354 | movdqu -60(%rsi), %xmm1 |
1355 | mov $-60, %dl |
1356 | pxor %xmm1, %xmm2 |
1357 | ptest %xmm2, %xmm0 |
1358 | jnc L(less16bytes) |
1359 | L(44bytes): |
1360 | movdqu -44(%rdi), %xmm2 |
1361 | movdqu -44(%rsi), %xmm1 |
1362 | mov $-44, %dl |
1363 | pxor %xmm1, %xmm2 |
1364 | ptest %xmm2, %xmm0 |
1365 | jnc L(less16bytes) |
1366 | L(28bytes): |
1367 | movdqu -28(%rdi), %xmm2 |
1368 | movdqu -28(%rsi), %xmm1 |
1369 | mov $-28, %dl |
1370 | pxor %xmm1, %xmm2 |
1371 | ptest %xmm2, %xmm0 |
1372 | jnc L(less16bytes) |
1373 | mov -12(%rdi), %rax |
1374 | mov -12(%rsi), %rcx |
1375 | cmp %rax, %rcx |
1376 | jne L(diffin8bytes) |
1377 | mov -4(%rsi), %ecx |
1378 | # ifndef USE_AS_WMEMCMP |
1379 | mov -4(%rdi), %eax |
1380 | cmp %eax, %ecx |
1381 | # else |
1382 | cmp -4(%rdi), %ecx |
1383 | # endif |
1384 | jne L(diffin4bytes) |
1385 | xor %eax, %eax |
1386 | ret |
1387 | |
1388 | # ifndef USE_AS_WMEMCMP |
1389 | /* unreal cases for wmemcmp */ |
1390 | .p2align 4 |
1391 | L(77bytes): |
1392 | movdqu -77(%rsi), %xmm1 |
1393 | movdqu -77(%rdi), %xmm2 |
1394 | mov $-77, %dl |
1395 | pxor %xmm1, %xmm2 |
1396 | ptest %xmm2, %xmm0 |
1397 | jnc L(less16bytes) |
1398 | L(61bytes): |
1399 | movdqu -61(%rdi), %xmm2 |
1400 | movdqu -61(%rsi), %xmm1 |
1401 | mov $-61, %dl |
1402 | pxor %xmm1, %xmm2 |
1403 | ptest %xmm2, %xmm0 |
1404 | jnc L(less16bytes) |
1405 | L(45bytes): |
1406 | movdqu -45(%rdi), %xmm2 |
1407 | movdqu -45(%rsi), %xmm1 |
1408 | mov $-45, %dl |
1409 | pxor %xmm1, %xmm2 |
1410 | ptest %xmm2, %xmm0 |
1411 | jnc L(less16bytes) |
1412 | L(29bytes): |
1413 | movdqu -29(%rdi), %xmm2 |
1414 | movdqu -29(%rsi), %xmm1 |
1415 | mov $-29, %dl |
1416 | pxor %xmm1, %xmm2 |
1417 | ptest %xmm2, %xmm0 |
1418 | jnc L(less16bytes) |
1419 | |
1420 | mov -13(%rdi), %rax |
1421 | mov -13(%rsi), %rcx |
1422 | cmp %rax, %rcx |
1423 | jne L(diffin8bytes) |
1424 | |
1425 | mov -8(%rdi), %rax |
1426 | mov -8(%rsi), %rcx |
1427 | cmp %rax, %rcx |
1428 | jne L(diffin8bytes) |
1429 | xor %eax, %eax |
1430 | ret |
1431 | |
1432 | .p2align 4 |
1433 | L(78bytes): |
1434 | movdqu -78(%rsi), %xmm1 |
1435 | movdqu -78(%rdi), %xmm2 |
1436 | mov $-78, %dl |
1437 | pxor %xmm1, %xmm2 |
1438 | ptest %xmm2, %xmm0 |
1439 | jnc L(less16bytes) |
1440 | L(62bytes): |
1441 | movdqu -62(%rdi), %xmm2 |
1442 | movdqu -62(%rsi), %xmm1 |
1443 | mov $-62, %dl |
1444 | pxor %xmm1, %xmm2 |
1445 | ptest %xmm2, %xmm0 |
1446 | jnc L(less16bytes) |
1447 | L(46bytes): |
1448 | movdqu -46(%rdi), %xmm2 |
1449 | movdqu -46(%rsi), %xmm1 |
1450 | mov $-46, %dl |
1451 | pxor %xmm1, %xmm2 |
1452 | ptest %xmm2, %xmm0 |
1453 | jnc L(less16bytes) |
1454 | L(30bytes): |
1455 | movdqu -30(%rdi), %xmm2 |
1456 | movdqu -30(%rsi), %xmm1 |
1457 | mov $-30, %dl |
1458 | pxor %xmm1, %xmm2 |
1459 | ptest %xmm2, %xmm0 |
1460 | jnc L(less16bytes) |
1461 | mov -14(%rdi), %rax |
1462 | mov -14(%rsi), %rcx |
1463 | cmp %rax, %rcx |
1464 | jne L(diffin8bytes) |
1465 | mov -8(%rdi), %rax |
1466 | mov -8(%rsi), %rcx |
1467 | cmp %rax, %rcx |
1468 | jne L(diffin8bytes) |
1469 | xor %eax, %eax |
1470 | ret |
1471 | |
1472 | .p2align 4 |
1473 | L(79bytes): |
1474 | movdqu -79(%rsi), %xmm1 |
1475 | movdqu -79(%rdi), %xmm2 |
1476 | mov $-79, %dl |
1477 | pxor %xmm1, %xmm2 |
1478 | ptest %xmm2, %xmm0 |
1479 | jnc L(less16bytes) |
1480 | L(63bytes): |
1481 | movdqu -63(%rdi), %xmm2 |
1482 | movdqu -63(%rsi), %xmm1 |
1483 | mov $-63, %dl |
1484 | pxor %xmm1, %xmm2 |
1485 | ptest %xmm2, %xmm0 |
1486 | jnc L(less16bytes) |
1487 | L(47bytes): |
1488 | movdqu -47(%rdi), %xmm2 |
1489 | movdqu -47(%rsi), %xmm1 |
1490 | mov $-47, %dl |
1491 | pxor %xmm1, %xmm2 |
1492 | ptest %xmm2, %xmm0 |
1493 | jnc L(less16bytes) |
1494 | L(31bytes): |
1495 | movdqu -31(%rdi), %xmm2 |
1496 | movdqu -31(%rsi), %xmm1 |
1497 | mov $-31, %dl |
1498 | pxor %xmm1, %xmm2 |
1499 | ptest %xmm2, %xmm0 |
1500 | jnc L(less16bytes) |
1501 | mov -15(%rdi), %rax |
1502 | mov -15(%rsi), %rcx |
1503 | cmp %rax, %rcx |
1504 | jne L(diffin8bytes) |
1505 | mov -8(%rdi), %rax |
1506 | mov -8(%rsi), %rcx |
1507 | cmp %rax, %rcx |
1508 | jne L(diffin8bytes) |
1509 | xor %eax, %eax |
1510 | ret |
1511 | # endif |
1512 | .p2align 4 |
1513 | L(64bytes): |
1514 | movdqu -64(%rdi), %xmm2 |
1515 | movdqu -64(%rsi), %xmm1 |
1516 | mov $-64, %dl |
1517 | pxor %xmm1, %xmm2 |
1518 | ptest %xmm2, %xmm0 |
1519 | jnc L(less16bytes) |
1520 | L(48bytes): |
1521 | movdqu -48(%rdi), %xmm2 |
1522 | movdqu -48(%rsi), %xmm1 |
1523 | mov $-48, %dl |
1524 | pxor %xmm1, %xmm2 |
1525 | ptest %xmm2, %xmm0 |
1526 | jnc L(less16bytes) |
1527 | L(32bytes): |
1528 | movdqu -32(%rdi), %xmm2 |
1529 | movdqu -32(%rsi), %xmm1 |
1530 | mov $-32, %dl |
1531 | pxor %xmm1, %xmm2 |
1532 | ptest %xmm2, %xmm0 |
1533 | jnc L(less16bytes) |
1534 | |
1535 | mov -16(%rdi), %rax |
1536 | mov -16(%rsi), %rcx |
1537 | cmp %rax, %rcx |
1538 | jne L(diffin8bytes) |
1539 | |
1540 | mov -8(%rdi), %rax |
1541 | mov -8(%rsi), %rcx |
1542 | cmp %rax, %rcx |
1543 | jne L(diffin8bytes) |
1544 | xor %eax, %eax |
1545 | ret |
1546 | |
1547 | /* |
1548 | * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. |
1549 | */ |
1550 | .p2align 3 |
1551 | L(less16bytes): |
1552 | movsbq %dl, %rdx |
1553 | mov (%rsi, %rdx), %rcx |
1554 | mov (%rdi, %rdx), %rax |
1555 | cmp %rax, %rcx |
1556 | jne L(diffin8bytes) |
1557 | mov 8(%rsi, %rdx), %rcx |
1558 | mov 8(%rdi, %rdx), %rax |
1559 | L(diffin8bytes): |
1560 | cmp %eax, %ecx |
1561 | jne L(diffin4bytes) |
1562 | shr $32, %rcx |
1563 | shr $32, %rax |
1564 | |
1565 | # ifdef USE_AS_WMEMCMP |
1566 | /* for wmemcmp */ |
1567 | cmp %eax, %ecx |
1568 | jne L(diffin4bytes) |
1569 | xor %eax, %eax |
1570 | ret |
1571 | # endif |
1572 | |
1573 | L(diffin4bytes): |
1574 | # ifndef USE_AS_WMEMCMP |
1575 | cmp %cx, %ax |
1576 | jne L(diffin2bytes) |
1577 | shr $16, %ecx |
1578 | shr $16, %eax |
1579 | L(diffin2bytes): |
1580 | cmp %cl, %al |
1581 | jne L(end) |
1582 | and $0xffff, %eax |
1583 | and $0xffff, %ecx |
1584 | sub %ecx, %eax |
1585 | ret |
1586 | |
1587 | .p2align 4 |
1588 | L(end): |
1589 | and $0xff, %eax |
1590 | and $0xff, %ecx |
1591 | sub %ecx, %eax |
1592 | ret |
1593 | # else |
1594 | |
1595 | /* for wmemcmp */ |
1596 | mov $1, %eax |
1597 | jl L(nequal_bigger) |
1598 | neg %eax |
1599 | ret |
1600 | |
1601 | .p2align 4 |
1602 | L(nequal_bigger): |
1603 | ret |
1604 | |
1605 | L(unreal_case): |
1606 | xor %eax, %eax |
1607 | ret |
1608 | # endif |
1609 | |
1610 | END (MEMCMP) |
1611 | |
1612 | .section .rodata.sse4.1,"a" ,@progbits |
1613 | .p2align 3 |
1614 | # ifndef USE_AS_WMEMCMP |
1615 | L(table_64bytes): |
1616 | .int JMPTBL (L(0bytes), L(table_64bytes)) |
1617 | .int JMPTBL (L(1bytes), L(table_64bytes)) |
1618 | .int JMPTBL (L(2bytes), L(table_64bytes)) |
1619 | .int JMPTBL (L(3bytes), L(table_64bytes)) |
1620 | .int JMPTBL (L(4bytes), L(table_64bytes)) |
1621 | .int JMPTBL (L(5bytes), L(table_64bytes)) |
1622 | .int JMPTBL (L(6bytes), L(table_64bytes)) |
1623 | .int JMPTBL (L(7bytes), L(table_64bytes)) |
1624 | .int JMPTBL (L(8bytes), L(table_64bytes)) |
1625 | .int JMPTBL (L(9bytes), L(table_64bytes)) |
1626 | .int JMPTBL (L(10bytes), L(table_64bytes)) |
1627 | .int JMPTBL (L(11bytes), L(table_64bytes)) |
1628 | .int JMPTBL (L(12bytes), L(table_64bytes)) |
1629 | .int JMPTBL (L(13bytes), L(table_64bytes)) |
1630 | .int JMPTBL (L(14bytes), L(table_64bytes)) |
1631 | .int JMPTBL (L(15bytes), L(table_64bytes)) |
1632 | .int JMPTBL (L(16bytes), L(table_64bytes)) |
1633 | .int JMPTBL (L(17bytes), L(table_64bytes)) |
1634 | .int JMPTBL (L(18bytes), L(table_64bytes)) |
1635 | .int JMPTBL (L(19bytes), L(table_64bytes)) |
1636 | .int JMPTBL (L(20bytes), L(table_64bytes)) |
1637 | .int JMPTBL (L(21bytes), L(table_64bytes)) |
1638 | .int JMPTBL (L(22bytes), L(table_64bytes)) |
1639 | .int JMPTBL (L(23bytes), L(table_64bytes)) |
1640 | .int JMPTBL (L(24bytes), L(table_64bytes)) |
1641 | .int JMPTBL (L(25bytes), L(table_64bytes)) |
1642 | .int JMPTBL (L(26bytes), L(table_64bytes)) |
1643 | .int JMPTBL (L(27bytes), L(table_64bytes)) |
1644 | .int JMPTBL (L(28bytes), L(table_64bytes)) |
1645 | .int JMPTBL (L(29bytes), L(table_64bytes)) |
1646 | .int JMPTBL (L(30bytes), L(table_64bytes)) |
1647 | .int JMPTBL (L(31bytes), L(table_64bytes)) |
1648 | .int JMPTBL (L(32bytes), L(table_64bytes)) |
1649 | .int JMPTBL (L(33bytes), L(table_64bytes)) |
1650 | .int JMPTBL (L(34bytes), L(table_64bytes)) |
1651 | .int JMPTBL (L(35bytes), L(table_64bytes)) |
1652 | .int JMPTBL (L(36bytes), L(table_64bytes)) |
1653 | .int JMPTBL (L(37bytes), L(table_64bytes)) |
1654 | .int JMPTBL (L(38bytes), L(table_64bytes)) |
1655 | .int JMPTBL (L(39bytes), L(table_64bytes)) |
1656 | .int JMPTBL (L(40bytes), L(table_64bytes)) |
1657 | .int JMPTBL (L(41bytes), L(table_64bytes)) |
1658 | .int JMPTBL (L(42bytes), L(table_64bytes)) |
1659 | .int JMPTBL (L(43bytes), L(table_64bytes)) |
1660 | .int JMPTBL (L(44bytes), L(table_64bytes)) |
1661 | .int JMPTBL (L(45bytes), L(table_64bytes)) |
1662 | .int JMPTBL (L(46bytes), L(table_64bytes)) |
1663 | .int JMPTBL (L(47bytes), L(table_64bytes)) |
1664 | .int JMPTBL (L(48bytes), L(table_64bytes)) |
1665 | .int JMPTBL (L(49bytes), L(table_64bytes)) |
1666 | .int JMPTBL (L(50bytes), L(table_64bytes)) |
1667 | .int JMPTBL (L(51bytes), L(table_64bytes)) |
1668 | .int JMPTBL (L(52bytes), L(table_64bytes)) |
1669 | .int JMPTBL (L(53bytes), L(table_64bytes)) |
1670 | .int JMPTBL (L(54bytes), L(table_64bytes)) |
1671 | .int JMPTBL (L(55bytes), L(table_64bytes)) |
1672 | .int JMPTBL (L(56bytes), L(table_64bytes)) |
1673 | .int JMPTBL (L(57bytes), L(table_64bytes)) |
1674 | .int JMPTBL (L(58bytes), L(table_64bytes)) |
1675 | .int JMPTBL (L(59bytes), L(table_64bytes)) |
1676 | .int JMPTBL (L(60bytes), L(table_64bytes)) |
1677 | .int JMPTBL (L(61bytes), L(table_64bytes)) |
1678 | .int JMPTBL (L(62bytes), L(table_64bytes)) |
1679 | .int JMPTBL (L(63bytes), L(table_64bytes)) |
1680 | .int JMPTBL (L(64bytes), L(table_64bytes)) |
1681 | .int JMPTBL (L(65bytes), L(table_64bytes)) |
1682 | .int JMPTBL (L(66bytes), L(table_64bytes)) |
1683 | .int JMPTBL (L(67bytes), L(table_64bytes)) |
1684 | .int JMPTBL (L(68bytes), L(table_64bytes)) |
1685 | .int JMPTBL (L(69bytes), L(table_64bytes)) |
1686 | .int JMPTBL (L(70bytes), L(table_64bytes)) |
1687 | .int JMPTBL (L(71bytes), L(table_64bytes)) |
1688 | .int JMPTBL (L(72bytes), L(table_64bytes)) |
1689 | .int JMPTBL (L(73bytes), L(table_64bytes)) |
1690 | .int JMPTBL (L(74bytes), L(table_64bytes)) |
1691 | .int JMPTBL (L(75bytes), L(table_64bytes)) |
1692 | .int JMPTBL (L(76bytes), L(table_64bytes)) |
1693 | .int JMPTBL (L(77bytes), L(table_64bytes)) |
1694 | .int JMPTBL (L(78bytes), L(table_64bytes)) |
1695 | .int JMPTBL (L(79bytes), L(table_64bytes)) |
1696 | # else |
1697 | L(table_64bytes): |
1698 | .int JMPTBL (L(0bytes), L(table_64bytes)) |
1699 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1700 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1701 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1702 | .int JMPTBL (L(4bytes), L(table_64bytes)) |
1703 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1704 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1705 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1706 | .int JMPTBL (L(8bytes), L(table_64bytes)) |
1707 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1708 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1709 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1710 | .int JMPTBL (L(12bytes), L(table_64bytes)) |
1711 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1712 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1713 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1714 | .int JMPTBL (L(16bytes), L(table_64bytes)) |
1715 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1716 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1717 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1718 | .int JMPTBL (L(20bytes), L(table_64bytes)) |
1719 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1720 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1721 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1722 | .int JMPTBL (L(24bytes), L(table_64bytes)) |
1723 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1724 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1725 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1726 | .int JMPTBL (L(28bytes), L(table_64bytes)) |
1727 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1728 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1729 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1730 | .int JMPTBL (L(32bytes), L(table_64bytes)) |
1731 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1732 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1733 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1734 | .int JMPTBL (L(36bytes), L(table_64bytes)) |
1735 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1736 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1737 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1738 | .int JMPTBL (L(40bytes), L(table_64bytes)) |
1739 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1740 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1741 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1742 | .int JMPTBL (L(44bytes), L(table_64bytes)) |
1743 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1744 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1745 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1746 | .int JMPTBL (L(48bytes), L(table_64bytes)) |
1747 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1748 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1749 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1750 | .int JMPTBL (L(52bytes), L(table_64bytes)) |
1751 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1752 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1753 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1754 | .int JMPTBL (L(56bytes), L(table_64bytes)) |
1755 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1756 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1757 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1758 | .int JMPTBL (L(60bytes), L(table_64bytes)) |
1759 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1760 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1761 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1762 | .int JMPTBL (L(64bytes), L(table_64bytes)) |
1763 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1764 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1765 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1766 | .int JMPTBL (L(68bytes), L(table_64bytes)) |
1767 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1768 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1769 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1770 | .int JMPTBL (L(72bytes), L(table_64bytes)) |
1771 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1772 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1773 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1774 | .int JMPTBL (L(76bytes), L(table_64bytes)) |
1775 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1776 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1777 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1778 | # endif |
1779 | #endif |
1780 | |