1 | /* memcmp with SSE4.1, wmemcmp with SSE4.1 |
2 | Copyright (C) 2010-2016 Free Software Foundation, Inc. |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #if IS_IN (libc) |
21 | |
22 | # include <sysdep.h> |
23 | |
24 | # ifndef MEMCMP |
25 | # define MEMCMP __memcmp_sse4_1 |
26 | # endif |
27 | |
28 | # define JMPTBL(I, B) (I - B) |
29 | |
30 | # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ |
31 | lea TABLE(%rip), %r11; \ |
32 | movslq (%r11, INDEX, SCALE), %rcx; \ |
33 | add %r11, %rcx; \ |
34 | jmp *%rcx; \ |
35 | ud2 |
36 | |
37 | /* Warning! |
38 | wmemcmp has to use SIGNED comparison for elements. |
39 | memcmp has to use UNSIGNED comparison for elemnts. |
40 | */ |
41 | |
42 | .section .text.sse4.1,"ax" ,@progbits |
43 | ENTRY (MEMCMP) |
44 | # ifdef USE_AS_WMEMCMP |
45 | shl $2, %rdx |
46 | # endif |
47 | pxor %xmm0, %xmm0 |
48 | cmp $79, %rdx |
49 | ja L(79bytesormore) |
50 | # ifndef USE_AS_WMEMCMP |
51 | cmp $1, %rdx |
52 | je L(firstbyte) |
53 | # endif |
54 | add %rdx, %rsi |
55 | add %rdx, %rdi |
56 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
57 | |
58 | # ifndef USE_AS_WMEMCMP |
59 | .p2align 4 |
60 | L(firstbyte): |
61 | movzbl (%rdi), %eax |
62 | movzbl (%rsi), %ecx |
63 | sub %ecx, %eax |
64 | ret |
65 | # endif |
66 | |
67 | .p2align 4 |
68 | L(79bytesormore): |
69 | movdqu (%rsi), %xmm1 |
70 | movdqu (%rdi), %xmm2 |
71 | pxor %xmm1, %xmm2 |
72 | ptest %xmm2, %xmm0 |
73 | jnc L(16bytesin256) |
74 | mov %rsi, %rcx |
75 | and $-16, %rsi |
76 | add $16, %rsi |
77 | sub %rsi, %rcx |
78 | |
79 | sub %rcx, %rdi |
80 | add %rcx, %rdx |
81 | test $0xf, %rdi |
82 | jz L(2aligned) |
83 | |
84 | cmp $128, %rdx |
85 | ja L(128bytesormore) |
86 | L(less128bytes): |
87 | sub $64, %rdx |
88 | |
89 | movdqu (%rdi), %xmm2 |
90 | pxor (%rsi), %xmm2 |
91 | ptest %xmm2, %xmm0 |
92 | jnc L(16bytesin256) |
93 | |
94 | movdqu 16(%rdi), %xmm2 |
95 | pxor 16(%rsi), %xmm2 |
96 | ptest %xmm2, %xmm0 |
97 | jnc L(32bytesin256) |
98 | |
99 | movdqu 32(%rdi), %xmm2 |
100 | pxor 32(%rsi), %xmm2 |
101 | ptest %xmm2, %xmm0 |
102 | jnc L(48bytesin256) |
103 | |
104 | movdqu 48(%rdi), %xmm2 |
105 | pxor 48(%rsi), %xmm2 |
106 | ptest %xmm2, %xmm0 |
107 | jnc L(64bytesin256) |
108 | cmp $32, %rdx |
109 | jb L(less32bytesin64) |
110 | |
111 | movdqu 64(%rdi), %xmm2 |
112 | pxor 64(%rsi), %xmm2 |
113 | ptest %xmm2, %xmm0 |
114 | jnc L(80bytesin256) |
115 | |
116 | movdqu 80(%rdi), %xmm2 |
117 | pxor 80(%rsi), %xmm2 |
118 | ptest %xmm2, %xmm0 |
119 | jnc L(96bytesin256) |
120 | sub $32, %rdx |
121 | add $32, %rdi |
122 | add $32, %rsi |
123 | L(less32bytesin64): |
124 | add $64, %rdi |
125 | add $64, %rsi |
126 | add %rdx, %rsi |
127 | add %rdx, %rdi |
128 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
129 | |
130 | L(128bytesormore): |
131 | cmp $512, %rdx |
132 | ja L(512bytesormore) |
133 | cmp $256, %rdx |
134 | ja L(less512bytes) |
135 | L(less256bytes): |
136 | sub $128, %rdx |
137 | |
138 | movdqu (%rdi), %xmm2 |
139 | pxor (%rsi), %xmm2 |
140 | ptest %xmm2, %xmm0 |
141 | jnc L(16bytesin256) |
142 | |
143 | movdqu 16(%rdi), %xmm2 |
144 | pxor 16(%rsi), %xmm2 |
145 | ptest %xmm2, %xmm0 |
146 | jnc L(32bytesin256) |
147 | |
148 | movdqu 32(%rdi), %xmm2 |
149 | pxor 32(%rsi), %xmm2 |
150 | ptest %xmm2, %xmm0 |
151 | jnc L(48bytesin256) |
152 | |
153 | movdqu 48(%rdi), %xmm2 |
154 | pxor 48(%rsi), %xmm2 |
155 | ptest %xmm2, %xmm0 |
156 | jnc L(64bytesin256) |
157 | |
158 | movdqu 64(%rdi), %xmm2 |
159 | pxor 64(%rsi), %xmm2 |
160 | ptest %xmm2, %xmm0 |
161 | jnc L(80bytesin256) |
162 | |
163 | movdqu 80(%rdi), %xmm2 |
164 | pxor 80(%rsi), %xmm2 |
165 | ptest %xmm2, %xmm0 |
166 | jnc L(96bytesin256) |
167 | |
168 | movdqu 96(%rdi), %xmm2 |
169 | pxor 96(%rsi), %xmm2 |
170 | ptest %xmm2, %xmm0 |
171 | jnc L(112bytesin256) |
172 | |
173 | movdqu 112(%rdi), %xmm2 |
174 | pxor 112(%rsi), %xmm2 |
175 | ptest %xmm2, %xmm0 |
176 | jnc L(128bytesin256) |
177 | |
178 | add $128, %rsi |
179 | add $128, %rdi |
180 | |
181 | cmp $64, %rdx |
182 | jae L(less128bytes) |
183 | |
184 | cmp $32, %rdx |
185 | jb L(less32bytesin128) |
186 | |
187 | movdqu (%rdi), %xmm2 |
188 | pxor (%rsi), %xmm2 |
189 | ptest %xmm2, %xmm0 |
190 | jnc L(16bytesin256) |
191 | |
192 | movdqu 16(%rdi), %xmm2 |
193 | pxor 16(%rsi), %xmm2 |
194 | ptest %xmm2, %xmm0 |
195 | jnc L(32bytesin256) |
196 | sub $32, %rdx |
197 | add $32, %rdi |
198 | add $32, %rsi |
199 | L(less32bytesin128): |
200 | add %rdx, %rsi |
201 | add %rdx, %rdi |
202 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
203 | |
204 | L(less512bytes): |
205 | sub $256, %rdx |
206 | movdqu (%rdi), %xmm2 |
207 | pxor (%rsi), %xmm2 |
208 | ptest %xmm2, %xmm0 |
209 | jnc L(16bytesin256) |
210 | |
211 | movdqu 16(%rdi), %xmm2 |
212 | pxor 16(%rsi), %xmm2 |
213 | ptest %xmm2, %xmm0 |
214 | jnc L(32bytesin256) |
215 | |
216 | movdqu 32(%rdi), %xmm2 |
217 | pxor 32(%rsi), %xmm2 |
218 | ptest %xmm2, %xmm0 |
219 | jnc L(48bytesin256) |
220 | |
221 | movdqu 48(%rdi), %xmm2 |
222 | pxor 48(%rsi), %xmm2 |
223 | ptest %xmm2, %xmm0 |
224 | jnc L(64bytesin256) |
225 | |
226 | movdqu 64(%rdi), %xmm2 |
227 | pxor 64(%rsi), %xmm2 |
228 | ptest %xmm2, %xmm0 |
229 | jnc L(80bytesin256) |
230 | |
231 | movdqu 80(%rdi), %xmm2 |
232 | pxor 80(%rsi), %xmm2 |
233 | ptest %xmm2, %xmm0 |
234 | jnc L(96bytesin256) |
235 | |
236 | movdqu 96(%rdi), %xmm2 |
237 | pxor 96(%rsi), %xmm2 |
238 | ptest %xmm2, %xmm0 |
239 | jnc L(112bytesin256) |
240 | |
241 | movdqu 112(%rdi), %xmm2 |
242 | pxor 112(%rsi), %xmm2 |
243 | ptest %xmm2, %xmm0 |
244 | jnc L(128bytesin256) |
245 | |
246 | movdqu 128(%rdi), %xmm2 |
247 | pxor 128(%rsi), %xmm2 |
248 | ptest %xmm2, %xmm0 |
249 | jnc L(144bytesin256) |
250 | |
251 | movdqu 144(%rdi), %xmm2 |
252 | pxor 144(%rsi), %xmm2 |
253 | ptest %xmm2, %xmm0 |
254 | jnc L(160bytesin256) |
255 | |
256 | movdqu 160(%rdi), %xmm2 |
257 | pxor 160(%rsi), %xmm2 |
258 | ptest %xmm2, %xmm0 |
259 | jnc L(176bytesin256) |
260 | |
261 | movdqu 176(%rdi), %xmm2 |
262 | pxor 176(%rsi), %xmm2 |
263 | ptest %xmm2, %xmm0 |
264 | jnc L(192bytesin256) |
265 | |
266 | movdqu 192(%rdi), %xmm2 |
267 | pxor 192(%rsi), %xmm2 |
268 | ptest %xmm2, %xmm0 |
269 | jnc L(208bytesin256) |
270 | |
271 | movdqu 208(%rdi), %xmm2 |
272 | pxor 208(%rsi), %xmm2 |
273 | ptest %xmm2, %xmm0 |
274 | jnc L(224bytesin256) |
275 | |
276 | movdqu 224(%rdi), %xmm2 |
277 | pxor 224(%rsi), %xmm2 |
278 | ptest %xmm2, %xmm0 |
279 | jnc L(240bytesin256) |
280 | |
281 | movdqu 240(%rdi), %xmm2 |
282 | pxor 240(%rsi), %xmm2 |
283 | ptest %xmm2, %xmm0 |
284 | jnc L(256bytesin256) |
285 | |
286 | add $256, %rsi |
287 | add $256, %rdi |
288 | |
289 | cmp $128, %rdx |
290 | jae L(less256bytes) |
291 | |
292 | cmp $64, %rdx |
293 | jae L(less128bytes) |
294 | |
295 | cmp $32, %rdx |
296 | jb L(less32bytesin256) |
297 | |
298 | movdqu (%rdi), %xmm2 |
299 | pxor (%rsi), %xmm2 |
300 | ptest %xmm2, %xmm0 |
301 | jnc L(16bytesin256) |
302 | |
303 | movdqu 16(%rdi), %xmm2 |
304 | pxor 16(%rsi), %xmm2 |
305 | ptest %xmm2, %xmm0 |
306 | jnc L(32bytesin256) |
307 | sub $32, %rdx |
308 | add $32, %rdi |
309 | add $32, %rsi |
310 | L(less32bytesin256): |
311 | add %rdx, %rsi |
312 | add %rdx, %rdi |
313 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
314 | |
315 | .p2align 4 |
316 | L(512bytesormore): |
317 | # ifdef DATA_CACHE_SIZE_HALF |
318 | mov $DATA_CACHE_SIZE_HALF, %R8_LP |
319 | # else |
320 | mov __x86_data_cache_size_half(%rip), %R8_LP |
321 | # endif |
322 | mov %r8, %r9 |
323 | shr $1, %r8 |
324 | add %r9, %r8 |
325 | cmp %r8, %rdx |
326 | ja L(L2_L3_cache_unaglined) |
327 | sub $64, %rdx |
328 | .p2align 4 |
329 | L(64bytesormore_loop): |
330 | movdqu (%rdi), %xmm2 |
331 | pxor (%rsi), %xmm2 |
332 | movdqa %xmm2, %xmm1 |
333 | |
334 | movdqu 16(%rdi), %xmm3 |
335 | pxor 16(%rsi), %xmm3 |
336 | por %xmm3, %xmm1 |
337 | |
338 | movdqu 32(%rdi), %xmm4 |
339 | pxor 32(%rsi), %xmm4 |
340 | por %xmm4, %xmm1 |
341 | |
342 | movdqu 48(%rdi), %xmm5 |
343 | pxor 48(%rsi), %xmm5 |
344 | por %xmm5, %xmm1 |
345 | |
346 | ptest %xmm1, %xmm0 |
347 | jnc L(64bytesormore_loop_end) |
348 | add $64, %rsi |
349 | add $64, %rdi |
350 | sub $64, %rdx |
351 | jae L(64bytesormore_loop) |
352 | |
353 | add $64, %rdx |
354 | add %rdx, %rsi |
355 | add %rdx, %rdi |
356 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
357 | |
358 | L(L2_L3_cache_unaglined): |
359 | sub $64, %rdx |
360 | .p2align 4 |
361 | L(L2_L3_unaligned_128bytes_loop): |
362 | prefetchnta 0x1c0(%rdi) |
363 | prefetchnta 0x1c0(%rsi) |
364 | movdqu (%rdi), %xmm2 |
365 | pxor (%rsi), %xmm2 |
366 | movdqa %xmm2, %xmm1 |
367 | |
368 | movdqu 16(%rdi), %xmm3 |
369 | pxor 16(%rsi), %xmm3 |
370 | por %xmm3, %xmm1 |
371 | |
372 | movdqu 32(%rdi), %xmm4 |
373 | pxor 32(%rsi), %xmm4 |
374 | por %xmm4, %xmm1 |
375 | |
376 | movdqu 48(%rdi), %xmm5 |
377 | pxor 48(%rsi), %xmm5 |
378 | por %xmm5, %xmm1 |
379 | |
380 | ptest %xmm1, %xmm0 |
381 | jnc L(64bytesormore_loop_end) |
382 | add $64, %rsi |
383 | add $64, %rdi |
384 | sub $64, %rdx |
385 | jae L(L2_L3_unaligned_128bytes_loop) |
386 | |
387 | add $64, %rdx |
388 | add %rdx, %rsi |
389 | add %rdx, %rdi |
390 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
391 | |
392 | /* |
393 | * This case is for machines which are sensitive for unaligned instructions. |
394 | */ |
395 | .p2align 4 |
396 | L(2aligned): |
397 | cmp $128, %rdx |
398 | ja L(128bytesormorein2aligned) |
399 | L(less128bytesin2aligned): |
400 | sub $64, %rdx |
401 | |
402 | movdqa (%rdi), %xmm2 |
403 | pxor (%rsi), %xmm2 |
404 | ptest %xmm2, %xmm0 |
405 | jnc L(16bytesin256) |
406 | |
407 | movdqa 16(%rdi), %xmm2 |
408 | pxor 16(%rsi), %xmm2 |
409 | ptest %xmm2, %xmm0 |
410 | jnc L(32bytesin256) |
411 | |
412 | movdqa 32(%rdi), %xmm2 |
413 | pxor 32(%rsi), %xmm2 |
414 | ptest %xmm2, %xmm0 |
415 | jnc L(48bytesin256) |
416 | |
417 | movdqa 48(%rdi), %xmm2 |
418 | pxor 48(%rsi), %xmm2 |
419 | ptest %xmm2, %xmm0 |
420 | jnc L(64bytesin256) |
421 | cmp $32, %rdx |
422 | jb L(less32bytesin64in2alinged) |
423 | |
424 | movdqa 64(%rdi), %xmm2 |
425 | pxor 64(%rsi), %xmm2 |
426 | ptest %xmm2, %xmm0 |
427 | jnc L(80bytesin256) |
428 | |
429 | movdqa 80(%rdi), %xmm2 |
430 | pxor 80(%rsi), %xmm2 |
431 | ptest %xmm2, %xmm0 |
432 | jnc L(96bytesin256) |
433 | sub $32, %rdx |
434 | add $32, %rdi |
435 | add $32, %rsi |
436 | L(less32bytesin64in2alinged): |
437 | add $64, %rdi |
438 | add $64, %rsi |
439 | add %rdx, %rsi |
440 | add %rdx, %rdi |
441 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
442 | |
443 | .p2align 4 |
444 | L(128bytesormorein2aligned): |
445 | cmp $512, %rdx |
446 | ja L(512bytesormorein2aligned) |
447 | cmp $256, %rdx |
448 | ja L(256bytesormorein2aligned) |
449 | L(less256bytesin2alinged): |
450 | sub $128, %rdx |
451 | |
452 | movdqa (%rdi), %xmm2 |
453 | pxor (%rsi), %xmm2 |
454 | ptest %xmm2, %xmm0 |
455 | jnc L(16bytesin256) |
456 | |
457 | movdqa 16(%rdi), %xmm2 |
458 | pxor 16(%rsi), %xmm2 |
459 | ptest %xmm2, %xmm0 |
460 | jnc L(32bytesin256) |
461 | |
462 | movdqa 32(%rdi), %xmm2 |
463 | pxor 32(%rsi), %xmm2 |
464 | ptest %xmm2, %xmm0 |
465 | jnc L(48bytesin256) |
466 | |
467 | movdqa 48(%rdi), %xmm2 |
468 | pxor 48(%rsi), %xmm2 |
469 | ptest %xmm2, %xmm0 |
470 | jnc L(64bytesin256) |
471 | |
472 | movdqa 64(%rdi), %xmm2 |
473 | pxor 64(%rsi), %xmm2 |
474 | ptest %xmm2, %xmm0 |
475 | jnc L(80bytesin256) |
476 | |
477 | movdqa 80(%rdi), %xmm2 |
478 | pxor 80(%rsi), %xmm2 |
479 | ptest %xmm2, %xmm0 |
480 | jnc L(96bytesin256) |
481 | |
482 | movdqa 96(%rdi), %xmm2 |
483 | pxor 96(%rsi), %xmm2 |
484 | ptest %xmm2, %xmm0 |
485 | jnc L(112bytesin256) |
486 | |
487 | movdqa 112(%rdi), %xmm2 |
488 | pxor 112(%rsi), %xmm2 |
489 | ptest %xmm2, %xmm0 |
490 | jnc L(128bytesin256) |
491 | |
492 | add $128, %rsi |
493 | add $128, %rdi |
494 | |
495 | cmp $64, %rdx |
496 | jae L(less128bytesin2aligned) |
497 | |
498 | cmp $32, %rdx |
499 | jb L(less32bytesin128in2aligned) |
500 | |
501 | movdqu (%rdi), %xmm2 |
502 | pxor (%rsi), %xmm2 |
503 | ptest %xmm2, %xmm0 |
504 | jnc L(16bytesin256) |
505 | |
506 | movdqu 16(%rdi), %xmm2 |
507 | pxor 16(%rsi), %xmm2 |
508 | ptest %xmm2, %xmm0 |
509 | jnc L(32bytesin256) |
510 | sub $32, %rdx |
511 | add $32, %rdi |
512 | add $32, %rsi |
513 | L(less32bytesin128in2aligned): |
514 | add %rdx, %rsi |
515 | add %rdx, %rdi |
516 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
517 | |
518 | .p2align 4 |
519 | L(256bytesormorein2aligned): |
520 | |
521 | sub $256, %rdx |
522 | movdqa (%rdi), %xmm2 |
523 | pxor (%rsi), %xmm2 |
524 | ptest %xmm2, %xmm0 |
525 | jnc L(16bytesin256) |
526 | |
527 | movdqa 16(%rdi), %xmm2 |
528 | pxor 16(%rsi), %xmm2 |
529 | ptest %xmm2, %xmm0 |
530 | jnc L(32bytesin256) |
531 | |
532 | movdqa 32(%rdi), %xmm2 |
533 | pxor 32(%rsi), %xmm2 |
534 | ptest %xmm2, %xmm0 |
535 | jnc L(48bytesin256) |
536 | |
537 | movdqa 48(%rdi), %xmm2 |
538 | pxor 48(%rsi), %xmm2 |
539 | ptest %xmm2, %xmm0 |
540 | jnc L(64bytesin256) |
541 | |
542 | movdqa 64(%rdi), %xmm2 |
543 | pxor 64(%rsi), %xmm2 |
544 | ptest %xmm2, %xmm0 |
545 | jnc L(80bytesin256) |
546 | |
547 | movdqa 80(%rdi), %xmm2 |
548 | pxor 80(%rsi), %xmm2 |
549 | ptest %xmm2, %xmm0 |
550 | jnc L(96bytesin256) |
551 | |
552 | movdqa 96(%rdi), %xmm2 |
553 | pxor 96(%rsi), %xmm2 |
554 | ptest %xmm2, %xmm0 |
555 | jnc L(112bytesin256) |
556 | |
557 | movdqa 112(%rdi), %xmm2 |
558 | pxor 112(%rsi), %xmm2 |
559 | ptest %xmm2, %xmm0 |
560 | jnc L(128bytesin256) |
561 | |
562 | movdqa 128(%rdi), %xmm2 |
563 | pxor 128(%rsi), %xmm2 |
564 | ptest %xmm2, %xmm0 |
565 | jnc L(144bytesin256) |
566 | |
567 | movdqa 144(%rdi), %xmm2 |
568 | pxor 144(%rsi), %xmm2 |
569 | ptest %xmm2, %xmm0 |
570 | jnc L(160bytesin256) |
571 | |
572 | movdqa 160(%rdi), %xmm2 |
573 | pxor 160(%rsi), %xmm2 |
574 | ptest %xmm2, %xmm0 |
575 | jnc L(176bytesin256) |
576 | |
577 | movdqa 176(%rdi), %xmm2 |
578 | pxor 176(%rsi), %xmm2 |
579 | ptest %xmm2, %xmm0 |
580 | jnc L(192bytesin256) |
581 | |
582 | movdqa 192(%rdi), %xmm2 |
583 | pxor 192(%rsi), %xmm2 |
584 | ptest %xmm2, %xmm0 |
585 | jnc L(208bytesin256) |
586 | |
587 | movdqa 208(%rdi), %xmm2 |
588 | pxor 208(%rsi), %xmm2 |
589 | ptest %xmm2, %xmm0 |
590 | jnc L(224bytesin256) |
591 | |
592 | movdqa 224(%rdi), %xmm2 |
593 | pxor 224(%rsi), %xmm2 |
594 | ptest %xmm2, %xmm0 |
595 | jnc L(240bytesin256) |
596 | |
597 | movdqa 240(%rdi), %xmm2 |
598 | pxor 240(%rsi), %xmm2 |
599 | ptest %xmm2, %xmm0 |
600 | jnc L(256bytesin256) |
601 | |
602 | add $256, %rsi |
603 | add $256, %rdi |
604 | |
605 | cmp $128, %rdx |
606 | jae L(less256bytesin2alinged) |
607 | |
608 | cmp $64, %rdx |
609 | jae L(less128bytesin2aligned) |
610 | |
611 | cmp $32, %rdx |
612 | jb L(less32bytesin256in2alinged) |
613 | |
614 | movdqa (%rdi), %xmm2 |
615 | pxor (%rsi), %xmm2 |
616 | ptest %xmm2, %xmm0 |
617 | jnc L(16bytesin256) |
618 | |
619 | movdqa 16(%rdi), %xmm2 |
620 | pxor 16(%rsi), %xmm2 |
621 | ptest %xmm2, %xmm0 |
622 | jnc L(32bytesin256) |
623 | sub $32, %rdx |
624 | add $32, %rdi |
625 | add $32, %rsi |
626 | L(less32bytesin256in2alinged): |
627 | add %rdx, %rsi |
628 | add %rdx, %rdi |
629 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
630 | |
631 | .p2align 4 |
632 | L(512bytesormorein2aligned): |
633 | # ifdef DATA_CACHE_SIZE_HALF |
634 | mov $DATA_CACHE_SIZE_HALF, %R8_LP |
635 | # else |
636 | mov __x86_data_cache_size_half(%rip), %R8_LP |
637 | # endif |
638 | mov %r8, %r9 |
639 | shr $1, %r8 |
640 | add %r9, %r8 |
641 | cmp %r8, %rdx |
642 | ja L(L2_L3_cache_aglined) |
643 | |
644 | sub $64, %rdx |
645 | .p2align 4 |
646 | L(64bytesormore_loopin2aligned): |
647 | movdqa (%rdi), %xmm2 |
648 | pxor (%rsi), %xmm2 |
649 | movdqa %xmm2, %xmm1 |
650 | |
651 | movdqa 16(%rdi), %xmm3 |
652 | pxor 16(%rsi), %xmm3 |
653 | por %xmm3, %xmm1 |
654 | |
655 | movdqa 32(%rdi), %xmm4 |
656 | pxor 32(%rsi), %xmm4 |
657 | por %xmm4, %xmm1 |
658 | |
659 | movdqa 48(%rdi), %xmm5 |
660 | pxor 48(%rsi), %xmm5 |
661 | por %xmm5, %xmm1 |
662 | |
663 | ptest %xmm1, %xmm0 |
664 | jnc L(64bytesormore_loop_end) |
665 | add $64, %rsi |
666 | add $64, %rdi |
667 | sub $64, %rdx |
668 | jae L(64bytesormore_loopin2aligned) |
669 | |
670 | add $64, %rdx |
671 | add %rdx, %rsi |
672 | add %rdx, %rdi |
673 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
674 | L(L2_L3_cache_aglined): |
675 | sub $64, %rdx |
676 | |
677 | .p2align 4 |
678 | L(L2_L3_aligned_128bytes_loop): |
679 | prefetchnta 0x1c0(%rdi) |
680 | prefetchnta 0x1c0(%rsi) |
681 | movdqa (%rdi), %xmm2 |
682 | pxor (%rsi), %xmm2 |
683 | movdqa %xmm2, %xmm1 |
684 | |
685 | movdqa 16(%rdi), %xmm3 |
686 | pxor 16(%rsi), %xmm3 |
687 | por %xmm3, %xmm1 |
688 | |
689 | movdqa 32(%rdi), %xmm4 |
690 | pxor 32(%rsi), %xmm4 |
691 | por %xmm4, %xmm1 |
692 | |
693 | movdqa 48(%rdi), %xmm5 |
694 | pxor 48(%rsi), %xmm5 |
695 | por %xmm5, %xmm1 |
696 | |
697 | ptest %xmm1, %xmm0 |
698 | jnc L(64bytesormore_loop_end) |
699 | add $64, %rsi |
700 | add $64, %rdi |
701 | sub $64, %rdx |
702 | jae L(L2_L3_aligned_128bytes_loop) |
703 | |
704 | add $64, %rdx |
705 | add %rdx, %rsi |
706 | add %rdx, %rdi |
707 | BRANCH_TO_JMPTBL_ENTRY(L(table_64bytes), %rdx, 4) |
708 | |
709 | |
710 | .p2align 4 |
711 | L(64bytesormore_loop_end): |
712 | add $16, %rdi |
713 | add $16, %rsi |
714 | ptest %xmm2, %xmm0 |
715 | jnc L(16bytes) |
716 | |
717 | add $16, %rdi |
718 | add $16, %rsi |
719 | ptest %xmm3, %xmm0 |
720 | jnc L(16bytes) |
721 | |
722 | add $16, %rdi |
723 | add $16, %rsi |
724 | ptest %xmm4, %xmm0 |
725 | jnc L(16bytes) |
726 | |
727 | add $16, %rdi |
728 | add $16, %rsi |
729 | jmp L(16bytes) |
730 | |
731 | L(256bytesin256): |
732 | add $256, %rdi |
733 | add $256, %rsi |
734 | jmp L(16bytes) |
735 | L(240bytesin256): |
736 | add $240, %rdi |
737 | add $240, %rsi |
738 | jmp L(16bytes) |
739 | L(224bytesin256): |
740 | add $224, %rdi |
741 | add $224, %rsi |
742 | jmp L(16bytes) |
743 | L(208bytesin256): |
744 | add $208, %rdi |
745 | add $208, %rsi |
746 | jmp L(16bytes) |
747 | L(192bytesin256): |
748 | add $192, %rdi |
749 | add $192, %rsi |
750 | jmp L(16bytes) |
751 | L(176bytesin256): |
752 | add $176, %rdi |
753 | add $176, %rsi |
754 | jmp L(16bytes) |
755 | L(160bytesin256): |
756 | add $160, %rdi |
757 | add $160, %rsi |
758 | jmp L(16bytes) |
759 | L(144bytesin256): |
760 | add $144, %rdi |
761 | add $144, %rsi |
762 | jmp L(16bytes) |
763 | L(128bytesin256): |
764 | add $128, %rdi |
765 | add $128, %rsi |
766 | jmp L(16bytes) |
767 | L(112bytesin256): |
768 | add $112, %rdi |
769 | add $112, %rsi |
770 | jmp L(16bytes) |
771 | L(96bytesin256): |
772 | add $96, %rdi |
773 | add $96, %rsi |
774 | jmp L(16bytes) |
775 | L(80bytesin256): |
776 | add $80, %rdi |
777 | add $80, %rsi |
778 | jmp L(16bytes) |
779 | L(64bytesin256): |
780 | add $64, %rdi |
781 | add $64, %rsi |
782 | jmp L(16bytes) |
783 | L(48bytesin256): |
784 | add $16, %rdi |
785 | add $16, %rsi |
786 | L(32bytesin256): |
787 | add $16, %rdi |
788 | add $16, %rsi |
789 | L(16bytesin256): |
790 | add $16, %rdi |
791 | add $16, %rsi |
792 | L(16bytes): |
793 | mov -16(%rdi), %rax |
794 | mov -16(%rsi), %rcx |
795 | cmp %rax, %rcx |
796 | jne L(diffin8bytes) |
797 | L(8bytes): |
798 | mov -8(%rdi), %rax |
799 | mov -8(%rsi), %rcx |
800 | cmp %rax, %rcx |
801 | jne L(diffin8bytes) |
802 | xor %eax, %eax |
803 | ret |
804 | |
805 | .p2align 4 |
806 | L(12bytes): |
807 | mov -12(%rdi), %rax |
808 | mov -12(%rsi), %rcx |
809 | cmp %rax, %rcx |
810 | jne L(diffin8bytes) |
811 | L(4bytes): |
812 | mov -4(%rsi), %ecx |
813 | # ifndef USE_AS_WMEMCMP |
814 | mov -4(%rdi), %eax |
815 | cmp %eax, %ecx |
816 | # else |
817 | cmp -4(%rdi), %ecx |
818 | # endif |
819 | jne L(diffin4bytes) |
820 | L(0bytes): |
821 | xor %eax, %eax |
822 | ret |
823 | |
824 | # ifndef USE_AS_WMEMCMP |
825 | /* unreal case for wmemcmp */ |
826 | .p2align 4 |
827 | L(65bytes): |
828 | movdqu -65(%rdi), %xmm1 |
829 | movdqu -65(%rsi), %xmm2 |
830 | mov $-65, %dl |
831 | pxor %xmm1, %xmm2 |
832 | ptest %xmm2, %xmm0 |
833 | jnc L(less16bytes) |
834 | L(49bytes): |
835 | movdqu -49(%rdi), %xmm1 |
836 | movdqu -49(%rsi), %xmm2 |
837 | mov $-49, %dl |
838 | pxor %xmm1, %xmm2 |
839 | ptest %xmm2, %xmm0 |
840 | jnc L(less16bytes) |
841 | L(33bytes): |
842 | movdqu -33(%rdi), %xmm1 |
843 | movdqu -33(%rsi), %xmm2 |
844 | mov $-33, %dl |
845 | pxor %xmm1, %xmm2 |
846 | ptest %xmm2, %xmm0 |
847 | jnc L(less16bytes) |
848 | L(17bytes): |
849 | mov -17(%rdi), %rax |
850 | mov -17(%rsi), %rcx |
851 | cmp %rax, %rcx |
852 | jne L(diffin8bytes) |
853 | L(9bytes): |
854 | mov -9(%rdi), %rax |
855 | mov -9(%rsi), %rcx |
856 | cmp %rax, %rcx |
857 | jne L(diffin8bytes) |
858 | movzbl -1(%rdi), %eax |
859 | movzbl -1(%rsi), %edx |
860 | sub %edx, %eax |
861 | ret |
862 | |
863 | .p2align 4 |
864 | L(13bytes): |
865 | mov -13(%rdi), %rax |
866 | mov -13(%rsi), %rcx |
867 | cmp %rax, %rcx |
868 | jne L(diffin8bytes) |
869 | mov -8(%rdi), %rax |
870 | mov -8(%rsi), %rcx |
871 | cmp %rax, %rcx |
872 | jne L(diffin8bytes) |
873 | xor %eax, %eax |
874 | ret |
875 | |
876 | .p2align 4 |
877 | L(5bytes): |
878 | mov -5(%rdi), %eax |
879 | mov -5(%rsi), %ecx |
880 | cmp %eax, %ecx |
881 | jne L(diffin4bytes) |
882 | movzbl -1(%rdi), %eax |
883 | movzbl -1(%rsi), %edx |
884 | sub %edx, %eax |
885 | ret |
886 | |
887 | .p2align 4 |
888 | L(66bytes): |
889 | movdqu -66(%rdi), %xmm1 |
890 | movdqu -66(%rsi), %xmm2 |
891 | mov $-66, %dl |
892 | pxor %xmm1, %xmm2 |
893 | ptest %xmm2, %xmm0 |
894 | jnc L(less16bytes) |
895 | L(50bytes): |
896 | movdqu -50(%rdi), %xmm1 |
897 | movdqu -50(%rsi), %xmm2 |
898 | mov $-50, %dl |
899 | pxor %xmm1, %xmm2 |
900 | ptest %xmm2, %xmm0 |
901 | jnc L(less16bytes) |
902 | L(34bytes): |
903 | movdqu -34(%rdi), %xmm1 |
904 | movdqu -34(%rsi), %xmm2 |
905 | mov $-34, %dl |
906 | pxor %xmm1, %xmm2 |
907 | ptest %xmm2, %xmm0 |
908 | jnc L(less16bytes) |
909 | L(18bytes): |
910 | mov -18(%rdi), %rax |
911 | mov -18(%rsi), %rcx |
912 | cmp %rax, %rcx |
913 | jne L(diffin8bytes) |
914 | L(10bytes): |
915 | mov -10(%rdi), %rax |
916 | mov -10(%rsi), %rcx |
917 | cmp %rax, %rcx |
918 | jne L(diffin8bytes) |
919 | movzwl -2(%rdi), %eax |
920 | movzwl -2(%rsi), %ecx |
921 | cmp %cl, %al |
922 | jne L(end) |
923 | and $0xffff, %eax |
924 | and $0xffff, %ecx |
925 | sub %ecx, %eax |
926 | ret |
927 | |
928 | .p2align 4 |
929 | L(14bytes): |
930 | mov -14(%rdi), %rax |
931 | mov -14(%rsi), %rcx |
932 | cmp %rax, %rcx |
933 | jne L(diffin8bytes) |
934 | mov -8(%rdi), %rax |
935 | mov -8(%rsi), %rcx |
936 | cmp %rax, %rcx |
937 | jne L(diffin8bytes) |
938 | xor %eax, %eax |
939 | ret |
940 | |
941 | .p2align 4 |
942 | L(6bytes): |
943 | mov -6(%rdi), %eax |
944 | mov -6(%rsi), %ecx |
945 | cmp %eax, %ecx |
946 | jne L(diffin4bytes) |
947 | L(2bytes): |
948 | movzwl -2(%rsi), %ecx |
949 | movzwl -2(%rdi), %eax |
950 | cmp %cl, %al |
951 | jne L(end) |
952 | and $0xffff, %eax |
953 | and $0xffff, %ecx |
954 | sub %ecx, %eax |
955 | ret |
956 | |
957 | .p2align 4 |
958 | L(67bytes): |
959 | movdqu -67(%rdi), %xmm2 |
960 | movdqu -67(%rsi), %xmm1 |
961 | mov $-67, %dl |
962 | pxor %xmm1, %xmm2 |
963 | ptest %xmm2, %xmm0 |
964 | jnc L(less16bytes) |
965 | L(51bytes): |
966 | movdqu -51(%rdi), %xmm2 |
967 | movdqu -51(%rsi), %xmm1 |
968 | mov $-51, %dl |
969 | pxor %xmm1, %xmm2 |
970 | ptest %xmm2, %xmm0 |
971 | jnc L(less16bytes) |
972 | L(35bytes): |
973 | movdqu -35(%rsi), %xmm1 |
974 | movdqu -35(%rdi), %xmm2 |
975 | mov $-35, %dl |
976 | pxor %xmm1, %xmm2 |
977 | ptest %xmm2, %xmm0 |
978 | jnc L(less16bytes) |
979 | L(19bytes): |
980 | mov -19(%rdi), %rax |
981 | mov -19(%rsi), %rcx |
982 | cmp %rax, %rcx |
983 | jne L(diffin8bytes) |
984 | L(11bytes): |
985 | mov -11(%rdi), %rax |
986 | mov -11(%rsi), %rcx |
987 | cmp %rax, %rcx |
988 | jne L(diffin8bytes) |
989 | mov -4(%rdi), %eax |
990 | mov -4(%rsi), %ecx |
991 | cmp %eax, %ecx |
992 | jne L(diffin4bytes) |
993 | xor %eax, %eax |
994 | ret |
995 | |
996 | .p2align 4 |
997 | L(15bytes): |
998 | mov -15(%rdi), %rax |
999 | mov -15(%rsi), %rcx |
1000 | cmp %rax, %rcx |
1001 | jne L(diffin8bytes) |
1002 | mov -8(%rdi), %rax |
1003 | mov -8(%rsi), %rcx |
1004 | cmp %rax, %rcx |
1005 | jne L(diffin8bytes) |
1006 | xor %eax, %eax |
1007 | ret |
1008 | |
1009 | .p2align 4 |
1010 | L(7bytes): |
1011 | mov -7(%rdi), %eax |
1012 | mov -7(%rsi), %ecx |
1013 | cmp %eax, %ecx |
1014 | jne L(diffin4bytes) |
1015 | mov -4(%rdi), %eax |
1016 | mov -4(%rsi), %ecx |
1017 | cmp %eax, %ecx |
1018 | jne L(diffin4bytes) |
1019 | xor %eax, %eax |
1020 | ret |
1021 | |
1022 | .p2align 4 |
1023 | L(3bytes): |
1024 | movzwl -3(%rdi), %eax |
1025 | movzwl -3(%rsi), %ecx |
1026 | cmp %eax, %ecx |
1027 | jne L(diffin2bytes) |
1028 | L(1bytes): |
1029 | movzbl -1(%rdi), %eax |
1030 | movzbl -1(%rsi), %ecx |
1031 | sub %ecx, %eax |
1032 | ret |
1033 | # endif |
1034 | |
1035 | .p2align 4 |
1036 | L(68bytes): |
1037 | movdqu -68(%rdi), %xmm2 |
1038 | movdqu -68(%rsi), %xmm1 |
1039 | mov $-68, %dl |
1040 | pxor %xmm1, %xmm2 |
1041 | ptest %xmm2, %xmm0 |
1042 | jnc L(less16bytes) |
1043 | L(52bytes): |
1044 | movdqu -52(%rdi), %xmm2 |
1045 | movdqu -52(%rsi), %xmm1 |
1046 | mov $-52, %dl |
1047 | pxor %xmm1, %xmm2 |
1048 | ptest %xmm2, %xmm0 |
1049 | jnc L(less16bytes) |
1050 | L(36bytes): |
1051 | movdqu -36(%rdi), %xmm2 |
1052 | movdqu -36(%rsi), %xmm1 |
1053 | mov $-36, %dl |
1054 | pxor %xmm1, %xmm2 |
1055 | ptest %xmm2, %xmm0 |
1056 | jnc L(less16bytes) |
1057 | L(20bytes): |
1058 | movdqu -20(%rdi), %xmm2 |
1059 | movdqu -20(%rsi), %xmm1 |
1060 | mov $-20, %dl |
1061 | pxor %xmm1, %xmm2 |
1062 | ptest %xmm2, %xmm0 |
1063 | jnc L(less16bytes) |
1064 | mov -4(%rsi), %ecx |
1065 | |
1066 | # ifndef USE_AS_WMEMCMP |
1067 | mov -4(%rdi), %eax |
1068 | cmp %eax, %ecx |
1069 | # else |
1070 | cmp -4(%rdi), %ecx |
1071 | # endif |
1072 | jne L(diffin4bytes) |
1073 | xor %eax, %eax |
1074 | ret |
1075 | |
1076 | # ifndef USE_AS_WMEMCMP |
1077 | /* unreal cases for wmemcmp */ |
1078 | .p2align 4 |
1079 | L(69bytes): |
1080 | movdqu -69(%rsi), %xmm1 |
1081 | movdqu -69(%rdi), %xmm2 |
1082 | mov $-69, %dl |
1083 | pxor %xmm1, %xmm2 |
1084 | ptest %xmm2, %xmm0 |
1085 | jnc L(less16bytes) |
1086 | L(53bytes): |
1087 | movdqu -53(%rsi), %xmm1 |
1088 | movdqu -53(%rdi), %xmm2 |
1089 | mov $-53, %dl |
1090 | pxor %xmm1, %xmm2 |
1091 | ptest %xmm2, %xmm0 |
1092 | jnc L(less16bytes) |
1093 | L(37bytes): |
1094 | movdqu -37(%rsi), %xmm1 |
1095 | movdqu -37(%rdi), %xmm2 |
1096 | mov $-37, %dl |
1097 | pxor %xmm1, %xmm2 |
1098 | ptest %xmm2, %xmm0 |
1099 | jnc L(less16bytes) |
1100 | L(21bytes): |
1101 | movdqu -21(%rsi), %xmm1 |
1102 | movdqu -21(%rdi), %xmm2 |
1103 | mov $-21, %dl |
1104 | pxor %xmm1, %xmm2 |
1105 | ptest %xmm2, %xmm0 |
1106 | jnc L(less16bytes) |
1107 | mov -8(%rdi), %rax |
1108 | mov -8(%rsi), %rcx |
1109 | cmp %rax, %rcx |
1110 | jne L(diffin8bytes) |
1111 | xor %eax, %eax |
1112 | ret |
1113 | |
1114 | .p2align 4 |
1115 | L(70bytes): |
1116 | movdqu -70(%rsi), %xmm1 |
1117 | movdqu -70(%rdi), %xmm2 |
1118 | mov $-70, %dl |
1119 | pxor %xmm1, %xmm2 |
1120 | ptest %xmm2, %xmm0 |
1121 | jnc L(less16bytes) |
1122 | L(54bytes): |
1123 | movdqu -54(%rsi), %xmm1 |
1124 | movdqu -54(%rdi), %xmm2 |
1125 | mov $-54, %dl |
1126 | pxor %xmm1, %xmm2 |
1127 | ptest %xmm2, %xmm0 |
1128 | jnc L(less16bytes) |
1129 | L(38bytes): |
1130 | movdqu -38(%rsi), %xmm1 |
1131 | movdqu -38(%rdi), %xmm2 |
1132 | mov $-38, %dl |
1133 | pxor %xmm1, %xmm2 |
1134 | ptest %xmm2, %xmm0 |
1135 | jnc L(less16bytes) |
1136 | L(22bytes): |
1137 | movdqu -22(%rsi), %xmm1 |
1138 | movdqu -22(%rdi), %xmm2 |
1139 | mov $-22, %dl |
1140 | pxor %xmm1, %xmm2 |
1141 | ptest %xmm2, %xmm0 |
1142 | jnc L(less16bytes) |
1143 | mov -8(%rdi), %rax |
1144 | mov -8(%rsi), %rcx |
1145 | cmp %rax, %rcx |
1146 | jne L(diffin8bytes) |
1147 | xor %eax, %eax |
1148 | ret |
1149 | |
1150 | .p2align 4 |
1151 | L(71bytes): |
1152 | movdqu -71(%rsi), %xmm1 |
1153 | movdqu -71(%rdi), %xmm2 |
1154 | mov $-71, %dl |
1155 | pxor %xmm1, %xmm2 |
1156 | ptest %xmm2, %xmm0 |
1157 | jnc L(less16bytes) |
1158 | L(55bytes): |
1159 | movdqu -55(%rdi), %xmm2 |
1160 | movdqu -55(%rsi), %xmm1 |
1161 | mov $-55, %dl |
1162 | pxor %xmm1, %xmm2 |
1163 | ptest %xmm2, %xmm0 |
1164 | jnc L(less16bytes) |
1165 | L(39bytes): |
1166 | movdqu -39(%rdi), %xmm2 |
1167 | movdqu -39(%rsi), %xmm1 |
1168 | mov $-39, %dl |
1169 | pxor %xmm1, %xmm2 |
1170 | ptest %xmm2, %xmm0 |
1171 | jnc L(less16bytes) |
1172 | L(23bytes): |
1173 | movdqu -23(%rdi), %xmm2 |
1174 | movdqu -23(%rsi), %xmm1 |
1175 | mov $-23, %dl |
1176 | pxor %xmm1, %xmm2 |
1177 | ptest %xmm2, %xmm0 |
1178 | jnc L(less16bytes) |
1179 | mov -8(%rdi), %rax |
1180 | mov -8(%rsi), %rcx |
1181 | cmp %rax, %rcx |
1182 | jne L(diffin8bytes) |
1183 | xor %eax, %eax |
1184 | ret |
1185 | # endif |
1186 | |
1187 | .p2align 4 |
1188 | L(72bytes): |
1189 | movdqu -72(%rsi), %xmm1 |
1190 | movdqu -72(%rdi), %xmm2 |
1191 | mov $-72, %dl |
1192 | pxor %xmm1, %xmm2 |
1193 | ptest %xmm2, %xmm0 |
1194 | jnc L(less16bytes) |
1195 | L(56bytes): |
1196 | movdqu -56(%rdi), %xmm2 |
1197 | movdqu -56(%rsi), %xmm1 |
1198 | mov $-56, %dl |
1199 | pxor %xmm1, %xmm2 |
1200 | ptest %xmm2, %xmm0 |
1201 | jnc L(less16bytes) |
1202 | L(40bytes): |
1203 | movdqu -40(%rdi), %xmm2 |
1204 | movdqu -40(%rsi), %xmm1 |
1205 | mov $-40, %dl |
1206 | pxor %xmm1, %xmm2 |
1207 | ptest %xmm2, %xmm0 |
1208 | jnc L(less16bytes) |
1209 | L(24bytes): |
1210 | movdqu -24(%rdi), %xmm2 |
1211 | movdqu -24(%rsi), %xmm1 |
1212 | mov $-24, %dl |
1213 | pxor %xmm1, %xmm2 |
1214 | ptest %xmm2, %xmm0 |
1215 | jnc L(less16bytes) |
1216 | |
1217 | mov -8(%rsi), %rcx |
1218 | mov -8(%rdi), %rax |
1219 | cmp %rax, %rcx |
1220 | jne L(diffin8bytes) |
1221 | xor %eax, %eax |
1222 | ret |
1223 | |
1224 | # ifndef USE_AS_WMEMCMP |
1225 | /* unreal cases for wmemcmp */ |
1226 | .p2align 4 |
1227 | L(73bytes): |
1228 | movdqu -73(%rsi), %xmm1 |
1229 | movdqu -73(%rdi), %xmm2 |
1230 | mov $-73, %dl |
1231 | pxor %xmm1, %xmm2 |
1232 | ptest %xmm2, %xmm0 |
1233 | jnc L(less16bytes) |
1234 | L(57bytes): |
1235 | movdqu -57(%rdi), %xmm2 |
1236 | movdqu -57(%rsi), %xmm1 |
1237 | mov $-57, %dl |
1238 | pxor %xmm1, %xmm2 |
1239 | ptest %xmm2, %xmm0 |
1240 | jnc L(less16bytes) |
1241 | L(41bytes): |
1242 | movdqu -41(%rdi), %xmm2 |
1243 | movdqu -41(%rsi), %xmm1 |
1244 | mov $-41, %dl |
1245 | pxor %xmm1, %xmm2 |
1246 | ptest %xmm2, %xmm0 |
1247 | jnc L(less16bytes) |
1248 | L(25bytes): |
1249 | movdqu -25(%rdi), %xmm2 |
1250 | movdqu -25(%rsi), %xmm1 |
1251 | mov $-25, %dl |
1252 | pxor %xmm1, %xmm2 |
1253 | ptest %xmm2, %xmm0 |
1254 | jnc L(less16bytes) |
1255 | mov -9(%rdi), %rax |
1256 | mov -9(%rsi), %rcx |
1257 | cmp %rax, %rcx |
1258 | jne L(diffin8bytes) |
1259 | movzbl -1(%rdi), %eax |
1260 | movzbl -1(%rsi), %ecx |
1261 | sub %ecx, %eax |
1262 | ret |
1263 | |
1264 | .p2align 4 |
1265 | L(74bytes): |
1266 | movdqu -74(%rsi), %xmm1 |
1267 | movdqu -74(%rdi), %xmm2 |
1268 | mov $-74, %dl |
1269 | pxor %xmm1, %xmm2 |
1270 | ptest %xmm2, %xmm0 |
1271 | jnc L(less16bytes) |
1272 | L(58bytes): |
1273 | movdqu -58(%rdi), %xmm2 |
1274 | movdqu -58(%rsi), %xmm1 |
1275 | mov $-58, %dl |
1276 | pxor %xmm1, %xmm2 |
1277 | ptest %xmm2, %xmm0 |
1278 | jnc L(less16bytes) |
1279 | L(42bytes): |
1280 | movdqu -42(%rdi), %xmm2 |
1281 | movdqu -42(%rsi), %xmm1 |
1282 | mov $-42, %dl |
1283 | pxor %xmm1, %xmm2 |
1284 | ptest %xmm2, %xmm0 |
1285 | jnc L(less16bytes) |
1286 | L(26bytes): |
1287 | movdqu -26(%rdi), %xmm2 |
1288 | movdqu -26(%rsi), %xmm1 |
1289 | mov $-26, %dl |
1290 | pxor %xmm1, %xmm2 |
1291 | ptest %xmm2, %xmm0 |
1292 | jnc L(less16bytes) |
1293 | mov -10(%rdi), %rax |
1294 | mov -10(%rsi), %rcx |
1295 | cmp %rax, %rcx |
1296 | jne L(diffin8bytes) |
1297 | movzwl -2(%rdi), %eax |
1298 | movzwl -2(%rsi), %ecx |
1299 | jmp L(diffin2bytes) |
1300 | |
1301 | .p2align 4 |
1302 | L(75bytes): |
1303 | movdqu -75(%rsi), %xmm1 |
1304 | movdqu -75(%rdi), %xmm2 |
1305 | mov $-75, %dl |
1306 | pxor %xmm1, %xmm2 |
1307 | ptest %xmm2, %xmm0 |
1308 | jnc L(less16bytes) |
1309 | L(59bytes): |
1310 | movdqu -59(%rdi), %xmm2 |
1311 | movdqu -59(%rsi), %xmm1 |
1312 | mov $-59, %dl |
1313 | pxor %xmm1, %xmm2 |
1314 | ptest %xmm2, %xmm0 |
1315 | jnc L(less16bytes) |
1316 | L(43bytes): |
1317 | movdqu -43(%rdi), %xmm2 |
1318 | movdqu -43(%rsi), %xmm1 |
1319 | mov $-43, %dl |
1320 | pxor %xmm1, %xmm2 |
1321 | ptest %xmm2, %xmm0 |
1322 | jnc L(less16bytes) |
1323 | L(27bytes): |
1324 | movdqu -27(%rdi), %xmm2 |
1325 | movdqu -27(%rsi), %xmm1 |
1326 | mov $-27, %dl |
1327 | pxor %xmm1, %xmm2 |
1328 | ptest %xmm2, %xmm0 |
1329 | jnc L(less16bytes) |
1330 | mov -11(%rdi), %rax |
1331 | mov -11(%rsi), %rcx |
1332 | cmp %rax, %rcx |
1333 | jne L(diffin8bytes) |
1334 | mov -4(%rdi), %eax |
1335 | mov -4(%rsi), %ecx |
1336 | cmp %eax, %ecx |
1337 | jne L(diffin4bytes) |
1338 | xor %eax, %eax |
1339 | ret |
1340 | # endif |
1341 | .p2align 4 |
1342 | L(76bytes): |
1343 | movdqu -76(%rsi), %xmm1 |
1344 | movdqu -76(%rdi), %xmm2 |
1345 | mov $-76, %dl |
1346 | pxor %xmm1, %xmm2 |
1347 | ptest %xmm2, %xmm0 |
1348 | jnc L(less16bytes) |
1349 | L(60bytes): |
1350 | movdqu -60(%rdi), %xmm2 |
1351 | movdqu -60(%rsi), %xmm1 |
1352 | mov $-60, %dl |
1353 | pxor %xmm1, %xmm2 |
1354 | ptest %xmm2, %xmm0 |
1355 | jnc L(less16bytes) |
1356 | L(44bytes): |
1357 | movdqu -44(%rdi), %xmm2 |
1358 | movdqu -44(%rsi), %xmm1 |
1359 | mov $-44, %dl |
1360 | pxor %xmm1, %xmm2 |
1361 | ptest %xmm2, %xmm0 |
1362 | jnc L(less16bytes) |
1363 | L(28bytes): |
1364 | movdqu -28(%rdi), %xmm2 |
1365 | movdqu -28(%rsi), %xmm1 |
1366 | mov $-28, %dl |
1367 | pxor %xmm1, %xmm2 |
1368 | ptest %xmm2, %xmm0 |
1369 | jnc L(less16bytes) |
1370 | mov -12(%rdi), %rax |
1371 | mov -12(%rsi), %rcx |
1372 | cmp %rax, %rcx |
1373 | jne L(diffin8bytes) |
1374 | mov -4(%rsi), %ecx |
1375 | # ifndef USE_AS_WMEMCMP |
1376 | mov -4(%rdi), %eax |
1377 | cmp %eax, %ecx |
1378 | # else |
1379 | cmp -4(%rdi), %ecx |
1380 | # endif |
1381 | jne L(diffin4bytes) |
1382 | xor %eax, %eax |
1383 | ret |
1384 | |
1385 | # ifndef USE_AS_WMEMCMP |
1386 | /* unreal cases for wmemcmp */ |
1387 | .p2align 4 |
1388 | L(77bytes): |
1389 | movdqu -77(%rsi), %xmm1 |
1390 | movdqu -77(%rdi), %xmm2 |
1391 | mov $-77, %dl |
1392 | pxor %xmm1, %xmm2 |
1393 | ptest %xmm2, %xmm0 |
1394 | jnc L(less16bytes) |
1395 | L(61bytes): |
1396 | movdqu -61(%rdi), %xmm2 |
1397 | movdqu -61(%rsi), %xmm1 |
1398 | mov $-61, %dl |
1399 | pxor %xmm1, %xmm2 |
1400 | ptest %xmm2, %xmm0 |
1401 | jnc L(less16bytes) |
1402 | L(45bytes): |
1403 | movdqu -45(%rdi), %xmm2 |
1404 | movdqu -45(%rsi), %xmm1 |
1405 | mov $-45, %dl |
1406 | pxor %xmm1, %xmm2 |
1407 | ptest %xmm2, %xmm0 |
1408 | jnc L(less16bytes) |
1409 | L(29bytes): |
1410 | movdqu -29(%rdi), %xmm2 |
1411 | movdqu -29(%rsi), %xmm1 |
1412 | mov $-29, %dl |
1413 | pxor %xmm1, %xmm2 |
1414 | ptest %xmm2, %xmm0 |
1415 | jnc L(less16bytes) |
1416 | |
1417 | mov -13(%rdi), %rax |
1418 | mov -13(%rsi), %rcx |
1419 | cmp %rax, %rcx |
1420 | jne L(diffin8bytes) |
1421 | |
1422 | mov -8(%rdi), %rax |
1423 | mov -8(%rsi), %rcx |
1424 | cmp %rax, %rcx |
1425 | jne L(diffin8bytes) |
1426 | xor %eax, %eax |
1427 | ret |
1428 | |
1429 | .p2align 4 |
1430 | L(78bytes): |
1431 | movdqu -78(%rsi), %xmm1 |
1432 | movdqu -78(%rdi), %xmm2 |
1433 | mov $-78, %dl |
1434 | pxor %xmm1, %xmm2 |
1435 | ptest %xmm2, %xmm0 |
1436 | jnc L(less16bytes) |
1437 | L(62bytes): |
1438 | movdqu -62(%rdi), %xmm2 |
1439 | movdqu -62(%rsi), %xmm1 |
1440 | mov $-62, %dl |
1441 | pxor %xmm1, %xmm2 |
1442 | ptest %xmm2, %xmm0 |
1443 | jnc L(less16bytes) |
1444 | L(46bytes): |
1445 | movdqu -46(%rdi), %xmm2 |
1446 | movdqu -46(%rsi), %xmm1 |
1447 | mov $-46, %dl |
1448 | pxor %xmm1, %xmm2 |
1449 | ptest %xmm2, %xmm0 |
1450 | jnc L(less16bytes) |
1451 | L(30bytes): |
1452 | movdqu -30(%rdi), %xmm2 |
1453 | movdqu -30(%rsi), %xmm1 |
1454 | mov $-30, %dl |
1455 | pxor %xmm1, %xmm2 |
1456 | ptest %xmm2, %xmm0 |
1457 | jnc L(less16bytes) |
1458 | mov -14(%rdi), %rax |
1459 | mov -14(%rsi), %rcx |
1460 | cmp %rax, %rcx |
1461 | jne L(diffin8bytes) |
1462 | mov -8(%rdi), %rax |
1463 | mov -8(%rsi), %rcx |
1464 | cmp %rax, %rcx |
1465 | jne L(diffin8bytes) |
1466 | xor %eax, %eax |
1467 | ret |
1468 | |
1469 | .p2align 4 |
1470 | L(79bytes): |
1471 | movdqu -79(%rsi), %xmm1 |
1472 | movdqu -79(%rdi), %xmm2 |
1473 | mov $-79, %dl |
1474 | pxor %xmm1, %xmm2 |
1475 | ptest %xmm2, %xmm0 |
1476 | jnc L(less16bytes) |
1477 | L(63bytes): |
1478 | movdqu -63(%rdi), %xmm2 |
1479 | movdqu -63(%rsi), %xmm1 |
1480 | mov $-63, %dl |
1481 | pxor %xmm1, %xmm2 |
1482 | ptest %xmm2, %xmm0 |
1483 | jnc L(less16bytes) |
1484 | L(47bytes): |
1485 | movdqu -47(%rdi), %xmm2 |
1486 | movdqu -47(%rsi), %xmm1 |
1487 | mov $-47, %dl |
1488 | pxor %xmm1, %xmm2 |
1489 | ptest %xmm2, %xmm0 |
1490 | jnc L(less16bytes) |
1491 | L(31bytes): |
1492 | movdqu -31(%rdi), %xmm2 |
1493 | movdqu -31(%rsi), %xmm1 |
1494 | mov $-31, %dl |
1495 | pxor %xmm1, %xmm2 |
1496 | ptest %xmm2, %xmm0 |
1497 | jnc L(less16bytes) |
1498 | mov -15(%rdi), %rax |
1499 | mov -15(%rsi), %rcx |
1500 | cmp %rax, %rcx |
1501 | jne L(diffin8bytes) |
1502 | mov -8(%rdi), %rax |
1503 | mov -8(%rsi), %rcx |
1504 | cmp %rax, %rcx |
1505 | jne L(diffin8bytes) |
1506 | xor %eax, %eax |
1507 | ret |
1508 | # endif |
1509 | .p2align 4 |
1510 | L(64bytes): |
1511 | movdqu -64(%rdi), %xmm2 |
1512 | movdqu -64(%rsi), %xmm1 |
1513 | mov $-64, %dl |
1514 | pxor %xmm1, %xmm2 |
1515 | ptest %xmm2, %xmm0 |
1516 | jnc L(less16bytes) |
1517 | L(48bytes): |
1518 | movdqu -48(%rdi), %xmm2 |
1519 | movdqu -48(%rsi), %xmm1 |
1520 | mov $-48, %dl |
1521 | pxor %xmm1, %xmm2 |
1522 | ptest %xmm2, %xmm0 |
1523 | jnc L(less16bytes) |
1524 | L(32bytes): |
1525 | movdqu -32(%rdi), %xmm2 |
1526 | movdqu -32(%rsi), %xmm1 |
1527 | mov $-32, %dl |
1528 | pxor %xmm1, %xmm2 |
1529 | ptest %xmm2, %xmm0 |
1530 | jnc L(less16bytes) |
1531 | |
1532 | mov -16(%rdi), %rax |
1533 | mov -16(%rsi), %rcx |
1534 | cmp %rax, %rcx |
1535 | jne L(diffin8bytes) |
1536 | |
1537 | mov -8(%rdi), %rax |
1538 | mov -8(%rsi), %rcx |
1539 | cmp %rax, %rcx |
1540 | jne L(diffin8bytes) |
1541 | xor %eax, %eax |
1542 | ret |
1543 | |
1544 | /* |
1545 | * Aligned 8 bytes to avoid 2 branch "taken" in one 16 alinged code block. |
1546 | */ |
1547 | .p2align 3 |
1548 | L(less16bytes): |
1549 | movsbq %dl, %rdx |
1550 | mov (%rsi, %rdx), %rcx |
1551 | mov (%rdi, %rdx), %rax |
1552 | cmp %rax, %rcx |
1553 | jne L(diffin8bytes) |
1554 | mov 8(%rsi, %rdx), %rcx |
1555 | mov 8(%rdi, %rdx), %rax |
1556 | L(diffin8bytes): |
1557 | cmp %eax, %ecx |
1558 | jne L(diffin4bytes) |
1559 | shr $32, %rcx |
1560 | shr $32, %rax |
1561 | |
1562 | # ifdef USE_AS_WMEMCMP |
1563 | /* for wmemcmp */ |
1564 | cmp %eax, %ecx |
1565 | jne L(diffin4bytes) |
1566 | xor %eax, %eax |
1567 | ret |
1568 | # endif |
1569 | |
1570 | L(diffin4bytes): |
1571 | # ifndef USE_AS_WMEMCMP |
1572 | cmp %cx, %ax |
1573 | jne L(diffin2bytes) |
1574 | shr $16, %ecx |
1575 | shr $16, %eax |
1576 | L(diffin2bytes): |
1577 | cmp %cl, %al |
1578 | jne L(end) |
1579 | and $0xffff, %eax |
1580 | and $0xffff, %ecx |
1581 | sub %ecx, %eax |
1582 | ret |
1583 | |
1584 | .p2align 4 |
1585 | L(end): |
1586 | and $0xff, %eax |
1587 | and $0xff, %ecx |
1588 | sub %ecx, %eax |
1589 | ret |
1590 | # else |
1591 | |
1592 | /* for wmemcmp */ |
1593 | mov $1, %eax |
1594 | jl L(nequal_bigger) |
1595 | neg %eax |
1596 | ret |
1597 | |
1598 | .p2align 4 |
1599 | L(nequal_bigger): |
1600 | ret |
1601 | |
1602 | L(unreal_case): |
1603 | xor %eax, %eax |
1604 | ret |
1605 | # endif |
1606 | |
1607 | END (MEMCMP) |
1608 | |
1609 | .section .rodata.sse4.1,"a" ,@progbits |
1610 | .p2align 3 |
1611 | # ifndef USE_AS_WMEMCMP |
1612 | L(table_64bytes): |
1613 | .int JMPTBL (L(0bytes), L(table_64bytes)) |
1614 | .int JMPTBL (L(1bytes), L(table_64bytes)) |
1615 | .int JMPTBL (L(2bytes), L(table_64bytes)) |
1616 | .int JMPTBL (L(3bytes), L(table_64bytes)) |
1617 | .int JMPTBL (L(4bytes), L(table_64bytes)) |
1618 | .int JMPTBL (L(5bytes), L(table_64bytes)) |
1619 | .int JMPTBL (L(6bytes), L(table_64bytes)) |
1620 | .int JMPTBL (L(7bytes), L(table_64bytes)) |
1621 | .int JMPTBL (L(8bytes), L(table_64bytes)) |
1622 | .int JMPTBL (L(9bytes), L(table_64bytes)) |
1623 | .int JMPTBL (L(10bytes), L(table_64bytes)) |
1624 | .int JMPTBL (L(11bytes), L(table_64bytes)) |
1625 | .int JMPTBL (L(12bytes), L(table_64bytes)) |
1626 | .int JMPTBL (L(13bytes), L(table_64bytes)) |
1627 | .int JMPTBL (L(14bytes), L(table_64bytes)) |
1628 | .int JMPTBL (L(15bytes), L(table_64bytes)) |
1629 | .int JMPTBL (L(16bytes), L(table_64bytes)) |
1630 | .int JMPTBL (L(17bytes), L(table_64bytes)) |
1631 | .int JMPTBL (L(18bytes), L(table_64bytes)) |
1632 | .int JMPTBL (L(19bytes), L(table_64bytes)) |
1633 | .int JMPTBL (L(20bytes), L(table_64bytes)) |
1634 | .int JMPTBL (L(21bytes), L(table_64bytes)) |
1635 | .int JMPTBL (L(22bytes), L(table_64bytes)) |
1636 | .int JMPTBL (L(23bytes), L(table_64bytes)) |
1637 | .int JMPTBL (L(24bytes), L(table_64bytes)) |
1638 | .int JMPTBL (L(25bytes), L(table_64bytes)) |
1639 | .int JMPTBL (L(26bytes), L(table_64bytes)) |
1640 | .int JMPTBL (L(27bytes), L(table_64bytes)) |
1641 | .int JMPTBL (L(28bytes), L(table_64bytes)) |
1642 | .int JMPTBL (L(29bytes), L(table_64bytes)) |
1643 | .int JMPTBL (L(30bytes), L(table_64bytes)) |
1644 | .int JMPTBL (L(31bytes), L(table_64bytes)) |
1645 | .int JMPTBL (L(32bytes), L(table_64bytes)) |
1646 | .int JMPTBL (L(33bytes), L(table_64bytes)) |
1647 | .int JMPTBL (L(34bytes), L(table_64bytes)) |
1648 | .int JMPTBL (L(35bytes), L(table_64bytes)) |
1649 | .int JMPTBL (L(36bytes), L(table_64bytes)) |
1650 | .int JMPTBL (L(37bytes), L(table_64bytes)) |
1651 | .int JMPTBL (L(38bytes), L(table_64bytes)) |
1652 | .int JMPTBL (L(39bytes), L(table_64bytes)) |
1653 | .int JMPTBL (L(40bytes), L(table_64bytes)) |
1654 | .int JMPTBL (L(41bytes), L(table_64bytes)) |
1655 | .int JMPTBL (L(42bytes), L(table_64bytes)) |
1656 | .int JMPTBL (L(43bytes), L(table_64bytes)) |
1657 | .int JMPTBL (L(44bytes), L(table_64bytes)) |
1658 | .int JMPTBL (L(45bytes), L(table_64bytes)) |
1659 | .int JMPTBL (L(46bytes), L(table_64bytes)) |
1660 | .int JMPTBL (L(47bytes), L(table_64bytes)) |
1661 | .int JMPTBL (L(48bytes), L(table_64bytes)) |
1662 | .int JMPTBL (L(49bytes), L(table_64bytes)) |
1663 | .int JMPTBL (L(50bytes), L(table_64bytes)) |
1664 | .int JMPTBL (L(51bytes), L(table_64bytes)) |
1665 | .int JMPTBL (L(52bytes), L(table_64bytes)) |
1666 | .int JMPTBL (L(53bytes), L(table_64bytes)) |
1667 | .int JMPTBL (L(54bytes), L(table_64bytes)) |
1668 | .int JMPTBL (L(55bytes), L(table_64bytes)) |
1669 | .int JMPTBL (L(56bytes), L(table_64bytes)) |
1670 | .int JMPTBL (L(57bytes), L(table_64bytes)) |
1671 | .int JMPTBL (L(58bytes), L(table_64bytes)) |
1672 | .int JMPTBL (L(59bytes), L(table_64bytes)) |
1673 | .int JMPTBL (L(60bytes), L(table_64bytes)) |
1674 | .int JMPTBL (L(61bytes), L(table_64bytes)) |
1675 | .int JMPTBL (L(62bytes), L(table_64bytes)) |
1676 | .int JMPTBL (L(63bytes), L(table_64bytes)) |
1677 | .int JMPTBL (L(64bytes), L(table_64bytes)) |
1678 | .int JMPTBL (L(65bytes), L(table_64bytes)) |
1679 | .int JMPTBL (L(66bytes), L(table_64bytes)) |
1680 | .int JMPTBL (L(67bytes), L(table_64bytes)) |
1681 | .int JMPTBL (L(68bytes), L(table_64bytes)) |
1682 | .int JMPTBL (L(69bytes), L(table_64bytes)) |
1683 | .int JMPTBL (L(70bytes), L(table_64bytes)) |
1684 | .int JMPTBL (L(71bytes), L(table_64bytes)) |
1685 | .int JMPTBL (L(72bytes), L(table_64bytes)) |
1686 | .int JMPTBL (L(73bytes), L(table_64bytes)) |
1687 | .int JMPTBL (L(74bytes), L(table_64bytes)) |
1688 | .int JMPTBL (L(75bytes), L(table_64bytes)) |
1689 | .int JMPTBL (L(76bytes), L(table_64bytes)) |
1690 | .int JMPTBL (L(77bytes), L(table_64bytes)) |
1691 | .int JMPTBL (L(78bytes), L(table_64bytes)) |
1692 | .int JMPTBL (L(79bytes), L(table_64bytes)) |
1693 | # else |
1694 | L(table_64bytes): |
1695 | .int JMPTBL (L(0bytes), L(table_64bytes)) |
1696 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1697 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1698 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1699 | .int JMPTBL (L(4bytes), L(table_64bytes)) |
1700 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1701 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1702 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1703 | .int JMPTBL (L(8bytes), L(table_64bytes)) |
1704 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1705 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1706 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1707 | .int JMPTBL (L(12bytes), L(table_64bytes)) |
1708 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1709 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1710 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1711 | .int JMPTBL (L(16bytes), L(table_64bytes)) |
1712 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1713 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1714 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1715 | .int JMPTBL (L(20bytes), L(table_64bytes)) |
1716 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1717 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1718 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1719 | .int JMPTBL (L(24bytes), L(table_64bytes)) |
1720 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1721 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1722 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1723 | .int JMPTBL (L(28bytes), L(table_64bytes)) |
1724 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1725 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1726 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1727 | .int JMPTBL (L(32bytes), L(table_64bytes)) |
1728 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1729 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1730 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1731 | .int JMPTBL (L(36bytes), L(table_64bytes)) |
1732 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1733 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1734 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1735 | .int JMPTBL (L(40bytes), L(table_64bytes)) |
1736 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1737 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1738 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1739 | .int JMPTBL (L(44bytes), L(table_64bytes)) |
1740 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1741 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1742 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1743 | .int JMPTBL (L(48bytes), L(table_64bytes)) |
1744 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1745 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1746 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1747 | .int JMPTBL (L(52bytes), L(table_64bytes)) |
1748 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1749 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1750 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1751 | .int JMPTBL (L(56bytes), L(table_64bytes)) |
1752 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1753 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1754 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1755 | .int JMPTBL (L(60bytes), L(table_64bytes)) |
1756 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1757 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1758 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1759 | .int JMPTBL (L(64bytes), L(table_64bytes)) |
1760 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1761 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1762 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1763 | .int JMPTBL (L(68bytes), L(table_64bytes)) |
1764 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1765 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1766 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1767 | .int JMPTBL (L(72bytes), L(table_64bytes)) |
1768 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1769 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1770 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1771 | .int JMPTBL (L(76bytes), L(table_64bytes)) |
1772 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1773 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1774 | .int JMPTBL (L(unreal_case), L(table_64bytes)) |
1775 | # endif |
1776 | #endif |
1777 | |