1 /*
2 Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
31 #ifndef L
32 # define L(label) .L##label
33 #endif
35 #ifndef cfi_startproc
36 # define cfi_startproc .cfi_startproc
37 #endif
39 #ifndef cfi_endproc
40 # define cfi_endproc .cfi_endproc
41 #endif
43 #ifndef cfi_rel_offset
44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45 #endif
47 #ifndef cfi_restore
48 # define cfi_restore(reg) .cfi_restore reg
49 #endif
51 #ifndef cfi_adjust_cfa_offset
52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53 #endif
55 #ifndef cfi_remember_state
56 # define cfi_remember_state .cfi_remember_state
57 #endif
59 #ifndef cfi_restore_state
60 # define cfi_restore_state .cfi_restore_state
61 #endif
63 #ifndef ENTRY
64 # define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68 name: \
69 cfi_startproc
70 #endif
72 #ifndef END
73 # define END(name) \
74 cfi_endproc; \
75 .size name, .-name
76 #endif
78 #ifndef MEMCMP
79 # define MEMCMP memcmp
80 #endif
82 #define CFI_PUSH(REG) \
83 cfi_adjust_cfa_offset (4); \
84 cfi_rel_offset (REG, 0)
86 #define CFI_POP(REG) \
87 cfi_adjust_cfa_offset (-4); \
88 cfi_restore (REG)
90 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
91 #define POP(REG) popl REG; CFI_POP (REG)
93 #define PARMS 4
94 #define BLK1 PARMS
95 #define BLK2 BLK1+4
96 #define LEN BLK2+4
97 #define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
98 #define RETURN RETURN_END; cfi_restore_state; cfi_remember_state
100 /* Warning!
101 wmemcmp has to use SIGNED comparison for elements.
102 memcmp has to use UNSIGNED comparison for elemnts.
103 */
105 .text
106 ENTRY (MEMCMP)
107 movl LEN(%esp), %ecx
109 #ifdef USE_WCHAR
110 shl $2, %ecx
111 jz L(zero)
112 #elif defined USE_UTF16
113 shl $1, %ecx
114 jz L(zero)
115 #endif
117 movl BLK1(%esp), %eax
118 cmp $48, %ecx
119 movl BLK2(%esp), %edx
120 jae L(48bytesormore)
122 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
123 cmp $1, %ecx
124 jbe L(less1bytes)
125 #endif
127 PUSH (%ebx)
128 add %ecx, %edx
129 add %ecx, %eax
130 jmp L(less48bytes)
132 CFI_POP (%ebx)
134 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
135 .p2align 4
136 L(less1bytes):
137 jb L(zero)
138 movb (%eax), %cl
139 cmp (%edx), %cl
140 je L(zero)
141 mov $1, %eax
142 ja L(1bytesend)
143 neg %eax
144 L(1bytesend):
145 ret
146 #endif
148 .p2align 4
149 L(zero):
150 xor %eax, %eax
151 ret
153 .p2align 4
154 L(48bytesormore):
155 PUSH (%ebx)
156 PUSH (%esi)
157 PUSH (%edi)
158 cfi_remember_state
159 movdqu (%eax), %xmm3
160 movdqu (%edx), %xmm0
161 movl %eax, %edi
162 movl %edx, %esi
163 pcmpeqb %xmm0, %xmm3
164 pmovmskb %xmm3, %edx
165 lea 16(%edi), %edi
167 sub $0xffff, %edx
168 lea 16(%esi), %esi
169 jnz L(less16bytes)
170 mov %edi, %edx
171 and $0xf, %edx
172 xor %edx, %edi
173 sub %edx, %esi
174 add %edx, %ecx
175 mov %esi, %edx
176 and $0xf, %edx
177 jz L(shr_0)
178 xor %edx, %esi
180 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
181 cmp $8, %edx
182 jae L(next_unaligned_table)
183 cmp $0, %edx
184 je L(shr_0)
185 cmp $1, %edx
186 je L(shr_1)
187 cmp $2, %edx
188 je L(shr_2)
189 cmp $3, %edx
190 je L(shr_3)
191 cmp $4, %edx
192 je L(shr_4)
193 cmp $5, %edx
194 je L(shr_5)
195 cmp $6, %edx
196 je L(shr_6)
197 jmp L(shr_7)
199 .p2align 2
200 L(next_unaligned_table):
201 cmp $8, %edx
202 je L(shr_8)
203 cmp $9, %edx
204 je L(shr_9)
205 cmp $10, %edx
206 je L(shr_10)
207 cmp $11, %edx
208 je L(shr_11)
209 cmp $12, %edx
210 je L(shr_12)
211 cmp $13, %edx
212 je L(shr_13)
213 cmp $14, %edx
214 je L(shr_14)
215 jmp L(shr_15)
216 #elif defined(USE_WCHAR)
217 cmp $0, %edx
218 je L(shr_0)
219 cmp $4, %edx
220 je L(shr_4)
221 cmp $8, %edx
222 je L(shr_8)
223 jmp L(shr_12)
224 #elif defined(USE_UTF16)
225 cmp $0, %edx
226 je L(shr_0)
227 cmp $2, %edx
228 je L(shr_2)
229 cmp $4, %edx
230 je L(shr_4)
231 cmp $6, %edx
232 je L(shr_6)
233 cmp $8, %edx
234 je L(shr_8)
235 cmp $10, %edx
236 je L(shr_10)
237 cmp $12, %edx
238 je L(shr_12)
239 jmp L(shr_14)
240 #endif
242 .p2align 4
243 L(shr_0):
244 cmp $80, %ecx
245 jae L(shr_0_gobble)
246 lea -48(%ecx), %ecx
247 xor %eax, %eax
248 movaps (%esi), %xmm1
249 pcmpeqb (%edi), %xmm1
250 movaps 16(%esi), %xmm2
251 pcmpeqb 16(%edi), %xmm2
252 pand %xmm1, %xmm2
253 pmovmskb %xmm2, %edx
254 add $32, %edi
255 add $32, %esi
256 sub $0xffff, %edx
257 jnz L(exit)
259 lea (%ecx, %edi,1), %eax
260 lea (%ecx, %esi,1), %edx
261 POP (%edi)
262 POP (%esi)
263 jmp L(less48bytes)
265 cfi_restore_state
266 cfi_remember_state
267 .p2align 4
268 L(shr_0_gobble):
269 lea -48(%ecx), %ecx
270 movdqa (%esi), %xmm0
271 xor %eax, %eax
272 pcmpeqb (%edi), %xmm0
273 sub $32, %ecx
274 movdqa 16(%esi), %xmm2
275 pcmpeqb 16(%edi), %xmm2
276 L(shr_0_gobble_loop):
277 pand %xmm0, %xmm2
278 sub $32, %ecx
279 pmovmskb %xmm2, %edx
280 movdqa %xmm0, %xmm1
281 movdqa 32(%esi), %xmm0
282 movdqa 48(%esi), %xmm2
283 sbb $0xffff, %edx
284 pcmpeqb 32(%edi), %xmm0
285 pcmpeqb 48(%edi), %xmm2
286 lea 32(%edi), %edi
287 lea 32(%esi), %esi
288 jz L(shr_0_gobble_loop)
290 pand %xmm0, %xmm2
291 cmp $0, %ecx
292 jge L(shr_0_gobble_loop_next)
293 inc %edx
294 add $32, %ecx
295 L(shr_0_gobble_loop_next):
296 test %edx, %edx
297 jnz L(exit)
299 pmovmskb %xmm2, %edx
300 movdqa %xmm0, %xmm1
301 lea 32(%edi), %edi
302 lea 32(%esi), %esi
303 sub $0xffff, %edx
304 jnz L(exit)
305 lea (%ecx, %edi,1), %eax
306 lea (%ecx, %esi,1), %edx
307 POP (%edi)
308 POP (%esi)
309 jmp L(less48bytes)
311 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
312 cfi_restore_state
313 cfi_remember_state
314 .p2align 4
315 L(shr_1):
316 cmp $80, %ecx
317 lea -48(%ecx), %ecx
318 mov %edx, %eax
319 jae L(shr_1_gobble)
321 movdqa 16(%esi), %xmm1
322 movdqa %xmm1, %xmm2
323 palignr $1,(%esi), %xmm1
324 pcmpeqb (%edi), %xmm1
326 movdqa 32(%esi), %xmm3
327 palignr $1,%xmm2, %xmm3
328 pcmpeqb 16(%edi), %xmm3
330 pand %xmm1, %xmm3
331 pmovmskb %xmm3, %edx
332 lea 32(%edi), %edi
333 lea 32(%esi), %esi
334 sub $0xffff, %edx
335 jnz L(exit)
336 lea (%ecx, %edi,1), %eax
337 lea 1(%ecx, %esi,1), %edx
338 POP (%edi)
339 POP (%esi)
340 jmp L(less48bytes)
342 cfi_restore_state
343 cfi_remember_state
344 .p2align 4
345 L(shr_1_gobble):
346 sub $32, %ecx
347 movdqa 16(%esi), %xmm0
348 palignr $1,(%esi), %xmm0
349 pcmpeqb (%edi), %xmm0
351 movdqa 32(%esi), %xmm3
352 palignr $1,16(%esi), %xmm3
353 pcmpeqb 16(%edi), %xmm3
355 L(shr_1_gobble_loop):
356 pand %xmm0, %xmm3
357 sub $32, %ecx
358 pmovmskb %xmm3, %edx
359 movdqa %xmm0, %xmm1
361 movdqa 64(%esi), %xmm3
362 palignr $1,48(%esi), %xmm3
363 sbb $0xffff, %edx
364 movdqa 48(%esi), %xmm0
365 palignr $1,32(%esi), %xmm0
366 pcmpeqb 32(%edi), %xmm0
367 lea 32(%esi), %esi
368 pcmpeqb 48(%edi), %xmm3
370 lea 32(%edi), %edi
371 jz L(shr_1_gobble_loop)
372 pand %xmm0, %xmm3
374 cmp $0, %ecx
375 jge L(shr_1_gobble_next)
376 inc %edx
377 add $32, %ecx
378 L(shr_1_gobble_next):
379 test %edx, %edx
380 jnz L(exit)
382 pmovmskb %xmm3, %edx
383 movdqa %xmm0, %xmm1
384 lea 32(%edi), %edi
385 lea 32(%esi), %esi
386 sub $0xffff, %edx
387 jnz L(exit)
389 lea (%ecx, %edi,1), %eax
390 lea 1(%ecx, %esi,1), %edx
391 POP (%edi)
392 POP (%esi)
393 jmp L(less48bytes)
394 #endif
397 #if !defined(USE_WCHAR)
398 cfi_restore_state
399 cfi_remember_state
400 .p2align 4
401 L(shr_2):
402 cmp $80, %ecx
403 lea -48(%ecx), %ecx
404 mov %edx, %eax
405 jae L(shr_2_gobble)
407 movdqa 16(%esi), %xmm1
408 movdqa %xmm1, %xmm2
409 palignr $2,(%esi), %xmm1
410 pcmpeqb (%edi), %xmm1
412 movdqa 32(%esi), %xmm3
413 palignr $2,%xmm2, %xmm3
414 pcmpeqb 16(%edi), %xmm3
416 pand %xmm1, %xmm3
417 pmovmskb %xmm3, %edx
418 lea 32(%edi), %edi
419 lea 32(%esi), %esi
420 sub $0xffff, %edx
421 jnz L(exit)
422 lea (%ecx, %edi,1), %eax
423 lea 2(%ecx, %esi,1), %edx
424 POP (%edi)
425 POP (%esi)
426 jmp L(less48bytes)
428 cfi_restore_state
429 cfi_remember_state
430 .p2align 4
431 L(shr_2_gobble):
432 sub $32, %ecx
433 movdqa 16(%esi), %xmm0
434 palignr $2,(%esi), %xmm0
435 pcmpeqb (%edi), %xmm0
437 movdqa 32(%esi), %xmm3
438 palignr $2,16(%esi), %xmm3
439 pcmpeqb 16(%edi), %xmm3
441 L(shr_2_gobble_loop):
442 pand %xmm0, %xmm3
443 sub $32, %ecx
444 pmovmskb %xmm3, %edx
445 movdqa %xmm0, %xmm1
447 movdqa 64(%esi), %xmm3
448 palignr $2,48(%esi), %xmm3
449 sbb $0xffff, %edx
450 movdqa 48(%esi), %xmm0
451 palignr $2,32(%esi), %xmm0
452 pcmpeqb 32(%edi), %xmm0
453 lea 32(%esi), %esi
454 pcmpeqb 48(%edi), %xmm3
456 lea 32(%edi), %edi
457 jz L(shr_2_gobble_loop)
458 pand %xmm0, %xmm3
460 cmp $0, %ecx
461 jge L(shr_2_gobble_next)
462 inc %edx
463 add $32, %ecx
464 L(shr_2_gobble_next):
465 test %edx, %edx
466 jnz L(exit)
468 pmovmskb %xmm3, %edx
469 movdqa %xmm0, %xmm1
470 lea 32(%edi), %edi
471 lea 32(%esi), %esi
472 sub $0xffff, %edx
473 jnz L(exit)
475 lea (%ecx, %edi,1), %eax
476 lea 2(%ecx, %esi,1), %edx
477 POP (%edi)
478 POP (%esi)
479 jmp L(less48bytes)
480 #endif
482 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
483 cfi_restore_state
484 cfi_remember_state
485 .p2align 4
486 L(shr_3):
487 cmp $80, %ecx
488 lea -48(%ecx), %ecx
489 mov %edx, %eax
490 jae L(shr_3_gobble)
492 movdqa 16(%esi), %xmm1
493 movdqa %xmm1, %xmm2
494 palignr $3,(%esi), %xmm1
495 pcmpeqb (%edi), %xmm1
497 movdqa 32(%esi), %xmm3
498 palignr $3,%xmm2, %xmm3
499 pcmpeqb 16(%edi), %xmm3
501 pand %xmm1, %xmm3
502 pmovmskb %xmm3, %edx
503 lea 32(%edi), %edi
504 lea 32(%esi), %esi
505 sub $0xffff, %edx
506 jnz L(exit)
507 lea (%ecx, %edi,1), %eax
508 lea 3(%ecx, %esi,1), %edx
509 POP (%edi)
510 POP (%esi)
511 jmp L(less48bytes)
513 cfi_restore_state
514 cfi_remember_state
515 .p2align 4
516 L(shr_3_gobble):
517 sub $32, %ecx
518 movdqa 16(%esi), %xmm0
519 palignr $3,(%esi), %xmm0
520 pcmpeqb (%edi), %xmm0
522 movdqa 32(%esi), %xmm3
523 palignr $3,16(%esi), %xmm3
524 pcmpeqb 16(%edi), %xmm3
526 L(shr_3_gobble_loop):
527 pand %xmm0, %xmm3
528 sub $32, %ecx
529 pmovmskb %xmm3, %edx
530 movdqa %xmm0, %xmm1
532 movdqa 64(%esi), %xmm3
533 palignr $3,48(%esi), %xmm3
534 sbb $0xffff, %edx
535 movdqa 48(%esi), %xmm0
536 palignr $3,32(%esi), %xmm0
537 pcmpeqb 32(%edi), %xmm0
538 lea 32(%esi), %esi
539 pcmpeqb 48(%edi), %xmm3
541 lea 32(%edi), %edi
542 jz L(shr_3_gobble_loop)
543 pand %xmm0, %xmm3
545 cmp $0, %ecx
546 jge L(shr_3_gobble_next)
547 inc %edx
548 add $32, %ecx
549 L(shr_3_gobble_next):
550 test %edx, %edx
551 jnz L(exit)
553 pmovmskb %xmm3, %edx
554 movdqa %xmm0, %xmm1
555 lea 32(%edi), %edi
556 lea 32(%esi), %esi
557 sub $0xffff, %edx
558 jnz L(exit)
560 lea (%ecx, %edi,1), %eax
561 lea 3(%ecx, %esi,1), %edx
562 POP (%edi)
563 POP (%esi)
564 jmp L(less48bytes)
565 #endif
567 cfi_restore_state
568 cfi_remember_state
569 .p2align 4
570 L(shr_4):
571 cmp $80, %ecx
572 lea -48(%ecx), %ecx
573 mov %edx, %eax
574 jae L(shr_4_gobble)
576 movdqa 16(%esi), %xmm1
577 movdqa %xmm1, %xmm2
578 palignr $4,(%esi), %xmm1
579 pcmpeqb (%edi), %xmm1
581 movdqa 32(%esi), %xmm3
582 palignr $4,%xmm2, %xmm3
583 pcmpeqb 16(%edi), %xmm3
585 pand %xmm1, %xmm3
586 pmovmskb %xmm3, %edx
587 lea 32(%edi), %edi
588 lea 32(%esi), %esi
589 sub $0xffff, %edx
590 jnz L(exit)
591 lea (%ecx, %edi,1), %eax
592 lea 4(%ecx, %esi,1), %edx
593 POP (%edi)
594 POP (%esi)
595 jmp L(less48bytes)
597 cfi_restore_state
598 cfi_remember_state
599 .p2align 4
600 L(shr_4_gobble):
601 sub $32, %ecx
602 movdqa 16(%esi), %xmm0
603 palignr $4,(%esi), %xmm0
604 pcmpeqb (%edi), %xmm0
606 movdqa 32(%esi), %xmm3
607 palignr $4,16(%esi), %xmm3
608 pcmpeqb 16(%edi), %xmm3
610 L(shr_4_gobble_loop):
611 pand %xmm0, %xmm3
612 sub $32, %ecx
613 pmovmskb %xmm3, %edx
614 movdqa %xmm0, %xmm1
616 movdqa 64(%esi), %xmm3
617 palignr $4,48(%esi), %xmm3
618 sbb $0xffff, %edx
619 movdqa 48(%esi), %xmm0
620 palignr $4,32(%esi), %xmm0
621 pcmpeqb 32(%edi), %xmm0
622 lea 32(%esi), %esi
623 pcmpeqb 48(%edi), %xmm3
625 lea 32(%edi), %edi
626 jz L(shr_4_gobble_loop)
627 pand %xmm0, %xmm3
629 cmp $0, %ecx
630 jge L(shr_4_gobble_next)
631 inc %edx
632 add $32, %ecx
633 L(shr_4_gobble_next):
634 test %edx, %edx
635 jnz L(exit)
637 pmovmskb %xmm3, %edx
638 movdqa %xmm0, %xmm1
639 lea 32(%edi), %edi
640 lea 32(%esi), %esi
641 sub $0xffff, %edx
642 jnz L(exit)
644 lea (%ecx, %edi,1), %eax
645 lea 4(%ecx, %esi,1), %edx
646 POP (%edi)
647 POP (%esi)
648 jmp L(less48bytes)
650 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
651 cfi_restore_state
652 cfi_remember_state
653 .p2align 4
654 L(shr_5):
655 cmp $80, %ecx
656 lea -48(%ecx), %ecx
657 mov %edx, %eax
658 jae L(shr_5_gobble)
660 movdqa 16(%esi), %xmm1
661 movdqa %xmm1, %xmm2
662 palignr $5,(%esi), %xmm1
663 pcmpeqb (%edi), %xmm1
665 movdqa 32(%esi), %xmm3
666 palignr $5,%xmm2, %xmm3
667 pcmpeqb 16(%edi), %xmm3
669 pand %xmm1, %xmm3
670 pmovmskb %xmm3, %edx
671 lea 32(%edi), %edi
672 lea 32(%esi), %esi
673 sub $0xffff, %edx
674 jnz L(exit)
675 lea (%ecx, %edi,1), %eax
676 lea 5(%ecx, %esi,1), %edx
677 POP (%edi)
678 POP (%esi)
679 jmp L(less48bytes)
681 cfi_restore_state
682 cfi_remember_state
683 .p2align 4
684 L(shr_5_gobble):
685 sub $32, %ecx
686 movdqa 16(%esi), %xmm0
687 palignr $5,(%esi), %xmm0
688 pcmpeqb (%edi), %xmm0
690 movdqa 32(%esi), %xmm3
691 palignr $5,16(%esi), %xmm3
692 pcmpeqb 16(%edi), %xmm3
694 L(shr_5_gobble_loop):
695 pand %xmm0, %xmm3
696 sub $32, %ecx
697 pmovmskb %xmm3, %edx
698 movdqa %xmm0, %xmm1
700 movdqa 64(%esi), %xmm3
701 palignr $5,48(%esi), %xmm3
702 sbb $0xffff, %edx
703 movdqa 48(%esi), %xmm0
704 palignr $5,32(%esi), %xmm0
705 pcmpeqb 32(%edi), %xmm0
706 lea 32(%esi), %esi
707 pcmpeqb 48(%edi), %xmm3
709 lea 32(%edi), %edi
710 jz L(shr_5_gobble_loop)
711 pand %xmm0, %xmm3
713 cmp $0, %ecx
714 jge L(shr_5_gobble_next)
715 inc %edx
716 add $32, %ecx
717 L(shr_5_gobble_next):
718 test %edx, %edx
719 jnz L(exit)
721 pmovmskb %xmm3, %edx
722 movdqa %xmm0, %xmm1
723 lea 32(%edi), %edi
724 lea 32(%esi), %esi
725 sub $0xffff, %edx
726 jnz L(exit)
728 lea (%ecx, %edi,1), %eax
729 lea 5(%ecx, %esi,1), %edx
730 POP (%edi)
731 POP (%esi)
732 jmp L(less48bytes)
733 #endif
735 #if !defined(USE_WCHAR)
736 cfi_restore_state
737 cfi_remember_state
738 .p2align 4
739 L(shr_6):
740 cmp $80, %ecx
741 lea -48(%ecx), %ecx
742 mov %edx, %eax
743 jae L(shr_6_gobble)
745 movdqa 16(%esi), %xmm1
746 movdqa %xmm1, %xmm2
747 palignr $6,(%esi), %xmm1
748 pcmpeqb (%edi), %xmm1
750 movdqa 32(%esi), %xmm3
751 palignr $6,%xmm2, %xmm3
752 pcmpeqb 16(%edi), %xmm3
754 pand %xmm1, %xmm3
755 pmovmskb %xmm3, %edx
756 lea 32(%edi), %edi
757 lea 32(%esi), %esi
758 sub $0xffff, %edx
759 jnz L(exit)
760 lea (%ecx, %edi,1), %eax
761 lea 6(%ecx, %esi,1), %edx
762 POP (%edi)
763 POP (%esi)
764 jmp L(less48bytes)
766 cfi_restore_state
767 cfi_remember_state
768 .p2align 4
769 L(shr_6_gobble):
770 sub $32, %ecx
771 movdqa 16(%esi), %xmm0
772 palignr $6,(%esi), %xmm0
773 pcmpeqb (%edi), %xmm0
775 movdqa 32(%esi), %xmm3
776 palignr $6,16(%esi), %xmm3
777 pcmpeqb 16(%edi), %xmm3
779 L(shr_6_gobble_loop):
780 pand %xmm0, %xmm3
781 sub $32, %ecx
782 pmovmskb %xmm3, %edx
783 movdqa %xmm0, %xmm1
785 movdqa 64(%esi), %xmm3
786 palignr $6,48(%esi), %xmm3
787 sbb $0xffff, %edx
788 movdqa 48(%esi), %xmm0
789 palignr $6,32(%esi), %xmm0
790 pcmpeqb 32(%edi), %xmm0
791 lea 32(%esi), %esi
792 pcmpeqb 48(%edi), %xmm3
794 lea 32(%edi), %edi
795 jz L(shr_6_gobble_loop)
796 pand %xmm0, %xmm3
798 cmp $0, %ecx
799 jge L(shr_6_gobble_next)
800 inc %edx
801 add $32, %ecx
802 L(shr_6_gobble_next):
803 test %edx, %edx
804 jnz L(exit)
806 pmovmskb %xmm3, %edx
807 movdqa %xmm0, %xmm1
808 lea 32(%edi), %edi
809 lea 32(%esi), %esi
810 sub $0xffff, %edx
811 jnz L(exit)
813 lea (%ecx, %edi,1), %eax
814 lea 6(%ecx, %esi,1), %edx
815 POP (%edi)
816 POP (%esi)
817 jmp L(less48bytes)
818 #endif
820 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
821 cfi_restore_state
822 cfi_remember_state
823 .p2align 4
824 L(shr_7):
825 cmp $80, %ecx
826 lea -48(%ecx), %ecx
827 mov %edx, %eax
828 jae L(shr_7_gobble)
830 movdqa 16(%esi), %xmm1
831 movdqa %xmm1, %xmm2
832 palignr $7,(%esi), %xmm1
833 pcmpeqb (%edi), %xmm1
835 movdqa 32(%esi), %xmm3
836 palignr $7,%xmm2, %xmm3
837 pcmpeqb 16(%edi), %xmm3
839 pand %xmm1, %xmm3
840 pmovmskb %xmm3, %edx
841 lea 32(%edi), %edi
842 lea 32(%esi), %esi
843 sub $0xffff, %edx
844 jnz L(exit)
845 lea (%ecx, %edi,1), %eax
846 lea 7(%ecx, %esi,1), %edx
847 POP (%edi)
848 POP (%esi)
849 jmp L(less48bytes)
851 cfi_restore_state
852 cfi_remember_state
853 .p2align 4
854 L(shr_7_gobble):
855 sub $32, %ecx
856 movdqa 16(%esi), %xmm0
857 palignr $7,(%esi), %xmm0
858 pcmpeqb (%edi), %xmm0
860 movdqa 32(%esi), %xmm3
861 palignr $7,16(%esi), %xmm3
862 pcmpeqb 16(%edi), %xmm3
864 L(shr_7_gobble_loop):
865 pand %xmm0, %xmm3
866 sub $32, %ecx
867 pmovmskb %xmm3, %edx
868 movdqa %xmm0, %xmm1
870 movdqa 64(%esi), %xmm3
871 palignr $7,48(%esi), %xmm3
872 sbb $0xffff, %edx
873 movdqa 48(%esi), %xmm0
874 palignr $7,32(%esi), %xmm0
875 pcmpeqb 32(%edi), %xmm0
876 lea 32(%esi), %esi
877 pcmpeqb 48(%edi), %xmm3
879 lea 32(%edi), %edi
880 jz L(shr_7_gobble_loop)
881 pand %xmm0, %xmm3
883 cmp $0, %ecx
884 jge L(shr_7_gobble_next)
885 inc %edx
886 add $32, %ecx
887 L(shr_7_gobble_next):
888 test %edx, %edx
889 jnz L(exit)
891 pmovmskb %xmm3, %edx
892 movdqa %xmm0, %xmm1
893 lea 32(%edi), %edi
894 lea 32(%esi), %esi
895 sub $0xffff, %edx
896 jnz L(exit)
898 lea (%ecx, %edi,1), %eax
899 lea 7(%ecx, %esi,1), %edx
900 POP (%edi)
901 POP (%esi)
902 jmp L(less48bytes)
903 #endif
905 cfi_restore_state
906 cfi_remember_state
907 .p2align 4
908 L(shr_8):
909 cmp $80, %ecx
910 lea -48(%ecx), %ecx
911 mov %edx, %eax
912 jae L(shr_8_gobble)
914 movdqa 16(%esi), %xmm1
915 movdqa %xmm1, %xmm2
916 palignr $8,(%esi), %xmm1
917 pcmpeqb (%edi), %xmm1
919 movdqa 32(%esi), %xmm3
920 palignr $8,%xmm2, %xmm3
921 pcmpeqb 16(%edi), %xmm3
923 pand %xmm1, %xmm3
924 pmovmskb %xmm3, %edx
925 lea 32(%edi), %edi
926 lea 32(%esi), %esi
927 sub $0xffff, %edx
928 jnz L(exit)
929 lea (%ecx, %edi,1), %eax
930 lea 8(%ecx, %esi,1), %edx
931 POP (%edi)
932 POP (%esi)
933 jmp L(less48bytes)
935 cfi_restore_state
936 cfi_remember_state
937 .p2align 4
938 L(shr_8_gobble):
939 sub $32, %ecx
940 movdqa 16(%esi), %xmm0
941 palignr $8,(%esi), %xmm0
942 pcmpeqb (%edi), %xmm0
944 movdqa 32(%esi), %xmm3
945 palignr $8,16(%esi), %xmm3
946 pcmpeqb 16(%edi), %xmm3
948 L(shr_8_gobble_loop):
949 pand %xmm0, %xmm3
950 sub $32, %ecx
951 pmovmskb %xmm3, %edx
952 movdqa %xmm0, %xmm1
954 movdqa 64(%esi), %xmm3
955 palignr $8,48(%esi), %xmm3
956 sbb $0xffff, %edx
957 movdqa 48(%esi), %xmm0
958 palignr $8,32(%esi), %xmm0
959 pcmpeqb 32(%edi), %xmm0
960 lea 32(%esi), %esi
961 pcmpeqb 48(%edi), %xmm3
963 lea 32(%edi), %edi
964 jz L(shr_8_gobble_loop)
965 pand %xmm0, %xmm3
967 cmp $0, %ecx
968 jge L(shr_8_gobble_next)
969 inc %edx
970 add $32, %ecx
971 L(shr_8_gobble_next):
972 test %edx, %edx
973 jnz L(exit)
975 pmovmskb %xmm3, %edx
976 movdqa %xmm0, %xmm1
977 lea 32(%edi), %edi
978 lea 32(%esi), %esi
979 sub $0xffff, %edx
980 jnz L(exit)
982 lea (%ecx, %edi,1), %eax
983 lea 8(%ecx, %esi,1), %edx
984 POP (%edi)
985 POP (%esi)
986 jmp L(less48bytes)
988 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
989 cfi_restore_state
990 cfi_remember_state
991 .p2align 4
992 L(shr_9):
993 cmp $80, %ecx
994 lea -48(%ecx), %ecx
995 mov %edx, %eax
996 jae L(shr_9_gobble)
998 movdqa 16(%esi), %xmm1
999 movdqa %xmm1, %xmm2
1000 palignr $9,(%esi), %xmm1
1001 pcmpeqb (%edi), %xmm1
1003 movdqa 32(%esi), %xmm3
1004 palignr $9,%xmm2, %xmm3
1005 pcmpeqb 16(%edi), %xmm3
1007 pand %xmm1, %xmm3
1008 pmovmskb %xmm3, %edx
1009 lea 32(%edi), %edi
1010 lea 32(%esi), %esi
1011 sub $0xffff, %edx
1012 jnz L(exit)
1013 lea (%ecx, %edi,1), %eax
1014 lea 9(%ecx, %esi,1), %edx
1015 POP (%edi)
1016 POP (%esi)
1017 jmp L(less48bytes)
1019 cfi_restore_state
1020 cfi_remember_state
1021 .p2align 4
1022 L(shr_9_gobble):
1023 sub $32, %ecx
1024 movdqa 16(%esi), %xmm0
1025 palignr $9,(%esi), %xmm0
1026 pcmpeqb (%edi), %xmm0
1028 movdqa 32(%esi), %xmm3
1029 palignr $9,16(%esi), %xmm3
1030 pcmpeqb 16(%edi), %xmm3
1032 L(shr_9_gobble_loop):
1033 pand %xmm0, %xmm3
1034 sub $32, %ecx
1035 pmovmskb %xmm3, %edx
1036 movdqa %xmm0, %xmm1
1038 movdqa 64(%esi), %xmm3
1039 palignr $9,48(%esi), %xmm3
1040 sbb $0xffff, %edx
1041 movdqa 48(%esi), %xmm0
1042 palignr $9,32(%esi), %xmm0
1043 pcmpeqb 32(%edi), %xmm0
1044 lea 32(%esi), %esi
1045 pcmpeqb 48(%edi), %xmm3
1047 lea 32(%edi), %edi
1048 jz L(shr_9_gobble_loop)
1049 pand %xmm0, %xmm3
1051 cmp $0, %ecx
1052 jge L(shr_9_gobble_next)
1053 inc %edx
1054 add $32, %ecx
1055 L(shr_9_gobble_next):
1056 test %edx, %edx
1057 jnz L(exit)
1059 pmovmskb %xmm3, %edx
1060 movdqa %xmm0, %xmm1
1061 lea 32(%edi), %edi
1062 lea 32(%esi), %esi
1063 sub $0xffff, %edx
1064 jnz L(exit)
1066 lea (%ecx, %edi,1), %eax
1067 lea 9(%ecx, %esi,1), %edx
1068 POP (%edi)
1069 POP (%esi)
1070 jmp L(less48bytes)
1071 #endif
1073 #if !defined(USE_WCHAR)
1074 cfi_restore_state
1075 cfi_remember_state
1076 .p2align 4
1077 L(shr_10):
1078 cmp $80, %ecx
1079 lea -48(%ecx), %ecx
1080 mov %edx, %eax
1081 jae L(shr_10_gobble)
1083 movdqa 16(%esi), %xmm1
1084 movdqa %xmm1, %xmm2
1085 palignr $10, (%esi), %xmm1
1086 pcmpeqb (%edi), %xmm1
1088 movdqa 32(%esi), %xmm3
1089 palignr $10,%xmm2, %xmm3
1090 pcmpeqb 16(%edi), %xmm3
1092 pand %xmm1, %xmm3
1093 pmovmskb %xmm3, %edx
1094 lea 32(%edi), %edi
1095 lea 32(%esi), %esi
1096 sub $0xffff, %edx
1097 jnz L(exit)
1098 lea (%ecx, %edi,1), %eax
1099 lea 10(%ecx, %esi,1), %edx
1100 POP (%edi)
1101 POP (%esi)
1102 jmp L(less48bytes)
1104 cfi_restore_state
1105 cfi_remember_state
1106 .p2align 4
1107 L(shr_10_gobble):
1108 sub $32, %ecx
1109 movdqa 16(%esi), %xmm0
1110 palignr $10, (%esi), %xmm0
1111 pcmpeqb (%edi), %xmm0
1113 movdqa 32(%esi), %xmm3
1114 palignr $10, 16(%esi), %xmm3
1115 pcmpeqb 16(%edi), %xmm3
1117 L(shr_10_gobble_loop):
1118 pand %xmm0, %xmm3
1119 sub $32, %ecx
1120 pmovmskb %xmm3, %edx
1121 movdqa %xmm0, %xmm1
1123 movdqa 64(%esi), %xmm3
1124 palignr $10,48(%esi), %xmm3
1125 sbb $0xffff, %edx
1126 movdqa 48(%esi), %xmm0
1127 palignr $10,32(%esi), %xmm0
1128 pcmpeqb 32(%edi), %xmm0
1129 lea 32(%esi), %esi
1130 pcmpeqb 48(%edi), %xmm3
1132 lea 32(%edi), %edi
1133 jz L(shr_10_gobble_loop)
1134 pand %xmm0, %xmm3
1136 cmp $0, %ecx
1137 jge L(shr_10_gobble_next)
1138 inc %edx
1139 add $32, %ecx
1140 L(shr_10_gobble_next):
1141 test %edx, %edx
1142 jnz L(exit)
1144 pmovmskb %xmm3, %edx
1145 movdqa %xmm0, %xmm1
1146 lea 32(%edi), %edi
1147 lea 32(%esi), %esi
1148 sub $0xffff, %edx
1149 jnz L(exit)
1151 lea (%ecx, %edi,1), %eax
1152 lea 10(%ecx, %esi,1), %edx
1153 POP (%edi)
1154 POP (%esi)
1155 jmp L(less48bytes)
1156 #endif
1158 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1159 cfi_restore_state
1160 cfi_remember_state
1161 .p2align 4
1162 L(shr_11):
1163 cmp $80, %ecx
1164 lea -48(%ecx), %ecx
1165 mov %edx, %eax
1166 jae L(shr_11_gobble)
1168 movdqa 16(%esi), %xmm1
1169 movdqa %xmm1, %xmm2
1170 palignr $11, (%esi), %xmm1
1171 pcmpeqb (%edi), %xmm1
1173 movdqa 32(%esi), %xmm3
1174 palignr $11, %xmm2, %xmm3
1175 pcmpeqb 16(%edi), %xmm3
1177 pand %xmm1, %xmm3
1178 pmovmskb %xmm3, %edx
1179 lea 32(%edi), %edi
1180 lea 32(%esi), %esi
1181 sub $0xffff, %edx
1182 jnz L(exit)
1183 lea (%ecx, %edi,1), %eax
1184 lea 11(%ecx, %esi,1), %edx
1185 POP (%edi)
1186 POP (%esi)
1187 jmp L(less48bytes)
1189 cfi_restore_state
1190 cfi_remember_state
1191 .p2align 4
1192 L(shr_11_gobble):
1193 sub $32, %ecx
1194 movdqa 16(%esi), %xmm0
1195 palignr $11, (%esi), %xmm0
1196 pcmpeqb (%edi), %xmm0
1198 movdqa 32(%esi), %xmm3
1199 palignr $11, 16(%esi), %xmm3
1200 pcmpeqb 16(%edi), %xmm3
1202 L(shr_11_gobble_loop):
1203 pand %xmm0, %xmm3
1204 sub $32, %ecx
1205 pmovmskb %xmm3, %edx
1206 movdqa %xmm0, %xmm1
1208 movdqa 64(%esi), %xmm3
1209 palignr $11,48(%esi), %xmm3
1210 sbb $0xffff, %edx
1211 movdqa 48(%esi), %xmm0
1212 palignr $11,32(%esi), %xmm0
1213 pcmpeqb 32(%edi), %xmm0
1214 lea 32(%esi), %esi
1215 pcmpeqb 48(%edi), %xmm3
1217 lea 32(%edi), %edi
1218 jz L(shr_11_gobble_loop)
1219 pand %xmm0, %xmm3
1221 cmp $0, %ecx
1222 jge L(shr_11_gobble_next)
1223 inc %edx
1224 add $32, %ecx
1225 L(shr_11_gobble_next):
1226 test %edx, %edx
1227 jnz L(exit)
1229 pmovmskb %xmm3, %edx
1230 movdqa %xmm0, %xmm1
1231 lea 32(%edi), %edi
1232 lea 32(%esi), %esi
1233 sub $0xffff, %edx
1234 jnz L(exit)
1236 lea (%ecx, %edi,1), %eax
1237 lea 11(%ecx, %esi,1), %edx
1238 POP (%edi)
1239 POP (%esi)
1240 jmp L(less48bytes)
1241 #endif
1243 cfi_restore_state
1244 cfi_remember_state
1245 .p2align 4
1246 L(shr_12):
1247 cmp $80, %ecx
1248 lea -48(%ecx), %ecx
1249 mov %edx, %eax
1250 jae L(shr_12_gobble)
1252 movdqa 16(%esi), %xmm1
1253 movdqa %xmm1, %xmm2
1254 palignr $12, (%esi), %xmm1
1255 pcmpeqb (%edi), %xmm1
1257 movdqa 32(%esi), %xmm3
1258 palignr $12, %xmm2, %xmm3
1259 pcmpeqb 16(%edi), %xmm3
1261 pand %xmm1, %xmm3
1262 pmovmskb %xmm3, %edx
1263 lea 32(%edi), %edi
1264 lea 32(%esi), %esi
1265 sub $0xffff, %edx
1266 jnz L(exit)
1267 lea (%ecx, %edi,1), %eax
1268 lea 12(%ecx, %esi,1), %edx
1269 POP (%edi)
1270 POP (%esi)
1271 jmp L(less48bytes)
1273 cfi_restore_state
1274 cfi_remember_state
1275 .p2align 4
1276 L(shr_12_gobble):
1277 sub $32, %ecx
1278 movdqa 16(%esi), %xmm0
1279 palignr $12, (%esi), %xmm0
1280 pcmpeqb (%edi), %xmm0
1282 movdqa 32(%esi), %xmm3
1283 palignr $12, 16(%esi), %xmm3
1284 pcmpeqb 16(%edi), %xmm3
1286 L(shr_12_gobble_loop):
1287 pand %xmm0, %xmm3
1288 sub $32, %ecx
1289 pmovmskb %xmm3, %edx
1290 movdqa %xmm0, %xmm1
1292 movdqa 64(%esi), %xmm3
1293 palignr $12,48(%esi), %xmm3
1294 sbb $0xffff, %edx
1295 movdqa 48(%esi), %xmm0
1296 palignr $12,32(%esi), %xmm0
1297 pcmpeqb 32(%edi), %xmm0
1298 lea 32(%esi), %esi
1299 pcmpeqb 48(%edi), %xmm3
1301 lea 32(%edi), %edi
1302 jz L(shr_12_gobble_loop)
1303 pand %xmm0, %xmm3
1305 cmp $0, %ecx
1306 jge L(shr_12_gobble_next)
1307 inc %edx
1308 add $32, %ecx
1309 L(shr_12_gobble_next):
1310 test %edx, %edx
1311 jnz L(exit)
1313 pmovmskb %xmm3, %edx
1314 movdqa %xmm0, %xmm1
1315 lea 32(%edi), %edi
1316 lea 32(%esi), %esi
1317 sub $0xffff, %edx
1318 jnz L(exit)
1320 lea (%ecx, %edi,1), %eax
1321 lea 12(%ecx, %esi,1), %edx
1322 POP (%edi)
1323 POP (%esi)
1324 jmp L(less48bytes)
1326 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1327 cfi_restore_state
1328 cfi_remember_state
1329 .p2align 4
1330 L(shr_13):
1331 cmp $80, %ecx
1332 lea -48(%ecx), %ecx
1333 mov %edx, %eax
1334 jae L(shr_13_gobble)
1336 movdqa 16(%esi), %xmm1
1337 movdqa %xmm1, %xmm2
1338 palignr $13, (%esi), %xmm1
1339 pcmpeqb (%edi), %xmm1
1341 movdqa 32(%esi), %xmm3
1342 palignr $13, %xmm2, %xmm3
1343 pcmpeqb 16(%edi), %xmm3
1345 pand %xmm1, %xmm3
1346 pmovmskb %xmm3, %edx
1347 lea 32(%edi), %edi
1348 lea 32(%esi), %esi
1349 sub $0xffff, %edx
1350 jnz L(exit)
1351 lea (%ecx, %edi,1), %eax
1352 lea 13(%ecx, %esi,1), %edx
1353 POP (%edi)
1354 POP (%esi)
1355 jmp L(less48bytes)
1357 cfi_restore_state
1358 cfi_remember_state
1359 .p2align 4
1360 L(shr_13_gobble):
1361 sub $32, %ecx
1362 movdqa 16(%esi), %xmm0
1363 palignr $13, (%esi), %xmm0
1364 pcmpeqb (%edi), %xmm0
1366 movdqa 32(%esi), %xmm3
1367 palignr $13, 16(%esi), %xmm3
1368 pcmpeqb 16(%edi), %xmm3
1370 L(shr_13_gobble_loop):
1371 pand %xmm0, %xmm3
1372 sub $32, %ecx
1373 pmovmskb %xmm3, %edx
1374 movdqa %xmm0, %xmm1
1376 movdqa 64(%esi), %xmm3
1377 palignr $13,48(%esi), %xmm3
1378 sbb $0xffff, %edx
1379 movdqa 48(%esi), %xmm0
1380 palignr $13,32(%esi), %xmm0
1381 pcmpeqb 32(%edi), %xmm0
1382 lea 32(%esi), %esi
1383 pcmpeqb 48(%edi), %xmm3
1385 lea 32(%edi), %edi
1386 jz L(shr_13_gobble_loop)
1387 pand %xmm0, %xmm3
1389 cmp $0, %ecx
1390 jge L(shr_13_gobble_next)
1391 inc %edx
1392 add $32, %ecx
1393 L(shr_13_gobble_next):
1394 test %edx, %edx
1395 jnz L(exit)
1397 pmovmskb %xmm3, %edx
1398 movdqa %xmm0, %xmm1
1399 lea 32(%edi), %edi
1400 lea 32(%esi), %esi
1401 sub $0xffff, %edx
1402 jnz L(exit)
1404 lea (%ecx, %edi,1), %eax
1405 lea 13(%ecx, %esi,1), %edx
1406 POP (%edi)
1407 POP (%esi)
1408 jmp L(less48bytes)
1409 #endif
1411 #if !defined(USE_WCHAR)
1412 cfi_restore_state
1413 cfi_remember_state
1414 .p2align 4
1415 L(shr_14):
1416 cmp $80, %ecx
1417 lea -48(%ecx), %ecx
1418 mov %edx, %eax
1419 jae L(shr_14_gobble)
1421 movdqa 16(%esi), %xmm1
1422 movdqa %xmm1, %xmm2
1423 palignr $14, (%esi), %xmm1
1424 pcmpeqb (%edi), %xmm1
1426 movdqa 32(%esi), %xmm3
1427 palignr $14, %xmm2, %xmm3
1428 pcmpeqb 16(%edi), %xmm3
1430 pand %xmm1, %xmm3
1431 pmovmskb %xmm3, %edx
1432 lea 32(%edi), %edi
1433 lea 32(%esi), %esi
1434 sub $0xffff, %edx
1435 jnz L(exit)
1436 lea (%ecx, %edi,1), %eax
1437 lea 14(%ecx, %esi,1), %edx
1438 POP (%edi)
1439 POP (%esi)
1440 jmp L(less48bytes)
1442 cfi_restore_state
1443 cfi_remember_state
1444 .p2align 4
1445 L(shr_14_gobble):
1446 sub $32, %ecx
1447 movdqa 16(%esi), %xmm0
1448 palignr $14, (%esi), %xmm0
1449 pcmpeqb (%edi), %xmm0
1451 movdqa 32(%esi), %xmm3
1452 palignr $14, 16(%esi), %xmm3
1453 pcmpeqb 16(%edi), %xmm3
1455 L(shr_14_gobble_loop):
1456 pand %xmm0, %xmm3
1457 sub $32, %ecx
1458 pmovmskb %xmm3, %edx
1459 movdqa %xmm0, %xmm1
1461 movdqa 64(%esi), %xmm3
1462 palignr $14,48(%esi), %xmm3
1463 sbb $0xffff, %edx
1464 movdqa 48(%esi), %xmm0
1465 palignr $14,32(%esi), %xmm0
1466 pcmpeqb 32(%edi), %xmm0
1467 lea 32(%esi), %esi
1468 pcmpeqb 48(%edi), %xmm3
1470 lea 32(%edi), %edi
1471 jz L(shr_14_gobble_loop)
1472 pand %xmm0, %xmm3
1474 cmp $0, %ecx
1475 jge L(shr_14_gobble_next)
1476 inc %edx
1477 add $32, %ecx
1478 L(shr_14_gobble_next):
1479 test %edx, %edx
1480 jnz L(exit)
1482 pmovmskb %xmm3, %edx
1483 movdqa %xmm0, %xmm1
1484 lea 32(%edi), %edi
1485 lea 32(%esi), %esi
1486 sub $0xffff, %edx
1487 jnz L(exit)
1489 lea (%ecx, %edi,1), %eax
1490 lea 14(%ecx, %esi,1), %edx
1491 POP (%edi)
1492 POP (%esi)
1493 jmp L(less48bytes)
1494 #endif
1496 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1497 cfi_restore_state
1498 cfi_remember_state
1499 .p2align 4
1500 L(shr_15):
1501 cmp $80, %ecx
1502 lea -48(%ecx), %ecx
1503 mov %edx, %eax
1504 jae L(shr_15_gobble)
1506 movdqa 16(%esi), %xmm1
1507 movdqa %xmm1, %xmm2
1508 palignr $15, (%esi), %xmm1
1509 pcmpeqb (%edi), %xmm1
1511 movdqa 32(%esi), %xmm3
1512 palignr $15, %xmm2, %xmm3
1513 pcmpeqb 16(%edi), %xmm3
1515 pand %xmm1, %xmm3
1516 pmovmskb %xmm3, %edx
1517 lea 32(%edi), %edi
1518 lea 32(%esi), %esi
1519 sub $0xffff, %edx
1520 jnz L(exit)
1521 lea (%ecx, %edi,1), %eax
1522 lea 15(%ecx, %esi,1), %edx
1523 POP (%edi)
1524 POP (%esi)
1525 jmp L(less48bytes)
1527 cfi_restore_state
1528 cfi_remember_state
1529 .p2align 4
1530 L(shr_15_gobble):
1531 sub $32, %ecx
1532 movdqa 16(%esi), %xmm0
1533 palignr $15, (%esi), %xmm0
1534 pcmpeqb (%edi), %xmm0
1536 movdqa 32(%esi), %xmm3
1537 palignr $15, 16(%esi), %xmm3
1538 pcmpeqb 16(%edi), %xmm3
1540 L(shr_15_gobble_loop):
1541 pand %xmm0, %xmm3
1542 sub $32, %ecx
1543 pmovmskb %xmm3, %edx
1544 movdqa %xmm0, %xmm1
1546 movdqa 64(%esi), %xmm3
1547 palignr $15,48(%esi), %xmm3
1548 sbb $0xffff, %edx
1549 movdqa 48(%esi), %xmm0
1550 palignr $15,32(%esi), %xmm0
1551 pcmpeqb 32(%edi), %xmm0
1552 lea 32(%esi), %esi
1553 pcmpeqb 48(%edi), %xmm3
1555 lea 32(%edi), %edi
1556 jz L(shr_15_gobble_loop)
1557 pand %xmm0, %xmm3
1559 cmp $0, %ecx
1560 jge L(shr_15_gobble_next)
1561 inc %edx
1562 add $32, %ecx
1563 L(shr_15_gobble_next):
1564 test %edx, %edx
1565 jnz L(exit)
1567 pmovmskb %xmm3, %edx
1568 movdqa %xmm0, %xmm1
1569 lea 32(%edi), %edi
1570 lea 32(%esi), %esi
1571 sub $0xffff, %edx
1572 jnz L(exit)
1574 lea (%ecx, %edi,1), %eax
1575 lea 15(%ecx, %esi,1), %edx
1576 POP (%edi)
1577 POP (%esi)
1578 jmp L(less48bytes)
1579 #endif
1581 cfi_restore_state
1582 cfi_remember_state
1583 .p2align 4
1584 L(exit):
1585 pmovmskb %xmm1, %ebx
1586 sub $0xffff, %ebx
1587 jz L(first16bytes)
1588 lea -16(%esi), %esi
1589 lea -16(%edi), %edi
1590 mov %ebx, %edx
1592 L(first16bytes):
1593 add %eax, %esi
1594 L(less16bytes):
1596 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1597 test %dl, %dl
1598 jz L(next_24_bytes)
1600 test $0x01, %dl
1601 jnz L(Byte16)
1603 test $0x02, %dl
1604 jnz L(Byte17)
1606 test $0x04, %dl
1607 jnz L(Byte18)
1609 test $0x08, %dl
1610 jnz L(Byte19)
1612 test $0x10, %dl
1613 jnz L(Byte20)
1615 test $0x20, %dl
1616 jnz L(Byte21)
1618 test $0x40, %dl
1619 jnz L(Byte22)
1620 L(Byte23):
1621 movzbl -9(%edi), %eax
1622 movzbl -9(%esi), %edx
1623 sub %edx, %eax
1624 RETURN
1626 .p2align 4
1627 L(Byte16):
1628 movzbl -16(%edi), %eax
1629 movzbl -16(%esi), %edx
1630 sub %edx, %eax
1631 RETURN
1633 .p2align 4
1634 L(Byte17):
1635 movzbl -15(%edi), %eax
1636 movzbl -15(%esi), %edx
1637 sub %edx, %eax
1638 RETURN
1640 .p2align 4
1641 L(Byte18):
1642 movzbl -14(%edi), %eax
1643 movzbl -14(%esi), %edx
1644 sub %edx, %eax
1645 RETURN
1647 .p2align 4
1648 L(Byte19):
1649 movzbl -13(%edi), %eax
1650 movzbl -13(%esi), %edx
1651 sub %edx, %eax
1652 RETURN
1654 .p2align 4
1655 L(Byte20):
1656 movzbl -12(%edi), %eax
1657 movzbl -12(%esi), %edx
1658 sub %edx, %eax
1659 RETURN
1661 .p2align 4
1662 L(Byte21):
1663 movzbl -11(%edi), %eax
1664 movzbl -11(%esi), %edx
1665 sub %edx, %eax
1666 RETURN
1668 .p2align 4
1669 L(Byte22):
1670 movzbl -10(%edi), %eax
1671 movzbl -10(%esi), %edx
1672 sub %edx, %eax
1673 RETURN
1675 .p2align 4
1676 L(next_24_bytes):
1677 lea 8(%edi), %edi
1678 lea 8(%esi), %esi
1679 test $0x01, %dh
1680 jnz L(Byte16)
1682 test $0x02, %dh
1683 jnz L(Byte17)
1685 test $0x04, %dh
1686 jnz L(Byte18)
1688 test $0x08, %dh
1689 jnz L(Byte19)
1691 test $0x10, %dh
1692 jnz L(Byte20)
1694 test $0x20, %dh
1695 jnz L(Byte21)
1697 test $0x40, %dh
1698 jnz L(Byte22)
1700 .p2align 4
1701 L(Byte31):
1702 movzbl -9(%edi), %eax
1703 movzbl -9(%esi), %edx
1704 sub %edx, %eax
1705 RETURN_END
1706 #elif defined(USE_AS_WMEMCMP)
1708 /* special for wmemcmp */
1709 test %dl, %dl
1710 jz L(next_two_double_words)
1711 and $15, %dl
1712 jz L(second_double_word)
1713 mov -16(%edi), %ecx
1714 cmp -16(%esi), %ecx
1715 mov $1, %eax
1716 jg L(nequal_bigger)
1717 neg %eax
1718 RETURN
1720 .p2align 4
1721 L(second_double_word):
1722 mov -12(%edi), %ecx
1723 cmp -12(%esi), %ecx
1724 mov $1, %eax
1725 jg L(nequal_bigger)
1726 neg %eax
1727 RETURN
1729 .p2align 4
1730 L(next_two_double_words):
1731 and $15, %dh
1732 jz L(fourth_double_word)
1733 mov -8(%edi), %ecx
1734 cmp -8(%esi), %ecx
1735 mov $1, %eax
1736 jg L(nequal_bigger)
1737 neg %eax
1738 RETURN
1740 .p2align 4
1741 L(fourth_double_word):
1742 mov -4(%edi), %ecx
1743 cmp -4(%esi), %ecx
1744 mov $1, %eax
1745 jg L(nequal_bigger)
1746 neg %eax
1747 RETURN
1749 .p2align 4
1750 L(nequal_bigger):
1751 RETURN_END
1753 #elif defined(USE_AS_MEMCMP16)
1755 /* special for __memcmp16 */
1756 test %dl, %dl
1757 jz L(next_four_words)
1758 test $15, %dl
1759 jz L(second_two_words)
1760 test $3, %dl
1761 jz L(second_word)
1762 movzwl -16(%edi), %eax
1763 movzwl -16(%esi), %ebx
1764 subl %ebx, %eax
1765 RETURN
1767 .p2align 4
1768 L(second_word):
1769 movzwl -14(%edi), %eax
1770 movzwl -14(%esi), %ebx
1771 subl %ebx, %eax
1772 RETURN
1774 .p2align 4
1775 L(second_two_words):
1776 test $63, %dl
1777 jz L(fourth_word)
1778 movzwl -12(%edi), %eax
1779 movzwl -12(%esi), %ebx
1780 subl %ebx, %eax
1781 RETURN
1783 .p2align 4
1784 L(fourth_word):
1785 movzwl -10(%edi), %eax
1786 movzwl -10(%esi), %ebx
1787 subl %ebx, %eax
1788 RETURN
1790 .p2align 4
1791 L(next_four_words):
1792 test $15, %dh
1793 jz L(fourth_two_words)
1794 test $3, %dh
1795 jz L(sixth_word)
1796 movzwl -8(%edi), %eax
1797 movzwl -8(%esi), %ebx
1798 subl %ebx, %eax
1799 RETURN
1801 .p2align 4
1802 L(sixth_word):
1803 movzwl -6(%edi), %eax
1804 movzwl -6(%esi), %ebx
1805 subl %ebx, %eax
1806 RETURN
1808 .p2align 4
1809 L(fourth_two_words):
1810 test $63, %dh
1811 jz L(eighth_word)
1812 movzwl -4(%edi), %eax
1813 movzwl -4(%esi), %ebx
1814 subl %ebx, %eax
1815 RETURN
1817 .p2align 4
1818 L(eighth_word):
1819 movzwl -2(%edi), %eax
1820 movzwl -2(%esi), %ebx
1821 subl %ebx, %eax
1822 RETURN
1823 #else
1824 # error Unreachable preprocessor case
1825 #endif
1827 CFI_PUSH (%ebx)
1829 .p2align 4
1830 L(more8bytes):
1831 cmp $16, %ecx
1832 jae L(more16bytes)
1833 cmp $8, %ecx
1834 je L(8bytes)
1835 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1836 cmp $9, %ecx
1837 je L(9bytes)
1838 cmp $10, %ecx
1839 je L(10bytes)
1840 cmp $11, %ecx
1841 je L(11bytes)
1842 cmp $12, %ecx
1843 je L(12bytes)
1844 cmp $13, %ecx
1845 je L(13bytes)
1846 cmp $14, %ecx
1847 je L(14bytes)
1848 jmp L(15bytes)
1849 #elif defined(USE_WCHAR) && !defined(USE_UTF16)
1850 jmp L(12bytes)
1851 #elif defined(USE_UTF16) && !defined(USE_WCHAR)
1852 cmp $10, %ecx
1853 je L(10bytes)
1854 cmp $12, %ecx
1855 je L(12bytes)
1856 jmp L(14bytes)
1857 #else
1858 # error Unreachable preprocessor case
1859 #endif
1861 .p2align 4
1862 L(more16bytes):
1863 cmp $24, %ecx
1864 jae L(more24bytes)
1865 cmp $16, %ecx
1866 je L(16bytes)
1867 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1868 cmp $17, %ecx
1869 je L(17bytes)
1870 cmp $18, %ecx
1871 je L(18bytes)
1872 cmp $19, %ecx
1873 je L(19bytes)
1874 cmp $20, %ecx
1875 je L(20bytes)
1876 cmp $21, %ecx
1877 je L(21bytes)
1878 cmp $22, %ecx
1879 je L(22bytes)
1880 jmp L(23bytes)
1881 #elif defined(USE_WCHAR) && !defined(USE_UTF16)
1882 jmp L(20bytes)
1883 #elif defined(USE_UTF16) && !defined(USE_WCHAR)
1884 cmp $18, %ecx
1885 je L(18bytes)
1886 cmp $20, %ecx
1887 je L(20bytes)
1888 jmp L(22bytes)
1889 #else
1890 # error Unreachable preprocessor case
1891 #endif
1893 .p2align 4
1894 L(more24bytes):
1895 cmp $32, %ecx
1896 jae L(more32bytes)
1897 cmp $24, %ecx
1898 je L(24bytes)
1899 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1900 cmp $25, %ecx
1901 je L(25bytes)
1902 cmp $26, %ecx
1903 je L(26bytes)
1904 cmp $27, %ecx
1905 je L(27bytes)
1906 cmp $28, %ecx
1907 je L(28bytes)
1908 cmp $29, %ecx
1909 je L(29bytes)
1910 cmp $30, %ecx
1911 je L(30bytes)
1912 jmp L(31bytes)
1913 #elif defined(USE_WCHAR) && !defined(USE_UTF16)
1914 jmp L(28bytes)
1915 #elif defined(USE_UTF16) && !defined(USE_WCHAR)
1916 cmp $26, %ecx
1917 je L(26bytes)
1918 cmp $28, %ecx
1919 je L(28bytes)
1920 jmp L(30bytes)
1921 #else
1922 # error Unreachable preprocessor case
1923 #endif
1925 .p2align 4
1926 L(more32bytes):
1927 cmp $40, %ecx
1928 jae L(more40bytes)
1929 cmp $32, %ecx
1930 je L(32bytes)
1931 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1932 cmp $33, %ecx
1933 je L(33bytes)
1934 cmp $34, %ecx
1935 je L(34bytes)
1936 cmp $35, %ecx
1937 je L(35bytes)
1938 cmp $36, %ecx
1939 je L(36bytes)
1940 cmp $37, %ecx
1941 je L(37bytes)
1942 cmp $38, %ecx
1943 je L(38bytes)
1944 jmp L(39bytes)
1945 #elif defined(USE_WCHAR) && !defined(USE_UTF16)
1946 jmp L(36bytes)
1947 #elif defined(USE_UTF16) && !defined(USE_WCHAR)
1948 cmp $34, %ecx
1949 je L(34bytes)
1950 cmp $36, %ecx
1951 je L(36bytes)
1952 jmp L(38bytes)
1953 #else
1954 # error Unreachable preprocessor case
1955 #endif
1957 .p2align 4
1958 L(less48bytes):
1959 cmp $8, %ecx
1960 jae L(more8bytes)
1961 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1962 cmp $2, %ecx
1963 je L(2bytes)
1964 cmp $3, %ecx
1965 je L(3bytes)
1966 cmp $4, %ecx
1967 je L(4bytes)
1968 cmp $5, %ecx
1969 je L(5bytes)
1970 cmp $6, %ecx
1971 je L(6bytes)
1972 jmp L(7bytes)
1973 #elif defined(USE_WCHAR) && !defined(USE_UTF16)
1974 jmp L(4bytes)
1975 #elif defined(USE_UTF16) && !defined(USE_WCHAR)
1976 cmp $2, %ecx
1977 je L(2bytes)
1978 cmp $4, %ecx
1979 je L(4bytes)
1980 jmp L(6bytes)
1981 #else
1982 # error Unreachable preprocessor case
1983 #endif
1985 .p2align 4
1986 L(more40bytes):
1987 cmp $40, %ecx
1988 je L(40bytes)
1989 #if !defined(USE_WCHAR) && !defined(USE_UTF16)
1990 cmp $41, %ecx
1991 je L(41bytes)
1992 cmp $42, %ecx
1993 je L(42bytes)
1994 cmp $43, %ecx
1995 je L(43bytes)
1996 cmp $44, %ecx
1997 je L(44bytes)
1998 cmp $45, %ecx
1999 je L(45bytes)
2000 cmp $46, %ecx
2001 je L(46bytes)
2002 jmp L(47bytes)
2003 #elif defined(USE_UTF16) && !defined(USE_WCHAR)
2004 cmp $42, %ecx
2005 je L(42bytes)
2006 cmp $44, %ecx
2007 je L(44bytes)
2008 jmp L(46bytes)
2009 #endif
2011 #if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
2012 .p2align 4
2013 L(44bytes):
2014 mov -44(%eax), %ecx
2015 mov -44(%edx), %ebx
2016 cmp %ebx, %ecx
2017 jne L(find_diff)
2018 L(40bytes):
2019 mov -40(%eax), %ecx
2020 mov -40(%edx), %ebx
2021 cmp %ebx, %ecx
2022 jne L(find_diff)
2023 L(36bytes):
2024 mov -36(%eax), %ecx
2025 mov -36(%edx), %ebx
2026 cmp %ebx, %ecx
2027 jne L(find_diff)
2028 L(32bytes):
2029 mov -32(%eax), %ecx
2030 mov -32(%edx), %ebx
2031 cmp %ebx, %ecx
2032 jne L(find_diff)
2033 L(28bytes):
2034 mov -28(%eax), %ecx
2035 mov -28(%edx), %ebx
2036 cmp %ebx, %ecx
2037 jne L(find_diff)
2038 L(24bytes):
2039 mov -24(%eax), %ecx
2040 mov -24(%edx), %ebx
2041 cmp %ebx, %ecx
2042 jne L(find_diff)
2043 L(20bytes):
2044 mov -20(%eax), %ecx
2045 mov -20(%edx), %ebx
2046 cmp %ebx, %ecx
2047 jne L(find_diff)
2048 L(16bytes):
2049 mov -16(%eax), %ecx
2050 mov -16(%edx), %ebx
2051 cmp %ebx, %ecx
2052 jne L(find_diff)
2053 L(12bytes):
2054 mov -12(%eax), %ecx
2055 mov -12(%edx), %ebx
2056 cmp %ebx, %ecx
2057 jne L(find_diff)
2058 L(8bytes):
2059 mov -8(%eax), %ecx
2060 mov -8(%edx), %ebx
2061 cmp %ebx, %ecx
2062 jne L(find_diff)
2063 L(4bytes):
2064 mov -4(%eax), %ecx
2065 mov -4(%edx), %ebx
2066 cmp %ebx, %ecx
2067 mov $0, %eax
2068 jne L(find_diff)
2069 POP (%ebx)
2070 ret
2071 CFI_PUSH (%ebx)
2072 #elif defined(USE_AS_WMEMCMP)
2074 .p2align 4
2075 L(44bytes):
2076 mov -44(%eax), %ecx
2077 cmp -44(%edx), %ecx
2078 jne L(find_diff)
2079 L(40bytes):
2080 mov -40(%eax), %ecx
2081 cmp -40(%edx), %ecx
2082 jne L(find_diff)
2083 L(36bytes):
2084 mov -36(%eax), %ecx
2085 cmp -36(%edx), %ecx
2086 jne L(find_diff)
2087 L(32bytes):
2088 mov -32(%eax), %ecx
2089 cmp -32(%edx), %ecx
2090 jne L(find_diff)
2091 L(28bytes):
2092 mov -28(%eax), %ecx
2093 cmp -28(%edx), %ecx
2094 jne L(find_diff)
2095 L(24bytes):
2096 mov -24(%eax), %ecx
2097 cmp -24(%edx), %ecx
2098 jne L(find_diff)
2099 L(20bytes):
2100 mov -20(%eax), %ecx
2101 cmp -20(%edx), %ecx
2102 jne L(find_diff)
2103 L(16bytes):
2104 mov -16(%eax), %ecx
2105 cmp -16(%edx), %ecx
2106 jne L(find_diff)
2107 L(12bytes):
2108 mov -12(%eax), %ecx
2109 cmp -12(%edx), %ecx
2110 jne L(find_diff)
2111 L(8bytes):
2112 mov -8(%eax), %ecx
2113 cmp -8(%edx), %ecx
2114 jne L(find_diff)
2115 L(4bytes):
2116 mov -4(%eax), %ecx
2117 xor %eax, %eax
2118 cmp -4(%edx), %ecx
2119 jne L(find_diff)
2120 POP (%ebx)
2121 ret
2122 CFI_PUSH (%ebx)
2123 #elif defined USE_AS_MEMCMP16
2125 .p2align 4
2126 L(46bytes):
2127 movzwl -46(%eax), %ecx
2128 movzwl -46(%edx), %ebx
2129 subl %ebx, %ecx
2130 jne L(memcmp16_exit)
2131 L(44bytes):
2132 movzwl -44(%eax), %ecx
2133 movzwl -44(%edx), %ebx
2134 subl %ebx, %ecx
2135 jne L(memcmp16_exit)
2136 L(42bytes):
2137 movzwl -42(%eax), %ecx
2138 movzwl -42(%edx), %ebx
2139 subl %ebx, %ecx
2140 jne L(memcmp16_exit)
2141 L(40bytes):
2142 movzwl -40(%eax), %ecx
2143 movzwl -40(%edx), %ebx
2144 subl %ebx, %ecx
2145 jne L(memcmp16_exit)
2146 L(38bytes):
2147 movzwl -38(%eax), %ecx
2148 movzwl -38(%edx), %ebx
2149 subl %ebx, %ecx
2150 jne L(memcmp16_exit)
2151 L(36bytes):
2152 movzwl -36(%eax), %ecx
2153 movzwl -36(%edx), %ebx
2154 subl %ebx, %ecx
2155 jne L(memcmp16_exit)
2156 L(34bytes):
2157 movzwl -34(%eax), %ecx
2158 movzwl -34(%edx), %ebx
2159 subl %ebx, %ecx
2160 jne L(memcmp16_exit)
2161 L(32bytes):
2162 movzwl -32(%eax), %ecx
2163 movzwl -32(%edx), %ebx
2164 subl %ebx, %ecx
2165 jne L(memcmp16_exit)
2166 L(30bytes):
2167 movzwl -30(%eax), %ecx
2168 movzwl -30(%edx), %ebx
2169 subl %ebx, %ecx
2170 jne L(memcmp16_exit)
2171 L(28bytes):
2172 movzwl -28(%eax), %ecx
2173 movzwl -28(%edx), %ebx
2174 subl %ebx, %ecx
2175 jne L(memcmp16_exit)
2176 L(26bytes):
2177 movzwl -26(%eax), %ecx
2178 movzwl -26(%edx), %ebx
2179 subl %ebx, %ecx
2180 jne L(memcmp16_exit)
2181 L(24bytes):
2182 movzwl -24(%eax), %ecx
2183 movzwl -24(%edx), %ebx
2184 subl %ebx, %ecx
2185 jne L(memcmp16_exit)
2186 L(22bytes):
2187 movzwl -22(%eax), %ecx
2188 movzwl -22(%edx), %ebx
2189 subl %ebx, %ecx
2190 jne L(memcmp16_exit)
2191 L(20bytes):
2192 movzwl -20(%eax), %ecx
2193 movzwl -20(%edx), %ebx
2194 subl %ebx, %ecx
2195 jne L(memcmp16_exit)
2196 L(18bytes):
2197 movzwl -18(%eax), %ecx
2198 movzwl -18(%edx), %ebx
2199 subl %ebx, %ecx
2200 jne L(memcmp16_exit)
2201 L(16bytes):
2202 movzwl -16(%eax), %ecx
2203 movzwl -16(%edx), %ebx
2204 subl %ebx, %ecx
2205 jne L(memcmp16_exit)
2206 L(14bytes):
2207 movzwl -14(%eax), %ecx
2208 movzwl -14(%edx), %ebx
2209 subl %ebx, %ecx
2210 jne L(memcmp16_exit)
2211 L(12bytes):
2212 movzwl -12(%eax), %ecx
2213 movzwl -12(%edx), %ebx
2214 subl %ebx, %ecx
2215 jne L(memcmp16_exit)
2216 L(10bytes):
2217 movzwl -10(%eax), %ecx
2218 movzwl -10(%edx), %ebx
2219 subl %ebx, %ecx
2220 jne L(memcmp16_exit)
2221 L(8bytes):
2222 movzwl -8(%eax), %ecx
2223 movzwl -8(%edx), %ebx
2224 subl %ebx, %ecx
2225 jne L(memcmp16_exit)
2226 L(6bytes):
2227 movzwl -6(%eax), %ecx
2228 movzwl -6(%edx), %ebx
2229 subl %ebx, %ecx
2230 jne L(memcmp16_exit)
2231 L(4bytes):
2232 movzwl -4(%eax), %ecx
2233 movzwl -4(%edx), %ebx
2234 subl %ebx, %ecx
2235 jne L(memcmp16_exit)
2236 L(2bytes):
2237 movzwl -2(%eax), %eax
2238 movzwl -2(%edx), %ebx
2239 subl %ebx, %eax
2240 POP (%ebx)
2241 ret
2242 CFI_PUSH (%ebx)
2243 #else
2244 # error Unreachable preprocessor case
2245 #endif
2247 #if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16)
2249 .p2align 4
2250 L(45bytes):
2251 mov -45(%eax), %ecx
2252 mov -45(%edx), %ebx
2253 cmp %ebx, %ecx
2254 jne L(find_diff)
2255 L(41bytes):
2256 mov -41(%eax), %ecx
2257 mov -41(%edx), %ebx
2258 cmp %ebx, %ecx
2259 jne L(find_diff)
2260 L(37bytes):
2261 mov -37(%eax), %ecx
2262 mov -37(%edx), %ebx
2263 cmp %ebx, %ecx
2264 jne L(find_diff)
2265 L(33bytes):
2266 mov -33(%eax), %ecx
2267 mov -33(%edx), %ebx
2268 cmp %ebx, %ecx
2269 jne L(find_diff)
2270 L(29bytes):
2271 mov -29(%eax), %ecx
2272 mov -29(%edx), %ebx
2273 cmp %ebx, %ecx
2274 jne L(find_diff)
2275 L(25bytes):
2276 mov -25(%eax), %ecx
2277 mov -25(%edx), %ebx
2278 cmp %ebx, %ecx
2279 jne L(find_diff)
2280 L(21bytes):
2281 mov -21(%eax), %ecx
2282 mov -21(%edx), %ebx
2283 cmp %ebx, %ecx
2284 jne L(find_diff)
2285 L(17bytes):
2286 mov -17(%eax), %ecx
2287 mov -17(%edx), %ebx
2288 cmp %ebx, %ecx
2289 jne L(find_diff)
2290 L(13bytes):
2291 mov -13(%eax), %ecx
2292 mov -13(%edx), %ebx
2293 cmp %ebx, %ecx
2294 jne L(find_diff)
2295 L(9bytes):
2296 mov -9(%eax), %ecx
2297 mov -9(%edx), %ebx
2298 cmp %ebx, %ecx
2299 jne L(find_diff)
2300 L(5bytes):
2301 mov -5(%eax), %ecx
2302 mov -5(%edx), %ebx
2303 cmp %ebx, %ecx
2304 jne L(find_diff)
2305 movzbl -1(%eax), %ecx
2306 cmp -1(%edx), %cl
2307 mov $0, %eax
2308 jne L(end)
2309 POP (%ebx)
2310 ret
2311 CFI_PUSH (%ebx)
2313 .p2align 4
2314 L(46bytes):
2315 mov -46(%eax), %ecx
2316 mov -46(%edx), %ebx
2317 cmp %ebx, %ecx
2318 jne L(find_diff)
2319 L(42bytes):
2320 mov -42(%eax), %ecx
2321 mov -42(%edx), %ebx
2322 cmp %ebx, %ecx
2323 jne L(find_diff)
2324 L(38bytes):
2325 mov -38(%eax), %ecx
2326 mov -38(%edx), %ebx
2327 cmp %ebx, %ecx
2328 jne L(find_diff)
2329 L(34bytes):
2330 mov -34(%eax), %ecx
2331 mov -34(%edx), %ebx
2332 cmp %ebx, %ecx
2333 jne L(find_diff)
2334 L(30bytes):
2335 mov -30(%eax), %ecx
2336 mov -30(%edx), %ebx
2337 cmp %ebx, %ecx
2338 jne L(find_diff)
2339 L(26bytes):
2340 mov -26(%eax), %ecx
2341 mov -26(%edx), %ebx
2342 cmp %ebx, %ecx
2343 jne L(find_diff)
2344 L(22bytes):
2345 mov -22(%eax), %ecx
2346 mov -22(%edx), %ebx
2347 cmp %ebx, %ecx
2348 jne L(find_diff)
2349 L(18bytes):
2350 mov -18(%eax), %ecx
2351 mov -18(%edx), %ebx
2352 cmp %ebx, %ecx
2353 jne L(find_diff)
2354 L(14bytes):
2355 mov -14(%eax), %ecx
2356 mov -14(%edx), %ebx
2357 cmp %ebx, %ecx
2358 jne L(find_diff)
2359 L(10bytes):
2360 mov -10(%eax), %ecx
2361 mov -10(%edx), %ebx
2362 cmp %ebx, %ecx
2363 jne L(find_diff)
2364 L(6bytes):
2365 mov -6(%eax), %ecx
2366 mov -6(%edx), %ebx
2367 cmp %ebx, %ecx
2368 jne L(find_diff)
2369 L(2bytes):
2370 movzwl -2(%eax), %ecx
2371 movzwl -2(%edx), %ebx
2372 cmp %bl, %cl
2373 jne L(end)
2374 cmp %bh, %ch
2375 mov $0, %eax
2376 jne L(end)
2377 POP (%ebx)
2378 ret
2379 CFI_PUSH (%ebx)
2381 .p2align 4
2382 L(47bytes):
2383 movl -47(%eax), %ecx
2384 movl -47(%edx), %ebx
2385 cmp %ebx, %ecx
2386 jne L(find_diff)
2387 L(43bytes):
2388 movl -43(%eax), %ecx
2389 movl -43(%edx), %ebx
2390 cmp %ebx, %ecx
2391 jne L(find_diff)
2392 L(39bytes):
2393 movl -39(%eax), %ecx
2394 movl -39(%edx), %ebx
2395 cmp %ebx, %ecx
2396 jne L(find_diff)
2397 L(35bytes):
2398 movl -35(%eax), %ecx
2399 movl -35(%edx), %ebx
2400 cmp %ebx, %ecx
2401 jne L(find_diff)
2402 L(31bytes):
2403 movl -31(%eax), %ecx
2404 movl -31(%edx), %ebx
2405 cmp %ebx, %ecx
2406 jne L(find_diff)
2407 L(27bytes):
2408 movl -27(%eax), %ecx
2409 movl -27(%edx), %ebx
2410 cmp %ebx, %ecx
2411 jne L(find_diff)
2412 L(23bytes):
2413 movl -23(%eax), %ecx
2414 movl -23(%edx), %ebx
2415 cmp %ebx, %ecx
2416 jne L(find_diff)
2417 L(19bytes):
2418 movl -19(%eax), %ecx
2419 movl -19(%edx), %ebx
2420 cmp %ebx, %ecx
2421 jne L(find_diff)
2422 L(15bytes):
2423 movl -15(%eax), %ecx
2424 movl -15(%edx), %ebx
2425 cmp %ebx, %ecx
2426 jne L(find_diff)
2427 L(11bytes):
2428 movl -11(%eax), %ecx
2429 movl -11(%edx), %ebx
2430 cmp %ebx, %ecx
2431 jne L(find_diff)
2432 L(7bytes):
2433 movl -7(%eax), %ecx
2434 movl -7(%edx), %ebx
2435 cmp %ebx, %ecx
2436 jne L(find_diff)
2437 L(3bytes):
2438 movzwl -3(%eax), %ecx
2439 movzwl -3(%edx), %ebx
2440 cmpb %bl, %cl
2441 jne L(end)
2442 cmp %bx, %cx
2443 jne L(end)
2444 movzbl -1(%eax), %eax
2445 cmpb -1(%edx), %al
2446 mov $0, %eax
2447 jne L(end)
2448 POP (%ebx)
2449 ret
2450 CFI_PUSH (%ebx)
2452 .p2align 4
2453 L(find_diff):
2454 cmpb %bl, %cl
2455 jne L(end)
2456 cmp %bx, %cx
2457 jne L(end)
2458 shr $16,%ecx
2459 shr $16,%ebx
2460 cmp %bl, %cl
2461 jne L(end)
2462 cmp %bx, %cx
2464 .p2align 4
2465 L(end):
2466 POP (%ebx)
2467 mov $1, %eax
2468 ja L(bigger)
2469 neg %eax
2470 L(bigger):
2471 ret
2472 #elif defined(USE_AS_WMEMCMP)
2474 .p2align 4
2475 L(find_diff):
2476 POP (%ebx)
2477 mov $1, %eax
2478 jg L(find_diff_bigger)
2479 neg %eax
2480 ret
2482 .p2align 4
2483 L(find_diff_bigger):
2484 ret
2486 #elif defined(USE_AS_MEMCMP16)
2488 .p2align 4
2489 L(memcmp16_exit):
2490 POP (%ebx)
2491 mov %ecx, %eax
2492 ret
2493 #else
2494 # error Unreachable preprocessor case
2495 #endif
2496 END (MEMCMP)