1 /*
2 Copyright (c) 2011, Intel Corporation
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
31 #ifndef L
32 # define L(label) .L##label
33 #endif
35 #ifndef cfi_startproc
36 # define cfi_startproc .cfi_startproc
37 #endif
39 #ifndef cfi_endproc
40 # define cfi_endproc .cfi_endproc
41 #endif
43 #ifndef cfi_rel_offset
44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45 #endif
47 #ifndef cfi_restore
48 # define cfi_restore(reg) .cfi_restore reg
49 #endif
51 #ifndef cfi_adjust_cfa_offset
52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53 #endif
55 #ifndef ENTRY
56 # define ENTRY(name) \
57 .type name, @function; \
58 .globl name; \
59 .p2align 4; \
60 name: \
61 cfi_startproc
62 #endif
64 #ifndef END
65 # define END(name) \
66 cfi_endproc; \
67 .size name, .-name
68 #endif
70 #define CFI_PUSH(REG) \
71 cfi_adjust_cfa_offset (4); \
72 cfi_rel_offset (REG, 0)
74 #define CFI_POP(REG) \
75 cfi_adjust_cfa_offset (-4); \
76 cfi_restore (REG)
78 #define PUSH(REG) pushl REG; CFI_PUSH (REG)
79 #define POP(REG) popl REG; CFI_POP (REG)
81 #define PARMS 8
82 #define ENTRANCE PUSH(%edi);
83 #define RETURN POP (%edi); ret; CFI_PUSH (%edi);
85 #define STR1 PARMS
86 #define STR2 STR1+4
88 .text
89 ENTRY (strrchr)
91 ENTRANCE
92 mov STR1(%esp), %ecx
93 movd STR2(%esp), %xmm1
95 pxor %xmm2, %xmm2
96 mov %ecx, %edi
97 punpcklbw %xmm1, %xmm1
98 punpcklbw %xmm1, %xmm1
99 /* ECX has OFFSET. */
100 and $63, %ecx
101 pshufd $0, %xmm1, %xmm1
102 cmp $48, %ecx
103 ja L(crosscache)
105 /* unaligned string. */
106 movdqu (%edi), %xmm0
107 pcmpeqb %xmm0, %xmm2
108 pcmpeqb %xmm1, %xmm0
109 /* Find where NULL is. */
110 pmovmskb %xmm2, %ecx
111 /* Check if there is a match. */
112 pmovmskb %xmm0, %eax
113 add $16, %edi
115 test %eax, %eax
116 jnz L(unaligned_match1)
118 test %ecx, %ecx
119 jnz L(return_null)
121 and $-16, %edi
123 PUSH (%esi)
124 PUSH (%ebx)
126 xor %ebx, %ebx
127 jmp L(loop)
129 CFI_POP (%esi)
130 CFI_POP (%ebx)
132 .p2align 4
133 L(unaligned_match1):
134 test %ecx, %ecx
135 jnz L(prolog_find_zero_1)
137 PUSH (%esi)
138 PUSH (%ebx)
140 mov %eax, %ebx
141 mov %edi, %esi
142 and $-16, %edi
143 jmp L(loop)
145 CFI_POP (%esi)
146 CFI_POP (%ebx)
148 .p2align 4
149 L(crosscache):
150 /* Hancle unaligned string. */
151 and $15, %ecx
152 and $-16, %edi
153 pxor %xmm3, %xmm3
154 movdqa (%edi), %xmm0
155 pcmpeqb %xmm0, %xmm3
156 pcmpeqb %xmm1, %xmm0
157 /* Find where NULL is. */
158 pmovmskb %xmm3, %edx
159 /* Check if there is a match. */
160 pmovmskb %xmm0, %eax
161 /* Remove the leading bytes. */
162 shr %cl, %edx
163 shr %cl, %eax
164 add $16, %edi
166 test %eax, %eax
167 jnz L(unaligned_match)
169 test %edx, %edx
170 jnz L(return_null)
172 PUSH (%esi)
173 PUSH (%ebx)
175 xor %ebx, %ebx
176 jmp L(loop)
178 CFI_POP (%esi)
179 CFI_POP (%ebx)
181 .p2align 4
182 L(unaligned_match):
183 test %edx, %edx
184 jnz L(prolog_find_zero)
186 PUSH (%esi)
187 PUSH (%ebx)
189 mov %eax, %ebx
190 lea (%edi, %ecx), %esi
192 /* Loop start on aligned string. */
193 .p2align 4
194 L(loop):
195 movdqa (%edi), %xmm0
196 pcmpeqb %xmm0, %xmm2
197 add $16, %edi
198 pcmpeqb %xmm1, %xmm0
199 pmovmskb %xmm2, %ecx
200 pmovmskb %xmm0, %eax
201 or %eax, %ecx
202 jnz L(matches)
204 movdqa (%edi), %xmm0
205 pcmpeqb %xmm0, %xmm2
206 add $16, %edi
207 pcmpeqb %xmm1, %xmm0
208 pmovmskb %xmm2, %ecx
209 pmovmskb %xmm0, %eax
210 or %eax, %ecx
211 jnz L(matches)
213 movdqa (%edi), %xmm0
214 pcmpeqb %xmm0, %xmm2
215 add $16, %edi
216 pcmpeqb %xmm1, %xmm0
217 pmovmskb %xmm2, %ecx
218 pmovmskb %xmm0, %eax
219 or %eax, %ecx
220 jnz L(matches)
222 movdqa (%edi), %xmm0
223 pcmpeqb %xmm0, %xmm2
224 add $16, %edi
225 pcmpeqb %xmm1, %xmm0
226 pmovmskb %xmm2, %ecx
227 pmovmskb %xmm0, %eax
228 or %eax, %ecx
229 jz L(loop)
231 L(matches):
232 test %eax, %eax
233 jnz L(match)
234 L(return_value):
235 test %ebx, %ebx
236 jz L(return_null_1)
237 mov %ebx, %eax
238 mov %esi, %edi
240 POP (%ebx)
241 POP (%esi)
243 jmp L(match_case1)
245 CFI_PUSH (%ebx)
246 CFI_PUSH (%esi)
248 .p2align 4
249 L(return_null_1):
250 POP (%ebx)
251 POP (%esi)
253 xor %eax, %eax
254 RETURN
256 CFI_PUSH (%ebx)
257 CFI_PUSH (%esi)
259 .p2align 4
260 L(match):
261 pmovmskb %xmm2, %ecx
262 test %ecx, %ecx
263 jnz L(find_zero)
264 mov %eax, %ebx
265 mov %edi, %esi
266 jmp L(loop)
268 .p2align 4
269 L(find_zero):
270 test %cl, %cl
271 jz L(find_zero_high)
272 mov %cl, %dl
273 and $15, %dl
274 jz L(find_zero_8)
275 test $0x01, %cl
276 jnz L(FindZeroExit1)
277 test $0x02, %cl
278 jnz L(FindZeroExit2)
279 test $0x04, %cl
280 jnz L(FindZeroExit3)
281 and $1 << 4 - 1, %eax
282 jz L(return_value)
284 POP (%ebx)
285 POP (%esi)
286 jmp L(match_case1)
288 CFI_PUSH (%ebx)
289 CFI_PUSH (%esi)
291 .p2align 4
292 L(find_zero_8):
293 test $0x10, %cl
294 jnz L(FindZeroExit5)
295 test $0x20, %cl
296 jnz L(FindZeroExit6)
297 test $0x40, %cl
298 jnz L(FindZeroExit7)
299 and $1 << 8 - 1, %eax
300 jz L(return_value)
302 POP (%ebx)
303 POP (%esi)
304 jmp L(match_case1)
306 CFI_PUSH (%ebx)
307 CFI_PUSH (%esi)
309 .p2align 4
310 L(find_zero_high):
311 mov %ch, %dh
312 and $15, %dh
313 jz L(find_zero_high_8)
314 test $0x01, %ch
315 jnz L(FindZeroExit9)
316 test $0x02, %ch
317 jnz L(FindZeroExit10)
318 test $0x04, %ch
319 jnz L(FindZeroExit11)
320 and $1 << 12 - 1, %eax
321 jz L(return_value)
323 POP (%ebx)
324 POP (%esi)
325 jmp L(match_case1)
327 CFI_PUSH (%ebx)
328 CFI_PUSH (%esi)
330 .p2align 4
331 L(find_zero_high_8):
332 test $0x10, %ch
333 jnz L(FindZeroExit13)
334 test $0x20, %ch
335 jnz L(FindZeroExit14)
336 test $0x40, %ch
337 jnz L(FindZeroExit15)
338 and $1 << 16 - 1, %eax
339 jz L(return_value)
341 POP (%ebx)
342 POP (%esi)
343 jmp L(match_case1)
345 CFI_PUSH (%ebx)
346 CFI_PUSH (%esi)
348 .p2align 4
349 L(FindZeroExit1):
350 and $1, %eax
351 jz L(return_value)
353 POP (%ebx)
354 POP (%esi)
355 jmp L(match_case1)
357 CFI_PUSH (%ebx)
358 CFI_PUSH (%esi)
360 .p2align 4
361 L(FindZeroExit2):
362 and $1 << 2 - 1, %eax
363 jz L(return_value)
365 POP (%ebx)
366 POP (%esi)
367 jmp L(match_case1)
369 CFI_PUSH (%ebx)
370 CFI_PUSH (%esi)
372 .p2align 4
373 L(FindZeroExit3):
374 and $1 << 3 - 1, %eax
375 jz L(return_value)
377 POP (%ebx)
378 POP (%esi)
379 jmp L(match_case1)
381 CFI_PUSH (%ebx)
382 CFI_PUSH (%esi)
384 .p2align 4
385 L(FindZeroExit5):
386 and $1 << 5 - 1, %eax
387 jz L(return_value)
389 POP (%ebx)
390 POP (%esi)
391 jmp L(match_case1)
393 CFI_PUSH (%ebx)
394 CFI_PUSH (%esi)
396 .p2align 4
397 L(FindZeroExit6):
398 and $1 << 6 - 1, %eax
399 jz L(return_value)
401 POP (%ebx)
402 POP (%esi)
403 jmp L(match_case1)
405 CFI_PUSH (%ebx)
406 CFI_PUSH (%esi)
408 .p2align 4
409 L(FindZeroExit7):
410 and $1 << 7 - 1, %eax
411 jz L(return_value)
413 POP (%ebx)
414 POP (%esi)
415 jmp L(match_case1)
417 CFI_PUSH (%ebx)
418 CFI_PUSH (%esi)
420 .p2align 4
421 L(FindZeroExit9):
422 and $1 << 9 - 1, %eax
423 jz L(return_value)
425 POP (%ebx)
426 POP (%esi)
427 jmp L(match_case1)
429 CFI_PUSH (%ebx)
430 CFI_PUSH (%esi)
432 .p2align 4
433 L(FindZeroExit10):
434 and $1 << 10 - 1, %eax
435 jz L(return_value)
437 POP (%ebx)
438 POP (%esi)
439 jmp L(match_case1)
441 CFI_PUSH (%ebx)
442 CFI_PUSH (%esi)
444 .p2align 4
445 L(FindZeroExit11):
446 and $1 << 11 - 1, %eax
447 jz L(return_value)
449 POP (%ebx)
450 POP (%esi)
451 jmp L(match_case1)
453 CFI_PUSH (%ebx)
454 CFI_PUSH (%esi)
456 .p2align 4
457 L(FindZeroExit13):
458 and $1 << 13 - 1, %eax
459 jz L(return_value)
461 POP (%ebx)
462 POP (%esi)
463 jmp L(match_case1)
465 CFI_PUSH (%ebx)
466 CFI_PUSH (%esi)
468 .p2align 4
469 L(FindZeroExit14):
470 and $1 << 14 - 1, %eax
471 jz L(return_value)
473 POP (%ebx)
474 POP (%esi)
475 jmp L(match_case1)
477 CFI_PUSH (%ebx)
478 CFI_PUSH (%esi)
480 .p2align 4
481 L(FindZeroExit15):
482 and $1 << 15 - 1, %eax
483 jz L(return_value)
485 POP (%ebx)
486 POP (%esi)
488 .p2align 4
489 L(match_case1):
490 test %ah, %ah
491 jnz L(match_case1_high)
492 mov %al, %dl
493 and $15 << 4, %dl
494 jnz L(match_case1_8)
495 test $0x08, %al
496 jnz L(Exit4)
497 test $0x04, %al
498 jnz L(Exit3)
499 test $0x02, %al
500 jnz L(Exit2)
501 lea -16(%edi), %eax
502 RETURN
504 .p2align 4
505 L(match_case1_8):
506 test $0x80, %al
507 jnz L(Exit8)
508 test $0x40, %al
509 jnz L(Exit7)
510 test $0x20, %al
511 jnz L(Exit6)
512 lea -12(%edi), %eax
513 RETURN
515 .p2align 4
516 L(match_case1_high):
517 mov %ah, %dh
518 and $15 << 4, %dh
519 jnz L(match_case1_high_8)
520 test $0x08, %ah
521 jnz L(Exit12)
522 test $0x04, %ah
523 jnz L(Exit11)
524 test $0x02, %ah
525 jnz L(Exit10)
526 lea -8(%edi), %eax
527 RETURN
529 .p2align 4
530 L(match_case1_high_8):
531 test $0x80, %ah
532 jnz L(Exit16)
533 test $0x40, %ah
534 jnz L(Exit15)
535 test $0x20, %ah
536 jnz L(Exit14)
537 lea -4(%edi), %eax
538 RETURN
540 .p2align 4
541 L(Exit2):
542 lea -15(%edi), %eax
543 RETURN
545 .p2align 4
546 L(Exit3):
547 lea -14(%edi), %eax
548 RETURN
550 .p2align 4
551 L(Exit4):
552 lea -13(%edi), %eax
553 RETURN
555 .p2align 4
556 L(Exit6):
557 lea -11(%edi), %eax
558 RETURN
560 .p2align 4
561 L(Exit7):
562 lea -10(%edi), %eax
563 RETURN
565 .p2align 4
566 L(Exit8):
567 lea -9(%edi), %eax
568 RETURN
570 .p2align 4
571 L(Exit10):
572 lea -7(%edi), %eax
573 RETURN
575 .p2align 4
576 L(Exit11):
577 lea -6(%edi), %eax
578 RETURN
580 .p2align 4
581 L(Exit12):
582 lea -5(%edi), %eax
583 RETURN
585 .p2align 4
586 L(Exit14):
587 lea -3(%edi), %eax
588 RETURN
590 .p2align 4
591 L(Exit15):
592 lea -2(%edi), %eax
593 RETURN
595 .p2align 4
596 L(Exit16):
597 lea -1(%edi), %eax
598 RETURN
600 /* Return NULL. */
601 .p2align 4
602 L(return_null):
603 xor %eax, %eax
604 RETURN
606 .p2align 4
607 L(prolog_find_zero):
608 add %ecx, %edi
609 mov %edx, %ecx
610 L(prolog_find_zero_1):
611 test %cl, %cl
612 jz L(prolog_find_zero_high)
613 mov %cl, %dl
614 and $15, %dl
615 jz L(prolog_find_zero_8)
616 test $0x01, %cl
617 jnz L(PrologFindZeroExit1)
618 test $0x02, %cl
619 jnz L(PrologFindZeroExit2)
620 test $0x04, %cl
621 jnz L(PrologFindZeroExit3)
622 and $1 << 4 - 1, %eax
623 jnz L(match_case1)
624 xor %eax, %eax
625 RETURN
627 .p2align 4
628 L(prolog_find_zero_8):
629 test $0x10, %cl
630 jnz L(PrologFindZeroExit5)
631 test $0x20, %cl
632 jnz L(PrologFindZeroExit6)
633 test $0x40, %cl
634 jnz L(PrologFindZeroExit7)
635 and $1 << 8 - 1, %eax
636 jnz L(match_case1)
637 xor %eax, %eax
638 RETURN
640 .p2align 4
641 L(prolog_find_zero_high):
642 mov %ch, %dh
643 and $15, %dh
644 jz L(prolog_find_zero_high_8)
645 test $0x01, %ch
646 jnz L(PrologFindZeroExit9)
647 test $0x02, %ch
648 jnz L(PrologFindZeroExit10)
649 test $0x04, %ch
650 jnz L(PrologFindZeroExit11)
651 and $1 << 12 - 1, %eax
652 jnz L(match_case1)
653 xor %eax, %eax
654 RETURN
656 .p2align 4
657 L(prolog_find_zero_high_8):
658 test $0x10, %ch
659 jnz L(PrologFindZeroExit13)
660 test $0x20, %ch
661 jnz L(PrologFindZeroExit14)
662 test $0x40, %ch
663 jnz L(PrologFindZeroExit15)
664 and $1 << 16 - 1, %eax
665 jnz L(match_case1)
666 xor %eax, %eax
667 RETURN
669 .p2align 4
670 L(PrologFindZeroExit1):
671 and $1, %eax
672 jnz L(match_case1)
673 xor %eax, %eax
674 RETURN
676 .p2align 4
677 L(PrologFindZeroExit2):
678 and $1 << 2 - 1, %eax
679 jnz L(match_case1)
680 xor %eax, %eax
681 RETURN
683 .p2align 4
684 L(PrologFindZeroExit3):
685 and $1 << 3 - 1, %eax
686 jnz L(match_case1)
687 xor %eax, %eax
688 RETURN
690 .p2align 4
691 L(PrologFindZeroExit5):
692 and $1 << 5 - 1, %eax
693 jnz L(match_case1)
694 xor %eax, %eax
695 RETURN
697 .p2align 4
698 L(PrologFindZeroExit6):
699 and $1 << 6 - 1, %eax
700 jnz L(match_case1)
701 xor %eax, %eax
702 RETURN
704 .p2align 4
705 L(PrologFindZeroExit7):
706 and $1 << 7 - 1, %eax
707 jnz L(match_case1)
708 xor %eax, %eax
709 RETURN
711 .p2align 4
712 L(PrologFindZeroExit9):
713 and $1 << 9 - 1, %eax
714 jnz L(match_case1)
715 xor %eax, %eax
716 RETURN
718 .p2align 4
719 L(PrologFindZeroExit10):
720 and $1 << 10 - 1, %eax
721 jnz L(match_case1)
722 xor %eax, %eax
723 RETURN
725 .p2align 4
726 L(PrologFindZeroExit11):
727 and $1 << 11 - 1, %eax
728 jnz L(match_case1)
729 xor %eax, %eax
730 RETURN
732 .p2align 4
733 L(PrologFindZeroExit13):
734 and $1 << 13 - 1, %eax
735 jnz L(match_case1)
736 xor %eax, %eax
737 RETURN
739 .p2align 4
740 L(PrologFindZeroExit14):
741 and $1 << 14 - 1, %eax
742 jnz L(match_case1)
743 xor %eax, %eax
744 RETURN
746 .p2align 4
747 L(PrologFindZeroExit15):
748 and $1 << 15 - 1, %eax
749 jnz L(match_case1)
750 xor %eax, %eax
751 RETURN
753 END (strrchr)