1 /*
2 Copyright (c) 2011 Intel Corporation
3 All rights reserved.
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
31 #ifndef USE_AS_WCSCAT
33 # ifndef L
34 # define L(label) .L##label
35 # endif
37 # ifndef cfi_startproc
38 # define cfi_startproc .cfi_startproc
39 # endif
41 # ifndef cfi_endproc
42 # define cfi_endproc .cfi_endproc
43 # endif
45 # ifndef ENTRY
46 # define ENTRY(name) \
47 .type name, @function; \
48 .globl name; \
49 .p2align 4; \
50 name: \
51 cfi_startproc
52 # endif
54 # ifndef END
55 # define END(name) \
56 cfi_endproc; \
57 .size name, .-name
58 # endif
60 # define PARMS 4
61 # define STR PARMS
62 # define RETURN ret
64 .text
65 ENTRY (wcslen)
66 mov STR(%esp), %edx
67 #endif
68 cmp $0, (%edx)
69 jz L(exit_tail0)
70 cmp $0, 4(%edx)
71 jz L(exit_tail1)
72 cmp $0, 8(%edx)
73 jz L(exit_tail2)
74 cmp $0, 12(%edx)
75 jz L(exit_tail3)
76 cmp $0, 16(%edx)
77 jz L(exit_tail4)
78 cmp $0, 20(%edx)
79 jz L(exit_tail5)
80 cmp $0, 24(%edx)
81 jz L(exit_tail6)
82 cmp $0, 28(%edx)
83 jz L(exit_tail7)
85 pxor %xmm0, %xmm0
87 lea 32(%edx), %eax
88 lea -16(%eax), %ecx
89 and $-16, %eax
91 pcmpeqd (%eax), %xmm0
92 pmovmskb %xmm0, %edx
93 pxor %xmm1, %xmm1
94 lea 16(%eax), %eax
95 test %edx, %edx
96 jnz L(exit)
98 pcmpeqd (%eax), %xmm1
99 pmovmskb %xmm1, %edx
100 pxor %xmm2, %xmm2
101 lea 16(%eax), %eax
102 test %edx, %edx
103 jnz L(exit)
105 pcmpeqd (%eax), %xmm2
106 pmovmskb %xmm2, %edx
107 pxor %xmm3, %xmm3
108 lea 16(%eax), %eax
109 test %edx, %edx
110 jnz L(exit)
112 pcmpeqd (%eax), %xmm3
113 pmovmskb %xmm3, %edx
114 lea 16(%eax), %eax
115 test %edx, %edx
116 jnz L(exit)
118 pcmpeqd (%eax), %xmm0
119 pmovmskb %xmm0, %edx
120 lea 16(%eax), %eax
121 test %edx, %edx
122 jnz L(exit)
124 pcmpeqd (%eax), %xmm1
125 pmovmskb %xmm1, %edx
126 lea 16(%eax), %eax
127 test %edx, %edx
128 jnz L(exit)
130 pcmpeqd (%eax), %xmm2
131 pmovmskb %xmm2, %edx
132 lea 16(%eax), %eax
133 test %edx, %edx
134 jnz L(exit)
136 pcmpeqd (%eax), %xmm3
137 pmovmskb %xmm3, %edx
138 lea 16(%eax), %eax
139 test %edx, %edx
140 jnz L(exit)
142 pcmpeqd (%eax), %xmm0
143 pmovmskb %xmm0, %edx
144 lea 16(%eax), %eax
145 test %edx, %edx
146 jnz L(exit)
148 pcmpeqd (%eax), %xmm1
149 pmovmskb %xmm1, %edx
150 lea 16(%eax), %eax
151 test %edx, %edx
152 jnz L(exit)
154 pcmpeqd (%eax), %xmm2
155 pmovmskb %xmm2, %edx
156 lea 16(%eax), %eax
157 test %edx, %edx
158 jnz L(exit)
160 pcmpeqd (%eax), %xmm3
161 pmovmskb %xmm3, %edx
162 lea 16(%eax), %eax
163 test %edx, %edx
164 jnz L(exit)
166 pcmpeqd (%eax), %xmm0
167 pmovmskb %xmm0, %edx
168 lea 16(%eax), %eax
169 test %edx, %edx
170 jnz L(exit)
172 pcmpeqd (%eax), %xmm1
173 pmovmskb %xmm1, %edx
174 lea 16(%eax), %eax
175 test %edx, %edx
176 jnz L(exit)
178 pcmpeqd (%eax), %xmm2
179 pmovmskb %xmm2, %edx
180 lea 16(%eax), %eax
181 test %edx, %edx
182 jnz L(exit)
184 pcmpeqd (%eax), %xmm3
185 pmovmskb %xmm3, %edx
186 lea 16(%eax), %eax
187 test %edx, %edx
188 jnz L(exit)
190 and $-0x40, %eax
192 .p2align 4
193 L(aligned_64_loop):
194 movaps (%eax), %xmm0
195 movaps 16(%eax), %xmm1
196 movaps 32(%eax), %xmm2
197 movaps 48(%eax), %xmm6
199 pminub %xmm1, %xmm0
200 pminub %xmm6, %xmm2
201 pminub %xmm0, %xmm2
202 pcmpeqd %xmm3, %xmm2
203 pmovmskb %xmm2, %edx
204 lea 64(%eax), %eax
205 test %edx, %edx
206 jz L(aligned_64_loop)
208 pcmpeqd -64(%eax), %xmm3
209 pmovmskb %xmm3, %edx
210 lea 48(%ecx), %ecx
211 test %edx, %edx
212 jnz L(exit)
214 pcmpeqd %xmm1, %xmm3
215 pmovmskb %xmm3, %edx
216 lea -16(%ecx), %ecx
217 test %edx, %edx
218 jnz L(exit)
220 pcmpeqd -32(%eax), %xmm3
221 pmovmskb %xmm3, %edx
222 lea -16(%ecx), %ecx
223 test %edx, %edx
224 jnz L(exit)
226 pcmpeqd %xmm6, %xmm3
227 pmovmskb %xmm3, %edx
228 lea -16(%ecx), %ecx
229 test %edx, %edx
230 jnz L(exit)
232 jmp L(aligned_64_loop)
234 .p2align 4
235 L(exit):
236 sub %ecx, %eax
237 shr $2, %eax
238 test %dl, %dl
239 jz L(exit_high)
241 mov %dl, %cl
242 and $15, %cl
243 jz L(exit_1)
244 RETURN
246 .p2align 4
247 L(exit_high):
248 mov %dh, %ch
249 and $15, %ch
250 jz L(exit_3)
251 add $2, %eax
252 RETURN
254 .p2align 4
255 L(exit_1):
256 add $1, %eax
257 RETURN
259 .p2align 4
260 L(exit_3):
261 add $3, %eax
262 RETURN
264 .p2align 4
265 L(exit_tail0):
266 xor %eax, %eax
267 RETURN
269 .p2align 4
270 L(exit_tail1):
271 mov $1, %eax
272 RETURN
274 .p2align 4
275 L(exit_tail2):
276 mov $2, %eax
277 RETURN
279 .p2align 4
280 L(exit_tail3):
281 mov $3, %eax
282 RETURN
284 .p2align 4
285 L(exit_tail4):
286 mov $4, %eax
287 RETURN
289 .p2align 4
290 L(exit_tail5):
291 mov $5, %eax
292 RETURN
294 .p2align 4
295 L(exit_tail6):
296 mov $6, %eax
297 RETURN
299 .p2align 4
300 L(exit_tail7):
301 mov $7, %eax
302 #ifndef USE_AS_WCSCAT
303 RETURN
305 END (wcslen)
306 #endif