1 /*
2 * Copyright (C) 2013 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
29 #include <machine/cpu-features.h>
30 #include <private/bionic_asm.h>
31 #include <private/libc_events.h>
33 /*
34 * Optimized memset() for ARM.
35 *
36 * memset() returns its first argument.
37 */
39 .fpu neon
40 .syntax unified
42 ENTRY(__memset_chk)
43 cmp r2, r3
44 bls .L_done
46 // Preserve lr for backtrace.
47 .save {lr}
48 push {lr}
49 .cfi_def_cfa_offset 4
50 .cfi_rel_offset lr, 0
52 ldr r0, error_message
53 ldr r1, error_code
54 1:
55 add r0, pc
56 bl __fortify_chk_fail
57 error_code:
58 .word BIONIC_EVENT_MEMSET_BUFFER_OVERFLOW
59 error_message:
60 .word error_string-(1b+8)
61 END(__memset_chk)
63 ENTRY(bzero)
64 mov r2, r1
65 mov r1, #0
66 .L_done:
67 // Fall through to memset...
68 END(bzero)
70 ENTRY(memset)
71 .save {r0}
72 stmfd sp!, {r0}
73 .cfi_def_cfa_offset 4
74 .cfi_rel_offset r0, 0
76 // The new algorithm is slower for copies < 16 so use the old
77 // neon code in that case.
78 cmp r2, #16
79 blo .L_set_less_than_16_unknown_align
81 // Use strd which requires an even and odd register so move the
82 // values so that:
83 // r0 and r1 contain the memset value
84 // r2 is the number of bytes to set
85 // r3 is the destination pointer
86 mov r3, r0
88 // Copy the byte value in every byte of r1.
89 mov r1, r1, lsl #24
90 orr r1, r1, r1, lsr #8
91 orr r1, r1, r1, lsr #16
93 .L_check_alignment:
94 // Align destination to a double word to avoid the strd crossing
95 // a cache line boundary.
96 ands ip, r3, #7
97 bne .L_do_double_word_align
99 .L_double_word_aligned:
100 mov r0, r1
102 subs r2, #64
103 blo .L_set_less_than_64
105 1: // Main loop sets 64 bytes at a time.
106 .irp offset, #0, #8, #16, #24, #32, #40, #48, #56
107 strd r0, r1, [r3, \offset]
108 .endr
110 add r3, #64
111 subs r2, #64
112 bge 1b
114 .L_set_less_than_64:
115 // Restore r2 to the count of bytes left to set.
116 add r2, #64
117 lsls ip, r2, #27
118 bcc .L_set_less_than_32
119 // Set 32 bytes.
120 .irp offset, #0, #8, #16, #24
121 strd r0, r1, [r3, \offset]
122 .endr
123 add r3, #32
125 .L_set_less_than_32:
126 bpl .L_set_less_than_16
127 // Set 16 bytes.
128 .irp offset, #0, #8
129 strd r0, r1, [r3, \offset]
130 .endr
131 add r3, #16
133 .L_set_less_than_16:
134 // Less than 16 bytes to set.
135 lsls ip, r2, #29
136 bcc .L_set_less_than_8
138 // Set 8 bytes.
139 strd r0, r1, [r3], #8
141 .L_set_less_than_8:
142 bpl .L_set_less_than_4
143 // Set 4 bytes
144 str r1, [r3], #4
146 .L_set_less_than_4:
147 lsls ip, r2, #31
148 it ne
149 strbne r1, [r3], #1
150 itt cs
151 strbcs r1, [r3], #1
152 strbcs r1, [r3]
154 ldmfd sp!, {r0}
155 bx lr
157 .L_do_double_word_align:
158 rsb ip, ip, #8
159 sub r2, r2, ip
160 movs r0, ip, lsl #31
161 it mi
162 strbmi r1, [r3], #1
163 itt cs
164 strbcs r1, [r3], #1
165 strbcs r1, [r3], #1
167 // Dst is at least word aligned by this point.
168 cmp ip, #4
169 blo .L_double_word_aligned
170 str r1, [r3], #4
171 b .L_double_word_aligned
173 .L_set_less_than_16_unknown_align:
174 // Set up to 15 bytes.
175 vdup.8 d0, r1
176 movs ip, r2, lsl #29
177 bcc 1f
178 vst1.8 {d0}, [r0]!
179 1: bge 2f
180 vst1.32 {d0[0]}, [r0]!
181 2: movs ip, r2, lsl #31
182 it mi
183 strbmi r1, [r0], #1
184 itt cs
185 strbcs r1, [r0], #1
186 strbcs r1, [r0], #1
187 ldmfd sp!, {r0}
188 bx lr
189 END(memset)
191 .data
192 error_string:
193 .string "memset: prevented write past end of buffer"