summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'libcutils/arch-mips/android_memset.S')
-rw-r--r--libcutils/arch-mips/android_memset.S323
1 files changed, 323 insertions, 0 deletions
diff --git a/libcutils/arch-mips/android_memset.S b/libcutils/arch-mips/android_memset.S
new file mode 100644
index 000000000..6811de01e
--- /dev/null
+++ b/libcutils/arch-mips/android_memset.S
@@ -0,0 +1,323 @@
1/*
2 * Copyright (c) 2009
3 * MIPS Technologies, Inc., California.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/************************************************************************
31 *
32 * memset.S, version "64h" with 1 cache line horizon for "pref 30" and 14 nops
33 * Version: "043009"
34 *
35 ************************************************************************/
36
37
38/************************************************************************
39 * Include files
40 ************************************************************************/
41
42#include <machine/asm.h>
43#define END(f) .cfi_endproc; .size f, .-f; .end f
44
45/*
46 * This routine could be optimized for MIPS64. The current code only
47 * uses MIPS32 instructions.
48 */
49
50#if defined(__MIPSEB__)
51# define SWHI swl /* high part is left in big-endian */
52# define SWLO swr /* low part is right in big-endian */
53#endif
54
55#if defined(__MIPSEL__)
56# define SWHI swr /* high part is right in little-endian */
57# define SWLO swl /* low part is left in little-endian */
58#endif
59
60#if !(defined(XGPROF) || defined(XPROF))
61#undef SETUP_GP
62#define SETUP_GP
63#endif
64
65#ifdef NDEBUG
66#define DBG #
67#else
68#define DBG
69#endif
70
71/*
72 * void android_memset16(uint16_t* dst, uint16_t value, size_t size);
73 */
74
75LEAF(android_memset16,0)
76 .set noreorder
77DBG /* Check parameters */
78DBG andi t0,a0,1 # a0 must be halfword aligned
79DBG tne t0,zero
80DBG andi t2,a2,1 # a2 must be even
81DBG tne t2,zero
82
83#ifdef FIXARGS
84 # ensure count is even
85#if (__mips==32) && (__mips_isa_rev>=2)
86 ins a2,zero,0,1
87#else
88 ori a2,1
89 xori a2,1
90#endif
91#endif
92
93#if (__mips==32) && (__mips_isa_rev>=2)
94 ins a1,a1,16,16
95#else
96 andi a1,0xffff
97 sll t3,a1,16
98 or a1,t3
99#endif
100
101 beqz a2,.Ldone
102 andi t1,a0,2
103 beqz t1,.Lalignok
104 addu t0,a0,a2 # t0 is the "past the end" address
105 sh a1,0(a0) # store one halfword to get aligned
106 addu a0,2
107 subu a2,2
108.Lalignok:
109 slti t1,a2,4 # .Laligned for 4 or more bytes
110 beqz t1,.Laligned
111 sne t1,a2,2 # one more halfword?
112 bnez t1,.Ldone
113 nop
114 sh a1,0(a0)
115.Ldone:
116 j ra
117 nop
118 .set reorder
119END(android_memset16)
120
121/*
122 * void android_memset32(uint32_t* dst, uint32_t value, size_t size);
123 */
124
125LEAF(android_memset32,0)
126 .set noreorder
127DBG /* Check parameters */
128DBG andi t0,a0,3 # a0 must be word aligned
129DBG tne t0,zero
130DBG andi t2,a2,3 # a2 must be a multiple of 4 bytes
131DBG tne t2,zero
132
133#ifdef FIXARGS
134 # ensure count is a multiple of 4
135#if (__mips==32) && (__mips_isa_rev>=2)
136 ins $a2,$0,0,2
137#else
138 ori a2,3
139 xori a2,3
140#endif
141#endif
142
143 bnez a2,.Laligned # any work to do?
144 addu t0,a0,a2 # t0 is the "past the end" address
145
146 j ra
147 nop
148 .set reorder
149END(android_memset32)
150
151LEAF(memset,0)
152
153 .set noreorder
154 .set noat
155
156 addu t0,a0,a2 # t0 is the "past the end" address
157 slti AT,a2,4 # is a2 less than 4?
158 bne AT,zero,.Llast4 # if yes, go to last4
159 move v0,a0 # memset returns the dst pointer
160
161 beq a1,zero,.Lset0
162 subu v1,zero,a0
163
164 # smear byte into 32 bit word
165#if (__mips==32) && (__mips_isa_rev>=2)
166 ins a1, a1, 8, 8 # Replicate fill byte into half-word.
167 ins a1, a1, 16, 16 # Replicate fill byte into word.
168#else
169 and a1,0xff
170 sll AT,a1,8
171 or a1,AT
172 sll AT,a1,16
173 or a1,AT
174#endif
175
176.Lset0:
177 andi v1,v1,0x3 # word-unaligned address?
178 beq v1,zero,.Laligned # v1 is the unalignment count
179 subu a2,a2,v1
180 SWHI a1,0(a0)
181 addu a0,a0,v1
182
183# Here we have the "word-aligned" a0 (until the "last4")
184.Laligned:
185 andi t8,a2,0x3f # any 64-byte chunks?
186 # t8 is the byte count past 64-byte chunks
187 beq a2,t8,.Lchk8w # when a2==t8, no 64-byte chunks
188 # There will be at most 1 32-byte chunk then
189 subu a3,a2,t8 # subtract from a2 the reminder
190 # Here a3 counts bytes in 16w chunks
191 addu a3,a0,a3 # Now a3 is the final dst after 64-byte chunks
192
193# Find out, if there are any 64-byte chunks after which will be still at least
194# 96 bytes left. The value "96" is calculated as needed buffer for
195# "pref 30,64(a0)" prefetch, which can be used as "pref 30,0(a0)" after
196# incrementing "a0" by 64.
197# For "a2" below 160 there will be no such "pref 30 safe" 64-byte chunk.
198#
199 sltiu v1,a2,160
200 bgtz v1,.Lloop16w_nopref30 # skip "pref 30,0(a0)"
201 subu t7,a2,96 # subtract "pref 30 unsafe" region
202 # below we have at least 1 64-byte chunk which is "pref 30 safe"
203 andi t6,t7,0x3f # t6 is past "64-byte safe chunks" reminder
204 subu t5,t7,t6 # subtract from t7 the reminder
205 # Here t5 counts bytes in 16w "safe" chunks
206 addu t4,a0,t5 # Now t4 is the dst after 64-byte "safe" chunks
207
208# Don't use "pref 30,0(a0)" for a0 in a "middle" of a cache line
209# pref 30,0(a0)
210# Here we are in the region, where it is safe to use "pref 30,64(a0)"
211.Lloop16w:
212 addiu a0,a0,64
213 pref 30,-32(a0) # continue setting up the dest, addr 64-32
214 sw a1,-64(a0)
215 sw a1,-60(a0)
216 sw a1,-56(a0)
217 sw a1,-52(a0)
218 sw a1,-48(a0)
219 sw a1,-44(a0)
220 sw a1,-40(a0)
221 sw a1,-36(a0)
222 nop
223 nop # the extra nop instructions help to balance
224 nop # cycles needed for "store" + "fill" + "evict"
225 nop # For 64byte store there are needed 8 fill
226 nop # and 8 evict cycles, i.e. at least 32 instr.
227 nop
228 nop
229 pref 30,0(a0) # continue setting up the dest, addr 64-0
230 sw a1,-32(a0)
231 sw a1,-28(a0)
232 sw a1,-24(a0)
233 sw a1,-20(a0)
234 sw a1,-16(a0)
235 sw a1,-12(a0)
236 sw a1,-8(a0)
237 sw a1,-4(a0)
238 nop
239 nop
240 nop
241 nop # NOTE: adding 14 nop-s instead of 12 nop-s
242 nop # gives better results for "fast" memory
243 nop
244 bne a0,t4,.Lloop16w
245 nop
246
247 beq a0,a3,.Lchk8w # maybe no more 64-byte chunks?
248 nop # this "delayed slot" is useless ...
249
250.Lloop16w_nopref30: # there could be up to 3 "64-byte nopref30" chunks
251 addiu a0,a0,64
252 sw a1,-64(a0)
253 sw a1,-60(a0)
254 sw a1,-56(a0)
255 sw a1,-52(a0)
256 sw a1,-48(a0)
257 sw a1,-44(a0)
258 sw a1,-40(a0)
259 sw a1,-36(a0)
260 sw a1,-32(a0)
261 sw a1,-28(a0)
262 sw a1,-24(a0)
263 sw a1,-20(a0)
264 sw a1,-16(a0)
265 sw a1,-12(a0)
266 sw a1,-8(a0)
267 bne a0,a3,.Lloop16w_nopref30
268 sw a1,-4(a0)
269
270.Lchk8w: # t8 here is the byte count past 64-byte chunks
271
272 andi t7,t8,0x1f # is there a 32-byte chunk?
273 # the t7 is the reminder count past 32-bytes
274 beq t8,t7,.Lchk1w # when t8==t7, no 32-byte chunk
275 move a2,t7
276
277 sw a1,0(a0)
278 sw a1,4(a0)
279 sw a1,8(a0)
280 sw a1,12(a0)
281 sw a1,16(a0)
282 sw a1,20(a0)
283 sw a1,24(a0)
284 sw a1,28(a0)
285 addiu a0,a0,32
286
287.Lchk1w:
288 andi t8,a2,0x3 # now t8 is the reminder past 1w chunks
289 beq a2,t8,.Llast4aligned
290 subu a3,a2,t8 # a3 is the count of bytes in 1w chunks
291 addu a3,a0,a3 # now a3 is the dst address past the 1w chunks
292
293# copying in words (4-byte chunks)
294.LwordCopy_loop:
295 addiu a0,a0,4
296 bne a0,a3,.LwordCopy_loop
297 sw a1,-4(a0)
298
299# store last 0-3 bytes
300# this will repeat the last store if the memset finishes on a word boundary
301.Llast4aligned:
302 j ra
303 SWLO a1,-1(t0)
304
305.Llast4:
306 beq a0,t0,.Llast4e
307.Llast4l:
308 addiu a0,a0,1
309 bne a0,t0,.Llast4l
310 sb a1,-1(a0)
311.Llast4e:
312 j ra
313 nop
314
315 .set at
316 .set reorder
317
318END(memset)
319
320
321/************************************************************************
322 * Implementation : Static functions
323 ************************************************************************/