1 /* $NetBSD: memcpy.S,v 1.3 1997/11/22 03:27:12 mark Exp $ */
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS\'\' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
39 /* This was modified by Jay Monkman <jmonkman@smoothsmoothie.com> to
40 * save and restore r12. This is necessary for RTEMS.
42 /* #include <machine/asm.h>*/
46 stmfd sp!, {r0, r12, lr}
48 ldmfd sp!, {r0, r12, pc}
53 stmfd sp!, {r0, r12, lr}
55 ldmfd sp!, {r0, r12, pc}
60 * This is one fun bit of code ...
61 * Some easy listening music is suggested while trying to understand this
62 * code e.g. Iron Maiden
64 * For anyone attempting to understand it :
66 * The core code is implemented here with simple stubs for memcpy()
67 * memmove() and bcopy().
69 * All local labels are prefixed with Lmemcpy_
70 * Following the prefix a label starting f is used in the forward copy code
71 * while a label using b is used in the backwards copy code
72 * The source and destination addresses determine whether a forward or
73 * backward copy is performed.
74 * Separate bits of code are used to deal with the following situations
75 * for both the forward and backwards copy.
76 * unaligned source address
77 * unaligned destination address
78 * Separate copy routines are used to produce an optimised result for each
80 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
81 * a time where possible.
83 * Note: r12 (aka ip) can be trashed during the function along with
84 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
85 * Additional registers are preserved prior to use i.e. r4, r5 & lr
87 * Apologies for the state of the comments;-)
92 /* Determine copy direction */
96 moveq r0, #0 /* Quick abort for len=0 */
99 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
101 blt Lmemcpy_fl4 /* less than 4 bytes */
103 bne Lmemcpy_fdestul /* oh unaligned destination addr */
105 bne Lmemcpy_fsrcul /* oh unaligned source addr */
108 /* We have aligned source and destination */
110 blt Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
112 blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
113 stmdb sp!, {r4} /* borrow r4 */
115 /* blat 32 bytes at a time */
116 /* XXX for really big copies perhaps we should use more registers */
118 ldmia r1!, {r3, r4, r12, lr}
119 stmia r0!, {r3, r4, r12, lr}
120 ldmia r1!, {r3, r4, r12, lr}
121 stmia r0!, {r3, r4, r12, lr}
126 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
127 stmgeia r0!, {r3, r4, r12, lr}
129 ldmia sp!, {r4} /* return r4 */
134 /* blat 12 bytes at a time */
136 ldmgeia r1!, {r3, r12, lr}
137 stmgeia r0!, {r3, r12, lr}
148 ldmgeia r1!, {r3, r12}
149 stmgeia r0!, {r3, r12}
153 /* less than 4 bytes to go */
155 ldmeqia sp!, {r0, pc} /* done */
157 /* copy the crud byte at a time */
167 /* erg - unaligned destination */
172 /* align destination with byte copies */
180 blt Lmemcpy_fl4 /* less the 4 bytes */
183 beq Lmemcpy_ft8 /* we have an aligned source */
185 /* erg - unaligned source */
186 /* This is where it gets nasty ... */
194 blt Lmemcpy_fsrcul1loop4
198 Lmemcpy_fsrcul1loop16:
200 ldmia r1!, {r4, r5, r12, lr}
201 orr r3, r3, r4, lsl #24
203 orr r4, r4, r5, lsl #24
205 orr r5, r5, r12, lsl #24
207 orr r12, r12, lr, lsl #24
208 stmia r0!, {r3-r5, r12}
210 bge Lmemcpy_fsrcul1loop16
213 blt Lmemcpy_fsrcul1l4
215 Lmemcpy_fsrcul1loop4:
218 orr r12, r12, lr, lsl #24
221 bge Lmemcpy_fsrcul1loop4
229 blt Lmemcpy_fsrcul2loop4
233 Lmemcpy_fsrcul2loop16:
235 ldmia r1!, {r4, r5, r12, lr}
236 orr r3, r3, r4, lsl #16
238 orr r4, r4, r5, lsl #16
240 orr r5, r5, r12, lsl #16
241 mov r12, r12, lsr #16
242 orr r12, r12, lr, lsl #16
243 stmia r0!, {r3-r5, r12}
245 bge Lmemcpy_fsrcul2loop16
248 blt Lmemcpy_fsrcul2l4
250 Lmemcpy_fsrcul2loop4:
253 orr r12, r12, lr, lsl #16
256 bge Lmemcpy_fsrcul2loop4
264 blt Lmemcpy_fsrcul3loop4
268 Lmemcpy_fsrcul3loop16:
270 ldmia r1!, {r4, r5, r12, lr}
271 orr r3, r3, r4, lsl #8
273 orr r4, r4, r5, lsl #8
275 orr r5, r5, r12, lsl #8
276 mov r12, r12, lsr #24
277 orr r12, r12, lr, lsl #8
278 stmia r0!, {r3-r5, r12}
280 bge Lmemcpy_fsrcul3loop16
283 blt Lmemcpy_fsrcul3l4
285 Lmemcpy_fsrcul3loop4:
288 orr r12, r12, lr, lsl #8
291 bge Lmemcpy_fsrcul3loop4
301 blt Lmemcpy_bl4 /* less than 4 bytes */
303 bne Lmemcpy_bdestul /* oh unaligned destination addr */
305 bne Lmemcpy_bsrcul /* oh unaligned source addr */
308 /* We have aligned source and destination */
310 blt Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
312 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
315 /* blat 32 bytes at a time */
316 /* XXX for really big copies perhaps we should use more registers */
318 ldmdb r1!, {r3, r4, r12, lr}
319 stmdb r0!, {r3, r4, r12, lr}
320 ldmdb r1!, {r3, r4, r12, lr}
321 stmdb r0!, {r3, r4, r12, lr}
327 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
328 stmgedb r0!, {r3, r4, r12, lr}
331 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
332 stmgedb r0!, {r3, r12, lr}
342 ldmgedb r1!, {r3, r12}
343 stmgedb r0!, {r3, r12}
347 /* less than 4 bytes to go */
349 moveq pc, lr /* done */
351 /* copy the crud byte at a time */
355 ldrgeb r3, [r1, #-1]!
356 strgeb r3, [r0, #-1]!
357 ldrgtb r3, [r1, #-1]!
358 strgtb r3, [r0, #-1]!
361 /* erg - unaligned destination */
365 /* align destination with byte copies */
368 ldrgeb r3, [r1, #-1]!
369 strgeb r3, [r0, #-1]!
370 ldrgtb r3, [r1, #-1]!
371 strgtb r3, [r0, #-1]!
373 blt Lmemcpy_bl4 /* less than 4 bytes to go */
375 beq Lmemcpy_bt8 /* we have an aligned source */
377 /* erg - unaligned source */
378 /* This is where it gets nasty ... */
386 blt Lmemcpy_bsrcul3loop4
388 stmdb sp!, {r4, r5, lr}
390 Lmemcpy_bsrcul3loop16:
392 ldmdb r1!, {r3-r5, r12}
393 orr lr, lr, r12, lsr #24
395 orr r12, r12, r5, lsr #24
397 orr r5, r5, r4, lsr #24
399 orr r4, r4, r3, lsr #24
400 stmdb r0!, {r4, r5, r12, lr}
402 bge Lmemcpy_bsrcul3loop16
403 ldmia sp!, {r4, r5, lr}
405 blt Lmemcpy_bsrcul3l4
407 Lmemcpy_bsrcul3loop4:
410 orr r12, r12, r3, lsr #24
413 bge Lmemcpy_bsrcul3loop4
421 blt Lmemcpy_bsrcul2loop4
423 stmdb sp!, {r4, r5, lr}
425 Lmemcpy_bsrcul2loop16:
427 ldmdb r1!, {r3-r5, r12}
428 orr lr, lr, r12, lsr #16
429 mov r12, r12, lsl #16
430 orr r12, r12, r5, lsr #16
432 orr r5, r5, r4, lsr #16
434 orr r4, r4, r3, lsr #16
435 stmdb r0!, {r4, r5, r12, lr}
437 bge Lmemcpy_bsrcul2loop16
438 ldmia sp!, {r4, r5, lr}
440 blt Lmemcpy_bsrcul2l4
442 Lmemcpy_bsrcul2loop4:
445 orr r12, r12, r3, lsr #16
448 bge Lmemcpy_bsrcul2loop4
456 blt Lmemcpy_bsrcul1loop4
458 stmdb sp!, {r4, r5, lr}
460 Lmemcpy_bsrcul1loop32:
462 ldmdb r1!, {r3-r5, r12}
463 orr lr, lr, r12, lsr #8
464 mov r12, r12, lsl #24
465 orr r12, r12, r5, lsr #8
467 orr r5, r5, r4, lsr #8
469 orr r4, r4, r3, lsr #8
470 stmdb r0!, {r4, r5, r12, lr}
472 bge Lmemcpy_bsrcul1loop32
473 ldmia sp!, {r4, r5, lr}
475 blt Lmemcpy_bsrcul1l4
477 Lmemcpy_bsrcul1loop4:
480 orr r12, r12, r3, lsr #8
483 bge Lmemcpy_bsrcul1loop4