Skip to content

Commit 9dce93a

Browse files
armccrichfelker
authored andcommitted
add big-endian support to ARM assembler memcpy
Allow the existing ARM assembler memcpy implementation to be used for both big and little endian targets.
1 parent 8ed2bd8 commit 9dce93a

File tree

3 files changed

+98
-8
lines changed

3 files changed

+98
-8
lines changed

COPYRIGHT

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ Copyright © 2017-2018 Arm Limited
127127
and labelled as such in comments in the individual source files. All
128128
have been licensed under extremely permissive terms.
129129

130-
The ARM memcpy code (src/string/arm/memcpy_el.S) is Copyright © 2008
130+
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
131131
The Android Open Source Project and is licensed under a two-clause BSD
132132
license. It was taken from Bionic libc, used on Android.
133133

src/string/arm/memcpy_le.S renamed to src/string/arm/memcpy.S

+97-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#if !__ARMEB__
2-
31
/*
42
* Copyright (C) 2008 The Android Open Source Project
53
* All rights reserved.
@@ -42,7 +40,7 @@
4240
* code safely callable from thumb mode, adjusting the return
4341
* instructions to be compatible with pre-thumb ARM cpus, removal of
4442
* prefetch code that is not compatible with older cpus and support for
45-
* building as thumb 2.
43+
* building as thumb 2 and big-endian.
4644
*/
4745

4846
.syntax unified
@@ -227,24 +225,45 @@ non_congruent:
227225
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
228226
*/
229227
movs r5, r5, lsl #31
228+
229+
#if __ARMEB__
230+
movmi r3, r3, ror #24
231+
strbmi r3, [r0], #1
232+
movcs r3, r3, ror #24
233+
strbcs r3, [r0], #1
234+
movcs r3, r3, ror #24
235+
strbcs r3, [r0], #1
236+
#else
230237
strbmi r3, [r0], #1
231238
movmi r3, r3, lsr #8
232239
strbcs r3, [r0], #1
233240
movcs r3, r3, lsr #8
234241
strbcs r3, [r0], #1
235242
movcs r3, r3, lsr #8
243+
#endif
236244

237245
cmp r2, #4
238246
blo partial_word_tail
239247

248+
#if __ARMEB__
249+
mov r3, r3, lsr r12
250+
mov r3, r3, lsl r12
251+
#endif
252+
240253
/* Align destination to 32 bytes (cache line boundary) */
241254
1: tst r0, #0x1c
242255
beq 2f
243256
ldr r5, [r1], #4
244257
sub r2, r2, #4
258+
#if __ARMEB__
259+
mov r4, r5, lsr lr
260+
orr r4, r4, r3
261+
mov r3, r5, lsl r12
262+
#else
245263
mov r4, r5, lsl lr
246264
orr r4, r4, r3
247265
mov r3, r5, lsr r12
266+
#endif
248267
str r4, [r0], #4
249268
cmp r2, #4
250269
bhs 1b
@@ -270,6 +289,25 @@ loop16:
270289
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
271290
subs r2, r2, #32
272291
ldrhs r12, [r1], #4
292+
#if __ARMEB__
293+
orr r3, r3, r4, lsr #16
294+
mov r4, r4, lsl #16
295+
orr r4, r4, r5, lsr #16
296+
mov r5, r5, lsl #16
297+
orr r5, r5, r6, lsr #16
298+
mov r6, r6, lsl #16
299+
orr r6, r6, r7, lsr #16
300+
mov r7, r7, lsl #16
301+
orr r7, r7, r8, lsr #16
302+
mov r8, r8, lsl #16
303+
orr r8, r8, r9, lsr #16
304+
mov r9, r9, lsl #16
305+
orr r9, r9, r10, lsr #16
306+
mov r10, r10, lsl #16
307+
orr r10, r10, r11, lsr #16
308+
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
309+
mov r3, r11, lsl #16
310+
#else
273311
orr r3, r3, r4, lsl #16
274312
mov r4, r4, lsr #16
275313
orr r4, r4, r5, lsl #16
@@ -287,6 +325,7 @@ loop16:
287325
orr r10, r10, r11, lsl #16
288326
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
289327
mov r3, r11, lsr #16
328+
#endif
290329
bhs 1b
291330
b less_than_thirtytwo
292331

@@ -296,6 +335,25 @@ loop8:
296335
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
297336
subs r2, r2, #32
298337
ldrhs r12, [r1], #4
338+
#if __ARMEB__
339+
orr r3, r3, r4, lsr #24
340+
mov r4, r4, lsl #8
341+
orr r4, r4, r5, lsr #24
342+
mov r5, r5, lsl #8
343+
orr r5, r5, r6, lsr #24
344+
mov r6, r6, lsl #8
345+
orr r6, r6, r7, lsr #24
346+
mov r7, r7, lsl #8
347+
orr r7, r7, r8, lsr #24
348+
mov r8, r8, lsl #8
349+
orr r8, r8, r9, lsr #24
350+
mov r9, r9, lsl #8
351+
orr r9, r9, r10, lsr #24
352+
mov r10, r10, lsl #8
353+
orr r10, r10, r11, lsr #24
354+
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
355+
mov r3, r11, lsl #8
356+
#else
299357
orr r3, r3, r4, lsl #24
300358
mov r4, r4, lsr #8
301359
orr r4, r4, r5, lsl #24
@@ -313,6 +371,7 @@ loop8:
313371
orr r10, r10, r11, lsl #24
314372
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
315373
mov r3, r11, lsr #8
374+
#endif
316375
bhs 1b
317376
b less_than_thirtytwo
318377

@@ -322,6 +381,25 @@ loop24:
322381
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
323382
subs r2, r2, #32
324383
ldrhs r12, [r1], #4
384+
#if __ARMEB__
385+
orr r3, r3, r4, lsr #8
386+
mov r4, r4, lsl #24
387+
orr r4, r4, r5, lsr #8
388+
mov r5, r5, lsl #24
389+
orr r5, r5, r6, lsr #8
390+
mov r6, r6, lsl #24
391+
orr r6, r6, r7, lsr #8
392+
mov r7, r7, lsl #24
393+
orr r7, r7, r8, lsr #8
394+
mov r8, r8, lsl #24
395+
orr r8, r8, r9, lsr #8
396+
mov r9, r9, lsl #24
397+
orr r9, r9, r10, lsr #8
398+
mov r10, r10, lsl #24
399+
orr r10, r10, r11, lsr #8
400+
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
401+
mov r3, r11, lsl #24
402+
#else
325403
orr r3, r3, r4, lsl #8
326404
mov r4, r4, lsr #24
327405
orr r4, r4, r5, lsl #8
@@ -339,6 +417,7 @@ loop24:
339417
orr r10, r10, r11, lsl #8
340418
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
341419
mov r3, r11, lsr #24
420+
#endif
342421
bhs 1b
343422

344423
less_than_thirtytwo:
@@ -350,21 +429,36 @@ less_than_thirtytwo:
350429

351430
1: ldr r5, [r1], #4
352431
sub r2, r2, #4
432+
#if __ARMEB__
433+
mov r4, r5, lsr lr
434+
orr r4, r4, r3
435+
mov r3, r5, lsl r12
436+
#else
353437
mov r4, r5, lsl lr
354438
orr r4, r4, r3
355439
mov r3, r5, lsr r12
440+
#endif
356441
str r4, [r0], #4
357442
cmp r2, #4
358443
bhs 1b
359444

360445
partial_word_tail:
361446
/* we have a partial word in the input buffer */
362447
movs r5, lr, lsl #(31-3)
448+
#if __ARMEB__
449+
movmi r3, r3, ror #24
450+
strbmi r3, [r0], #1
451+
movcs r3, r3, ror #24
452+
strbcs r3, [r0], #1
453+
movcs r3, r3, ror #24
454+
strbcs r3, [r0], #1
455+
#else
363456
strbmi r3, [r0], #1
364457
movmi r3, r3, lsr #8
365458
strbcs r3, [r0], #1
366459
movcs r3, r3, lsr #8
367460
strbcs r3, [r0], #1
461+
#endif
368462

369463
/* Refill spilled registers from the stack. Don't update sp. */
370464
ldmfd sp, {r5-r11}
@@ -383,4 +477,3 @@ copy_last_3_and_return:
383477
ldmfd sp!, {r0, r4, lr}
384478
bx lr
385479

386-
#endif

src/string/arm/memcpy.c

-3
This file was deleted.

0 commit comments

Comments
 (0)