--- a/sysdeps/arm/arm-features.h.orig 2018-07-31 14:36:22.745319871 +0200
+++ b/sysdeps/arm/arm-features.h 2018-07-31 14:36:33.421362248 +0200
@@ -56,4 +56,7 @@
/* An OS-specific arm-features.h file may define ARM_NO_INDEX_REGISTER to
indicate that the two-register addressing modes must never be used. */
+#define NO_THUMB
+
+
#endif /* arm-features.h */
--- a/sysdeps/arm/armv6t2/strlen.S
+++ /dev/null
@@ -1,141 +0,0 @@
-/* Copyright (C) 2010-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- . */
-
-/*
- Assumes:
- ARMv6T2, AArch32
-
- */
-
-#include
-#include
-
-#ifdef __ARMEB__
-#define S2LO lsl
-#define S2HI lsr
-#else
-#define S2LO lsr
-#define S2HI lsl
-#endif
-
-/* This code is best on Thumb. */
- .thumb
-
-/* Parameters and result. */
-#define srcin r0
-#define result r0
-
-/* Internal variables. */
-#define src r1
-#define data1a r2
-#define data1b r3
-#define const_m1 r12
-#define const_0 r4
-#define tmp1 r4 /* Overlaps const_0 */
-#define tmp2 r5
-
- .text
- .p2align 6
-ENTRY(strlen)
- pld [srcin, #0]
- strd r4, r5, [sp, #-8]!
- cfi_adjust_cfa_offset (8)
- cfi_rel_offset (r4, 0)
- cfi_rel_offset (r5, 4)
- cfi_remember_state
- bic src, srcin, #7
- mvn const_m1, #0
- ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
- pld [src, #32]
- bne.w .Lmisaligned8
- mov const_0, #0
- mov result, #-8
-.Lloop_aligned:
- /* Bytes 0-7. */
- ldrd data1a, data1b, [src]
- pld [src, #64]
- add result, result, #8
-.Lstart_realigned:
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 8-15. */
- ldrd data1a, data1b, [src, #8]
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 16-23. */
- ldrd data1a, data1b, [src, #16]
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cbnz data1b, .Lnull_found
-
- /* Bytes 24-31. */
- ldrd data1a, data1b, [src, #24]
- add src, src, #32
- uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */
- add result, result, #8
- sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */
- uadd8 data1b, data1b, const_m1
- sel data1b, data1a, const_m1 /* Only used if d1a == 0. */
- cmp data1b, #0
- beq .Lloop_aligned
-
-.Lnull_found:
- cmp data1a, #0
- itt eq
- addeq result, result, #4
- moveq data1a, data1b
-#ifndef __ARMEB__
- rev data1a, data1a
-#endif
- clz data1a, data1a
- ldrd r4, r5, [sp], #8
- cfi_adjust_cfa_offset (-8)
- cfi_restore (r4)
- cfi_restore (r5)
- add result, result, data1a, lsr #3 /* Bits -> Bytes. */
- DO_RET(lr)
-
-.Lmisaligned8:
- cfi_restore_state
- ldrd data1a, data1b, [src]
- and tmp2, tmp1, #3
- rsb result, tmp1, #0
- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- tst tmp1, #4
- pld [src, #64]
- S2HI tmp2, const_m1, tmp2
- orn data1a, data1a, tmp2
- itt ne
- ornne data1b, data1b, tmp2
- movne data1a, const_m1
- mov const_0, #0
- b .Lstart_realigned
-
-END(strlen)
-libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/arm/armv7/strcmp.S b/sysdeps/arm/armv7/strcmp.S
deleted file mode 100644
index 2626fdf..0000000
--- a/sysdeps/arm/armv7/strcmp.S
+++ /dev/null
@@ -1,496 +0,0 @@
-/* strcmp implementation for ARMv7-A, optimized for Cortex-A15.
- Copyright (C) 2012-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- . */
-
-#include
-#include
-
-/* Implementation of strcmp for ARMv7 when DSP instructions are
- available. Use ldrd to support wider loads, provided the data
- is sufficiently aligned. Use saturating arithmetic to optimize
- the compares. */
-
-/* Build Options:
- STRCMP_PRECHECK: Run a quick pre-check of the first byte in the
- string. If comparing completely random strings the pre-check will
- save time, since there is a very high probability of a mismatch in
- the first character: we save significant overhead if this is the
- common case. However, if strings are likely to be identical (e.g.
- because we're verifying a hit in a hash table), then this check
- is largely redundant. */
-
-#define STRCMP_PRECHECK 1
-
- .syntax unified
-
-#ifdef __ARM_BIG_ENDIAN
-# define S2LO lsl
-# define S2LOEQ lsleq
-# define S2HI lsr
-# define MSB 0x000000ff
-# define LSB 0xff000000
-# define BYTE0_OFFSET 24
-# define BYTE1_OFFSET 16
-# define BYTE2_OFFSET 8
-# define BYTE3_OFFSET 0
-#else /* not __ARM_BIG_ENDIAN */
-# define S2LO lsr
-# define S2LOEQ lsreq
-# define S2HI lsl
-# define BYTE0_OFFSET 0
-# define BYTE1_OFFSET 8
-# define BYTE2_OFFSET 16
-# define BYTE3_OFFSET 24
-# define MSB 0xff000000
-# define LSB 0x000000ff
-#endif /* not __ARM_BIG_ENDIAN */
-
-/* Parameters and result. */
-#define src1 r0
-#define src2 r1
-#define result r0 /* Overlaps src1. */
-
-/* Internal variables. */
-#define tmp1 r4
-#define tmp2 r5
-#define const_m1 r12
-
-/* Additional internal variables for 64-bit aligned data. */
-#define data1a r2
-#define data1b r3
-#define data2a r6
-#define data2b r7
-#define syndrome_a tmp1
-#define syndrome_b tmp2
-
-/* Additional internal variables for 32-bit aligned data. */
-#define data1 r2
-#define data2 r3
-#define syndrome tmp2
-
-
- .thumb
-
-/* In Thumb code we can't use MVN with a register shift, but we do have ORN. */
-.macro prepare_mask mask_reg, nbits_reg
- S2HI \mask_reg, const_m1, \nbits_reg
-.endm
-.macro apply_mask data_reg, mask_reg
- orn \data_reg, \data_reg, \mask_reg
-.endm
-
- /* Macro to compute and return the result value for word-aligned
- cases. */
- .macro strcmp_epilogue_aligned synd d1 d2 restore_r6
-#ifdef __ARM_BIG_ENDIAN
- /* If data1 contains a zero byte, then syndrome will contain a 1 in
- bit 7 of that byte. Otherwise, the highest set bit in the
- syndrome will highlight the first different bit. It is therefore
- sufficient to extract the eight bits starting with the syndrome
- bit. */
- clz tmp1, \synd
- lsl r1, \d2, tmp1
- .if \restore_r6
- ldrd r6, r7, [sp, #8]
- .endif
- lsl \d1, \d1, tmp1
- lsr result, \d1, #24
- ldrd r4, r5, [sp], #16
- cfi_remember_state
- cfi_def_cfa_offset (0)
- cfi_restore (r4)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
- sub result, result, r1, lsr #24
- bx lr
-#else
- /* To use the big-endian trick we'd have to reverse all three words.
- that's slower than this approach. */
- rev \synd, \synd
- clz tmp1, \synd
- bic tmp1, tmp1, #7
- lsr r1, \d2, tmp1
- .if \restore_r6
- ldrd r6, r7, [sp, #8]
- .endif
- lsr \d1, \d1, tmp1
- and result, \d1, #255
- and r1, r1, #255
- ldrd r4, r5, [sp], #16
- cfi_remember_state
- cfi_def_cfa_offset (0)
- cfi_restore (r4)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
- sub result, result, r1
-
- bx lr
-#endif
- .endm
-
- .text
- .p2align 5
-.Lstrcmp_start_addr:
-#if STRCMP_PRECHECK == 1
-.Lfastpath_exit:
- sub r0, r2, r3
- bx lr
- nop
-#endif
-ENTRY (strcmp)
-#if STRCMP_PRECHECK == 1
- ldrb r2, [src1]
- ldrb r3, [src2]
- cmp r2, #1
- it cs
- cmpcs r2, r3
- bne .Lfastpath_exit
-#endif
- strd r4, r5, [sp, #-16]!
- cfi_def_cfa_offset (16)
- cfi_offset (r4, -16)
- cfi_offset (r5, -12)
- orr tmp1, src1, src2
- strd r6, r7, [sp, #8]
- cfi_offset (r6, -8)
- cfi_offset (r7, -4)
- mvn const_m1, #0
- lsl r2, tmp1, #29
- cbz r2, .Lloop_aligned8
-
-.Lnot_aligned:
- eor tmp1, src1, src2
- tst tmp1, #7
- bne .Lmisaligned8
-
- /* Deal with mutual misalignment by aligning downwards and then
- masking off the unwanted loaded data to prevent a difference. */
- and tmp1, src1, #7
- bic src1, src1, #7
- and tmp2, tmp1, #3
- bic src2, src2, #7
- lsl tmp2, tmp2, #3 /* Bytes -> bits. */
- ldrd data1a, data1b, [src1], #16
- tst tmp1, #4
- ldrd data2a, data2b, [src2], #16
- prepare_mask tmp1, tmp2
- apply_mask data1a, tmp1
- apply_mask data2a, tmp1
- beq .Lstart_realigned8
- apply_mask data1b, tmp1
- mov data1a, const_m1
- apply_mask data2b, tmp1
- mov data2a, const_m1
- b .Lstart_realigned8
-
- /* Unwind the inner loop by a factor of 2, giving 16 bytes per
- pass. */
- .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */
- .p2align 2 /* Always word aligned. */
-.Lloop_aligned8:
- ldrd data1a, data1b, [src1], #16
- ldrd data2a, data2b, [src2], #16
-.Lstart_realigned8:
- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
- eor syndrome_a, data1a, data2a
- sel syndrome_a, syndrome_a, const_m1
- cbnz syndrome_a, .Ldiff_in_a
- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
- eor syndrome_b, data1b, data2b
- sel syndrome_b, syndrome_b, const_m1
- cbnz syndrome_b, .Ldiff_in_b
-
- ldrd data1a, data1b, [src1, #-8]
- ldrd data2a, data2b, [src2, #-8]
- uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */
- eor syndrome_a, data1a, data2a
- sel syndrome_a, syndrome_a, const_m1
- uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */
- eor syndrome_b, data1b, data2b
- sel syndrome_b, syndrome_b, const_m1
- /* Can't use CBZ for backwards branch. */
- orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
- beq .Lloop_aligned8
-
-.Ldiff_found:
- cbnz syndrome_a, .Ldiff_in_a
-
-.Ldiff_in_b:
- strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
-
-.Ldiff_in_a:
- cfi_restore_state
- strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
-
- cfi_restore_state
-.Lmisaligned8:
- tst tmp1, #3
- bne .Lmisaligned4
- ands tmp1, src1, #3
- bne .Lmutual_align4
-
- /* Unrolled by a factor of 2, to reduce the number of post-increment
- operations. */
-.Lloop_aligned4:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned4:
- uadd8 syndrome, data1, const_m1 /* Only need GE bits. */
- eor syndrome, data1, data2
- sel syndrome, syndrome, const_m1
- cbnz syndrome, .Laligned4_done
- ldr data1, [src1, #-4]
- ldr data2, [src2, #-4]
- uadd8 syndrome, data1, const_m1
- eor syndrome, data1, data2
- sel syndrome, syndrome, const_m1
- cmp syndrome, #0
- beq .Lloop_aligned4
-
-.Laligned4_done:
- strcmp_epilogue_aligned syndrome, data1, data2, 0
-
-.Lmutual_align4:
- cfi_restore_state
- /* Deal with mutual misalignment by aligning downwards and then
- masking off the unwanted loaded data to prevent a difference. */
- lsl tmp1, tmp1, #3 /* Bytes -> bits. */
- bic src1, src1, #3
- ldr data1, [src1], #8
- bic src2, src2, #3
- ldr data2, [src2], #8
-
- prepare_mask tmp1, tmp1
- apply_mask data1, tmp1
- apply_mask data2, tmp1
- b .Lstart_realigned4
-
-.Lmisaligned4:
- ands tmp1, src1, #3
- beq .Lsrc1_aligned
- sub src2, src2, tmp1
- bic src1, src1, #3
- lsls tmp1, tmp1, #31
- ldr data1, [src1], #4
- beq .Laligned_m2
- bcs .Laligned_m1
-
-#if STRCMP_PRECHECK == 0
- ldrb data2, [src2, #1]
- uxtb tmp1, data1, ror #BYTE1_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m2:
- ldrb data2, [src2, #2]
- uxtb tmp1, data1, ror #BYTE2_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m1:
- ldrb data2, [src2, #3]
- uxtb tmp1, data1, ror #BYTE3_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- add src2, src2, #4
- cbnz data2, .Lsrc1_aligned
-#else /* STRCMP_PRECHECK */
- /* If we've done the pre-check, then we don't need to check the
- first byte again here. */
- ldrb data2, [src2, #2]
- uxtb tmp1, data1, ror #BYTE2_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbz data2, .Lmisaligned_exit
-
-.Laligned_m2:
- ldrb data2, [src2, #3]
- uxtb tmp1, data1, ror #BYTE3_OFFSET
- subs tmp1, tmp1, data2
- bne .Lmisaligned_exit
- cbnz data2, .Laligned_m1
-#endif
-
-.Lmisaligned_exit:
- mov result, tmp1
- ldr r4, [sp], #16
- cfi_remember_state
- cfi_def_cfa_offset (0)
- cfi_restore (r4)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
- bx lr
-
-#if STRCMP_PRECHECK == 1
-.Laligned_m1:
- add src2, src2, #4
-#endif
-.Lsrc1_aligned:
- cfi_restore_state
- /* src1 is word aligned, but src2 has no common alignment
- with it. */
- ldr data1, [src1], #4
- lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */
-
- bic src2, src2, #3
- ldr data2, [src2], #4
- bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */
- bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */
-
- /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */
-.Loverlap3:
- bic tmp1, data1, #MSB
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #8
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #24
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap3
-4:
- S2LO data2, data2, #8
- b .Lstrcmp_tail
-
-5:
- bics syndrome, syndrome, #MSB
- bne .Lstrcmp_done_equal
-
- /* We can only get here if the MSB of data1 contains 0, so
- fast-path the exit. */
- ldrb result, [src2]
- ldrd r4, r5, [sp], #16
- cfi_remember_state
- cfi_def_cfa_offset (0)
- cfi_restore (r4)
- cfi_restore (r5)
- /* R6/7 Not used in this sequence. */
- cfi_restore (r6)
- cfi_restore (r7)
- neg result, result
- bx lr
-
-6:
- cfi_restore_state
- S2LO data1, data1, #24
- and data2, data2, #LSB
- b .Lstrcmp_tail
-
- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
-.Loverlap2:
- and tmp1, data1, const_m1, S2LO #16
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #16
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #16
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap2
-4:
- S2LO data2, data2, #16
- b .Lstrcmp_tail
-5:
- ands syndrome, syndrome, const_m1, S2LO #16
- bne .Lstrcmp_done_equal
-
- ldrh data2, [src2]
- S2LO data1, data1, #16
-#ifdef __ARM_BIG_ENDIAN
- lsl data2, data2, #16
-#endif
- b .Lstrcmp_tail
-
-6:
- S2LO data1, data1, #16
- and data2, data2, const_m1, S2LO #16
- b .Lstrcmp_tail
-
- .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */
-.Loverlap1:
- and tmp1, data1, #LSB
- uadd8 syndrome, data1, const_m1
- eors syndrome, tmp1, data2, S2LO #24
- sel syndrome, syndrome, const_m1
- bne 4f
- cbnz syndrome, 5f
- ldr data2, [src2], #4
- eor tmp1, tmp1, data1
- cmp tmp1, data2, S2HI #8
- bne 6f
- ldr data1, [src1], #4
- b .Loverlap1
-4:
- S2LO data2, data2, #24
- b .Lstrcmp_tail
-5:
- tst syndrome, #LSB
- bne .Lstrcmp_done_equal
- ldr data2, [src2]
-6:
- S2LO data1, data1, #8
- bic data2, data2, #MSB
- b .Lstrcmp_tail
-
-.Lstrcmp_done_equal:
- mov result, #0
- ldrd r4, r5, [sp], #16
- cfi_remember_state
- cfi_def_cfa_offset (0)
- cfi_restore (r4)
- cfi_restore (r5)
- /* R6/7 not used in this sequence. */
- cfi_restore (r6)
- cfi_restore (r7)
- bx lr
-
-.Lstrcmp_tail:
- cfi_restore_state
-#ifndef __ARM_BIG_ENDIAN
- rev data1, data1
- rev data2, data2
- /* Now everything looks big-endian... */
-#endif
- uadd8 tmp1, data1, const_m1
- eor tmp1, data1, data2
- sel syndrome, tmp1, const_m1
- clz tmp1, syndrome
- lsl data1, data1, tmp1
- lsl data2, data2, tmp1
- lsr result, data1, #24
- ldrd r4, r5, [sp], #16
- cfi_def_cfa_offset (0)
- cfi_restore (r4)
- cfi_restore (r5)
- /* R6/7 not used in this sequence. */
- cfi_restore (r6)
- cfi_restore (r7)
- sub result, result, data2, lsr #24
- bx lr
-END (strcmp)
-libc_hidden_builtin_def (strcmp)
--
2.30.0
--- a/sysdeps/arm/armv6t2/memchr.S
+++ /dev/null
@@ -1,161 +0,0 @@
-/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Code contributed by Dave Gilbert
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- . */
-
-#include
-
-@ This memchr routine is optimised on a Cortex-A9 and should work on all ARMv7
-@ and ARMv6T2 processors. It has a fast path for short sizes, and has an
-@ optimised path for large data sets; the worst case is finding the match early
-@ in a large data set.
-@ Note: The use of cbz/cbnz means it's Thumb only
-
-@ 2011-07-15 david.gilbert@linaro.org
-@ Copy from Cortex strings release 21 and change license
-@ http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/view/head:/src/linaro-a9/memchr.S
-@ Change function declarations/entry/exit
-@ 2011-12-01 david.gilbert@linaro.org
-@ Add some fixes from comments received (including use of ldrd instead ldm)
-@ 2011-12-07 david.gilbert@linaro.org
-@ Removed cbz from align loop - can't be taken
-
-@ this lets us check a flag in a 00/ff byte easily in either endianness
-#ifdef __ARMEB__
-#define CHARTSTMASK(c) 1<<(31-(c*8))
-#else
-#define CHARTSTMASK(c) 1<<(c*8)
-#endif
- .syntax unified
-
- .text
- .thumb
- .thumb_func
- .global memchr
- .type memchr,%function
-ENTRY(memchr)
- @ r0 = start of memory to scan
- @ r1 = character to look for
- @ r2 = length
- @ returns r0 = pointer to character or NULL if not found
- and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
-
- cmp r2,#16 @ If it's short don't bother with anything clever
- blt 20f
-
- tst r0, #7 @ If it's already aligned skip the next bit
- beq 10f
-
- @ Work up to an aligned point
-5:
- ldrb r3, [r0],#1
- subs r2, r2, #1
- cmp r3, r1
- beq 50f @ If it matches exit found
- tst r0, #7
- bne 5b @ If not aligned yet then do next byte
-
-10:
- @ At this point, we are aligned, we know we have at least 8 bytes to work with
- push {r4,r5,r6,r7}
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (r4, 0)
- cfi_rel_offset (r5, 4)
- cfi_rel_offset (r6, 8)
- cfi_rel_offset (r7, 12)
-
- cfi_remember_state
-
- orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
- orr r1, r1, r1, lsl #16
- bic r6, r2, #7 @ Number of double words to work with * 8
- mvns r7, #0 @ all F's
- movs r3, #0
-
-15:
- ldrd r4,r5, [r0],#8
- subs r6, r6, #8
- eor r4,r4, r1 @ Get it so that r4,r5 have 00's where the bytes match the target
- eor r5,r5, r1
- uadd8 r4, r4, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
- sel r4, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
- uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0
- sel r5, r4, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
- cbnz r5, 60f
- bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
-
- pop {r4,r5,r6,r7}
- cfi_adjust_cfa_offset (-16)
- cfi_restore (r4)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
-
- and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
- and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
-
-20:
- cbz r2, 40f @ 0 length or hit the end already then not found
-
-21: @ Post aligned section, or just a short call
- ldrb r3,[r0],#1
- subs r2,r2,#1
- eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub
- cbz r3, 50f
- bne 21b @ on r2 flags
-
-40:
- movs r0,#0 @ not found
- DO_RET(lr)
-
-50:
- subs r0,r0,#1 @ found
- DO_RET(lr)
-
-60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
- @ r0 points to the start of the double word after the one that was tested
- @ r4 has the 00/ff pattern for the first word, r5 has the chained value
- cfi_restore_state
- cmp r4, #0
- itte eq
- moveq r4, r5 @ the end is in the 2nd word
- subeq r0,r0,#3 @ Points to 2nd byte of 2nd word
- subne r0,r0,#7 @ or 2nd byte of 1st word
-
- @ r0 currently points to the 2nd byte of the word containing the hit
- tst r4, # CHARTSTMASK(0) @ 1st character
- bne 61f
- adds r0,r0,#1
- tst r4, # CHARTSTMASK(1) @ 2nd character
- ittt eq
- addeq r0,r0,#1
- tsteq r4, # (3<<15) @ 2nd & 3rd character
- @ If not the 3rd must be the last one
- addeq r0,r0,#1
-
-61:
- pop {r4,r5,r6,r7}
- cfi_adjust_cfa_offset (-16)
- cfi_restore (r4)
- cfi_restore (r5)
- cfi_restore (r6)
- cfi_restore (r7)
-
- subs r0,r0,#1
- DO_RET(lr)
-
-END(memchr)
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/arm/armv7/multiarch/Makefile b/sysdeps/arm/armv7/multiarch/Makefile
index 6e5851f..2a5f843 100644
--- a/sysdeps/arm/armv7/multiarch/Makefile
+++ b/sysdeps/arm/armv7/multiarch/Makefile
@@ -1,4 +1,3 @@
ifeq ($(subdir),string)
-sysdep_routines += memcpy_neon memcpy_vfp memchr_neon memcpy_arm \
- memchr_noneon
+sysdep_routines += memcpy_neon memcpy_vfp memcpy_arm
endif
diff --git a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
index 48e43da..8ce9878 100644
--- a/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
+++ b/sysdeps/arm/armv7/multiarch/ifunc-impl-list.c
@@ -34,7 +34,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
bool use_neon = true;
#ifdef __ARM_NEON__
# define __memcpy_neon memcpy
-# define __memchr_neon memchr
#else
use_neon = (GLRO(dl_hwcap) & HWCAP_ARM_NEON) != 0;
#endif
@@ -53,9 +52,5 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#endif
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_arm));
- IFUNC_IMPL (i, name, memchr,
- IFUNC_IMPL_ADD (array, i, memchr, use_neon, __memchr_neon)
- IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_noneon));
-
return i;
}
diff --git a/sysdeps/arm/armv7/multiarch/memchr.c b/sysdeps/arm/armv7/multiarch/memchr.c
deleted file mode 100644
index ff1cc5d..0000000
--- a/sysdeps/arm/armv7/multiarch/memchr.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Multiple versions of memchr.
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2017-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- . */
-
-/* For __ARM_NEON__ memchr_neon.S defines memchr directly and ifunc
- is not used. */
-#if IS_IN (libc) && !defined (__ARM_NEON__)
-# define memchr __redirect_memchr
-# include
-# undef memchr
-
-# include
-
-# define SYMBOL_NAME memchr
-# include "ifunc-memchr.h"
-
-arm_libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR);
-
-arm_libc_ifunc_hidden_def (__redirect_memchr, memchr);
-#endif
diff --git a/sysdeps/arm/armv7/multiarch/memchr_neon.S b/sysdeps/arm/armv7/multiarch/memchr_neon.S
deleted file mode 100644
index 6fbf9b8..0000000
--- a/sysdeps/arm/armv7/multiarch/memchr_neon.S
+++ /dev/null
@@ -1,202 +0,0 @@
-/* memchr implemented using NEON.
- Copyright (C) 2011-2018 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library. If not, see
- . */
-
-#include
-
-/* For __ARM_NEON__ this file defines memchr. */
-#ifndef __ARM_NEON__
-# define memchr __memchr_neon
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(a)
-#endif
-
- .arch armv7-a
- .fpu neon
-
-
-/* Arguments */
-#define srcin r0
-#define chrin r1
-#define cntin r2
-
-/* Retval */
-#define result r0 /* Live range does not overlap with srcin */
-
-/* Working registers */
-#define src r1 /* Live range does not overlap with chrin */
-#define tmp r3
-#define synd r0 /* No overlap with srcin or result */
-#define soff r12
-
-/* Working NEON registers */
-#define vrepchr q0
-#define vdata0 q1
-#define vdata0_0 d2 /* Lower half of vdata0 */
-#define vdata0_1 d3 /* Upper half of vdata0 */
-#define vdata1 q2
-#define vdata1_0 d4 /* Lower half of vhas_chr0 */
-#define vdata1_1 d5 /* Upper half of vhas_chr0 */
-#define vrepmask q3
-#define vrepmask0 d6
-#define vrepmask1 d7
-#define vend q4
-#define vend0 d8
-#define vend1 d9
-
-/*
- * Core algorithm:
- *
- * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per
- * byte. Each bit is set if the relevant byte matched the requested character
- * and cleared otherwise. Since the bits in the syndrome reflect exactly the
- * order in which things occur in the original string, counting trailing zeros
- * allows to identify exactly which byte has matched.
- */
-
- .thumb_func
- .p2align 4,,15
-
-ENTRY(memchr)
- /* Use a simple loop if there are less than 8 bytes to search. */
- cmp cntin, #7
- bhi .Llargestr
- and chrin, chrin, #0xff
-
-.Lsmallstr:
- subs cntin, cntin, #1
- blo .Lnotfound /* Return not found if reached end. */
- ldrb tmp, [srcin], #1
- cmp tmp, chrin
- bne .Lsmallstr /* Loop again if not found. */
- /* Otherwise fixup address and return. */
- sub result, srcin, #1
- bx lr
-
-
-.Llargestr:
- vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */
- /*
- * Magic constant 0x8040201008040201 allows us to identify which lane
- * matches the requested byte.
- */
- movw tmp, #0x0201
- movt tmp, #0x0804
- lsl soff, tmp, #4
- vmov vrepmask0, tmp, soff
- vmov vrepmask1, tmp, soff
- /* Work with aligned 32-byte chunks */
- bic src, srcin, #31
- ands soff, srcin, #31
- beq .Lloopintro /* Go straight to main loop if it's aligned. */
-
- /*
- * Input string is not 32-byte aligned. We calculate the syndrome
- * value for the aligned 32 bytes block containing the first bytes
- * and mask the irrelevant part.
- */
- vld1.8 {vdata0, vdata1}, [src:256]!
- sub tmp, soff, #32
- adds cntin, cntin, tmp
- vceq.i8 vdata0, vdata0, vrepchr
- vceq.i8 vdata1, vdata1, vrepchr
- vand vdata0, vdata0, vrepmask
- vand vdata1, vdata1, vrepmask
- vpadd.i8 vdata0_0, vdata0_0, vdata0_1
- vpadd.i8 vdata1_0, vdata1_0, vdata1_1
- vpadd.i8 vdata0_0, vdata0_0, vdata1_0
- vpadd.i8 vdata0_0, vdata0_0, vdata0_0
- vmov synd, vdata0_0[0]
-
- /* Clear the soff lower bits */
- lsr synd, synd, soff
- lsl synd, synd, soff
- /* The first block can also be the last */
- bls .Lmasklast
- /* Have we found something already? */
- cbnz synd, .Ltail
-
-
-.Lloopintro:
- vpush {vend}
- /* 264/265 correspond to d8/d9 for q4 */
- cfi_adjust_cfa_offset (16)
- cfi_rel_offset (264, 0)
- cfi_rel_offset (265, 8)
- .p2align 3,,7
-.Lloop:
- vld1.8 {vdata0, vdata1}, [src:256]!
- subs cntin, cntin, #32
- vceq.i8 vdata0, vdata0, vrepchr
- vceq.i8 vdata1, vdata1, vrepchr
- /* If we're out of data we finish regardless of the result. */
- bls .Lend
- /* Use a fast check for the termination condition. */
- vorr vend, vdata0, vdata1
- vorr vend0, vend0, vend1
- vmov synd, tmp, vend0
- orrs synd, synd, tmp
- /* We're not out of data, loop if we haven't found the character. */
- beq .Lloop
-
-.Lend:
- vpop {vend}
- cfi_adjust_cfa_offset (-16)
- cfi_restore (264)
- cfi_restore (265)
-
- /* Termination condition found, let's calculate the syndrome value. */
- vand vdata0, vdata0, vrepmask
- vand vdata1, vdata1, vrepmask
- vpadd.i8 vdata0_0, vdata0_0, vdata0_1
- vpadd.i8 vdata1_0, vdata1_0, vdata1_1
- vpadd.i8 vdata0_0, vdata0_0, vdata1_0
- vpadd.i8 vdata0_0, vdata0_0, vdata0_0
- vmov synd, vdata0_0[0]
- cbz synd, .Lnotfound
- bhi .Ltail /* Uses the condition code from
- subs cntin, cntin, #32 above. */
-
-
-.Lmasklast:
- /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */
- neg cntin, cntin
- lsl synd, synd, cntin
- lsrs synd, synd, cntin
- it eq
- moveq src, #0 /* If no match, set src to 0 so the retval is 0. */
-
-
-.Ltail:
- /* Count the trailing zeros using bit reversing */
- rbit synd, synd
- /* Compensate the last post-increment */
- sub src, src, #32
- /* Count the leading zeros */
- clz synd, synd
- /* Compute the potential result and return */
- add result, src, synd
- bx lr
-
-
-.Lnotfound:
- /* Set result to NULL if not found and return */
- mov result, #0
- bx lr
-
-END(memchr)
-libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/arm/armv7/multiarch/memchr_noneon.S b/sysdeps/arm/armv7/multiarch/memchr_noneon.S
deleted file mode 100644
index b1fb540..0000000
--- a/sysdeps/arm/armv7/multiarch/memchr_noneon.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#define memchr __memchr_noneon
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include
diff --git a/sysdeps/arm/armv7/multiarch/rtld-memchr.S b/sysdeps/arm/armv7/multiarch/rtld-memchr.S
deleted file mode 100644
index ae8e5f0..0000000
--- a/sysdeps/arm/armv7/multiarch/rtld-memchr.S
+++ /dev/null
@@ -1 +0,0 @@
-#include
--
2.30.0