1 /* Multiple versions of strlen
2 Copyright (C) 2009 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22 #include <ifunc-defines.h>
24 /* Define multiple versions only for the definition in libc and for the
25 DSO. In static binaries, we need strlen before the initialization
27 #if defined SHARED && !defined NOT_IN_libc
28 .section .gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
29 .globl __i686.get_pc_thunk.bx
30 .hidden __i686.get_pc_thunk.bx
32 .type __i686.get_pc_thunk.bx,@function
33 __i686.get_pc_thunk.bx:
39 .type strlen, @gnu_indirect_function
41 cfi_adjust_cfa_offset (4)
42 cfi_rel_offset (ebx, 0)
43 call __i686.get_pc_thunk.bx
44 addl $_GLOBAL_OFFSET_TABLE_, %ebx
45 cmpl $0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
47 call __init_cpu_features
48 1: leal __strlen_ia32@GOTOFF(%ebx), %eax
49 testl $(1<<26), CPUID_OFFSET+COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_EDX_OFFSET+__cpu_features@GOTOFF(%ebx)
51 leal __strlen_sse2@GOTOFF(%ebx), %eax
53 cfi_adjust_cfa_offset (-4);
58 #define CFI_POP(REG) \
59 cfi_adjust_cfa_offset (-4); \
62 #define RETURN popl %esi; CFI_POP (esi); ret
67 * This implementation uses SSE instructions to compare up to 16 bytes
68 * at a time looking for the end of string (null char).
71 cfi_adjust_cfa_offset (4)
72 cfi_rel_offset (%esi, 0)
75 pxor %xmm0, %xmm0 /* 16 null chars */
78 jz 1f /* string is 16 byte aligned */
81 * Unaligned case. Round down to 16-byte boundary before comparing
82 * 16 bytes for a null char. The code then compensates for any extra chars
83 * preceding the start of the string.
91 shr %cl, %edx /* Compensate for bytes preceding the string */
94 sub %ecx, %esi /* no null, adjust to next 16-byte boundary */
95 pxor %xmm0, %xmm0 /* clear xmm0, may have been changed... */
98 1: /* 16 byte aligned */
99 pcmpeqb (%esi), %xmm0 /* look for null bytes */
100 pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx */
102 add $16, %esi /* prepare to search next 16 bytes */
103 test %edx, %edx /* if no null byte, %edx must be 0 */
104 jnz 2f /* found a null */
106 pcmpeqb (%esi), %xmm0
112 pcmpeqb (%esi), %xmm0
118 pcmpeqb (%esi), %xmm0
126 lea -16(%eax, %esi), %eax /* calculate exact offset */
127 bsf %edx, %ecx /* Least significant 1 bit is index of null */
130 cfi_adjust_cfa_offset (-4)
137 # define ENTRY(name) \
138 .type __strlen_ia32, @function; \
139 .globl __strlen_ia32; \
141 __strlen_ia32: cfi_startproc; \
145 cfi_endproc; .size __strlen_ia32, .-__strlen_ia32
146 # undef libc_hidden_builtin_def
147 /* IFUNC doesn't work with the hidden functions in shared library since
148 they will be called without setting up EBX needed for PLT which is
150 # define libc_hidden_builtin_def(name) \
151 .globl __GI_strlen; __GI_strlen = __strlen_ia32
154 #include "../../i586/strlen.S"