diff -Nrup a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile --- a/sysdeps/i386/i686/multiarch/Makefile 2012-01-01 05:16:32.000000000 -0700 +++ b/sysdeps/i386/i686/multiarch/Makefile 2012-07-03 06:54:52.816446222 -0600 @@ -1,5 +1,6 @@ ifeq ($(subdir),csu) aux += init-arch +tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif diff -Nrup a/sysdeps/i386/i686/multiarch/test-multiarch.c b/sysdeps/i386/i686/multiarch/test-multiarch.c --- a/sysdeps/i386/i686/multiarch/test-multiarch.c 1969-12-31 17:00:00.000000000 -0700 +++ b/sysdeps/i386/i686/multiarch/test-multiarch.c 2012-07-03 06:54:58.989413307 -0600 @@ -0,0 +1 @@ +#include <sysdeps/x86_64/multiarch/test-multiarch.c> diff -Nrup a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c --- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c 2012-07-03 06:52:21.747254987 -0600 +++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c 2012-07-03 10:26:59.261017426 -0600 @@ -14,7 +14,7 @@ extern double __ieee754_atan2_fma4 (doub libm_ifunc (__ieee754_atan2, HAS_FMA4 ? __ieee754_atan2_fma4 - : (HAS_YMM_USABLE ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); + : (HAS_AVX ? __ieee754_atan2_avx : __ieee754_atan2_sse2)); strong_alias (__ieee754_atan2, __atan2_finite) # define __ieee754_atan2 __ieee754_atan2_sse2 diff -Nrup a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c --- a/sysdeps/x86_64/fpu/multiarch/e_exp.c 2012-07-03 06:52:21.748254982 -0600 +++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c 2012-07-03 10:26:59.261017426 -0600 @@ -14,7 +14,7 @@ extern double __ieee754_exp_fma4 (double libm_ifunc (__ieee754_exp, HAS_FMA4 ? __ieee754_exp_fma4 - : (HAS_YMM_USABLE ? __ieee754_exp_avx : __ieee754_exp_sse2)); + : (HAS_AVX ? __ieee754_exp_avx : __ieee754_exp_sse2)); strong_alias (__ieee754_exp, __exp_finite) # define __ieee754_exp __ieee754_exp_sse2 diff -Nrup a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c --- a/sysdeps/x86_64/fpu/multiarch/e_log.c 2012-07-03 06:52:21.748254982 -0600 +++ b/sysdeps/x86_64/fpu/multiarch/e_log.c 2012-07-03 10:26:59.262017420 -0600 @@ -14,8 +14,7 @@ extern double __ieee754_log_fma4 (double libm_ifunc (__ieee754_log, HAS_FMA4 ? __ieee754_log_fma4 - : (HAS_YMM_USABLE ? __ieee754_log_avx - : __ieee754_log_sse2)); + : (HAS_AVX ? __ieee754_log_avx : __ieee754_log_sse2)); strong_alias (__ieee754_log, __log_finite) # define __ieee754_log __ieee754_log_sse2 diff -Nrup a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c --- a/sysdeps/x86_64/fpu/multiarch/s_atan.c 2012-07-03 06:52:21.749254977 -0600 +++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c 2012-07-03 10:26:59.263017414 -0600 @@ -13,7 +13,7 @@ extern double __atan_fma4 (double); # endif libm_ifunc (atan, (HAS_FMA4 ? __atan_fma4 : - HAS_YMM_USABLE ? __atan_avx : __atan_sse2)); + HAS_AVX ? __atan_avx : __atan_sse2)); # define atan __atan_sse2 #endif diff -Nrup a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c --- a/sysdeps/x86_64/fpu/multiarch/s_sin.c 2012-07-03 06:52:21.749254977 -0600 +++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c 2012-07-03 10:26:59.263017414 -0600 @@ -18,11 +18,11 @@ extern double __sin_fma4 (double); # endif libm_ifunc (__cos, (HAS_FMA4 ? __cos_fma4 : - HAS_YMM_USABLE ? __cos_avx : __cos_sse2)); + HAS_AVX ? __cos_avx : __cos_sse2)); weak_alias (__cos, cos) libm_ifunc (__sin, (HAS_FMA4 ? __sin_fma4 : - HAS_YMM_USABLE ? __sin_avx : __sin_sse2)); + HAS_AVX ? __sin_avx : __sin_sse2)); weak_alias (__sin, sin) # define __cos __cos_sse2 diff -Nrup a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c --- a/sysdeps/x86_64/fpu/multiarch/s_tan.c 2012-07-03 06:52:21.750254972 -0600 +++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c 2012-07-03 10:26:59.264017408 -0600 @@ -13,7 +13,7 @@ extern double __tan_fma4 (double); # endif libm_ifunc (tan, (HAS_FMA4 ? __tan_fma4 : - HAS_YMM_USABLE ? __tan_avx : __tan_sse2)); + HAS_AVX ? __tan_avx : __tan_sse2)); # define tan __tan_sse2 #endif diff -Nrup a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile --- a/sysdeps/x86_64/multiarch/Makefile 2012-01-01 05:16:32.000000000 -0700 +++ b/sysdeps/x86_64/multiarch/Makefile 2012-07-03 06:53:48.964787267 -0600 @@ -1,5 +1,6 @@ ifeq ($(subdir),csu) aux += init-arch +tests += test-multiarch gen-as-const-headers += ifunc-defines.sym endif diff -Nrup a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c --- a/sysdeps/x86_64/multiarch/init-arch.c 2012-07-03 06:52:21.792254746 -0600 +++ b/sysdeps/x86_64/multiarch/init-arch.c 2012-07-03 06:53:20.227941127 -0600 @@ -1,6 +1,6 @@ /* Initialize CPU feature data. This file is part of the GNU C Library. - Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2008-2012 Free Software Foundation, Inc. Contributed by Ulrich Drepper <drepper@redhat.com>. The GNU C Library is free software; you can redistribute it and/or @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <atomic.h> #include <cpuid.h> @@ -144,18 +143,23 @@ __init_cpu_features (void) else kind = arch_kind_other; - if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX) + /* Can we call xgetbv? */ + if (CPUID_OSXSAVE) { - /* Reset the AVX bit in case OSXSAVE is disabled. */ - if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0 - && ({ unsigned int xcrlow; - unsigned int xcrhigh; - asm ("xgetbv" - : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); - (xcrlow & 6) == 6; })) - __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable; - else - __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx &= ~bit_AVX; + unsigned int xcrlow; + unsigned int xcrhigh; + asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); + /* Is YMM and XMM state usable? */ + if ((xcrlow & (bit_YMM_state | bit_XMM_state)) == + (bit_YMM_state | bit_XMM_state)) + { + /* Determine if AVX is usable. */ + if (CPUID_AVX) + __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable; + /* Determine if FMA4 is usable. */ + if (CPUID_FMA4) + __cpu_features.feature[index_FMA4_Usable] |= bit_FMA4_Usable; + } } __cpu_features.family = family; diff -Nrup a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h --- a/sysdeps/x86_64/multiarch/init-arch.h 2012-07-03 06:52:21.751254967 -0600 +++ b/sysdeps/x86_64/multiarch/init-arch.h 2012-07-03 06:53:10.548993004 -0600 @@ -1,5 +1,5 @@ /* This file is part of the GNU C Library. - Copyright (C) 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2008-2012 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -12,9 +12,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #define bit_Fast_Rep_String (1 << 0) #define bit_Fast_Copy_Backward (1 << 1) @@ -22,8 +21,10 @@ #define bit_Prefer_SSE_for_memop (1 << 3) #define bit_Fast_Unaligned_Load (1 << 4) #define bit_Prefer_PMINUB_for_stringop (1 << 5) -#define bit_YMM_Usable (1 << 6) +#define bit_AVX_Usable (1 << 6) +#define bit_FMA4_Usable (1 << 7) +/* CPUID Feature flags. */ #define bit_SSE2 (1 << 26) #define bit_SSSE3 (1 << 9) #define bit_SSE4_1 (1 << 19) @@ -34,6 +35,10 @@ #define bit_FMA (1 << 12) #define bit_FMA4 (1 << 16) +/* XCR0 Feature flags. */ +#define bit_XMM_state (1 << 1) +#define bit_YMM_state (2 << 1) + #ifdef __ASSEMBLER__ # include <ifunc-defines.h> @@ -50,7 +55,8 @@ # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE -# define index_YMM_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_AVX_Usable FEATURE_INDEX_1*FEATURE_SIZE +# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE #else /* __ASSEMBLER__ */ @@ -114,35 +120,45 @@ extern const struct cpu_features *__get_ /* Following are the feature tests used throughout libc. */ +/* CPUID_* evaluates to true if the feature flag is enabled. + We always use &__cpu_features because the HAS_CPUID_* macros + are called only within __init_cpu_features, where we can't + call __get_cpu_features without infinite recursion. */ +# define HAS_CPUID_FLAG(idx, reg, bit) \ + (((&__cpu_features)->cpuid[idx].reg & (bit)) != 0) + +# define CPUID_OSXSAVE \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_OSXSAVE) +# define CPUID_AVX \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_1, ecx, bit_AVX) +# define CPUID_FMA4 \ + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) + +/* HAS_* evaluates to true if we may use the feature at runtime. */ # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2) # define HAS_POPCOUNT HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_POPCOUNT) # define HAS_SSSE3 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSSE3) # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1) # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2) # define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA) -# define HAS_AVX HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX) -# define HAS_FMA4 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4) # define index_Fast_Rep_String FEATURE_INDEX_1 # define index_Fast_Copy_Backward FEATURE_INDEX_1 # define index_Slow_BSF FEATURE_INDEX_1 # define index_Prefer_SSE_for_memop FEATURE_INDEX_1 # define index_Fast_Unaligned_Load FEATURE_INDEX_1 -# define index_YMM_Usable FEATURE_INDEX_1 +# define index_AVX_Usable FEATURE_INDEX_1 +# define index_FMA4_Usable FEATURE_INDEX_1 # define HAS_ARCH_FEATURE(name) \ ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0) -# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) - -# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) - -# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) - -# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) - -# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) - -# define HAS_YMM_USABLE HAS_ARCH_FEATURE (YMM_Usable) +# define HAS_FAST_REP_STRING HAS_ARCH_FEATURE (Fast_Rep_String) +# define HAS_FAST_COPY_BACKWARD HAS_ARCH_FEATURE (Fast_Copy_Backward) +# define HAS_SLOW_BSF HAS_ARCH_FEATURE (Slow_BSF) +# define HAS_PREFER_SSE_FOR_MEMOP HAS_ARCH_FEATURE (Prefer_SSE_for_memop) +# define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load) +# define HAS_AVX HAS_ARCH_FEATURE (AVX_Usable) +# define HAS_FMA4 HAS_ARCH_FEATURE (FMA4_Usable) #endif /* __ASSEMBLER__ */ diff -Nrup a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S --- a/sysdeps/x86_64/multiarch/strcmp.S 2012-01-01 05:16:32.000000000 -0700 +++ b/sysdeps/x86_64/multiarch/strcmp.S 2012-07-03 06:53:42.956819413 -0600 @@ -1,5 +1,5 @@ /* strcmp with SSE4.2 - Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2009-2012 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include <sysdep.h> #include <init-arch.h> @@ -84,6 +83,7 @@ .text ENTRY(STRCMP) .type STRCMP, @gnu_indirect_function + /* Manually inlined call to __get_cpu_features. */ cmpl $0, __cpu_features+KIND_OFFSET(%rip) jne 1f call __init_cpu_features @@ -101,13 +101,14 @@ END(STRCMP) # ifdef USE_AS_STRCASECMP_L ENTRY(__strcasecmp) .type __strcasecmp, @gnu_indirect_function + /* Manually inlined call to __get_cpu_features. */ cmpl $0, __cpu_features+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: # ifdef HAVE_AVX_SUPPORT leaq __strcasecmp_avx(%rip), %rax - testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip) + testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) jnz 2f # endif leaq __strcasecmp_sse42(%rip), %rax @@ -124,13 +125,14 @@ weak_alias (__strcasecmp, strcasecmp) # ifdef USE_AS_STRNCASECMP_L ENTRY(__strncasecmp) .type __strncasecmp, @gnu_indirect_function + /* Manually inlined call to __get_cpu_features. */ cmpl $0, __cpu_features+KIND_OFFSET(%rip) jne 1f call __init_cpu_features 1: # ifdef HAVE_AVX_SUPPORT leaq __strncasecmp_avx(%rip), %rax - testl $bit_AVX, __cpu_features+CPUID_OFFSET+index_AVX(%rip) + testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) jnz 2f # endif leaq __strncasecmp_sse42(%rip), %rax diff -Nrup a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c --- a/sysdeps/x86_64/multiarch/test-multiarch.c 1969-12-31 17:00:00.000000000 -0700 +++ b/sysdeps/x86_64/multiarch/test-multiarch.c 2012-07-03 06:53:54.418758092 -0600 @@ -0,0 +1,90 @@ +/* Test CPU feature data. + This file is part of the GNU C Library. + Copyright (C) 2012 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <init-arch.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static char *cpu_flags; + +/* Search for flags in /proc/cpuinfo and store line + in cpu_flags. */ +void +get_cpuinfo (void) +{ + FILE *f; + char *line = NULL; + size_t len = 0; + ssize_t read; + + f = fopen ("/proc/cpuinfo", "r"); + if (f == NULL) + { + printf ("cannot open /proc/cpuinfo"); + exit (1); + } + + while ((read = getline (&line, &len, f)) != -1) + { + if (strncmp (line, "flags", 5) == 0) + { + cpu_flags = strdup (line); + break; + } + } + fclose (f); + free (line); +} + +int +check_proc (const char *proc_name, int flag, const char *name) +{ + int found = 0; + + printf ("Checking %s:\n", name); + printf (" init-arch %d\n", flag); + if (strstr (cpu_flags, proc_name) != NULL) + found = 1; + printf (" cpuinfo (%s) %d\n", proc_name, found); + + if (found != flag) + printf (" *** failure ***\n"); + + return (found != flag); +} + +static int +do_test (int argc, char **argv) +{ + int fails; + + get_cpuinfo (); + fails = check_proc ("avx", HAS_AVX, "HAS_AVX"); + fails += check_proc ("fma4", HAS_FMA4, "HAS_FMA4"); + fails += check_proc ("sse4_2", HAS_SSE4_2, "HAS_SSE4_2"); + fails += check_proc ("sse4_1", HAS_SSE4_1, "HAS_SSE4_1"); + fails += check_proc ("ssse3", HAS_SSSE3, "HAS_SSSE3"); + fails += check_proc ("popcnt", HAS_POPCOUNT, "HAS_POPCOUNT"); + + printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails); + + return (fails != 0); +} + +#include "../../../test-skeleton.c"