From 2339767f2e6796590081d9a2ab2de4dca1ed2a68 Mon Sep 17 00:00:00 2001 From: hjl Date: Tue, 23 Aug 2011 17:02:15 +0000 Subject: [PATCH] Add BMI2 support. gcc/ 2011-08-23 Uros Bizjak * config/i386/i386.md (type): Add imulx, ishiftx and rotatex. (length_immediate): Handle imulx, ishiftx and rotatex. (imm_disp): Ditto. (isa): Add bmi2. (enabled): Handle bmi2. (w): New mode attribute. (*mul3): Split from *mul3. (*umul3): Ditto. Add imulx BMI2 alternative. (*bmi2_umulditi3_1): New insn pattern. (*bmi2_umulsidi3_1): Ditto. (*umul3 splitter): New splitter to avoid flags dependency. (*bmi2_ashl3_1): New insn pattern. (*ashl3_1): Add ishiftx BMI2 alternative. (*ashl3_1 splitter): New splitter to avoid flags dependency. (*bmi2_ashlsi3_1_zext): New insn pattern. (*ashlsi3_1_zext): Add ishiftx BMI2 alternative. (*ashlsi3_1_zext splitter): New splitter to avoid flags dependency. (*bmi2_3_1): New insn pattern. (*3_1): Add ishiftx BMI2 alternative. (*3_1 splitter): New splitter to avoid flags dependency. (*bmi2_si3_1_zext): New insn pattern. (*si3_1_zext): Add ishiftx BMI2 alternative. (*si3_1_zext splitter): New splitter to avoid flags dependency. (*bmi2_rorx3_1): New insn pattern. (*3_1): Add rotatex BMI2 alternative. (*rotate3_1 splitter): New splitter to avoid flags dependency. (*rotatert3_1 splitter): Ditto. (*bmi2_rorxsi3_1_zext): New insn pattern. (*si3_1_zext): Add rotatex BMI2 alternative. (*rotatesi3_1_zext splitter): New splitter to avoid flags dependency. (*rotatertsi3_1_zext splitter): Ditto. 2011-08-23 Kirill Yukhin * common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET): New. (OPTION_MASK_ISA_BMI2_UNSET): Likewise. (ix86_handle_option): Handle OPT_mbmi2 case. * config.gcc (i[34567]86-*-*): Add bmi2intrin.h. (x86_64-*-*): Likewise. * config/i386/bmi2intrin.h: New file. * config/i386/cpuid.h (bit_BMI2): New. * config/i386/driver-i386.c (host_detect_local_cpu): Detect BMI2 feature. * config/i386/i386-c.c (ix86_target_macros_internal): Conditionally define __BMI2__. * config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2. Handle BMI2 option. (ix86_valid_target_attribute_inner_p): Handle BMI2 option. (print_reg): New code. (ix86_print_operand): Likewise. (ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64, IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32, IX86_BUILTIN_PEXT64. (bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64, IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32, IX86_BUILTIN_PEXT64. * config/i386/i386.h (TARGET_BMI2): New. * config/i386/i386.md (UNSPEC_PDEP): New. (UNSPEC_PEXT): Likewise. (*bmi2_bzhi_3): Likewise. (*bmi2_pdep_3): Likewise. (*bmi2_pext_3): Likewise. * config/i386/i386.opt (mbmi2): New. * config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__ is defined. * doc/extend.texi: Document BMI2 built-in functions. * doc/invoke.texi: Document -mbmi2. gcc/testsuite/ 2011-08-23 Kirill Yukhin * g++.dg/other/i386-2.C: Add -mbmi2 check. * g++.dg/other/i386-3.C: Likewise. * gcc.target/i386/bmi2-bzhi32-1.c: New testcase. * gcc.target/i386/bmi2-bzhi32-1a.c: Likewise. * gcc.target/i386/bmi2-bzhi64-1.c: Likewise. * gcc.target/i386/bmi2-bzhi64-1a.c: Likewise. * gcc.target/i386/bmi2-mulx32-1.c: Likewise. * gcc.target/i386/bmi2-mulx32-1a.c: Likewise. * gcc.target/i386/bmi2-mulx64-1.c: Likewise. * gcc.target/i386/bmi2-mulx64-1a.c: Likewise. * gcc.target/i386/bmi2-pdep32-1.c: Likewise. * gcc.target/i386/bmi2-pdep32-1a.c: Likewise. * gcc.target/i386/bmi2-pdep64-1.c: Likewise. * gcc.target/i386/bmi2-pdep64-1a.c: Likewise. * gcc.target/i386/bmi2-pext32-1.c: Likewise. * gcc.target/i386/bmi2-pext32-1a.c: Likewise. * gcc.target/i386/bmi2-pext64-1.c: Likewise. * gcc.target/i386/bmi2-pext64-1a.c: Likewise. * gcc.target/i386/bmi2-rorx32-1.c: Likewise. * gcc.target/i386/bmi2-rorx32-1a.c: Likewise. * gcc.target/i386/bmi2-rorx64-1.c: Likewise. * gcc.target/i386/bmi2-rorx64-1a.c: Likewise. * gcc.target/i386/bmi2-sarx32-1.c: Likewise. * gcc.target/i386/bmi2-sarx32-1a.c: Likewise. * gcc.target/i386/bmi2-sarx64-1.c: Likewise. * gcc.target/i386/bmi2-sarx64-1a.c: Likewise. * gcc.target/i386/bmi2-shlx32-1.c: Likewise. * gcc.target/i386/bmi2-shlx32-1a.c: Likewise. * gcc.target/i386/bmi2-shlx64-1.c: Likewise. * gcc.target/i386/bmi2-shlx64-1a.c: Likewise. * gcc.target/i386/bmi2-shrx32-1.c: Likewise. * gcc.target/i386/bmi2-shrx32-1a.c: Likewise. * gcc.target/i386/bmi2-shrx64-1.c: Likewise. * gcc.target/i386/bmi2-shrx64-1a.c: Likewise. * gcc.target/i386/i386.exp (check_effective_target_bmi2): New. * gcc.target/i386/sse-12.c: Add BMI2. * gcc.target/i386/sse-13.c: Likewise. * gcc.target/i386/sse-14.c: Likewise. * gcc.target/i386/sse-22.c: Likewise. * gcc.target/i386/sse-23.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@178001 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 78 ++++ gcc/common/config/i386/i386-common.c | 15 + gcc/config.gcc | 6 +- gcc/config/i386/bmi2intrin.h | 81 +++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/driver-i386.c | 8 +- gcc/config/i386/i386-c.c | 2 + gcc/config/i386/i386.c | 25 +- gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.md | 476 ++++++++++++++++++++++--- gcc/config/i386/i386.opt | 4 + gcc/config/i386/x86intrin.h | 4 + gcc/doc/extend.texi | 11 + gcc/doc/invoke.texi | 9 +- gcc/testsuite/ChangeLog | 43 +++ gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c | 35 ++ gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c | 35 ++ gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-check.h | 36 ++ gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c | 47 +++ gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c | 36 ++ gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c | 39 ++ gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c | 38 ++ gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c | 39 ++ gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c | 39 ++ gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c | 36 ++ gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c | 36 ++ gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c | 34 ++ gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c | 34 ++ gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c | 34 ++ gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c | 34 ++ gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c | 6 + gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c | 34 ++ gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c | 6 + gcc/testsuite/gcc.target/i386/i386.exp | 11 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 6 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- 54 files changed, 1397 insertions(+), 72 deletions(-) create mode 100644 gcc/config/i386/bmi2intrin.h create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-check.h create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c create mode 100644 gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6007136d81e..2fed6fa063c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,81 @@ +2011-08-23 Uros Bizjak + + * config/i386/i386.md (type): Add imulx, ishiftx and rotatex. + (length_immediate): Handle imulx, ishiftx and rotatex. + (imm_disp): Ditto. + (isa): Add bmi2. + (enabled): Handle bmi2. + (w): New mode attribute. + (*mul3): Split from *mul3. + (*umul3): Ditto. Add imulx BMI2 alternative. + (*bmi2_umulditi3_1): New insn pattern. + (*bmi2_umulsidi3_1): Ditto. + (*umul3 splitter): New splitter to avoid flags + dependency. + (*bmi2_ashl3_1): New insn pattern. + (*ashl3_1): Add ishiftx BMI2 alternative. + (*ashl3_1 splitter): New splitter to avoid flags + dependency. + (*bmi2_ashlsi3_1_zext): New insn pattern. + (*ashlsi3_1_zext): Add ishiftx BMI2 alternative. + (*ashlsi3_1_zext splitter): New splitter to avoid flags + dependency. + (*bmi2_3_1): New insn pattern. + (*3_1): Add ishiftx BMI2 alternative. + (*3_1 splitter): New splitter to avoid + flags dependency. + (*bmi2_si3_1_zext): New insn pattern. + (*si3_1_zext): Add ishiftx BMI2 alternative. + (*si3_1_zext splitter): New splitter to avoid + flags dependency. + (*bmi2_rorx3_1): New insn pattern. + (*3_1): Add rotatex BMI2 alternative. + (*rotate3_1 splitter): New splitter to avoid flags + dependency. + (*rotatert3_1 splitter): Ditto. + (*bmi2_rorxsi3_1_zext): New insn pattern. + (*si3_1_zext): Add rotatex BMI2 alternative. + (*rotatesi3_1_zext splitter): New splitter to avoid flags + dependency. + (*rotatertsi3_1_zext splitter): Ditto. + +2011-08-23 Kirill Yukhin + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET): + New. + (OPTION_MASK_ISA_BMI2_UNSET): Likewise. + (ix86_handle_option): Handle OPT_mbmi2 case. + * config.gcc (i[34567]86-*-*): Add bmi2intrin.h. + (x86_64-*-*): Likewise. + * config/i386/bmi2intrin.h: New file. + * config/i386/cpuid.h (bit_BMI2): New. + * config/i386/driver-i386.c (host_detect_local_cpu): Detect + BMI2 feature. + * config/i386/i386-c.c (ix86_target_macros_internal): + Conditionally define __BMI2__. + * config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2. + Handle BMI2 option. + (ix86_valid_target_attribute_inner_p): Handle BMI2 option. + (print_reg): New code. + (ix86_print_operand): Likewise. + (ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64, + IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32, + IX86_BUILTIN_PEXT64. + (bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64, + IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32, + IX86_BUILTIN_PEXT64. + * config/i386/i386.h (TARGET_BMI2): New. + * config/i386/i386.md (UNSPEC_PDEP): New. + (UNSPEC_PEXT): Likewise. + (*bmi2_bzhi_3): Likewise. + (*bmi2_pdep_3): Likewise. + (*bmi2_pext_3): Likewise. + * config/i386/i386.opt (mbmi2): New. + * config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__ + is defined. + * doc/extend.texi: Document BMI2 built-in functions. + * doc/invoke.texi: Document -mbmi2. + 2011-08-23 Jakub Jelinek PR middle-end/50161 diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index b2018357dfd..99643d65f9b 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI +#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 @@ -137,6 +138,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI +#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 @@ -395,6 +397,19 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mbmi2: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI2_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_UNSET; + } + return true; + case OPT_mtbm: if (value) { diff --git a/gcc/config.gcc b/gcc/config.gcc index b8addaf6457..67aae86d740 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -352,7 +352,8 @@ i[34567]86-*-*) nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h - lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h" + lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h + avx2intrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -364,7 +365,8 @@ x86_64-*-*) nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h - lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h" + lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h + avx2intrin.h" need_64bit_hwint=yes ;; ia64-*-*) diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h new file mode 100644 index 00000000000..f3ffa52a916 --- /dev/null +++ b/gcc/config/i386/bmi2intrin.h @@ -0,0 +1,81 @@ +/* Copyright (C) 2010, 2011 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _X86INTRIN_H_INCLUDED +# error "Never use directly; include instead." +#endif + +#ifndef __BMI2__ +# error "BMI2 instruction set not enabled" +#endif /* __BMI2__ */ + +#ifndef _BMI2INTRIN_H_INCLUDED +#define _BMI2INTRIN_H_INCLUDED + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bzhi_u32 (unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_bzhi_si (__X, __Y); +} + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pdep_u32 (unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_pdep_si (__X, __Y); +} + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pext_u32 (unsigned int __X, unsigned int __Y) +{ + return __builtin_ia32_pext_si (__X, __Y); +} + +#ifdef __x86_64__ + +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bzhi_u64 (unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_bzhi_di (__X, __Y); +} + +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pdep_u64 (unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_pdep_di (__X, __Y); +} + +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pext_u64 (unsigned long long __X, unsigned long long __Y) +{ + return __builtin_ia32_pext_di (__X, __Y); +} + +#endif /* __x86_64__ */ + +#endif /* _BMI2INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index d53743fa158..5da8fd2e9f4 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -67,6 +67,7 @@ #define bit_FSGSBASE (1 << 0) #define bit_BMI (1 << 3) #define bit_AVX2 (1 << 5) +#define bit_BMI2 (1 << 8) #if defined(__i386__) && defined(__PIC__) /* %ebx may be the PIC register. */ diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index b7a1f523c13..8107ecee7c9 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -396,7 +396,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0; unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0; unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0; - unsigned int has_bmi = 0, has_tbm = 0, has_lzcnt = 0; + unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0; bool arch; @@ -475,6 +475,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_bmi = ebx & bit_BMI; has_avx2 = ebx & bit_AVX2; + has_bmi2 = ebx & bit_BMI2; } if (!arch) @@ -715,6 +716,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4"; const char *xop = has_xop ? " -mxop" : " -mno-xop"; const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi"; + const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2"; const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm"; const char *avx = has_avx ? " -mavx" : " -mno-avx"; const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2"; @@ -723,8 +725,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt"; options = concat (options, cx16, sahf, movbe, ase, pclmul, - popcnt, abm, lwp, fma, fma4, xop, bmi, tbm, - avx2, avx, sse4_2, sse4_1, lzcnt, NULL); + popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2, + tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL); } done: diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 5c1dfe61038..d4b0b086b39 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -273,6 +273,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__ABM__"); if (isa_flag & OPTION_MASK_ISA_BMI) def_or_undef (parse_in, "__BMI__"); + if (isa_flag & OPTION_MASK_ISA_BMI2) + def_or_undef (parse_in, "__BMI2__"); if (isa_flag & OPTION_MASK_ISA_LZCNT) def_or_undef (parse_in, "__LZCNT__"); if (isa_flag & OPTION_MASK_ISA_TBM) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index ef02673bf86..61f2c5c87e7 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2664,6 +2664,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mmmx", OPTION_MASK_ISA_MMX }, { "-mabm", OPTION_MASK_ISA_ABM }, { "-mbmi", OPTION_MASK_ISA_BMI }, + { "-mbmi2", OPTION_MASK_ISA_BMI2 }, { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, { "-mtbm", OPTION_MASK_ISA_TBM }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, @@ -2921,6 +2922,7 @@ ix86_option_override_internal (bool main_args_p) #define PTA_TBM (HOST_WIDE_INT_1 << 28) #define PTA_XOP (HOST_WIDE_INT_1 << 29) #define PTA_AVX2 (HOST_WIDE_INT_1 << 30) +#define PTA_BMI2 (HOST_WIDE_INT_1 << 31) /* if this reaches 64, need to widen struct pta flags below */ static struct pta @@ -2978,8 +2980,8 @@ ix86_option_override_internal (bool main_args_p) PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE - | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_LZCNT | PTA_FMA - | PTA_MOVBE}, + | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT + | PTA_FMA | PTA_MOVBE}, {"atom", PROCESSOR_ATOM, CPU_ATOM, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE}, @@ -3300,6 +3302,9 @@ ix86_option_override_internal (bool main_args_p) if (processor_alias_table[i].flags & PTA_TBM && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) ix86_isa_flags |= OPTION_MASK_ISA_TBM; + if (processor_alias_table[i].flags & PTA_BMI2 + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) + ix86_isa_flags |= OPTION_MASK_ISA_BMI2; if (processor_alias_table[i].flags & PTA_CX16 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) ix86_isa_flags |= OPTION_MASK_ISA_CX16; @@ -4053,6 +4058,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("3dnow", OPT_m3dnow), IX86_ATTR_ISA ("abm", OPT_mabm), IX86_ATTR_ISA ("bmi", OPT_mbmi), + IX86_ATTR_ISA ("bmi2", OPT_mbmi2), IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), IX86_ATTR_ISA ("tbm", OPT_mtbm), IX86_ATTR_ISA ("aes", OPT_maes), @@ -24242,6 +24248,13 @@ enum ix86_builtins IX86_BUILTIN_BEXTRI32, IX86_BUILTIN_BEXTRI64, + /* BMI2 instructions. */ + IX86_BUILTIN_BZHI32, + IX86_BUILTIN_BZHI64, + IX86_BUILTIN_PDEP32, + IX86_BUILTIN_PDEP64, + IX86_BUILTIN_PEXT32, + IX86_BUILTIN_PEXT64, /* FSGSBASE instructions. */ IX86_BUILTIN_RDFSBASE32, @@ -25375,6 +25388,14 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI }, { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT }, { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT }, + + /* BMI2 */ + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, + { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, }; /* FMA4 and XOP. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f8a35ba99ae..47442a0e50f 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -62,6 +62,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_ROUND OPTION_ISA_ROUND #define TARGET_ABM OPTION_ISA_ABM #define TARGET_BMI OPTION_ISA_BMI +#define TARGET_BMI2 OPTION_ISA_BMI2 #define TARGET_LZCNT OPTION_ISA_LZCNT #define TARGET_TBM OPTION_ISA_TBM #define TARGET_POPCNT OPTION_ISA_POPCNT diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d343fc2a036..3ce2a015e3f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -244,6 +244,10 @@ ;; For RDRAND support UNSPEC_RDRAND + + ;; For BMI2 support + UNSPEC_PDEP + UNSPEC_PEXT ]) (define_c_enum "unspecv" [ @@ -385,7 +389,7 @@ (define_attr "type" "other,multi, alu,alu1,negnot,imov,imovx,lea, - incdec,ishift,ishift1,rotate,rotate1,imul,idiv, + incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv, icmp,test,ibr,setcc,icmov, push,pop,call,callv,leave, str,bitmanip, @@ -418,12 +422,12 @@ ;; The (bounding maximum) length of an instruction immediate. (define_attr "length_immediate" "" (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave, - bitmanip") + bitmanip,imulx") (const_int 0) (eq_attr "unit" "i387,sse,mmx") (const_int 0) - (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1, - imul,icmp,push,pop") + (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1, + rotate,rotatex,rotate1,imul,icmp,push,pop") (symbol_ref "ix86_attr_length_immediate_default (insn, true)") (eq_attr "type" "imov,test") (symbol_ref "ix86_attr_length_immediate_default (insn, false)") @@ -683,7 +687,7 @@ (and (match_operand 0 "memory_displacement_operand" "") (match_operand 1 "immediate_operand" ""))) (const_string "true") - (and (eq_attr "type" "alu,ishift,rotate,imul,idiv") + (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv") (and (match_operand 0 "memory_displacement_operand" "") (match_operand 2 "immediate_operand" ""))) (const_string "true") @@ -707,12 +711,13 @@ (define_attr "movu" "0,1" (const_string "0")) ;; Used to control the "enabled" attribute on a per-instruction basis. -(define_attr "isa" "base,noavx,avx" +(define_attr "isa" "base,noavx,avx,bmi2" (const_string "base")) (define_attr "enabled" "" (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX") (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX") + (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2") ] (const_int 1))) @@ -6853,16 +6858,103 @@ (clobber (reg:CC FLAGS_REG))])] "TARGET_QIMODE_MATH") -(define_insn "*mul3_1" +(define_insn "*bmi2_umulditi3_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (mult:DI + (match_operand:DI 2 "nonimmediate_operand" "%d") + (match_operand:DI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:DI 1 "register_operand" "=r") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI (match_dup 2)) + (zero_extend:TI (match_dup 3))) + (const_int 64))))] + "TARGET_64BIT && TARGET_BMI2 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mulx\t{%3, %0, %1|%1, %0, %3}" + [(set_attr "type" "imulx") + (set_attr "prefix" "vex") + (set_attr "mode" "DI")]) + +(define_insn "*bmi2_umulsidi3_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (mult:SI + (match_operand:SI 2 "nonimmediate_operand" "%d") + (match_operand:SI 3 "nonimmediate_operand" "rm"))) + (set (match_operand:SI 1 "register_operand" "=r") + (truncate:SI + (lshiftrt:DI + (mult:DI (zero_extend:DI (match_dup 2)) + (zero_extend:DI (match_dup 3))) + (const_int 32))))] + "!TARGET_64BIT && TARGET_BMI2 + && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "mulx\t{%3, %0, %1|%1, %0, %3}" + [(set_attr "type" "imulx") + (set_attr "prefix" "vex") + (set_attr "mode" "SI")]) + +(define_insn "*umul3_1" + [(set (match_operand: 0 "register_operand" "=A,r") + (mult: + (zero_extend: + (match_operand:DWIH 1 "nonimmediate_operand" "%0,d")) + (zero_extend: + (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm")))) + (clobber (reg:CC FLAGS_REG))] + "!(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + mul{}\t%2 + #" + [(set_attr "isa" "base,bmi2") + (set_attr "type" "imul,imulx") + (set_attr "length_immediate" "0,*") + (set (attr "athlon_decode") + (cond [(eq_attr "alternative" "0") + (if_then_else (eq_attr "cpu" "athlon") + (const_string "vector") + (const_string "double"))] + (const_string "*"))) + (set_attr "amdfam10_decode" "double,*") + (set_attr "bdver1_decode" "direct,*") + (set_attr "prefix" "orig,vex") + (set_attr "mode" "")]) + +;; Convert mul to the mulx pattern to avoid flags dependency. +(define_split + [(set (match_operand: 0 "register_operand" "") + (mult: + (zero_extend: + (match_operand:DWIH 1 "register_operand" "")) + (zero_extend: + (match_operand:DWIH 2 "nonimmediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed + && true_regnum (operands[1]) == DX_REG" + [(parallel [(set (match_dup 3) + (mult:DWIH (match_dup 1) (match_dup 2))) + (set (match_dup 4) + (truncate:DWIH + (lshiftrt: + (mult: (zero_extend: (match_dup 1)) + (zero_extend: (match_dup 2))) + (match_dup 5))))])] +{ + split_double_mode (mode, &operands[0], 1, &operands[3], &operands[4]); + + operands[5] = GEN_INT (GET_MODE_BITSIZE (mode)); +}) + +(define_insn "*mul3_1" [(set (match_operand: 0 "register_operand" "=A") (mult: - (any_extend: + (sign_extend: (match_operand:DWIH 1 "nonimmediate_operand" "%0")) - (any_extend: + (sign_extend: (match_operand:DWIH 2 "nonimmediate_operand" "rm")))) (clobber (reg:CC FLAGS_REG))] "!(MEM_P (operands[1]) && MEM_P (operands[2]))" - "mul{}\t%2" + "imul{}\t%2" [(set_attr "type" "imul") (set_attr "length_immediate" "0") (set (attr "athlon_decode") @@ -9060,16 +9152,26 @@ [(set_attr "type" "ishift") (set_attr "mode" "")]) +(define_insn "*bmi2_ashl3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")))] + "TARGET_BMI2" + "shlx\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "")]) + (define_insn "*ashl3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") - (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l") - (match_operand:QI 2 "nonmemory_operand" "c,M"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm") + (match_operand:QI 2 "nonmemory_operand" "c,M,r"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, mode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: + case TYPE_ISHIFTX: return "#"; case TYPE_ALU: @@ -9085,9 +9187,12 @@ return "sal{}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "base,base,bmi2") + (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") + (eq_attr "alternative" "2") + (const_string "ishiftx") (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") (const_int 0)) (match_operand 0 "register_operand" "")) @@ -9106,17 +9211,39 @@ (const_string "*"))) (set_attr "mode" "")]) +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (ashift:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (mode, operands[2]);") + +(define_insn "*bmi2_ashlsi3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_64BIT && TARGET_BMI2" + "shlx\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "SI")]) + (define_insn "*ashlsi3_1_zext" - [(set (match_operand:DI 0 "register_operand" "=r,r") + [(set (match_operand:DI 0 "register_operand" "=r,r,r") (zero_extend:DI - (ashift:SI (match_operand:SI 1 "register_operand" "0,l") - (match_operand:QI 2 "nonmemory_operand" "cI,M")))) + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,M,r")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: + case TYPE_ISHIFTX: return "#"; case TYPE_ALU: @@ -9131,9 +9258,12 @@ return "sal{l}\t{%2, %k0|%k0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "base,base,bmi2") + (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") + (eq_attr "alternative" "2") + (const_string "ishiftx") (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD") (const_int 0)) (match_operand 2 "const1_operand" "")) @@ -9151,6 +9281,18 @@ (const_string "*"))) (set_attr "mode" "SI")]) +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))] + "operands[2] = gen_lowpart (SImode, operands[2]);") + (define_insn "*ashlhi3_1" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm") (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0") @@ -9767,20 +9909,38 @@ DONE; }) +(define_insn "*bmi2_3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")))] + "TARGET_BMI2" + "x\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "")]) + (define_insn "*3_1" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") - (any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "c"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (any_shiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "c,r"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands)" { - if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{}\t%0"; - else - return "{}\t{%2, %0|%0, %2}"; + switch (get_attr_type (insn)) + { + case TYPE_ISHIFTX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; + else + return "{}\t{%2, %0|%0, %2}"; + } } - [(set_attr "type" "ishift") + [(set_attr "isa" "base,bmi2") + (set_attr "type" "ishift,ishiftx") (set (attr "length_immediate") (if_then_else (and (match_operand 2 "const1_operand" "") @@ -9790,19 +9950,84 @@ (const_string "*"))) (set_attr "mode" "")]) -(define_insn "*si3_1_zext" +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:QI 2 "register_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))] + "operands[2] = gen_lowpart (mode, operands[2]);") + +(define_insn "*bmi2_si3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))) + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:SI 2 "register_operand" "r"))))] + "TARGET_64BIT && TARGET_BMI2" + "x\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "ishiftx") + (set_attr "mode" "SI")]) + +(define_insn "*si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,r")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" { + switch (get_attr_type (insn)) + { + case TYPE_ISHIFTX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{l}\t%k0"; + else + return "{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set_attr "isa" "base,bmi2") + (set_attr "type" "ishift,ishiftx") + (set (attr "length_immediate") + (if_then_else + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +;; Convert shift to the shiftx pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "register_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_insn "*3_1" + [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m") + (any_shiftrt:SWI12 + (match_operand:SWI12 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands)" +{ if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{l}\t%k0"; + return "{}\t%0"; else - return "{l}\t{%2, %k0|%k0, %2}"; + return "{}\t{%2, %0|%0, %2}"; } [(set_attr "type" "ishift") (set (attr "length_immediate") @@ -9812,7 +10037,7 @@ (const_int 0))) (const_string "0") (const_string "*"))) - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) (define_insn "*qi3_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) @@ -10064,42 +10289,151 @@ split_double_mode (mode, &operands[0], 1, &operands[4], &operands[5]); }) +(define_insn "*bmi2_rorx3_1" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:QI 2 "immediate_operand" "")))] + "TARGET_BMI2" + "rorx\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "rotatex") + (set_attr "mode" "")]) + (define_insn "*3_1" - [(set (match_operand:SWI 0 "nonimmediate_operand" "=m") - (any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "c"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") + (any_rotate:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "c,"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (, mode, operands)" { - if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{}\t%0"; - else - return "{}\t{%2, %0|%0, %2}"; + switch (get_attr_type (insn)) + { + case TYPE_ROTATEX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; + else + return "{}\t{%2, %0|%0, %2}"; + } } - [(set_attr "type" "rotate") + [(set_attr "isa" "base,bmi2") + (set_attr "type" "rotate,rotatex") (set (attr "length_immediate") (if_then_else - (and (match_operand 2 "const1_operand" "") - (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") - (const_int 0))) + (and (eq_attr "type" "rotate") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0)))) (const_string "0") (const_string "*"))) (set_attr "mode" "")]) -(define_insn "*si3_1_zext" +;; Convert rotate to the rotatex pattern to avoid flags dependency. +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (rotatert:SWI48 (match_dup 1) (match_dup 2)))] +{ + operands[2] + = GEN_INT (GET_MODE_BITSIZE (mode) - INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:SWI48 0 "register_operand" "") + (rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "") + (match_operand:QI 2 "immediate_operand" ""))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (rotatert:SWI48 (match_dup 1) (match_dup 2)))]) + +(define_insn "*bmi2_rorxsi3_1_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI - (any_rotate:SI (match_operand:SI 1 "register_operand" "0") - (match_operand:QI 2 "nonmemory_operand" "cI")))) + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm") + (match_operand:QI 2 "immediate_operand" "I"))))] + "TARGET_64BIT && TARGET_BMI2" + "rorx\t{%2, %1, %k0|%k0, %1, %2}" + [(set_attr "type" "rotatex") + (set_attr "mode" "SI")]) + +(define_insn "*si3_1_zext" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI + (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm") + (match_operand:QI 2 "nonmemory_operand" "cI,I")))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (, SImode, operands)" { - if (operands[2] == const1_rtx - && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{l}\t%k0"; + switch (get_attr_type (insn)) + { + case TYPE_ROTATEX: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{l}\t%k0"; + else + return "{l}\t{%2, %k0|%k0, %2}"; + } +} + [(set_attr "isa" "base,bmi2") + (set_attr "type" "rotate,rotatex") + (set (attr "length_immediate") + (if_then_else + (and (eq_attr "type" "rotate") + (and (match_operand 2 "const1_operand" "") + (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)") + (const_int 0)))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "SI")]) + +;; Convert rotate to the rotatex pattern to avoid flags dependency. +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "immediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))] +{ + operands[2] + = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2])); +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (zero_extend:DI + (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "") + (match_operand:QI 2 "immediate_operand" "")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && TARGET_BMI2 && reload_completed" + [(set (match_dup 0) + (zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]) + +(define_insn "*3_1" + [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m") + (any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "c"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (, mode, operands)" +{ + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "{}\t%0"; else - return "{l}\t{%2, %k0|%k0, %2}"; + return "{}\t{%2, %0|%0, %2}"; } [(set_attr "type" "rotate") (set (attr "length_immediate") @@ -10109,7 +10443,7 @@ (const_int 0))) (const_string "0") (const_string "*"))) - (set_attr "mode" "SI")]) + (set_attr "mode" "")]) (define_insn "*qi3_1_slp" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) @@ -11951,6 +12285,41 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "")]) +;; BMI2 instructions. +(define_insn "bmi2_bzhi_3" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm") + (lshiftrt:SWI48 (const_int -1) + (match_operand:SWI48 2 "register_operand" "r")))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_BMI2" + "bzhi\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "bmi2_pdep_3" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")] + UNSPEC_PDEP))] + "TARGET_BMI2" + "pdep\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + +(define_insn "bmi2_pext_3" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm") + (match_operand:SWI48 2 "register_operand" "r")] + UNSPEC_PEXT))] + "TARGET_BMI2" + "pext\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "bitmanip") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + ;; TBM instructions. (define_insn "tbm_bextri_" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -12350,6 +12719,7 @@ "xor{b}\t{%h0, %b0|%b0, %h0}" [(set_attr "length" "2") (set_attr "mode" "HI")]) + ;; Thread-local storage patterns for ELF. ;; diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 54d7af1d250..8e4d51b3f9f 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -493,6 +493,10 @@ mbmi Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save Support BMI built-in functions and code generation +mbmi2 +Target Report Mask(ISA_BMI2) Var(ix86_isa_flags) Save +Support BMI2 built-in functions and code generation + mlzcnt Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save Support LZCNT built-in function and code generation diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h index 88456f9e9ef..e01ecd2a1ea 100644 --- a/gcc/config/i386/x86intrin.h +++ b/gcc/config/i386/x86intrin.h @@ -81,6 +81,10 @@ #include #endif +#ifdef __BMI2__ +#include +#endif + #ifdef __TBM__ #include #endif diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 29c02b8354d..0b3633689e9 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -9883,6 +9883,17 @@ unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int); unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long); @end smallexample +The following built-in functions are available when @option{-mbmi2} is used. +All of them generate the machine instruction that is part of the name. +@smallexample +unsigned int _bzhi_u32 (unsigned int, unsigned int) +unsigned int _pdep_u32 (unsigned int, unsigned int) +unsigned int _pext_u32 (unsigned int, unsigned int) +unsigned long long _bzhi_u64 (unsigned long long, unsigned long long) +unsigned long long _pdep_u64 (unsigned long long, unsigned long long) +unsigned long long _pext_u64 (unsigned long long, unsigned long long) +@end smallexample + The following built-in functions are available when @option{-mlzcnt} is used. All of them generate the machine instruction that is part of the name. @smallexample diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fdc32979a57..cbf42766fe6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -607,7 +607,7 @@ Objective-C and Objective-C++ Dialects}. -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol --mlwp -mthreads -mno-align-stringops -minline-all-stringops @gol +-mbmi2 -mlwp -mthreads -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -msseregparm @gol @@ -12697,7 +12697,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mabm @itemx -mno-abm @itemx -mbmi +@itemx -mbmi2 @itemx -mno-bmi +@itemx -mno-bmi2 @itemx -mlzcnt @itemx -mno-lzcnt @itemx -mtbm @@ -12709,8 +12711,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, SSE, -SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, -SSE4A, FMA4, XOP, LWP, ABM, BMI, LZCNT or 3DNow!@: extended instruction sets. +SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, +FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT or 3DNow! +@: extended instruction sets. These extensions are also available as built-in functions: see @ref{X86 Built-in Functions}, for details of the functions enabled and disabled by these switches. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 633af2b8c63..19e4579b544 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,46 @@ +2011-08-23 Kirill Yukhin + + * g++.dg/other/i386-2.C: Add -mbmi2 check. + * g++.dg/other/i386-3.C: Likewise. + * gcc.target/i386/bmi2-bzhi32-1.c: New testcase. + * gcc.target/i386/bmi2-bzhi32-1a.c: Likewise. + * gcc.target/i386/bmi2-bzhi64-1.c: Likewise. + * gcc.target/i386/bmi2-bzhi64-1a.c: Likewise. + * gcc.target/i386/bmi2-mulx32-1.c: Likewise. + * gcc.target/i386/bmi2-mulx32-1a.c: Likewise. + * gcc.target/i386/bmi2-mulx64-1.c: Likewise. + * gcc.target/i386/bmi2-mulx64-1a.c: Likewise. + * gcc.target/i386/bmi2-pdep32-1.c: Likewise. + * gcc.target/i386/bmi2-pdep32-1a.c: Likewise. + * gcc.target/i386/bmi2-pdep64-1.c: Likewise. + * gcc.target/i386/bmi2-pdep64-1a.c: Likewise. + * gcc.target/i386/bmi2-pext32-1.c: Likewise. + * gcc.target/i386/bmi2-pext32-1a.c: Likewise. + * gcc.target/i386/bmi2-pext64-1.c: Likewise. + * gcc.target/i386/bmi2-pext64-1a.c: Likewise. + * gcc.target/i386/bmi2-rorx32-1.c: Likewise. + * gcc.target/i386/bmi2-rorx32-1a.c: Likewise. + * gcc.target/i386/bmi2-rorx64-1.c: Likewise. + * gcc.target/i386/bmi2-rorx64-1a.c: Likewise. + * gcc.target/i386/bmi2-sarx32-1.c: Likewise. + * gcc.target/i386/bmi2-sarx32-1a.c: Likewise. + * gcc.target/i386/bmi2-sarx64-1.c: Likewise. + * gcc.target/i386/bmi2-sarx64-1a.c: Likewise. + * gcc.target/i386/bmi2-shlx32-1.c: Likewise. + * gcc.target/i386/bmi2-shlx32-1a.c: Likewise. + * gcc.target/i386/bmi2-shlx64-1.c: Likewise. + * gcc.target/i386/bmi2-shlx64-1a.c: Likewise. + * gcc.target/i386/bmi2-shrx32-1.c: Likewise. + * gcc.target/i386/bmi2-shrx32-1a.c: Likewise. + * gcc.target/i386/bmi2-shrx64-1.c: Likewise. + * gcc.target/i386/bmi2-shrx64-1a.c: Likewise. + * gcc.target/i386/i386.exp (check_effective_target_bmi2): New. + * gcc.target/i386/sse-12.c: Add BMI2. + * gcc.target/i386/sse-13.c: Likewise. + * gcc.target/i386/sse-14.c: Likewise. + * gcc.target/i386/sse-22.c: Likewise. + * gcc.target/i386/sse-23.c: Likewise. + 2011-08-23 Jason Merrill * g++.dg/template/crash7.C: Adjust expected errors. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index ed183c7633d..5f2eaf96b1e 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 626f972463a..76d4d19c1f1 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c new file mode 100644 index 00000000000..68df8b71d18 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c @@ -0,0 +1,35 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_bzhi_u32 (unsigned a, int l) +{ + unsigned res = a; + int i; + for (i = 0; i < 32 - l; ++i) + res &= ~(1 << (31 - i)); + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7ace0f; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_bzhi_u32 (src, i * 2); + res = _bzhi_u32 (src, i * 2); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c new file mode 100644 index 00000000000..05be7a83710 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-bzhi32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_bzhi_si3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c new file mode 100644 index 00000000000..1ffe135b437 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c @@ -0,0 +1,35 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_bzhi_u64 (unsigned long long a, int l) +{ + unsigned long long res = a; + int i; + for (i = 0; i < 64 - l; ++i) + res &= ~(1LL << (63 - i)); + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned long long src = 0xce7ace0ce7ace0ff; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_bzhi_u64 (src, i * 2); + res = _bzhi_u64 (src, i * 2); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c new file mode 100644 index 00000000000..dc4a94cc369 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-bzhi64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_bzhi_di3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-check.h b/gcc/testsuite/gcc.target/i386/bmi2-check.h new file mode 100644 index 00000000000..5ffce44fc6e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-check.h @@ -0,0 +1,36 @@ +#include +#include + +#include "cpuid.h" + +static void bmi2_test (void); + +static void +__attribute__ ((noinline)) +do_test (void) +{ + bmi2_test (); +} + +int +main () +{ + unsigned int eax, ebx, ecx, edx; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + + /* Run BMI2 test only if host has BMI2 support. */ + if (ebx & bit_BMI2) + { + do_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c new file mode 100644 index 00000000000..a90ff1a511f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c @@ -0,0 +1,47 @@ +/* { dg-do run { target { bmi2 && { ia32 } } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_mul_u32 (unsigned volatile a, unsigned b) +{ + unsigned long long res = 0; + int i; + for (i = 0; i < b; ++i) + res += a; + + return res; +} + +__attribute__((noinline)) +unsigned long long +gen_mulx (unsigned a, unsigned b) +{ + unsigned long long res; + + res = (unsigned long long)a * b; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned a = 0xce7ace0; + unsigned b = 0xfacefff; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + a = a * (i + 1); + b = b / (i + 1); + + res_ref = calc_mul_u32 (a, b); + res = gen_mulx (a, b); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c new file mode 100644 index 00000000000..cf3bb085cfd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-mulx32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_umulsidi3_1" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c new file mode 100644 index 00000000000..68449466ceb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned __int128 +calc_mul_u64 (unsigned long long volatile a, unsigned long long b) +{ + unsigned __int128 res = 0; + int i; + for (i = 0; i < b; ++i) + res += (unsigned __int128) a; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned long long a = 0xce7ace0ce7ace0; + unsigned long long b = 0xface; + unsigned __int128 res, res_ref; + + for (i=0; i<5; ++i) { + a = a * (i + 1); + b = b / (i + 1); + + res_ref = calc_mul_u64 (a, b); + res = (unsigned __int128) a * b; + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c new file mode 100644 index 00000000000..592d713e96c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-mulx64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_umulditi3_1" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c new file mode 100644 index 00000000000..5aecf5717dc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c @@ -0,0 +1,39 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_pdep_u32 (unsigned a, int mask) +{ + unsigned res = 0; + int i, k = 0; + + for (i = 0; i < 32; ++i) + if (mask & (1 << i)) { + res |= ((a & (1 << k)) >> k) << i; + ++k; + } + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7acc; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pdep_u32 (src, i * 3); + res = _pdep_u32 (src, i * 3); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c new file mode 100644 index 00000000000..87888fcff82 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-pdep32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_pdep_si3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c new file mode 100644 index 00000000000..f718b2f3587 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c @@ -0,0 +1,38 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_pdep_u64 (unsigned long long a, unsigned long long mask) +{ + unsigned long long res = 0; + unsigned long long i, k = 0; + + for (i = 0; i < 64; ++i) + if (mask & (1LL << i)) { + res |= ((a & (1LL << k)) >> k) << i; + ++k; + } + return res; +} + +static void +bmi2_test () +{ + unsigned long long i; + unsigned long long src = 0xce7acce7acce7ac; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pdep_u64 (src, ~(i * 3)); + res = _pdep_u64 (src, ~(i * 3)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c new file mode 100644 index 00000000000..8163c4062a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-pdep64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_pdep_di3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c new file mode 100644 index 00000000000..7fe78378ebf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c @@ -0,0 +1,39 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_pext_u32 (unsigned a, unsigned mask) +{ + unsigned res = 0; + int i, k = 0; + + for (i = 0; i < 32; ++i) + if (mask & (1 << i)) { + res |= ((a & (1 << i)) >> i) << k; + ++k; + } + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7acc; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pext_u32 (src, ~(i * 3)); + res = _pext_u32 (src, ~(i * 3)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c new file mode 100644 index 00000000000..c4a6deecaed --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-pext32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_pext_si3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c new file mode 100644 index 00000000000..6850749660f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c @@ -0,0 +1,39 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2" } */ + +#include +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_pext_u64 (unsigned long long a, unsigned long long mask) +{ + unsigned long long res = 0; + int i, k = 0; + + for (i = 0; i < 64; ++i) + if (mask & (1LL << i)) { + res |= ((a & (1LL << i)) >> i) << k; + ++k; + } + + return res; +} + +static void +bmi2_test () +{ + unsigned long long i; + unsigned long long src = 0xce7acce7acce7ac; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pext_u64 (src, ~(i * 3)); + res = _pext_u64 (src, ~(i * 3)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c new file mode 100644 index 00000000000..aaf06c1f206 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-pext64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_pext_di3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c new file mode 100644 index 00000000000..d7f6f3b62db --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_rorx_u32 (unsigned a, int l) +{ + unsigned volatile res = a; + int i; + for (i = 0; i < l; ++i) + res = (res >> 1) | ((res & 1) << 31); + + return res; +} + +#define SHIFT_VAL 0x0e + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7ace0; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_rorx_u32 (src, SHIFT_VAL); + res = (src >> SHIFT_VAL) | (src << (32 - SHIFT_VAL)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c new file mode 100644 index 00000000000..bb3b28d6c7f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-rorx32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_rorxsi3_1" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c new file mode 100644 index 00000000000..ccd60c28a5c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c @@ -0,0 +1,36 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_rorx_u64 (unsigned long long a, int l) +{ + unsigned long long volatile res = a; + int i; + for (i = 0; i < l; ++i) + res = (res >> 1) | ((res&1)<< 63); + + return res; +} + +#define SHIFT_VAL 0x1e + +static void +bmi2_test () +{ + unsigned i; + unsigned long long src = 0xce7ace0ce7ace0; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_rorx_u64 (src, SHIFT_VAL); + res = (src >> SHIFT_VAL) | (src << (64 - SHIFT_VAL)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c new file mode 100644 index 00000000000..2a7a7a08ced --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-rorx64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_rorxdi3_1" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c new file mode 100644 index 00000000000..8224b6f60de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +int +calc_sarx_u32 (int a, int l) +{ + int volatile res = a; + int i; + for (i = 0; i < l; ++i) + res >>= 1; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + int src = 0xfce7ace0; + int res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_sarx_u32 (src, i + 1); + res = src >> (i + 1); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c new file mode 100644 index 00000000000..f10d60b3efa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-sarx32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_ashrsi3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c new file mode 100644 index 00000000000..a43b2025d3b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +long long +calc_sarx_u64 (long long a, int l) +{ + long long volatile res = a; + int i; + for (i = 0; i < l; ++i) + res >>= 1; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + long long src = 0xfce7ace0ce7ace0; + long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_sarx_u64 (src, i + 1); + res = src >> (i + 1); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c new file mode 100644 index 00000000000..bcf0fd44c51 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-sarx64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_ashrdi3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c new file mode 100644 index 00000000000..0bf970282f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +int +calc_shlx_u32 (int a, int l) +{ + int volatile res = a; + int i; + for (i = 0; i < l; ++i) + res <<= 1; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + int src = 0xfce7ace0; + int res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_shlx_u32 (src, i + 1); + res = src << (i + 1); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c new file mode 100644 index 00000000000..215e5d3d79e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-shlx32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_ashlsi3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c new file mode 100644 index 00000000000..2d2ec155e6b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { bmi2 } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_shrx_u32 (unsigned a, int l) +{ + unsigned volatile res = a; + int i; + for (i = 0; i < l; ++i) + res >>= 1; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7ace0; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_shrx_u32 (src, i + 1); + res = src >> (i + 1); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c new file mode 100644 index 00000000000..24c53d45807 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-shrx32-1.c" + +/* { dg-final { scan-assembler-times "bmi2_lshrsi3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c new file mode 100644 index 00000000000..81d232e765f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c @@ -0,0 +1,34 @@ +/* { dg-do run { target { bmi2 && { ! ia32 } } } } */ +/* { dg-options "-mbmi2 -O2 -dp" } */ + +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_shrx_u64 (unsigned long long a, int l) +{ + unsigned long long volatile res = a; + int i; + for (i = 0; i < l; ++i) + res >>= 1; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned long long src = 0xce7ace0ce7ace0; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_shrx_u64 (src, i + 1); + res = src >> (i + 1); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c new file mode 100644 index 00000000000..783043935fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mbmi2 -dp" } */ + +#include "bmi2-shrx64-1.c" + +/* { dg-final { scan-assembler-times "bmi2_lshrdi3" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index 167b79b5d2a..cff8a9a3cba 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -206,6 +206,17 @@ proc check_effective_target_bmi { } { } "-mbmi" ] } +# Return 1 if bmi2 instructions can be compiled. +proc check_effective_target_bmi2 { } { + return [check_no_compiler_messages bmi2 object { + unsigned int + _bzhi_u32 (unsigned int __X, unsigned int __Y) + { + return __builtin_ia32_bzhi_si (__X, __Y); + } + } "-mbmi2" ] +} + # If the linker used understands -M , pass it to clear hardware # capabilities set by the Sun assembler. set clearcap_ldflags "-Wl,-M,$srcdir/$subdir/clearcap.map" diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 59e659ebec1..db949839eed 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 836272d7e62..6b02df7b4ba 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index af42781e2a2..e5216b1aeaa 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 0a7af039e1e..4660ba0b15a 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -46,7 +46,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c") +#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c") #endif /* Following intrinsics require immediate arguments. They @@ -220,9 +220,9 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) #endif #include -/* x86intrin.h (FMA4/XOP/LWP/BMI/TBM/LZCNT). */ +/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT). */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("fma4,xop,lwp,bmi,tbm,lzcnt") +#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt") #endif #include /* xopintrin.h */ diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 8d0c32336c7..b2e52550c16 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -147,7 +147,7 @@ #define __builtin_ia32_bextri_u32(X, Y) __builtin_ia32_bextr_u32 (X, 1) #define __builtin_ia32_bextri_u64(X, Y) __builtin_ia32_bextr_u64 (X, 1) -#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c") +#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c") #include #include #include -- 2.11.0