From 754c06ac94e6ba7a8f0027e801b0eaa167739340 Mon Sep 17 00:00:00 2001 From: hjl Date: Wed, 25 Jun 2003 22:28:27 +0000 Subject: [PATCH] 2003-06-25 H.J. Lu * doc/extend.texi: Document new builtin functions for Intel Prescott New Intrunctions. * doc/invoke.texi: Document new command-line options, -mpni and -mno-pni, for Intel Prescott New Intrunctions. * config.gcc (extra_headers): Add pmmintrin.h for i[34567]86-*-*. * config/i386/i386.c (override_options): Turn on MASK_SSE2 for -mpni. Turn on MASK_SSE for -msse2. (bdesc_2arg): Add PNI builtins with 2 args. (bdesc_1arg): Add PNI builtins with 1 arg. (ix86_init_mmx_sse_builtins): Handle PNI builtins. (ix86_expand_builtin): Likewise. * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, MASK_128BIT_LONG_DOUBLE, MASK_64BIT, MASK_MS_BITFIELD_LAYOUT, MASK_TLS_DIRECT_SEG_REFS): Renumbered. (TARGET_PNI): New. (TARGET_SWITCHES): Don't enable MASK_SSE for -msse2 here. Add -mpni and -mno-pni. (TARGET_CPU_CPP_BUILTINS): Defined __PNI__ for PNI. (ix86_builtins): Add PNI builtins. (config/i386/i386.md): Add PNI patterns. * config/i386/pmmintrin.h: New file. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@68502 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 29 ++++++++++ gcc/config/i386/i386.c | 87 +++++++++++++++++++++++++++- gcc/config/i386/i386.h | 38 ++++++++++--- gcc/config/i386/i386.md | 135 ++++++++++++++++++++++++++++++++++++++++++++ gcc/config/i386/pmmintrin.h | 132 +++++++++++++++++++++++++++++++++++++++++++ gcc/doc/extend.texi | 25 ++++++++ gcc/doc/invoke.texi | 11 +++- 7 files changed, 447 insertions(+), 10 deletions(-) create mode 100644 gcc/config/i386/pmmintrin.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dce8c6e2a30..2c57303e303 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,32 @@ +2003-06-25 H.J. Lu + + * doc/extend.texi: Document new builtin functions for Intel + Prescott New Intrunctions. + + * doc/invoke.texi: Document new command-line options, -mpni and + -mno-pni, for Intel Prescott New Intrunctions. + + * config.gcc (extra_headers): Add pmmintrin.h for i[34567]86-*-*. + + * config/i386/i386.c (override_options): Turn on MASK_SSE2 + for -mpni. Turn on MASK_SSE for -msse2. + (bdesc_2arg): Add PNI builtins with 2 args. + (bdesc_1arg): Add PNI builtins with 1 arg. + (ix86_init_mmx_sse_builtins): Handle PNI builtins. + (ix86_expand_builtin): Likewise. + + * config/i386/i386.h (MASK_3DNOW, MASK_3DNOW_A, + MASK_128BIT_LONG_DOUBLE, MASK_64BIT, MASK_MS_BITFIELD_LAYOUT, + MASK_TLS_DIRECT_SEG_REFS): Renumbered. + (TARGET_PNI): New. + (TARGET_SWITCHES): Don't enable MASK_SSE for -msse2 here. Add + -mpni and -mno-pni. + (TARGET_CPU_CPP_BUILTINS): Defined __PNI__ for PNI. + (ix86_builtins): Add PNI builtins. + (config/i386/i386.md): Add PNI patterns. + + * config/i386/pmmintrin.h: New file. + 2003-06-25 Kazu Hirata * config/h8300/h8300.md (call): Fix the insn lengths. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 56c5ce5e2be..8d9c08f99fa 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1373,6 +1373,14 @@ override_options () if (x86_arch_always_fancy_math_387 & (1 << ix86_arch)) target_flags &= ~MASK_NO_FANCY_MATH_387; + /* Turn on SSE2 builtins for -mpni. */ + if (TARGET_PNI) + target_flags |= MASK_SSE2; + + /* Turn on SSE builtins for -msse2. */ + if (TARGET_SSE2) + target_flags |= MASK_SSE; + if (TARGET_64BIT) { if (TARGET_ALIGN_DOUBLE) @@ -13058,7 +13066,15 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 }, { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 }, - { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 } + { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }, + + /* PNI MMX */ + { MASK_PNI, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 }, + { MASK_PNI, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 }, + { MASK_PNI, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 }, + { MASK_PNI, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 }, + { MASK_PNI, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 }, + { MASK_PNI, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 } }; static const struct builtin_description bdesc_1arg[] = @@ -13104,7 +13120,12 @@ static const struct builtin_description bdesc_1arg[] = { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 }, { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 } + { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 }, + + /* PNI */ + { MASK_PNI, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 }, + { MASK_PNI, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 }, + { MASK_PNI, CODE_FOR_movddup, 0, IX86_BUILTIN_MOVDDUP, 0, 0 } }; void @@ -13195,6 +13216,13 @@ ix86_init_mmx_sse_builtins () = build_function_type (void_type_node, void_list_node); tree void_ftype_unsigned = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); + tree void_ftype_unsigned_unsigned + = build_function_type_list (void_type_node, unsigned_type_node, + unsigned_type_node, NULL_TREE); + tree void_ftype_pcvoid_unsigned_unsigned + = build_function_type_list (void_type_node, const_ptr_type_node, + unsigned_type_node, unsigned_type_node, + NULL_TREE); tree unsigned_ftype_void = build_function_type (unsigned_type_node, void_list_node); tree di_ftype_void @@ -13700,6 +13728,26 @@ ix86_init_mmx_sse_builtins () def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); + + /* Prescott New Instructions. */ + def_builtin (MASK_PNI, "__builtin_ia32_monitor", + void_ftype_pcvoid_unsigned_unsigned, + IX86_BUILTIN_MONITOR); + def_builtin (MASK_PNI, "__builtin_ia32_mwait", + void_ftype_unsigned_unsigned, + IX86_BUILTIN_MWAIT); + def_builtin (MASK_PNI, "__builtin_ia32_movshdup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSHDUP); + def_builtin (MASK_PNI, "__builtin_ia32_movsldup", + v4sf_ftype_v4sf, + IX86_BUILTIN_MOVSLDUP); + def_builtin (MASK_PNI, "__builtin_ia32_lddqu", + v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + def_builtin (MASK_PNI, "__builtin_ia32_loadddup", + v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP); + def_builtin (MASK_PNI, "__builtin_ia32_movddup", + v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP); } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -14509,6 +14557,41 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) case IX86_BUILTIN_STORED: return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist); + case IX86_BUILTIN_MONITOR: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + if (!REG_P (op2)) + op2 = copy_to_mode_reg (SImode, op2); + emit_insn (gen_monitor (op0, op1, op2)); + return 0; + + case IX86_BUILTIN_MWAIT: + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + if (!REG_P (op0)) + op0 = copy_to_mode_reg (SImode, op0); + if (!REG_P (op1)) + op1 = copy_to_mode_reg (SImode, op1); + emit_insn (gen_mwait (op0, op1)); + return 0; + + case IX86_BUILTIN_LOADDDUP: + return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1); + + case IX86_BUILTIN_LDDQU: + return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target, + 1); + default: break; } diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 25b9acfe07b..5f390f0e155 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -121,12 +121,13 @@ extern int target_flags; #define MASK_MMX 0x00002000 /* Support MMX regs/builtins */ #define MASK_SSE 0x00004000 /* Support SSE regs/builtins */ #define MASK_SSE2 0x00008000 /* Support SSE2 regs/builtins */ -#define MASK_3DNOW 0x00010000 /* Support 3Dnow builtins */ -#define MASK_3DNOW_A 0x00020000 /* Support Athlon 3Dnow builtins */ -#define MASK_128BIT_LONG_DOUBLE 0x00040000 /* long double size is 128bit */ -#define MASK_64BIT 0x00080000 /* Produce 64bit code */ -#define MASK_MS_BITFIELD_LAYOUT 0x00100000 /* Use native (MS) bitfield layout */ -#define MASK_TLS_DIRECT_SEG_REFS 0x00200000 /* Avoid adding %gs:0 */ +#define MASK_PNI 0x00010000 /* Support PNI regs/builtins */ +#define MASK_3DNOW 0x00020000 /* Support 3Dnow builtins */ +#define MASK_3DNOW_A 0x00040000 /* Support Athlon 3Dnow builtins */ +#define MASK_128BIT_LONG_DOUBLE 0x00080000 /* long double size is 128bit */ +#define MASK_64BIT 0x00100000 /* Produce 64bit code */ +#define MASK_MS_BITFIELD_LAYOUT 0x00200000 /* Use native (MS) bitfield layout */ +#define MASK_TLS_DIRECT_SEG_REFS 0x00400000 /* Avoid adding %gs:0 */ /* Unused: 0x03e0000 */ @@ -302,6 +303,7 @@ extern int x86_prefetch_sse; #define TARGET_SSE ((target_flags & MASK_SSE) != 0) #define TARGET_SSE2 ((target_flags & MASK_SSE2) != 0) +#define TARGET_PNI ((target_flags & MASK_PNI) != 0) #define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0) #define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \ && (ix86_fpmath & FPMATH_387)) @@ -393,10 +395,14 @@ extern int x86_prefetch_sse; N_("Support MMX and SSE built-in functions and code generation") }, \ { "no-sse", -MASK_SSE, \ N_("Do not support MMX and SSE built-in functions and code generation") },\ - { "sse2", (MASK_SSE2|MASK_SSE), \ + { "sse2", MASK_SSE2, \ N_("Support MMX, SSE and SSE2 built-in functions and code generation") }, \ { "no-sse2", -MASK_SSE2, \ N_("Do not support MMX, SSE and SSE2 built-in functions and code generation") }, \ + { "pni", MASK_PNI, \ + N_("Support MMX, SSE, SSE2 and PNI built-in functions and code generation") },\ + { "no-pni", -MASK_PNI, \ + N_("Do not support MMX, SSE, SSE2 and PNI built-in functions and code generation") },\ { "128bit-long-double", MASK_128BIT_LONG_DOUBLE, \ N_("sizeof(long double) is 16") }, \ { "96bit-long-double", -MASK_128BIT_LONG_DOUBLE, \ @@ -611,6 +617,8 @@ extern int x86_prefetch_sse; builtin_define ("__SSE__"); \ if (TARGET_SSE2) \ builtin_define ("__SSE2__"); \ + if (TARGET_PNI) \ + builtin_define ("__PNI__"); \ if (TARGET_SSE_MATH && TARGET_SSE) \ builtin_define ("__SSE_MATH__"); \ if (TARGET_SSE_MATH && TARGET_SSE2) \ @@ -2516,6 +2524,22 @@ enum ix86_builtins IX86_BUILTIN_MFENCE, IX86_BUILTIN_LFENCE, + /* Prescott New Instructions. */ + IX86_BUILTIN_ADDSUBPS, + IX86_BUILTIN_HADDPS, + IX86_BUILTIN_HSUBPS, + IX86_BUILTIN_MOVSHDUP, + IX86_BUILTIN_MOVSLDUP, + IX86_BUILTIN_ADDSUBPD, + IX86_BUILTIN_HADDPD, + IX86_BUILTIN_HSUBPD, + IX86_BUILTIN_LOADDDUP, + IX86_BUILTIN_MOVDDUP, + IX86_BUILTIN_LDDQU, + + IX86_BUILTIN_MONITOR, + IX86_BUILTIN_MWAIT, + IX86_BUILTIN_MAX }; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4218fb3a306..dc4ece8534a 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -109,6 +109,13 @@ (UNSPEC_MFENCE 59) (UNSPEC_LFENCE 60) (UNSPEC_PSADBW 61) + (UNSPEC_ADDSUB 71) + (UNSPEC_HADD 72) + (UNSPEC_HSUB 73) + (UNSPEC_MOVSHDUP 74) + (UNSPEC_MOVSLDUP 75) + (UNSPEC_LDQQU 76) + (UNSPEC_MOVDDUP 77) ; x87 Floating point (UNSPEC_FPATAN 65) @@ -130,6 +137,8 @@ (UNSPECV_FEMMS 46) (UNSPECV_CLFLUSH 57) (UNSPECV_ALIGN 68) + (UNSPECV_MONITOR 69) + (UNSPECV_MWAIT 70) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls @@ -23512,3 +23521,129 @@ "lfence" [(set_attr "type" "sse") (set_attr "memory" "unknown")]) + +;; PNI + +(define_insn "mwait" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c")] + UNSPECV_MWAIT)] + "TARGET_PNI" + "mwait\t%0, %1" + [(set_attr "length" "3")]) + +(define_insn "monitor" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") + (match_operand:SI 1 "register_operand" "c") + (match_operand:SI 2 "register_operand" "d")] + UNSPECV_MONITOR)] + "TARGET_PNI" + "monitor\t%0, %1, %2" + [(set_attr "length" "3")]) + +;; PNI arithmetic + +(define_insn "addsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_ADDSUB))] + "TARGET_PNI" + "addsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "addsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_ADDSUB))] + "TARGET_PNI" + "addsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "haddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_HADD))] + "TARGET_PNI" + "haddps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "haddv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_HADD))] + "TARGET_PNI" + "haddpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "hsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")] + UNSPEC_HSUB))] + "TARGET_PNI" + "hsubps\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V4SF")]) + +(define_insn "hsubv2df3" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") + (match_operand:V2DF 2 "nonimmediate_operand" "xm")] + UNSPEC_HSUB))] + "TARGET_PNI" + "hsubpd\t{%2, %0|%0, %2}" + [(set_attr "type" "sseadd") + (set_attr "mode" "V2DF")]) + +(define_insn "movshdup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))] + "TARGET_PNI" + "movshdup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "movsldup" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))] + "TARGET_PNI" + "movsldup\t{%1, %0|%0, %1}" + [(set_attr "type" "sse") + (set_attr "mode" "V4SF")]) + +(define_insn "lddqu" + [(set (match_operand:V16QI 0 "register_operand" "=x") + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] + UNSPEC_LDQQU))] + "TARGET_PNI" + "lddqu\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "TI")]) + +(define_insn "loadddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))] + "TARGET_PNI" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) + +(define_insn "movddup" + [(set (match_operand:V2DF 0 "register_operand" "=x") + (vec_duplicate:V2DF + (vec_select:DF (match_operand:V2DF 1 "register_operand" "x") + (parallel [(const_int 0)]))))] + "TARGET_PNI" + "movddup\t{%1, %0|%0, %1}" + [(set_attr "type" "ssecvt") + (set_attr "mode" "DF")]) diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h new file mode 100644 index 00000000000..5649c00d673 --- /dev/null +++ b/gcc/config/i386/pmmintrin.h @@ -0,0 +1,132 @@ +/* Copyright (C) 2003 Free Software Foundation, Inc. + + This file is part of GNU CC. + + GNU CC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU CC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU CC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 8.0. */ + +#ifndef _PMMINTRIN_H_INCLUDED +#define _PMMINTRIN_H_INCLUDED + +#ifdef __PNI__ +#include +#include + +/* Additional bits in the MXCSR. */ +#define _MM_DENORMALS_ZERO_MASK 0x0040 +#define _MM_DENORMALS_ZERO_ON 0x0040 +#define _MM_DENORMALS_ZERO_OFF 0x0000 + +#define _MM_SET_DENORMALS_ZERO_MODE(mode) \ + _mm_setcsr ((_mm_getcsr () & ~_MM_DENORMALS_ZERO_MASK) | (mode)) +#define _MM_GET_DENORMALS_ZERO_MODE() \ + (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) + +static __inline __m128 +_mm_addsub_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_addsubps ((__v4sf)__X, (__v4sf)__Y); +} + +static __inline __m128 +_mm_hadd_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_haddps ((__v4sf)__X, (__v4sf)__Y); +} + +static __inline __m128 +_mm_hsub_ps (__m128 __X, __m128 __Y) +{ + return (__m128) __builtin_ia32_hsubps ((__v4sf)__X, (__v4sf)__Y); +} + +static __inline __m128 +_mm_movehdup_ps (__m128 __X) +{ + return (__m128) __builtin_ia32_movshdup ((__v4sf)__X); +} + +static __inline __m128 +_mm_moveldup_ps (__m128 __X) +{ + return (__m128) __builtin_ia32_movsldup ((__v4sf)__X); +} + +static __inline __m128d +_mm_addsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_addsubpd ((__v2df)__X, (__v2df)__Y); +} + +static __inline __m128d +_mm_hadd_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_haddpd ((__v2df)__X, (__v2df)__Y); +} + +static __inline __m128d +_mm_hsub_pd (__m128d __X, __m128d __Y) +{ + return (__m128d) __builtin_ia32_hsubpd ((__v2df)__X, (__v2df)__Y); +} + +static __inline __m128d +_mm_loaddup_pd (double const *__P) +{ + return (__m128d) __builtin_ia32_loadddup (__P); +} + +static __inline __m128d +_mm_movedup_pd (__m128d __X) +{ + return (__m128d) __builtin_ia32_movddup ((__v2df)__X); +} + +static __inline __m128i +_mm_lddqu_si128 (__m128i const *__P) +{ + return (__m128i) __builtin_ia32_lddqu ((char const *)__P); +} + +#if 0 +static __inline void +_mm_monitor (void const * __P, unsigned int __E, unsigned int __H) +{ + __builtin_ia32_monitor (__P, __E, __H); +} + +static __inline void +_mm_mwait (unsigned int __E, unsigned int __H) +{ + __builtin_ia32_mwait (__E, __H); +} +#else +#define _mm_monitor(P, E, H) __builtin_ia32_monitor ((P), (E), (H)) +#define _mm_mwait(E, H) __builtin_ia32_mwait ((E), (H)) +#endif + +#endif /* __PNI__ */ + +#endif /* _PMMINTRIN_H_INCLUDED */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index bd5183a1e7b..e7e87126525 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -5592,6 +5592,31 @@ Generates the @code{movhps} machine instruction as a store to memory. Generates the @code{movlps} machine instruction as a store to memory. @end table +The following built-in functions are available when @option{-mpni} is used. +All of them generate the machine instruction that is part of the name. + +@example +v2df __builtin_ia32_addsubpd (v2df, v2df) +v2df __builtin_ia32_addsubps (v2df, v2df) +v2df __builtin_ia32_haddpd (v2df, v2df) +v2df __builtin_ia32_haddps (v2df, v2df) +v2df __builtin_ia32_hsubpd (v2df, v2df) +v2df __builtin_ia32_hsubps (v2df, v2df) +v16qi __builtin_ia32_lddqu (char const *) +void __builtin_ia32_monitor (void *, unsigned int, unsigned int) +v2df __builtin_ia32_movddup (v2df) +v4sf __builtin_ia32_movshdup (v4sf) +v4sf __builtin_ia32_movsldup (v4sf) +void __builtin_ia32_mwait (unsigned int, unsigned int) +@end example + +The following built-in functions are available when @option{-mpni} is used. + +@table @code +@item v2df __builtin_ia32_loadddup (double const *) +Generates the @code{movddup} machine instruction as a load from memory. +@end table + The following built-in functions are available when @option{-m3dnow} is used. All of them generate the machine instruction that is part of the name. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fb85612d180..d4ecfa2d898 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -487,7 +487,7 @@ in the following sections. -mno-fp-ret-in-387 -msoft-float -msvr3-shlib @gol -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} @gol --mmmx -msse -msse2 -m3dnow @gol +-mmmx -msse -msse2 -mpni -m3dnow @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -m96bit-long-double -mregparm=@var{num} -momit-leaf-frame-pointer @gol @@ -8217,6 +8217,13 @@ code that expects temporaries to be 80bit. This is the default choice for x86-64 compiler. +@item pni +Use all SSE extensions enabled by @option{-msse2} as well as the new +SSE extensions in Prescott New Intrunctions. @option{-mpni} also +enables 2 builtin functions, @code{__builtin_ia32_monitor} and +@code{__builtin_ia32_mwait}, for new intrunctions @code{monitor} and +@code{mwait}. + @item sse,387 Attempt to utilize both instruction sets at once. This effectively double the amount of available registers and on chips with separate execution units for @@ -8398,6 +8405,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mno-sse @item -msse2 @itemx -mno-sse2 +@item -mpni +@itemx -mno-pni @item -m3dnow @itemx -mno-3dnow @opindex mmmx -- 2.11.0