OSDN Git Service

Revert my previous commit.
[pf3gnuchains/gcc-fork.git] / gcc / config / i386 / i386.h
index e319b77..98cb72a 100644 (file)
@@ -1,13 +1,13 @@
 /* Definitions of target machine for GCC for IA-32.
    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-   2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation,
-   Inc.
+   2001, 2002, 2003, 2004, 2005, 2006, 2007
+   Free Software Foundation, Inc.
 
 This file is part of GCC.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
 any later version.
 
 GCC is distributed in the hope that it will be useful,
@@ -16,9 +16,45 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* The purpose of this file is to define the characteristics of the i386,
+   independent of assembler syntax or operating system.
+
+   Three other files build on this one to describe a specific assembler syntax:
+   bsd386.h, att386.h, and sun386.h.
+
+   The actual tm.h file for a particular system should include
+   this file, and then the file for the appropriate assembler syntax.
+
+   Many macros that specify assembler syntax are omitted entirely from
+   this file because they really belong in the files for particular
+   assemblers.  These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR,
+   ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many
+   that start with ASM_ or end in ASM_OP.  */
+
+/* Redefines for option macros.  */
+
+#define TARGET_64BIT   OPTION_ISA_64BIT
+#define TARGET_MMX     OPTION_ISA_MMX
+#define TARGET_3DNOW   OPTION_ISA_3DNOW
+#define TARGET_3DNOW_A OPTION_ISA_3DNOW_A
+#define TARGET_SSE     OPTION_ISA_SSE
+#define TARGET_SSE2    OPTION_ISA_SSE2
+#define TARGET_SSE3    OPTION_ISA_SSE3
+#define TARGET_SSSE3   OPTION_ISA_SSSE3
+#define TARGET_SSE4_1  OPTION_ISA_SSE4_1
+#define TARGET_SSE4_2  OPTION_ISA_SSE4_2
+#define TARGET_SSE4A   OPTION_ISA_SSE4A
+#define TARGET_SSE5    OPTION_ISA_SSE5
+#define TARGET_ROUND   OPTION_ISA_ROUND
+
+/* SSE5 and SSE4.1 define the same round instructions */
+#define        OPTION_MASK_ISA_ROUND   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE5)
+#define        OPTION_ISA_ROUND        ((ix86_isa_flags & OPTION_MASK_ISA_ROUND) != 0)
+
+#include "config/vxworks-dummy.h"
 
 /* Algorithm to expand string function with.  */
 enum stringop_alg
@@ -32,17 +68,18 @@ enum stringop_alg
    loop,
    unrolled_loop
 };
+
 #define NAX_STRINGOP_ALGS 4
+
 /* Specify what algorithm to use for stringops on known size.
    When size is unknown, the UNKNOWN_SIZE alg is used.  When size is
    known at compile time or estimated via feedback, the SIZE array
    is walked in order until MAX is greater then the estimate (or -1
-   means infinity).  Corresponding ALG is used then.  
+   means infinity).  Corresponding ALG is used then.
    For example initializer:
-    {{256, loop}, {-1, rep_prefix_4_byte}}             
+    {{256, loop}, {-1, rep_prefix_4_byte}}
    will use loop for blocks smaller or equal to 256 bytes, rep prefix will
-   be used otherwise.
-*/
+   be used otherwise.  */
 struct stringop_algs
 {
   const enum stringop_alg unknown_size;
@@ -52,21 +89,6 @@ struct stringop_algs
   } size [NAX_STRINGOP_ALGS];
 };
 
-/* The purpose of this file is to define the characteristics of the i386,
-   independent of assembler syntax or operating system.
-
-   Three other files build on this one to describe a specific assembler syntax:
-   bsd386.h, att386.h, and sun386.h.
-
-   The actual tm.h file for a particular system should include
-   this file, and then the file for the appropriate assembler syntax.
-
-   Many macros that specify assembler syntax are omitted entirely from
-   this file because they really belong in the files for particular
-   assemblers.  These include RP, IP, LPREFIX, PUT_OP_SIZE, USE_STAR,
-   ADDR_BEG, ADDR_END, PRINT_IREG, PRINT_SCALE, PRINT_B_I_S, and many
-   that start with ASM_ or end in ASM_OP.  */
-
 /* Define the specific costs for a given cpu */
 
 struct processor_costs {
@@ -107,6 +129,8 @@ struct processor_costs {
                                   in SImode, DImode and TImode*/
   const int mmxsse_to_integer; /* cost of moving mmxsse register to
                                   integer and vice versa.  */
+  const int l1_cache_size;     /* size of l1 cache, in kilobytes.  */
+  const int l2_cache_size;     /* size of l2 cache, in kilobytes.  */
   const int prefetch_block;    /* bytes moved to cache for prefetch.  */
   const int simultaneous_prefetches; /* number of parallel prefetch
                                   operations.  */
@@ -120,6 +144,22 @@ struct processor_costs {
                                /* Specify what algorithm
                                   to use for stringops on unknown size.  */
   struct stringop_algs memcpy[2], memset[2];
+  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
+                                  load and store.  */
+  const int scalar_load_cost;   /* Cost of scalar load.  */
+  const int scalar_store_cost;  /* Cost of scalar store.  */
+  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
+                                   load, store, vector-to-scalar and
+                                   scalar-to-vector operation.  */
+  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
+  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
+  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
+  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
+  const int vec_store_cost;        /* Cost of vector store.  */
+  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
+                                         cost model.  */
+  const int cond_not_taken_branch_cost;/* Cost of not taken branch for
+                                         vectorizer cost model.  */
 };
 
 extern const struct processor_costs *ix86_cost;
@@ -179,110 +219,195 @@ extern const struct processor_costs *ix86_cost;
 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
 #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
 
-extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
-extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
-extern const int x86_branch_hints, x86_unroll_strlen;
-extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
-extern const int x86_use_himode_fiop, x86_use_simode_fiop;
-extern const int x86_use_mov0, x86_use_cltd, x86_use_xchgb;
-extern const int x86_read_modify_write, x86_read_modify, x86_split_long_moves;
-extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix;
-extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
-extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
-extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
-extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
-extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
-extern const int x86_epilogue_using_move, x86_decompose_lea;
-extern const int x86_arch_always_fancy_math_387, x86_shift1;
-extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
-extern const int x86_sse_unaligned_move_optimal;
-extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
-extern const int x86_use_ffreep;
-extern const int x86_inter_unit_moves, x86_schedule;
-extern const int x86_use_bt;
-extern const int x86_cmpxchg, x86_cmpxchg8b, x86_xadd;
-extern const int x86_use_incdec;
-extern const int x86_pad_returns;
-extern const int x86_bswap;
-extern const int x86_partial_flag_reg_stall;
-extern int x86_prefetch_sse, x86_cmpxchg16b;
-
-#define TUNEMASK ix86_tune_mask
-#define ARCHMASK ix86_arch_mask
-
-#define TARGET_USE_LEAVE (x86_use_leave & TUNEMASK)
-#define TARGET_PUSH_MEMORY (x86_push_memory & TUNEMASK)
-#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & TUNEMASK)
-#define TARGET_USE_BIT_TEST (x86_use_bit_test & TUNEMASK)
-#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & TUNEMASK)
-/* For sane SSE instruction set generation we need fcomi instruction.  It is
-   safe to enable all CMOVE instructions.  */
-#define TARGET_CMOVE ((x86_cmove & ARCHMASK) || TARGET_SSE)
-#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
-#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & TUNEMASK)
-#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & TUNEMASK)
-#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & TUNEMASK)
-#define TARGET_USE_SAHF ((x86_use_sahf & TUNEMASK) && !TARGET_64BIT)
-#define TARGET_MOVX (x86_movx & TUNEMASK)
-#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & TUNEMASK)
-#define TARGET_PARTIAL_FLAG_REG_STALL (x86_partial_flag_reg_stall & TUNEMASK)
-#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & TUNEMASK)
-#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & TUNEMASK)
-#define TARGET_USE_MOV0 (x86_use_mov0 & TUNEMASK)
-#define TARGET_USE_CLTD (x86_use_cltd & TUNEMASK)
-#define TARGET_USE_XCHGB (x86_use_xchgb & TUNEMASK)
-#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & TUNEMASK)
-#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & TUNEMASK)
-#define TARGET_READ_MODIFY (x86_read_modify & TUNEMASK)
-#define TARGET_PROMOTE_QImode (x86_promote_QImode & TUNEMASK)
-#define TARGET_FAST_PREFIX (x86_fast_prefix & TUNEMASK)
-#define TARGET_SINGLE_STRINGOP (x86_single_stringop & TUNEMASK)
-#define TARGET_QIMODE_MATH (x86_qimode_math & TUNEMASK)
-#define TARGET_HIMODE_MATH (x86_himode_math & TUNEMASK)
-#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & TUNEMASK)
-#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & TUNEMASK)
-#define TARGET_ADD_ESP_4 (x86_add_esp_4 & TUNEMASK)
-#define TARGET_ADD_ESP_8 (x86_add_esp_8 & TUNEMASK)
-#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & TUNEMASK)
-#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & TUNEMASK)
-#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & TUNEMASK)
-#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
+/* Feature tests against the various tunings.  */
+enum ix86_tune_indices {
+  X86_TUNE_USE_LEAVE,
+  X86_TUNE_PUSH_MEMORY,
+  X86_TUNE_ZERO_EXTEND_WITH_AND,
+  X86_TUNE_USE_BIT_TEST,
+  X86_TUNE_UNROLL_STRLEN,
+  X86_TUNE_DEEP_BRANCH_PREDICTION,
+  X86_TUNE_BRANCH_PREDICTION_HINTS,
+  X86_TUNE_DOUBLE_WITH_ADD,
+  X86_TUNE_USE_SAHF,
+  X86_TUNE_MOVX,
+  X86_TUNE_PARTIAL_REG_STALL,
+  X86_TUNE_PARTIAL_FLAG_REG_STALL,
+  X86_TUNE_USE_HIMODE_FIOP,
+  X86_TUNE_USE_SIMODE_FIOP,
+  X86_TUNE_USE_MOV0,
+  X86_TUNE_USE_CLTD,
+  X86_TUNE_USE_XCHGB,
+  X86_TUNE_SPLIT_LONG_MOVES,
+  X86_TUNE_READ_MODIFY_WRITE,
+  X86_TUNE_READ_MODIFY,
+  X86_TUNE_PROMOTE_QIMODE,
+  X86_TUNE_FAST_PREFIX,
+  X86_TUNE_SINGLE_STRINGOP,
+  X86_TUNE_QIMODE_MATH,
+  X86_TUNE_HIMODE_MATH,
+  X86_TUNE_PROMOTE_QI_REGS,
+  X86_TUNE_PROMOTE_HI_REGS,
+  X86_TUNE_ADD_ESP_4,
+  X86_TUNE_ADD_ESP_8,
+  X86_TUNE_SUB_ESP_4,
+  X86_TUNE_SUB_ESP_8,
+  X86_TUNE_INTEGER_DFMODE_MOVES,
+  X86_TUNE_PARTIAL_REG_DEPENDENCY,
+  X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
+  X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL,
+  X86_TUNE_SSE_SPLIT_REGS,
+  X86_TUNE_SSE_TYPELESS_STORES,
+  X86_TUNE_SSE_LOAD0_BY_PXOR,
+  X86_TUNE_MEMORY_MISMATCH_STALL,
+  X86_TUNE_PROLOGUE_USING_MOVE,
+  X86_TUNE_EPILOGUE_USING_MOVE,
+  X86_TUNE_SHIFT1,
+  X86_TUNE_USE_FFREEP,
+  X86_TUNE_INTER_UNIT_MOVES,
+  X86_TUNE_INTER_UNIT_CONVERSIONS,
+  X86_TUNE_FOUR_JUMP_LIMIT,
+  X86_TUNE_SCHEDULE,
+  X86_TUNE_USE_BT,
+  X86_TUNE_USE_INCDEC,
+  X86_TUNE_PAD_RETURNS,
+  X86_TUNE_EXT_80387_CONSTANTS,
+  X86_TUNE_SHORTEN_X87_SSE,
+  X86_TUNE_AVOID_VECTOR_DECODE,
+  X86_TUNE_PROMOTE_HIMODE_IMUL,
+  X86_TUNE_SLOW_IMUL_IMM32_MEM,
+  X86_TUNE_SLOW_IMUL_IMM8,
+  X86_TUNE_MOVE_M1_VIA_OR,
+  X86_TUNE_NOT_UNPAIRABLE,
+  X86_TUNE_NOT_VECTORMODE,
+  X86_TUNE_USE_VECTOR_CONVERTS,
+
+  X86_TUNE_LAST
+};
+
+extern unsigned int ix86_tune_features[X86_TUNE_LAST];
+
+#define TARGET_USE_LEAVE       ix86_tune_features[X86_TUNE_USE_LEAVE]
+#define TARGET_PUSH_MEMORY     ix86_tune_features[X86_TUNE_PUSH_MEMORY]
+#define TARGET_ZERO_EXTEND_WITH_AND \
+       ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
+#define TARGET_USE_BIT_TEST    ix86_tune_features[X86_TUNE_USE_BIT_TEST]
+#define TARGET_UNROLL_STRLEN   ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
+#define TARGET_DEEP_BRANCH_PREDICTION \
+       ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION]
+#define TARGET_BRANCH_PREDICTION_HINTS \
+       ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
+#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
+#define TARGET_USE_SAHF                ix86_tune_features[X86_TUNE_USE_SAHF]
+#define TARGET_MOVX            ix86_tune_features[X86_TUNE_MOVX]
+#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
+#define TARGET_PARTIAL_FLAG_REG_STALL \
+       ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
+#define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
+#define TARGET_USE_MOV0                ix86_tune_features[X86_TUNE_USE_MOV0]
+#define TARGET_USE_CLTD                ix86_tune_features[X86_TUNE_USE_CLTD]
+#define TARGET_USE_XCHGB       ix86_tune_features[X86_TUNE_USE_XCHGB]
+#define TARGET_SPLIT_LONG_MOVES        ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
+#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
+#define TARGET_READ_MODIFY     ix86_tune_features[X86_TUNE_READ_MODIFY]
+#define TARGET_PROMOTE_QImode  ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
+#define TARGET_FAST_PREFIX     ix86_tune_features[X86_TUNE_FAST_PREFIX]
+#define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
+#define TARGET_QIMODE_MATH     ix86_tune_features[X86_TUNE_QIMODE_MATH]
+#define TARGET_HIMODE_MATH     ix86_tune_features[X86_TUNE_HIMODE_MATH]
+#define TARGET_PROMOTE_QI_REGS ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
+#define TARGET_PROMOTE_HI_REGS ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
+#define TARGET_ADD_ESP_4       ix86_tune_features[X86_TUNE_ADD_ESP_4]
+#define TARGET_ADD_ESP_8       ix86_tune_features[X86_TUNE_ADD_ESP_8]
+#define TARGET_SUB_ESP_4       ix86_tune_features[X86_TUNE_SUB_ESP_4]
+#define TARGET_SUB_ESP_8       ix86_tune_features[X86_TUNE_SUB_ESP_8]
+#define TARGET_INTEGER_DFMODE_MOVES \
+       ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
+#define TARGET_PARTIAL_REG_DEPENDENCY \
+       ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
 #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
-                                     (x86_sse_partial_reg_dependency & TUNEMASK)
+       ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
 #define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
-                                     (x86_sse_unaligned_move_optimal & TUNEMASK)
-#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
-#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
-#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
-#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & TUNEMASK)
-#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & TUNEMASK)
-#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & TUNEMASK)
-#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
-#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
-#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
-#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
-#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
-#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
-#define TARGET_USE_BT (x86_use_bt & TUNEMASK)
-#define TARGET_USE_INCDEC (x86_use_incdec & TUNEMASK)
-#define TARGET_PAD_RETURNS (x86_pad_returns & TUNEMASK)
-
-#define ASSEMBLER_DIALECT (ix86_asm_dialect)
-
-#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
-#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
-                            && (ix86_fpmath & FPMATH_387))
-
-#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
-#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
-#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
-#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
-
-#define TARGET_CMPXCHG (x86_cmpxchg & ARCHMASK)
-#define TARGET_CMPXCHG8B (x86_cmpxchg8b & ARCHMASK)
-#define TARGET_CMPXCHG16B (x86_cmpxchg16b)
-#define TARGET_XADD (x86_xadd & ARCHMASK)
-#define TARGET_BSWAP (x86_bswap & ARCHMASK)
+       ix86_tune_features[X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL]
+#define TARGET_SSE_SPLIT_REGS  ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
+#define TARGET_SSE_TYPELESS_STORES \
+       ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
+#define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
+#define TARGET_MEMORY_MISMATCH_STALL \
+       ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
+#define TARGET_PROLOGUE_USING_MOVE \
+       ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
+#define TARGET_EPILOGUE_USING_MOVE \
+       ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
+#define TARGET_SHIFT1          ix86_tune_features[X86_TUNE_SHIFT1]
+#define TARGET_USE_FFREEP      ix86_tune_features[X86_TUNE_USE_FFREEP]
+#define TARGET_INTER_UNIT_MOVES        ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
+#define TARGET_INTER_UNIT_CONVERSIONS\
+       ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
+#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
+#define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
+#define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
+#define TARGET_USE_INCDEC      ix86_tune_features[X86_TUNE_USE_INCDEC]
+#define TARGET_PAD_RETURNS     ix86_tune_features[X86_TUNE_PAD_RETURNS]
+#define TARGET_EXT_80387_CONSTANTS \
+       ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
+#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
+#define TARGET_AVOID_VECTOR_DECODE \
+       ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
+#define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
+       ix86_tune_features[X86_TUNE_PROMOTE_HIMODE_IMUL]
+#define TARGET_SLOW_IMUL_IMM32_MEM \
+       ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM32_MEM]
+#define TARGET_SLOW_IMUL_IMM8  ix86_tune_features[X86_TUNE_SLOW_IMUL_IMM8]
+#define        TARGET_MOVE_M1_VIA_OR   ix86_tune_features[X86_TUNE_MOVE_M1_VIA_OR]
+#define TARGET_NOT_UNPAIRABLE  ix86_tune_features[X86_TUNE_NOT_UNPAIRABLE]
+#define TARGET_NOT_VECTORMODE  ix86_tune_features[X86_TUNE_NOT_VECTORMODE]
+#define TARGET_USE_VECTOR_CONVERTS ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
+
+/* Feature tests against the various architecture variations.  */
+enum ix86_arch_indices {
+  X86_ARCH_CMOVE,              /* || TARGET_SSE */
+  X86_ARCH_CMPXCHG,
+  X86_ARCH_CMPXCHG8B,
+  X86_ARCH_XADD,
+  X86_ARCH_BSWAP,
+
+  X86_ARCH_LAST
+};
+
+extern unsigned int ix86_arch_features[X86_ARCH_LAST];
+
+#define TARGET_CMOVE           ix86_arch_features[X86_ARCH_CMOVE]
+#define TARGET_CMPXCHG         ix86_arch_features[X86_ARCH_CMPXCHG]
+#define TARGET_CMPXCHG8B       ix86_arch_features[X86_ARCH_CMPXCHG8B]
+#define TARGET_XADD            ix86_arch_features[X86_ARCH_XADD]
+#define TARGET_BSWAP           ix86_arch_features[X86_ARCH_BSWAP]
+
+#define TARGET_FISTTP          (TARGET_SSE3 && TARGET_80387)
+
+extern int x86_prefetch_sse;
+
+#define TARGET_ABM             x86_abm
+#define TARGET_CMPXCHG16B      x86_cmpxchg16b
+#define TARGET_POPCNT          x86_popcnt
+#define TARGET_PREFETCH_SSE    x86_prefetch_sse
+#define TARGET_SAHF            x86_sahf
+#define TARGET_RECIP           x86_recip
+#define TARGET_FUSED_MADD      x86_fused_muladd
+
+#define ASSEMBLER_DIALECT      (ix86_asm_dialect)
+
+#define TARGET_SSE_MATH                ((ix86_fpmath & FPMATH_SSE) != 0)
+#define TARGET_MIX_SSE_I387 \
+ ((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
+
+#define TARGET_GNU_TLS         (ix86_tls_dialect == TLS_DIALECT_GNU)
+#define TARGET_GNU2_TLS                (ix86_tls_dialect == TLS_DIALECT_GNU2)
+#define TARGET_ANY_GNU_TLS     (TARGET_GNU_TLS || TARGET_GNU2_TLS)
+#define TARGET_SUN_TLS         (ix86_tls_dialect == TLS_DIALECT_SUN)
+
+extern int ix86_isa_flags;
 
 #ifndef TARGET_64BIT_DEFAULT
 #define TARGET_64BIT_DEFAULT 0
@@ -291,16 +416,36 @@ extern int x86_prefetch_sse, x86_cmpxchg16b;
 #define TARGET_TLS_DIRECT_SEG_REFS_DEFAULT 0
 #endif
 
+/* Fence to use after loop using storent.  */
+
+extern tree x86_mfence;
+#define FENCE_FOLLOWING_MOVNT x86_mfence
+
 /* Once GDB has been enhanced to deal with functions without frame
    pointers, we can change this to allow for elimination of
    the frame pointer in leaf functions.  */
 #define TARGET_DEFAULT 0
 
+/* Extra bits to force.  */
+#define TARGET_SUBTARGET_DEFAULT 0
+#define TARGET_SUBTARGET_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 32-bit mode.  */
+#define TARGET_SUBTARGET32_DEFAULT 0
+#define TARGET_SUBTARGET32_ISA_DEFAULT 0
+
+/* Extra bits to force on w/ 64-bit mode.  */
+#define TARGET_SUBTARGET64_DEFAULT 0
+#define TARGET_SUBTARGET64_ISA_DEFAULT 0
+
 /* This is not really a target flag, but is done this way so that
    it's analogous to similar code for Mach-O on PowerPC.  darwin.h
    redefines this to 1.  */
 #define TARGET_MACHO 0
 
+/* Likewise, for the Windows 64-bit ABI.  */
+#define TARGET_64BIT_MS_ABI 0
+
 /* Subtargets may reset this to 1 in order to enable 96-bit long double
    with the rounding mode forced to 53 bits.  */
 #define TARGET_96_ROUND_53_LONG_DOUBLE 0
@@ -334,26 +479,17 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 /* Support for configure-time defaults of some command line options.
    The order here is important so that -march doesn't squash the
    tune or cpu values.  */
-#define OPTION_DEFAULT_SPECS \
+#define OPTION_DEFAULT_SPECS                                      \
   {"tune", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
-  {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" }, \
+  {"cpu", "%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" },  \
   {"arch", "%{!march=*:-march=%(VALUE)}"}
 
 /* Specs for the compiler proper */
 
 #ifndef CC1_CPU_SPEC
 #define CC1_CPU_SPEC_1 "\
-%{!mtune*: \
-%{m386:mtune=i386 \
-%n`-m386' is deprecated. Use `-march=i386' or `-mtune=i386' instead.\n} \
-%{m486:-mtune=i486 \
-%n`-m486' is deprecated. Use `-march=i486' or `-mtune=i486' instead.\n} \
-%{mpentium:-mtune=pentium \
-%n`-mpentium' is deprecated. Use `-march=pentium' or `-mtune=pentium' instead.\n} \
-%{mpentiumpro:-mtune=pentiumpro \
-%n`-mpentiumpro' is deprecated. Use `-march=pentiumpro' or `-mtune=pentiumpro' instead.\n} \
 %{mcpu=*:-mtune=%* \
-%n`-mcpu=' is deprecated. Use `-mtune=' or '-march=' instead.\n}} \
+%n`-mcpu=' is deprecated. Use `-mtune=' or '-march=' instead.\n} \
 %<mcpu=* \
 %{mintel-syntax:-masm=intel \
 %n`-mintel-syntax' is deprecated. Use `-masm=intel' instead.\n} \
@@ -377,7 +513,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
       size_t arch_len = strlen (ix86_arch_string);             \
       size_t tune_len = strlen (ix86_tune_string);             \
       int last_arch_char = ix86_arch_string[arch_len - 1];     \
-      int last_tune_char = ix86_tune_string[tune_len - 1];             \
+      int last_tune_char = ix86_tune_string[tune_len - 1];     \
                                                                \
       if (TARGET_64BIT)                                                \
        {                                                       \
@@ -395,21 +531,90 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
          builtin_define_std ("i386");                          \
        }                                                       \
                                                                \
-      /* Built-ins based on -mtune= (or -march= if no          \
-        -mtune= given).  */                                    \
-      if (TARGET_386)                                          \
-       builtin_define ("__tune_i386__");                       \
-      else if (TARGET_486)                                     \
-       builtin_define ("__tune_i486__");                       \
-      else if (TARGET_PENTIUM)                                 \
+      /* Built-ins based on -march=.  */                       \
+      switch (ix86_arch)                                       \
        {                                                       \
+       case PROCESSOR_I386:                                    \
+         break;                                                \
+       case PROCESSOR_I486:                                    \
+         builtin_define ("__i486");                            \
+         builtin_define ("__i486__");                          \
+         break;                                                \
+       case PROCESSOR_PENTIUM:                                 \
+         builtin_define ("__i586");                            \
+         builtin_define ("__i586__");                          \
+         builtin_define ("__pentium");                         \
+         builtin_define ("__pentium__");                       \
+         if (last_arch_char == 'x')                            \
+           builtin_define ("__pentium_mmx__");                 \
+         break;                                                \
+       case PROCESSOR_PENTIUMPRO:                              \
+         builtin_define ("__i686");                            \
+         builtin_define ("__i686__");                          \
+         builtin_define ("__pentiumpro");                      \
+         builtin_define ("__pentiumpro__");                    \
+         break;                                                \
+       case PROCESSOR_GEODE:                                   \
+         builtin_define ("__geode");                           \
+         builtin_define ("__geode__");                         \
+         break;                                                \
+       case PROCESSOR_K6:                                      \
+         builtin_define ("__k6");                              \
+         builtin_define ("__k6__");                            \
+         if (last_arch_char == '2')                            \
+           builtin_define ("__k6_2__");                        \
+         else if (last_arch_char == '3')                       \
+           builtin_define ("__k6_3__");                        \
+         break;                                                \
+       case PROCESSOR_ATHLON:                                  \
+         builtin_define ("__athlon");                          \
+         builtin_define ("__athlon__");                        \
+         /* Only plain "athlon" lacks SSE.  */                 \
+         if (last_arch_char != 'n')                            \
+           builtin_define ("__athlon_sse__");                  \
+         break;                                                \
+       case PROCESSOR_K8:                                      \
+         builtin_define ("__k8");                              \
+         builtin_define ("__k8__");                            \
+         break;                                                \
+       case PROCESSOR_AMDFAM10:                                \
+         builtin_define ("__amdfam10");                        \
+         builtin_define ("__amdfam10__");                      \
+         break;                                                \
+       case PROCESSOR_PENTIUM4:                                \
+         builtin_define ("__pentium4");                        \
+         builtin_define ("__pentium4__");                      \
+         break;                                                \
+       case PROCESSOR_NOCONA:                                  \
+         builtin_define ("__nocona");                          \
+         builtin_define ("__nocona__");                        \
+         break;                                                \
+       case PROCESSOR_CORE2:                                   \
+         builtin_define ("__core2");                           \
+         builtin_define ("__core2__");                         \
+         break;                                                \
+       case PROCESSOR_GENERIC32:                               \
+       case PROCESSOR_GENERIC64:                               \
+       case PROCESSOR_max:                                     \
+         gcc_unreachable ();                                   \
+       }                                                       \
+                                                               \
+      /* Built-ins based on -mtune=.  */                       \
+      switch (ix86_tune)                                       \
+       {                                                       \
+       case PROCESSOR_I386:                                    \
+         builtin_define ("__tune_i386__");                     \
+         break;                                                \
+       case PROCESSOR_I486:                                    \
+         builtin_define ("__tune_i486__");                     \
+         break;                                                \
+       case PROCESSOR_PENTIUM:                                 \
          builtin_define ("__tune_i586__");                     \
          builtin_define ("__tune_pentium__");                  \
          if (last_tune_char == 'x')                            \
            builtin_define ("__tune_pentium_mmx__");            \
-       }                                                       \
-      else if (TARGET_PENTIUMPRO)                              \
-       {                                                       \
+         break;                                                \
+       case PROCESSOR_PENTIUMPRO:                              \
          builtin_define ("__tune_i686__");                     \
          builtin_define ("__tune_pentiumpro__");               \
          switch (last_tune_char)                               \
@@ -421,36 +626,44 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
              builtin_define ("__tune_pentium2__");             \
              break;                                            \
            }                                                   \
-       }                                                       \
-      else if (TARGET_GEODE)                                   \
-       {                                                       \
+         break;                                                \
+       case PROCESSOR_GEODE:                                   \
          builtin_define ("__tune_geode__");                    \
-       }                                                       \
-      else if (TARGET_K6)                                      \
-       {                                                       \
+         break;                                                \
+       case PROCESSOR_K6:                                      \
          builtin_define ("__tune_k6__");                       \
          if (last_tune_char == '2')                            \
            builtin_define ("__tune_k6_2__");                   \
          else if (last_tune_char == '3')                       \
            builtin_define ("__tune_k6_3__");                   \
-       }                                                       \
-      else if (TARGET_ATHLON)                                  \
-       {                                                       \
+         break;                                                \
+       case PROCESSOR_ATHLON:                                  \
          builtin_define ("__tune_athlon__");                   \
          /* Only plain "athlon" lacks SSE.  */                 \
          if (last_tune_char != 'n')                            \
            builtin_define ("__tune_athlon_sse__");             \
+         break;                                                \
+       case PROCESSOR_K8:                                      \
+         builtin_define ("__tune_k8__");                       \
+         break;                                                \
+       case PROCESSOR_AMDFAM10:                                \
+         builtin_define ("__tune_amdfam10__");                 \
+         break;                                                \
+       case PROCESSOR_PENTIUM4:                                \
+         builtin_define ("__tune_pentium4__");                 \
+         break;                                                \
+        case PROCESSOR_NOCONA:                                 \
+         builtin_define ("__tune_nocona__");                   \
+         break;                                                \
+       case PROCESSOR_CORE2:                                   \
+         builtin_define ("__tune_core2__");                    \
+         break;                                                \
+       case PROCESSOR_GENERIC32:                               \
+       case PROCESSOR_GENERIC64:                               \
+         break;                                                \
+       case PROCESSOR_max:                                     \
+         gcc_unreachable ();                                   \
        }                                                       \
-      else if (TARGET_K8)                                      \
-       builtin_define ("__tune_k8__");                         \
-      else if (TARGET_AMDFAM10)                                        \
-       builtin_define ("__tune_amdfam10__");                   \
-      else if (TARGET_PENTIUM4)                                        \
-       builtin_define ("__tune_pentium4__");                   \
-      else if (TARGET_NOCONA)                                  \
-       builtin_define ("__tune_nocona__");                     \
-      else if (TARGET_CORE2)                                   \
-       builtin_define ("__tune_core2__");                      \
                                                                \
       if (TARGET_MMX)                                          \
        builtin_define ("__MMX__");                             \
@@ -466,114 +679,49 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
        builtin_define ("__SSE3__");                            \
       if (TARGET_SSSE3)                                                \
        builtin_define ("__SSSE3__");                           \
-      if (TARGET_SSE4A)                                        \
+      if (TARGET_SSE4_1)                                       \
+       builtin_define ("__SSE4_1__");                          \
+      if (TARGET_SSE4_2)                                       \
+       builtin_define ("__SSE4_2__");                          \
+      if (TARGET_SSE4A)                                                \
        builtin_define ("__SSE4A__");                           \
+      if (TARGET_SSE5)                                         \
+       builtin_define ("__SSE5__");                            \
       if (TARGET_SSE_MATH && TARGET_SSE)                       \
        builtin_define ("__SSE_MATH__");                        \
       if (TARGET_SSE_MATH && TARGET_SSE2)                      \
        builtin_define ("__SSE2_MATH__");                       \
-                                                               \
-      /* Built-ins based on -march=.  */                       \
-      if (ix86_arch == PROCESSOR_I486)                         \
-       {                                                       \
-         builtin_define ("__i486");                            \
-         builtin_define ("__i486__");                          \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_PENTIUM)                 \
-       {                                                       \
-         builtin_define ("__i586");                            \
-         builtin_define ("__i586__");                          \
-         builtin_define ("__pentium");                         \
-         builtin_define ("__pentium__");                       \
-         if (last_arch_char == 'x')                            \
-           builtin_define ("__pentium_mmx__");                 \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_PENTIUMPRO)              \
-       {                                                       \
-         builtin_define ("__i686");                            \
-         builtin_define ("__i686__");                          \
-         builtin_define ("__pentiumpro");                      \
-         builtin_define ("__pentiumpro__");                    \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_GEODE)                   \
-       {                                                       \
-         builtin_define ("__geode");                           \
-         builtin_define ("__geode__");                         \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_K6)                      \
-       {                                                       \
-                                                               \
-         builtin_define ("__k6");                              \
-         builtin_define ("__k6__");                            \
-         if (last_arch_char == '2')                            \
-           builtin_define ("__k6_2__");                        \
-         else if (last_arch_char == '3')                       \
-           builtin_define ("__k6_3__");                        \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_ATHLON)                  \
-       {                                                       \
-         builtin_define ("__athlon");                          \
-         builtin_define ("__athlon__");                        \
-         /* Only plain "athlon" lacks SSE.  */                 \
-         if (last_arch_char != 'n')                            \
-           builtin_define ("__athlon_sse__");                  \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_K8)                      \
-       {                                                       \
-         builtin_define ("__k8");                              \
-         builtin_define ("__k8__");                            \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_AMDFAM10)                        \
-       {                                                       \
-         builtin_define ("__amdfam10");                        \
-         builtin_define ("__amdfam10__");                      \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_PENTIUM4)                        \
-       {                                                       \
-         builtin_define ("__pentium4");                        \
-         builtin_define ("__pentium4__");                      \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_NOCONA)                  \
-       {                                                       \
-         builtin_define ("__nocona");                          \
-         builtin_define ("__nocona__");                        \
-       }                                                       \
-      else if (ix86_arch == PROCESSOR_CORE2)                   \
-       {                                                       \
-         builtin_define ("__core2");                           \
-         builtin_define ("__core2__");                         \
-       }                                                       \
     }                                                          \
   while (0)
 
-#define TARGET_CPU_DEFAULT_i386 0
-#define TARGET_CPU_DEFAULT_i486 1
-#define TARGET_CPU_DEFAULT_pentium 2
-#define TARGET_CPU_DEFAULT_pentium_mmx 3
-#define TARGET_CPU_DEFAULT_pentiumpro 4
-#define TARGET_CPU_DEFAULT_pentium2 5
-#define TARGET_CPU_DEFAULT_pentium3 6
-#define TARGET_CPU_DEFAULT_pentium4 7
-#define TARGET_CPU_DEFAULT_geode 8
-#define TARGET_CPU_DEFAULT_k6 9
-#define TARGET_CPU_DEFAULT_k6_2 10
-#define TARGET_CPU_DEFAULT_k6_3 11
-#define TARGET_CPU_DEFAULT_athlon 12
-#define TARGET_CPU_DEFAULT_athlon_sse 13
-#define TARGET_CPU_DEFAULT_k8 14
-#define TARGET_CPU_DEFAULT_pentium_m 15
-#define TARGET_CPU_DEFAULT_prescott 16
-#define TARGET_CPU_DEFAULT_nocona 17
-#define TARGET_CPU_DEFAULT_core2 18
-#define TARGET_CPU_DEFAULT_generic 19
-#define TARGET_CPU_DEFAULT_amdfam10 20
-
-#define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
-                                 "pentiumpro", "pentium2", "pentium3", \
-                                  "pentium4", "geode", "k6", "k6-2", "k6-3", \
-                                 "athlon", "athlon-4", "k8", \
-                                 "pentium-m", "prescott", "nocona", \
-                                 "core2", "generic", "amdfam10"}
+enum target_cpu_default
+{
+  TARGET_CPU_DEFAULT_generic = 0,
+
+  TARGET_CPU_DEFAULT_i386,
+  TARGET_CPU_DEFAULT_i486,
+  TARGET_CPU_DEFAULT_pentium,
+  TARGET_CPU_DEFAULT_pentium_mmx,
+  TARGET_CPU_DEFAULT_pentiumpro,
+  TARGET_CPU_DEFAULT_pentium2,
+  TARGET_CPU_DEFAULT_pentium3,
+  TARGET_CPU_DEFAULT_pentium4,
+  TARGET_CPU_DEFAULT_pentium_m,
+  TARGET_CPU_DEFAULT_prescott,
+  TARGET_CPU_DEFAULT_nocona,
+  TARGET_CPU_DEFAULT_core2,
+
+  TARGET_CPU_DEFAULT_geode,
+  TARGET_CPU_DEFAULT_k6,
+  TARGET_CPU_DEFAULT_k6_2,
+  TARGET_CPU_DEFAULT_k6_3,
+  TARGET_CPU_DEFAULT_athlon,
+  TARGET_CPU_DEFAULT_athlon_sse,
+  TARGET_CPU_DEFAULT_k8,
+  TARGET_CPU_DEFAULT_amdfam10,
+
+  TARGET_CPU_DEFAULT_max
+};
 
 #ifndef CC1_SPEC
 #define CC1_SPEC "%(cc1_cpu) "
@@ -597,9 +745,6 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
   { "cc1_cpu",  CC1_CPU_SPEC },                                                \
   SUBTARGET_EXTRA_SPECS
 \f
-/* target machine storage layout */
-
-#define LONG_DOUBLE_TYPE_SIZE 80
 
 /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
    FPU, assume that the fpcw is set to extended precision; when using
@@ -609,12 +754,17 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define TARGET_FLT_EVAL_METHOD \
   (TARGET_MIX_SSE_I387 ? -1 : TARGET_SSE_MATH ? 0 : 2)
 
+/* target machine storage layout */
+
 #define SHORT_TYPE_SIZE 16
 #define INT_TYPE_SIZE 32
 #define FLOAT_TYPE_SIZE 32
 #define LONG_TYPE_SIZE BITS_PER_WORD
 #define DOUBLE_TYPE_SIZE 64
 #define LONG_LONG_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE 80
+
+#define WIDEST_HARDWARE_FP_SIZE LONG_DOUBLE_TYPE_SIZE
 
 #if defined (TARGET_BI_ARCH) || TARGET_64BIT_DEFAULT
 #define MAX_BITS_PER_WORD 64
@@ -770,6 +920,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    for details.  */
 
 #define STACK_REGS
+
 #define IS_STACK_MODE(MODE)                                    \
   (((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH))     \
    || ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH))  \
@@ -820,7 +971,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 /*  r8,  r9, r10, r11, r12, r13, r14, r15*/                    \
      2,   2,   2,   2,   2,   2,   2,   2,                     \
 /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/              \
-     2,   2,    2,    2,    2,    2,    2,    2}
+     2,   2,    2,    2,    2,    2,    2,    2 }
 
 
 /* 1 for registers not available across function calls.
@@ -848,7 +999,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 /*  r8,  r9, r10, r11, r12, r13, r14, r15*/                    \
      1,   1,   1,   1,   2,   2,   2,   2,                     \
 /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/              \
-     1,   1,    1,    1,    1,    1,    1,    1}               \
+     1,   1,    1,    1,    1,    1,    1,    1 }
 
 /* Order in which to allocate registers.  Each register must be
    listed once, even those in FIXED_REGISTERS.  List frame pointer
@@ -886,10 +1037,10 @@ do {                                                                     \
                               == (TARGET_64BIT ? 3 : 2));              \
       }                                                                        \
     j = PIC_OFFSET_TABLE_REGNUM;                                       \
-    if (j != INVALID_REGNUM)                   \
+    if (j != INVALID_REGNUM)                                           \
       {                                                                        \
-       fixed_regs[j] = 1;                      \
-       call_used_regs[j] = 1;                  \
+       fixed_regs[j] = 1;                                              \
+       call_used_regs[j] = 1;                                          \
       }                                                                        \
     if (! TARGET_MMX)                                                  \
       {                                                                        \
@@ -922,6 +1073,11 @@ do {                                                                      \
        for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)         \
          reg_names[i] = "";                                            \
       }                                                                        \
+    if (TARGET_64BIT_MS_ABI)                                           \
+      {                                                                        \
+        call_used_regs[4 /*RSI*/] = 0;                                  \
+        call_used_regs[5 /*RDI*/] = 0;                                  \
+      }                                                                        \
   } while (0)
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
@@ -934,7 +1090,7 @@ do {                                                                       \
    applied to them.
    */
 
-#define HARD_REGNO_NREGS(REGNO, MODE)   \
+#define HARD_REGNO_NREGS(REGNO, MODE)                                  \
   (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)    \
    ? (COMPLEX_MODE_P (MODE) ? 2 : 1)                                   \
    : ((MODE) == XFmode                                                 \
@@ -952,40 +1108,44 @@ do {                                                                     \
 
 #define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
 
-#define VALID_SSE2_REG_MODE(MODE) \
-    ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode    \
-     || (MODE) == V2DImode || (MODE) == DFmode)
+#define VALID_SSE2_REG_MODE(MODE)                                      \
+  ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode     \
+   || (MODE) == V2DImode || (MODE) == DFmode)
 
 #define VALID_SSE_REG_MODE(MODE)                                       \
-    ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode      \
-     || (MODE) == SFmode || (MODE) == TFmode)
+  ((MODE) == TImode || (MODE) == V4SFmode || (MODE) == V4SImode                \
+   || (MODE) == SFmode || (MODE) == TFmode)
 
 #define VALID_MMX_REG_MODE_3DNOW(MODE) \
-    ((MODE) == V2SFmode || (MODE) == SFmode)
+  ((MODE) == V2SFmode || (MODE) == SFmode)
 
 #define VALID_MMX_REG_MODE(MODE)                                       \
-    ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode      \
-     || (MODE) == V2SImode || (MODE) == SImode)
+  ((MODE == V1DImode) || (MODE) == DImode                              \
+   || (MODE) == V2SImode || (MODE) == SImode                           \
+   || (MODE) == V4HImode || (MODE) == V8QImode)
 
 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
    place emms and femms instructions.  */
 #define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
 
+#define VALID_DFP_MODE_P(MODE) \
+  ((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
+
 #define VALID_FP_MODE_P(MODE)                                          \
-    ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode          \
-     || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)      \
+  ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode            \
+   || (MODE) == SCmode || (MODE) == DCmode || (MODE) == XCmode)                \
 
 #define VALID_INT_MODE_P(MODE)                                         \
-    ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode          \
-     || (MODE) == DImode                                               \
-     || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode    \
-     || (MODE) == CDImode                                              \
-     || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode                \
-         || (MODE) == TFmode || (MODE) == TCmode)))
+  ((MODE) == QImode || (MODE) == HImode || (MODE) == SImode            \
+   || (MODE) == DImode                                                 \
+   || (MODE) == CQImode || (MODE) == CHImode || (MODE) == CSImode      \
+   || (MODE) == CDImode                                                        \
+   || (TARGET_64BIT && ((MODE) == TImode || (MODE) == CTImode          \
+                       || (MODE) == TFmode || (MODE) == TCmode)))
 
 /* Return true for modes passed in SSE registers.  */
-#define SSE_REG_MODE_P(MODE) \
((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode          \
+#define SSE_REG_MODE_P(MODE)                                           \
 ((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode         \
    || (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode   \
    || (MODE) == V4SFmode || (MODE) == V4SImode)
 
@@ -1013,10 +1173,11 @@ do {                                                                    \
 #define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE)                        \
   (CC_REGNO_P (REGNO) ? VOIDmode                                       \
    : (MODE) == VOIDmode && (NREGS) != 1 ? VOIDmode                     \
-   : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false)\
+   : (MODE) == VOIDmode ? choose_hard_reg_mode ((REGNO), (NREGS), false) \
    : (MODE) == HImode && !TARGET_PARTIAL_REG_STALL ? SImode            \
    : (MODE) == QImode && (REGNO) >= 4 && !TARGET_64BIT ? SImode        \
    : (MODE))
+
 /* Specify the registers used for certain standard purposes.
    The values of these macros are register numbers.  */
 
@@ -1158,6 +1319,7 @@ enum reg_class
   GENERAL_REGS,                        /* %eax %ebx %ecx %edx %esi %edi %ebp %esp %r8 - %r15*/
   FP_TOP_REG, FP_SECOND_REG,   /* %st(0) %st(1) */
   FLOAT_REGS,
+  SSE_FIRST_REG,
   SSE_REGS,
   MMX_REGS,
   FP_TOP_SSE_REGS,
@@ -1176,7 +1338,7 @@ enum reg_class
 #define FLOAT_CLASS_P(CLASS) \
   reg_class_subset_p ((CLASS), FLOAT_REGS)
 #define SSE_CLASS_P(CLASS) \
-  ((CLASS) == SSE_REGS)
+  reg_class_subset_p ((CLASS), SSE_REGS)
 #define MMX_CLASS_P(CLASS) \
   ((CLASS) == MMX_REGS)
 #define MAYBE_INTEGER_CLASS_P(CLASS) \
@@ -1204,6 +1366,7 @@ enum reg_class
    "GENERAL_REGS",                     \
    "FP_TOP_REG", "FP_SECOND_REG",      \
    "FLOAT_REGS",                       \
+   "SSE_FIRST_REG",                    \
    "SSE_REGS",                         \
    "MMX_REGS",                         \
    "FP_TOP_SSE_REGS",                  \
@@ -1231,6 +1394,7 @@ enum reg_class
   { 0x1100ff,  0x1fe0 },               /* GENERAL_REGS */              \
      { 0x100,     0x0 }, { 0x0200, 0x0 },/* FP_TOP_REG, FP_SECOND_REG */\
     { 0xff00,     0x0 },               /* FLOAT_REGS */                \
+  { 0x200000,     0x0 },               /* SSE_FIRST_REG */             \
 { 0x1fe00000,0x1fe000 },               /* SSE_REGS */                  \
 { 0xe0000000,    0x1f },               /* MMX_REGS */                  \
 { 0x1fe00100,0x1fe000 },               /* FP_TOP_SSE_REG */            \
@@ -1274,6 +1438,9 @@ enum reg_class
 #define ANY_FP_REG_P(X) (REG_P (X) && ANY_FP_REGNO_P (REGNO (X)))
 #define ANY_FP_REGNO_P(N) (FP_REGNO_P (N) || SSE_REGNO_P (N))
 
+#define X87_FLOAT_MODE_P(MODE) \
+  (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
+
 #define SSE_REG_P(N) (REG_P (N) && SSE_REGNO_P (REGNO (N)))
 #define SSE_REGNO_P(N)                                         \
   (IN_RANGE ((N), FIRST_SSE_REG, LAST_SSE_REG)                 \
@@ -1288,6 +1455,9 @@ enum reg_class
 #define SSE_FLOAT_MODE_P(MODE) \
   ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
 
+#define SSE_VEC_FLOAT_MODE_P(MODE) \
+  ((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
+
 #define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
 #define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
 
@@ -1548,10 +1718,6 @@ typedef struct ix86_args {
 #define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
   function_arg (&(CUM), (MODE), (TYPE), (NAMED))
 
-/* Implement `va_start' for varargs and stdarg.  */
-#define EXPAND_BUILTIN_VA_START(VALIST, NEXTARG) \
-  ix86_va_start (VALIST, NEXTARG)
-
 #define TARGET_ASM_FILE_END ix86_file_end
 #define NEED_INDICATE_EXEC_STACK 0
 
@@ -1620,7 +1786,7 @@ typedef struct ix86_args {
    All other eliminations are valid.  */
 
 #define CAN_ELIMINATE(FROM, TO) \
-  ((TO) == STACK_POINTER_REGNUM ? ! frame_pointer_needed : 1)
+  ((TO) == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1)
 
 /* Define the offset between two registers, one to be eliminated, and the other
    its replacement, at the start of a routine.  */
@@ -1796,7 +1962,8 @@ do {                                                                      \
 \f
 /* Specify the machine mode that this machine uses
    for the index in the tablejump instruction.  */
-#define CASE_VECTOR_MODE (!TARGET_64BIT || flag_pic ? SImode : DImode)
+#define CASE_VECTOR_MODE \
+ (!TARGET_64BIT || (flag_pic && ix86_cmodel != CM_LARGE_PIC) ? SImode : DImode)
 
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
 #define DEFAULT_SIGNED_CHAR 1
@@ -1822,8 +1989,7 @@ do {                                                                      \
 /* If a clear memory operation would take CLEAR_RATIO or more simple
    move-instruction sequences, we will do a clrmem or libcall instead.  */
 
-#define CLEAR_RATIO (optimize_size ? 2 \
-                    : ix86_cost->move_ratio > 6 ? 6 : ix86_cost->move_ratio)
+#define CLEAR_RATIO (optimize_size ? 2 : MIN (6, ix86_cost->move_ratio))
 
 /* Define if shifts truncate the shift count
    which implies one can omit a sign-extension or zero-extension
@@ -2095,7 +2261,7 @@ do {                                                                      \
    print_operand function.  */
 
 #define PRINT_OPERAND_PUNCT_VALID_P(CODE) \
-  ((CODE) == '*' || (CODE) == '+' || (CODE) == '&')
+  ((CODE) == '*' || (CODE) == '+' || (CODE) == '&' || (CODE) == ';')
 
 #define PRINT_OPERAND(FILE, X, CODE)  \
   print_operand ((FILE), (X), (CODE))
@@ -2114,7 +2280,7 @@ do {                                              \
 
 enum processor_type
 {
-  PROCESSOR_I386,                      /* 80386 */
+  PROCESSOR_I386 = 0,                  /* 80386 */
   PROCESSOR_I486,                      /* 80486DX, 80486SX, 80486DX[24] */
   PROCESSOR_PENTIUM,
   PROCESSOR_PENTIUMPRO,
@@ -2132,10 +2298,7 @@ enum processor_type
 };
 
 extern enum processor_type ix86_tune;
-extern int ix86_tune_mask;
-
 extern enum processor_type ix86_arch;
-extern int ix86_arch_mask;
 
 enum fpmath_unit
 {
@@ -2161,7 +2324,8 @@ enum cmodel {
   CM_MEDIUM,   /* Assumes code fits in the low 31 bits; data unlimited.  */
   CM_LARGE,    /* No assumptions.  */
   CM_SMALL_PIC,        /* Assumes code+data+got/plt fits in a 31 bit region.  */
-  CM_MEDIUM_PIC        /* Assumes code+got/plt fits in a 31 bit region.  */
+  CM_MEDIUM_PIC,/* Assumes code+got/plt fits in a 31 bit region.  */
+  CM_LARGE_PIC /* No assumptions.  */
 };
 
 extern enum cmodel ix86_cmodel;
@@ -2213,7 +2377,8 @@ enum ix86_entity
 
 enum ix86_stack_slot
 {
-  SLOT_TEMP = 0,
+  SLOT_VIRTUAL = 0,
+  SLOT_TEMP,
   SLOT_CW_STORED,
   SLOT_CW_TRUNC,
   SLOT_CW_FLOOR,
@@ -2272,8 +2437,6 @@ enum ix86_stack_slot
   (! IN_RANGE ((SRC), FIRST_STACK_REG, LAST_STACK_REG))
 
 \f
-#define DLL_IMPORT_EXPORT_PREFIX '#'
-
 #define FASTCALL_PREFIX '@'
 \f
 struct machine_function GTY(())
@@ -2313,7 +2476,7 @@ struct machine_function GTY(())
    verify whether there's any such instruction live by testing that
    REG_SP is live.  */
 #define ix86_current_function_calls_tls_descriptor \
-  (ix86_tls_descriptor_calls_expanded_in_cfun && regs_ever_live[SP_REG])
+  (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
 
 /* Control behavior of x86_file_start.  */
 #define X86_FILE_START_VERSION_DIRECTIVE false
@@ -2323,6 +2486,68 @@ struct machine_function GTY(())
 #define SYMBOL_FLAG_FAR_ADDR           (SYMBOL_FLAG_MACH_DEP << 0)
 #define SYMBOL_REF_FAR_ADDR_P(X)       \
        ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_FAR_ADDR) != 0)
+
+/* Flags to mark dllimport/dllexport.  Used by PE ports, but handy to
+   have defined always, to avoid ifdefing.  */
+#define SYMBOL_FLAG_DLLIMPORT          (SYMBOL_FLAG_MACH_DEP << 1)
+#define SYMBOL_REF_DLLIMPORT_P(X) \
+       ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLIMPORT) != 0)
+
+#define SYMBOL_FLAG_DLLEXPORT          (SYMBOL_FLAG_MACH_DEP << 2)
+#define SYMBOL_REF_DLLEXPORT_P(X) \
+       ((SYMBOL_REF_FLAGS (X) & SYMBOL_FLAG_DLLEXPORT) != 0)
+
+/* Model costs for vectorizer.  */
+
+/* Cost of conditional branch.  */
+#undef TARG_COND_BRANCH_COST
+#define TARG_COND_BRANCH_COST           ix86_cost->branch_cost
+
+/* Cost of any scalar operation, excluding load and store.  */
+#undef TARG_SCALAR_STMT_COST
+#define TARG_SCALAR_STMT_COST           ix86_cost->scalar_stmt_cost
+
+/* Cost of scalar load.  */
+#undef TARG_SCALAR_LOAD_COST
+#define TARG_SCALAR_LOAD_COST           ix86_cost->scalar_load_cost
+
+/* Cost of scalar store.  */
+#undef TARG_SCALAR_STORE_COST
+#define TARG_SCALAR_STORE_COST          ix86_cost->scalar_store_cost
+
+/* Cost of any vector operation, excluding load, store or vector to scalar
+   operation.  */
+#undef TARG_VEC_STMT_COST
+#define TARG_VEC_STMT_COST              ix86_cost->vec_stmt_cost
+
+/* Cost of vector to scalar operation.  */
+#undef TARG_VEC_TO_SCALAR_COST
+#define TARG_VEC_TO_SCALAR_COST         ix86_cost->vec_to_scalar_cost
+
+/* Cost of scalar to vector operation.  */
+#undef TARG_SCALAR_TO_VEC_COST
+#define TARG_SCALAR_TO_VEC_COST         ix86_cost->scalar_to_vec_cost
+
+/* Cost of aligned vector load.  */
+#undef TARG_VEC_LOAD_COST
+#define TARG_VEC_LOAD_COST              ix86_cost->vec_align_load_cost
+
+/* Cost of misaligned vector load.  */
+#undef TARG_VEC_UNALIGNED_LOAD_COST
+#define TARG_VEC_UNALIGNED_LOAD_COST    ix86_cost->vec_unalign_load_cost
+
+/* Cost of vector store.  */
+#undef TARG_VEC_STORE_COST
+#define TARG_VEC_STORE_COST             ix86_cost->vec_store_cost
+
+/* Cost of conditional taken branch for vectorizer cost model.  */
+#undef TARG_COND_TAKEN_BRANCH_COST
+#define TARG_COND_TAKEN_BRANCH_COST     ix86_cost->cond_taken_branch_cost
+
+/* Cost of conditional not taken branch for vectorizer cost model.  */
+#undef TARG_COND_NOT_TAKEN_BRANCH_COST
+#define TARG_COND_NOT_TAKEN_BRANCH_COST ix86_cost->cond_not_taken_branch_cost
+
 /*
 Local variables:
 version-control: t