gcc/config/i386/i386.c

   1 /* Subroutines used for code generation on IA-32.
   2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
   3    2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4    Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "tm_p.h"
  29 #include "regs.h"
  30 #include "hard-reg-set.h"
  31 #include "real.h"
  32 #include "insn-config.h"
  33 #include "conditions.h"
  34 #include "output.h"
  35 #include "insn-codes.h"
  36 #include "insn-attr.h"
  37 #include "flags.h"
  38 #include "except.h"
  39 #include "function.h"
  40 #include "recog.h"
  41 #include "expr.h"
  42 #include "optabs.h"
  43 #include "toplev.h"
  44 #include "basic-block.h"
  45 #include "ggc.h"
  46 #include "target.h"
  47 #include "target-def.h"
  48 #include "langhooks.h"
  49 #include "cgraph.h"
  50 #include "gimple.h"
  51 #include "dwarf2.h"
  52 #include "df.h"
  53 #include "tm-constrs.h"
  54 #include "params.h"
  55 #include "cselib.h"
  56
  57 static rtx legitimize_dllimport_symbol (rtx, bool);
  58
  59 #ifndef CHECK_STACK_LIMIT
  60 #define CHECK_STACK_LIMIT (-1)
  61 #endif
  62
  63 /* Return index of given mode in mult and division cost tables.  */
  64 #define MODE_INDEX(mode)                                        \
  65   ((mode) == QImode ? 0                                         \
  66    : (mode) == HImode ? 1                                       \
  67    : (mode) == SImode ? 2                                       \
  68    : (mode) == DImode ? 3                                       \
  69    : 4)
  70
  71 /* Processor costs (relative to an add) */
  72 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
  73 #define COSTS_N_BYTES(N) ((N) * 2)
  74
  75 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
  76
  77 const
  78 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
  79   COSTS_N_BYTES (2),                    /* cost of an add instruction */
  80   COSTS_N_BYTES (3),                    /* cost of a lea instruction */
  81   COSTS_N_BYTES (2),                    /* variable shift costs */
  82   COSTS_N_BYTES (3),                    /* constant shift costs */
  83   {COSTS_N_BYTES (3),                   /* cost of starting multiply for QI */
  84    COSTS_N_BYTES (3),                   /*                               HI */
  85    COSTS_N_BYTES (3),                   /*                               SI */
  86    COSTS_N_BYTES (3),                   /*                               DI */
  87    COSTS_N_BYTES (5)},                  /*                            other */
  88   0,                                    /* cost of multiply per each bit set */
  89   {COSTS_N_BYTES (3),                   /* cost of a divide/mod for QI */
  90    COSTS_N_BYTES (3),                   /*                          HI */
  91    COSTS_N_BYTES (3),                   /*                          SI */
  92    COSTS_N_BYTES (3),                   /*                          DI */
  93    COSTS_N_BYTES (5)},                  /*                       other */
  94   COSTS_N_BYTES (3),                    /* cost of movsx */
  95   COSTS_N_BYTES (3),                    /* cost of movzx */
  96   0,                                    /* "large" insn */
  97   2,                                    /* MOVE_RATIO */
  98   2,                                    /* cost for loading QImode using movzbl */
  99   {2, 2, 2},                            /* cost of loading integer registers
 100                                            in QImode, HImode and SImode.
 101                                            Relative to reg-reg move (2).  */
 102   {2, 2, 2},                            /* cost of storing integer registers */
 103   2,                                    /* cost of reg,reg fld/fst */
 104   {2, 2, 2},                            /* cost of loading fp registers
 105                                            in SFmode, DFmode and XFmode */
 106   {2, 2, 2},                            /* cost of storing fp registers
 107                                            in SFmode, DFmode and XFmode */
 108   3,                                    /* cost of moving MMX register */
 109   {3, 3},                               /* cost of loading MMX registers
 110                                            in SImode and DImode */
 111   {3, 3},                               /* cost of storing MMX registers
 112                                            in SImode and DImode */
 113   3,                                    /* cost of moving SSE register */
 114   {3, 3, 3},                            /* cost of loading SSE registers
 115                                            in SImode, DImode and TImode */
 116   {3, 3, 3},                            /* cost of storing SSE registers
 117                                            in SImode, DImode and TImode */
 118   3,                                    /* MMX or SSE register to integer */
 119   0,                                    /* size of l1 cache  */
 120   0,                                    /* size of l2 cache  */
 121   0,                                    /* size of prefetch block */
 122   0,                                    /* number of parallel prefetches */
 123   2,                                    /* Branch cost */
 124   COSTS_N_BYTES (2),                    /* cost of FADD and FSUB insns.  */
 125   COSTS_N_BYTES (2),                    /* cost of FMUL instruction.  */
 126   COSTS_N_BYTES (2),                    /* cost of FDIV instruction.  */
 127   COSTS_N_BYTES (2),                    /* cost of FABS instruction.  */
 128   COSTS_N_BYTES (2),                    /* cost of FCHS instruction.  */
 129   COSTS_N_BYTES (2),                    /* cost of FSQRT instruction.  */
 130   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 131    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 132   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 133    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 134   1,                                    /* scalar_stmt_cost.  */
 135   1,                                    /* scalar load_cost.  */
 136   1,                                    /* scalar_store_cost.  */
 137   1,                                    /* vec_stmt_cost.  */
 138   1,                                    /* vec_to_scalar_cost.  */
 139   1,                                    /* scalar_to_vec_cost.  */
 140   1,                                    /* vec_align_load_cost.  */
 141   1,                                    /* vec_unalign_load_cost.  */
 142   1,                                    /* vec_store_cost.  */
 143   1,                                    /* cond_taken_branch_cost.  */
 144   1,                                    /* cond_not_taken_branch_cost.  */
 145 };
 146
 147 /* Processor costs (relative to an add) */
 148 static const
 149 struct processor_costs i386_cost = {    /* 386 specific costs */
 150   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 151   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 152   COSTS_N_INSNS (3),                    /* variable shift costs */
 153   COSTS_N_INSNS (2),                    /* constant shift costs */
 154   {COSTS_N_INSNS (6),                   /* cost of starting multiply for QI */
 155    COSTS_N_INSNS (6),                   /*                               HI */
 156    COSTS_N_INSNS (6),                   /*                               SI */
 157    COSTS_N_INSNS (6),                   /*                               DI */
 158    COSTS_N_INSNS (6)},                  /*                               other */
 159   COSTS_N_INSNS (1),                    /* cost of multiply per each bit set */
 160   {COSTS_N_INSNS (23),                  /* cost of a divide/mod for QI */
 161    COSTS_N_INSNS (23),                  /*                          HI */
 162    COSTS_N_INSNS (23),                  /*                          SI */
 163    COSTS_N_INSNS (23),                  /*                          DI */
 164    COSTS_N_INSNS (23)},                 /*                          other */
 165   COSTS_N_INSNS (3),                    /* cost of movsx */
 166   COSTS_N_INSNS (2),                    /* cost of movzx */
 167   15,                                   /* "large" insn */
 168   3,                                    /* MOVE_RATIO */
 169   4,                                    /* cost for loading QImode using movzbl */
 170   {2, 4, 2},                            /* cost of loading integer registers
 171                                            in QImode, HImode and SImode.
 172                                            Relative to reg-reg move (2).  */
 173   {2, 4, 2},                            /* cost of storing integer registers */
 174   2,                                    /* cost of reg,reg fld/fst */
 175   {8, 8, 8},                            /* cost of loading fp registers
 176                                            in SFmode, DFmode and XFmode */
 177   {8, 8, 8},                            /* cost of storing fp registers
 178                                            in SFmode, DFmode and XFmode */
 179   2,                                    /* cost of moving MMX register */
 180   {4, 8},                               /* cost of loading MMX registers
 181                                            in SImode and DImode */
 182   {4, 8},                               /* cost of storing MMX registers
 183                                            in SImode and DImode */
 184   2,                                    /* cost of moving SSE register */
 185   {4, 8, 16},                           /* cost of loading SSE registers
 186                                            in SImode, DImode and TImode */
 187   {4, 8, 16},                           /* cost of storing SSE registers
 188                                            in SImode, DImode and TImode */
 189   3,                                    /* MMX or SSE register to integer */
 190   0,                                    /* size of l1 cache  */
 191   0,                                    /* size of l2 cache  */
 192   0,                                    /* size of prefetch block */
 193   0,                                    /* number of parallel prefetches */
 194   1,                                    /* Branch cost */
 195   COSTS_N_INSNS (23),                   /* cost of FADD and FSUB insns.  */
 196   COSTS_N_INSNS (27),                   /* cost of FMUL instruction.  */
 197   COSTS_N_INSNS (88),                   /* cost of FDIV instruction.  */
 198   COSTS_N_INSNS (22),                   /* cost of FABS instruction.  */
 199   COSTS_N_INSNS (24),                   /* cost of FCHS instruction.  */
 200   COSTS_N_INSNS (122),                  /* cost of FSQRT instruction.  */
 201   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 202    DUMMY_STRINGOP_ALGS},
 203   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 204    DUMMY_STRINGOP_ALGS},
 205   1,                                    /* scalar_stmt_cost.  */
 206   1,                                    /* scalar load_cost.  */
 207   1,                                    /* scalar_store_cost.  */
 208   1,                                    /* vec_stmt_cost.  */
 209   1,                                    /* vec_to_scalar_cost.  */
 210   1,                                    /* scalar_to_vec_cost.  */
 211   1,                                    /* vec_align_load_cost.  */
 212   2,                                    /* vec_unalign_load_cost.  */
 213   1,                                    /* vec_store_cost.  */
 214   3,                                    /* cond_taken_branch_cost.  */
 215   1,                                    /* cond_not_taken_branch_cost.  */
 216 };
 217
 218 static const
 219 struct processor_costs i486_cost = {    /* 486 specific costs */
 220   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 221   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 222   COSTS_N_INSNS (3),                    /* variable shift costs */
 223   COSTS_N_INSNS (2),                    /* constant shift costs */
 224   {COSTS_N_INSNS (12),                  /* cost of starting multiply for QI */
 225    COSTS_N_INSNS (12),                  /*                               HI */
 226    COSTS_N_INSNS (12),                  /*                               SI */
 227    COSTS_N_INSNS (12),                  /*                               DI */
 228    COSTS_N_INSNS (12)},                 /*                               other */
 229   1,                                    /* cost of multiply per each bit set */
 230   {COSTS_N_INSNS (40),                  /* cost of a divide/mod for QI */
 231    COSTS_N_INSNS (40),                  /*                          HI */
 232    COSTS_N_INSNS (40),                  /*                          SI */
 233    COSTS_N_INSNS (40),                  /*                          DI */
 234    COSTS_N_INSNS (40)},                 /*                          other */
 235   COSTS_N_INSNS (3),                    /* cost of movsx */
 236   COSTS_N_INSNS (2),                    /* cost of movzx */
 237   15,                                   /* "large" insn */
 238   3,                                    /* MOVE_RATIO */
 239   4,                                    /* cost for loading QImode using movzbl */
 240   {2, 4, 2},                            /* cost of loading integer registers
 241                                            in QImode, HImode and SImode.
 242                                            Relative to reg-reg move (2).  */
 243   {2, 4, 2},                            /* cost of storing integer registers */
 244   2,                                    /* cost of reg,reg fld/fst */
 245   {8, 8, 8},                            /* cost of loading fp registers
 246                                            in SFmode, DFmode and XFmode */
 247   {8, 8, 8},                            /* cost of storing fp registers
 248                                            in SFmode, DFmode and XFmode */
 249   2,                                    /* cost of moving MMX register */
 250   {4, 8},                               /* cost of loading MMX registers
 251                                            in SImode and DImode */
 252   {4, 8},                               /* cost of storing MMX registers
 253                                            in SImode and DImode */
 254   2,                                    /* cost of moving SSE register */
 255   {4, 8, 16},                           /* cost of loading SSE registers
 256                                            in SImode, DImode and TImode */
 257   {4, 8, 16},                           /* cost of storing SSE registers
 258                                            in SImode, DImode and TImode */
 259   3,                                    /* MMX or SSE register to integer */
 260   4,                                    /* size of l1 cache.  486 has 8kB cache
 261                                            shared for code and data, so 4kB is
 262                                            not really precise.  */
 263   4,                                    /* size of l2 cache  */
 264   0,                                    /* size of prefetch block */
 265   0,                                    /* number of parallel prefetches */
 266   1,                                    /* Branch cost */
 267   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
 268   COSTS_N_INSNS (16),                   /* cost of FMUL instruction.  */
 269   COSTS_N_INSNS (73),                   /* cost of FDIV instruction.  */
 270   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 271   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 272   COSTS_N_INSNS (83),                   /* cost of FSQRT instruction.  */
 273   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 274    DUMMY_STRINGOP_ALGS},
 275   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 276    DUMMY_STRINGOP_ALGS},
 277   1,                                    /* scalar_stmt_cost.  */
 278   1,                                    /* scalar load_cost.  */
 279   1,                                    /* scalar_store_cost.  */
 280   1,                                    /* vec_stmt_cost.  */
 281   1,                                    /* vec_to_scalar_cost.  */
 282   1,                                    /* scalar_to_vec_cost.  */
 283   1,                                    /* vec_align_load_cost.  */
 284   2,                                    /* vec_unalign_load_cost.  */
 285   1,                                    /* vec_store_cost.  */
 286   3,                                    /* cond_taken_branch_cost.  */
 287   1,                                    /* cond_not_taken_branch_cost.  */
 288 };
 289
 290 static const
 291 struct processor_costs pentium_cost = {
 292   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 293   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 294   COSTS_N_INSNS (4),                    /* variable shift costs */
 295   COSTS_N_INSNS (1),                    /* constant shift costs */
 296   {COSTS_N_INSNS (11),                  /* cost of starting multiply for QI */
 297    COSTS_N_INSNS (11),                  /*                               HI */
 298    COSTS_N_INSNS (11),                  /*                               SI */
 299    COSTS_N_INSNS (11),                  /*                               DI */
 300    COSTS_N_INSNS (11)},                 /*                               other */
 301   0,                                    /* cost of multiply per each bit set */
 302   {COSTS_N_INSNS (25),                  /* cost of a divide/mod for QI */
 303    COSTS_N_INSNS (25),                  /*                          HI */
 304    COSTS_N_INSNS (25),                  /*                          SI */
 305    COSTS_N_INSNS (25),                  /*                          DI */
 306    COSTS_N_INSNS (25)},                 /*                          other */
 307   COSTS_N_INSNS (3),                    /* cost of movsx */
 308   COSTS_N_INSNS (2),                    /* cost of movzx */
 309   8,                                    /* "large" insn */
 310   6,                                    /* MOVE_RATIO */
 311   6,                                    /* cost for loading QImode using movzbl */
 312   {2, 4, 2},                            /* cost of loading integer registers
 313                                            in QImode, HImode and SImode.
 314                                            Relative to reg-reg move (2).  */
 315   {2, 4, 2},                            /* cost of storing integer registers */
 316   2,                                    /* cost of reg,reg fld/fst */
 317   {2, 2, 6},                            /* cost of loading fp registers
 318                                            in SFmode, DFmode and XFmode */
 319   {4, 4, 6},                            /* cost of storing fp registers
 320                                            in SFmode, DFmode and XFmode */
 321   8,                                    /* cost of moving MMX register */
 322   {8, 8},                               /* cost of loading MMX registers
 323                                            in SImode and DImode */
 324   {8, 8},                               /* cost of storing MMX registers
 325                                            in SImode and DImode */
 326   2,                                    /* cost of moving SSE register */
 327   {4, 8, 16},                           /* cost of loading SSE registers
 328                                            in SImode, DImode and TImode */
 329   {4, 8, 16},                           /* cost of storing SSE registers
 330                                            in SImode, DImode and TImode */
 331   3,                                    /* MMX or SSE register to integer */
 332   8,                                    /* size of l1 cache.  */
 333   8,                                    /* size of l2 cache  */
 334   0,                                    /* size of prefetch block */
 335   0,                                    /* number of parallel prefetches */
 336   2,                                    /* Branch cost */
 337   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 338   COSTS_N_INSNS (3),                    /* cost of FMUL instruction.  */
 339   COSTS_N_INSNS (39),                   /* cost of FDIV instruction.  */
 340   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 341   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 342   COSTS_N_INSNS (70),                   /* cost of FSQRT instruction.  */
 343   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 344    DUMMY_STRINGOP_ALGS},
 345   {{libcall, {{-1, rep_prefix_4_byte}}},
 346    DUMMY_STRINGOP_ALGS},
 347   1,                                    /* scalar_stmt_cost.  */
 348   1,                                    /* scalar load_cost.  */
 349   1,                                    /* scalar_store_cost.  */
 350   1,                                    /* vec_stmt_cost.  */
 351   1,                                    /* vec_to_scalar_cost.  */
 352   1,                                    /* scalar_to_vec_cost.  */
 353   1,                                    /* vec_align_load_cost.  */
 354   2,                                    /* vec_unalign_load_cost.  */
 355   1,                                    /* vec_store_cost.  */
 356   3,                                    /* cond_taken_branch_cost.  */
 357   1,                                    /* cond_not_taken_branch_cost.  */
 358 };
 359
 360 static const
 361 struct processor_costs pentiumpro_cost = {
 362   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 363   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 364   COSTS_N_INSNS (1),                    /* variable shift costs */
 365   COSTS_N_INSNS (1),                    /* constant shift costs */
 366   {COSTS_N_INSNS (4),                   /* cost of starting multiply for QI */
 367    COSTS_N_INSNS (4),                   /*                               HI */
 368    COSTS_N_INSNS (4),                   /*                               SI */
 369    COSTS_N_INSNS (4),                   /*                               DI */
 370    COSTS_N_INSNS (4)},                  /*                               other */
 371   0,                                    /* cost of multiply per each bit set */
 372   {COSTS_N_INSNS (17),                  /* cost of a divide/mod for QI */
 373    COSTS_N_INSNS (17),                  /*                          HI */
 374    COSTS_N_INSNS (17),                  /*                          SI */
 375    COSTS_N_INSNS (17),                  /*                          DI */
 376    COSTS_N_INSNS (17)},                 /*                          other */
 377   COSTS_N_INSNS (1),                    /* cost of movsx */
 378   COSTS_N_INSNS (1),                    /* cost of movzx */
 379   8,                                    /* "large" insn */
 380   6,                                    /* MOVE_RATIO */
 381   2,                                    /* cost for loading QImode using movzbl */
 382   {4, 4, 4},                            /* cost of loading integer registers
 383                                            in QImode, HImode and SImode.
 384                                            Relative to reg-reg move (2).  */
 385   {2, 2, 2},                            /* cost of storing integer registers */
 386   2,                                    /* cost of reg,reg fld/fst */
 387   {2, 2, 6},                            /* cost of loading fp registers
 388                                            in SFmode, DFmode and XFmode */
 389   {4, 4, 6},                            /* cost of storing fp registers
 390                                            in SFmode, DFmode and XFmode */
 391   2,                                    /* cost of moving MMX register */
 392   {2, 2},                               /* cost of loading MMX registers
 393                                            in SImode and DImode */
 394   {2, 2},                               /* cost of storing MMX registers
 395                                            in SImode and DImode */
 396   2,                                    /* cost of moving SSE register */
 397   {2, 2, 8},                            /* cost of loading SSE registers
 398                                            in SImode, DImode and TImode */
 399   {2, 2, 8},                            /* cost of storing SSE registers
 400                                            in SImode, DImode and TImode */
 401   3,                                    /* MMX or SSE register to integer */
 402   8,                                    /* size of l1 cache.  */
 403   256,                                  /* size of l2 cache  */
 404   32,                                   /* size of prefetch block */
 405   6,                                    /* number of parallel prefetches */
 406   2,                                    /* Branch cost */
 407   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 408   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
 409   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 410   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 411   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 412   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 413   /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
 414      the alignment).  For small blocks inline loop is still a noticeable win, for bigger
 415      blocks either rep movsl or rep movsb is way to go.  Rep movsb has apparently
 416      more expensive startup time in CPU, but after 4K the difference is down in the noise.
 417    */
 418   {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
 419                         {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
 420    DUMMY_STRINGOP_ALGS},
 421   {{rep_prefix_4_byte, {{1024, unrolled_loop},
 422                         {8192, rep_prefix_4_byte}, {-1, libcall}}},
 423    DUMMY_STRINGOP_ALGS},
 424   1,                                    /* scalar_stmt_cost.  */
 425   1,                                    /* scalar load_cost.  */
 426   1,                                    /* scalar_store_cost.  */
 427   1,                                    /* vec_stmt_cost.  */
 428   1,                                    /* vec_to_scalar_cost.  */
 429   1,                                    /* scalar_to_vec_cost.  */
 430   1,                                    /* vec_align_load_cost.  */
 431   2,                                    /* vec_unalign_load_cost.  */
 432   1,                                    /* vec_store_cost.  */
 433   3,                                    /* cond_taken_branch_cost.  */
 434   1,                                    /* cond_not_taken_branch_cost.  */
 435 };
 436
 437 static const
 438 struct processor_costs geode_cost = {
 439   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 440   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 441   COSTS_N_INSNS (2),                    /* variable shift costs */
 442   COSTS_N_INSNS (1),                    /* constant shift costs */
 443   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 444    COSTS_N_INSNS (4),                   /*                               HI */
 445    COSTS_N_INSNS (7),                   /*                               SI */
 446    COSTS_N_INSNS (7),                   /*                               DI */
 447    COSTS_N_INSNS (7)},                  /*                               other */
 448   0,                                    /* cost of multiply per each bit set */
 449   {COSTS_N_INSNS (15),                  /* cost of a divide/mod for QI */
 450    COSTS_N_INSNS (23),                  /*                          HI */
 451    COSTS_N_INSNS (39),                  /*                          SI */
 452    COSTS_N_INSNS (39),                  /*                          DI */
 453    COSTS_N_INSNS (39)},                 /*                          other */
 454   COSTS_N_INSNS (1),                    /* cost of movsx */
 455   COSTS_N_INSNS (1),                    /* cost of movzx */
 456   8,                                    /* "large" insn */
 457   4,                                    /* MOVE_RATIO */
 458   1,                                    /* cost for loading QImode using movzbl */
 459   {1, 1, 1},                            /* cost of loading integer registers
 460                                            in QImode, HImode and SImode.
 461                                            Relative to reg-reg move (2).  */
 462   {1, 1, 1},                            /* cost of storing integer registers */
 463   1,                                    /* cost of reg,reg fld/fst */
 464   {1, 1, 1},                            /* cost of loading fp registers
 465                                            in SFmode, DFmode and XFmode */
 466   {4, 6, 6},                            /* cost of storing fp registers
 467                                            in SFmode, DFmode and XFmode */
 468
 469   1,                                    /* cost of moving MMX register */
 470   {1, 1},                               /* cost of loading MMX registers
 471                                            in SImode and DImode */
 472   {1, 1},                               /* cost of storing MMX registers
 473                                            in SImode and DImode */
 474   1,                                    /* cost of moving SSE register */
 475   {1, 1, 1},                            /* cost of loading SSE registers
 476                                            in SImode, DImode and TImode */
 477   {1, 1, 1},                            /* cost of storing SSE registers
 478                                            in SImode, DImode and TImode */
 479   1,                                    /* MMX or SSE register to integer */
 480   64,                                   /* size of l1 cache.  */
 481   128,                                  /* size of l2 cache.  */
 482   32,                                   /* size of prefetch block */
 483   1,                                    /* number of parallel prefetches */
 484   1,                                    /* Branch cost */
 485   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 486   COSTS_N_INSNS (11),                   /* cost of FMUL instruction.  */
 487   COSTS_N_INSNS (47),                   /* cost of FDIV instruction.  */
 488   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 489   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 490   COSTS_N_INSNS (54),                   /* cost of FSQRT instruction.  */
 491   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 492    DUMMY_STRINGOP_ALGS},
 493   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 494    DUMMY_STRINGOP_ALGS},
 495   1,                                    /* scalar_stmt_cost.  */
 496   1,                                    /* scalar load_cost.  */
 497   1,                                    /* scalar_store_cost.  */
 498   1,                                    /* vec_stmt_cost.  */
 499   1,                                    /* vec_to_scalar_cost.  */
 500   1,                                    /* scalar_to_vec_cost.  */
 501   1,                                    /* vec_align_load_cost.  */
 502   2,                                    /* vec_unalign_load_cost.  */
 503   1,                                    /* vec_store_cost.  */
 504   3,                                    /* cond_taken_branch_cost.  */
 505   1,                                    /* cond_not_taken_branch_cost.  */
 506 };
 507
 508 static const
 509 struct processor_costs k6_cost = {
 510   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 511   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 512   COSTS_N_INSNS (1),                    /* variable shift costs */
 513   COSTS_N_INSNS (1),                    /* constant shift costs */
 514   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 515    COSTS_N_INSNS (3),                   /*                               HI */
 516    COSTS_N_INSNS (3),                   /*                               SI */
 517    COSTS_N_INSNS (3),                   /*                               DI */
 518    COSTS_N_INSNS (3)},                  /*                               other */
 519   0,                                    /* cost of multiply per each bit set */
 520   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 521    COSTS_N_INSNS (18),                  /*                          HI */
 522    COSTS_N_INSNS (18),                  /*                          SI */
 523    COSTS_N_INSNS (18),                  /*                          DI */
 524    COSTS_N_INSNS (18)},                 /*                          other */
 525   COSTS_N_INSNS (2),                    /* cost of movsx */
 526   COSTS_N_INSNS (2),                    /* cost of movzx */
 527   8,                                    /* "large" insn */
 528   4,                                    /* MOVE_RATIO */
 529   3,                                    /* cost for loading QImode using movzbl */
 530   {4, 5, 4},                            /* cost of loading integer registers
 531                                            in QImode, HImode and SImode.
 532                                            Relative to reg-reg move (2).  */
 533   {2, 3, 2},                            /* cost of storing integer registers */
 534   4,                                    /* cost of reg,reg fld/fst */
 535   {6, 6, 6},                            /* cost of loading fp registers
 536                                            in SFmode, DFmode and XFmode */
 537   {4, 4, 4},                            /* cost of storing fp registers
 538                                            in SFmode, DFmode and XFmode */
 539   2,                                    /* cost of moving MMX register */
 540   {2, 2},                               /* cost of loading MMX registers
 541                                            in SImode and DImode */
 542   {2, 2},                               /* cost of storing MMX registers
 543                                            in SImode and DImode */
 544   2,                                    /* cost of moving SSE register */
 545   {2, 2, 8},                            /* cost of loading SSE registers
 546                                            in SImode, DImode and TImode */
 547   {2, 2, 8},                            /* cost of storing SSE registers
 548                                            in SImode, DImode and TImode */
 549   6,                                    /* MMX or SSE register to integer */
 550   32,                                   /* size of l1 cache.  */
 551   32,                                   /* size of l2 cache.  Some models
 552                                            have integrated l2 cache, but
 553                                            optimizing for k6 is not important
 554                                            enough to worry about that.  */
 555   32,                                   /* size of prefetch block */
 556   1,                                    /* number of parallel prefetches */
 557   1,                                    /* Branch cost */
 558   COSTS_N_INSNS (2),                    /* cost of FADD and FSUB insns.  */
 559   COSTS_N_INSNS (2),                    /* cost of FMUL instruction.  */
 560   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 561   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 562   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 563   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 564   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 565    DUMMY_STRINGOP_ALGS},
 566   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 567    DUMMY_STRINGOP_ALGS},
 568   1,                                    /* scalar_stmt_cost.  */
 569   1,                                    /* scalar load_cost.  */
 570   1,                                    /* scalar_store_cost.  */
 571   1,                                    /* vec_stmt_cost.  */
 572   1,                                    /* vec_to_scalar_cost.  */
 573   1,                                    /* scalar_to_vec_cost.  */
 574   1,                                    /* vec_align_load_cost.  */
 575   2,                                    /* vec_unalign_load_cost.  */
 576   1,                                    /* vec_store_cost.  */
 577   3,                                    /* cond_taken_branch_cost.  */
 578   1,                                    /* cond_not_taken_branch_cost.  */
 579 };
 580
 581 static const
 582 struct processor_costs athlon_cost = {
 583   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 584   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 585   COSTS_N_INSNS (1),                    /* variable shift costs */
 586   COSTS_N_INSNS (1),                    /* constant shift costs */
 587   {COSTS_N_INSNS (5),                   /* cost of starting multiply for QI */
 588    COSTS_N_INSNS (5),                   /*                               HI */
 589    COSTS_N_INSNS (5),                   /*                               SI */
 590    COSTS_N_INSNS (5),                   /*                               DI */
 591    COSTS_N_INSNS (5)},                  /*                               other */
 592   0,                                    /* cost of multiply per each bit set */
 593   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 594    COSTS_N_INSNS (26),                  /*                          HI */
 595    COSTS_N_INSNS (42),                  /*                          SI */
 596    COSTS_N_INSNS (74),                  /*                          DI */
 597    COSTS_N_INSNS (74)},                 /*                          other */
 598   COSTS_N_INSNS (1),                    /* cost of movsx */
 599   COSTS_N_INSNS (1),                    /* cost of movzx */
 600   8,                                    /* "large" insn */
 601   9,                                    /* MOVE_RATIO */
 602   4,                                    /* cost for loading QImode using movzbl */
 603   {3, 4, 3},                            /* cost of loading integer registers
 604                                            in QImode, HImode and SImode.
 605                                            Relative to reg-reg move (2).  */
 606   {3, 4, 3},                            /* cost of storing integer registers */
 607   4,                                    /* cost of reg,reg fld/fst */
 608   {4, 4, 12},                           /* cost of loading fp registers
 609                                            in SFmode, DFmode and XFmode */
 610   {6, 6, 8},                            /* cost of storing fp registers
 611                                            in SFmode, DFmode and XFmode */
 612   2,                                    /* cost of moving MMX register */
 613   {4, 4},                               /* cost of loading MMX registers
 614                                            in SImode and DImode */
 615   {4, 4},                               /* cost of storing MMX registers
 616                                            in SImode and DImode */
 617   2,                                    /* cost of moving SSE register */
 618   {4, 4, 6},                            /* cost of loading SSE registers
 619                                            in SImode, DImode and TImode */
 620   {4, 4, 5},                            /* cost of storing SSE registers
 621                                            in SImode, DImode and TImode */
 622   5,                                    /* MMX or SSE register to integer */
 623   64,                                   /* size of l1 cache.  */
 624   256,                                  /* size of l2 cache.  */
 625   64,                                   /* size of prefetch block */
 626   6,                                    /* number of parallel prefetches */
 627   5,                                    /* Branch cost */
 628   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 629   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 630   COSTS_N_INSNS (24),                   /* cost of FDIV instruction.  */
 631   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 632   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 633   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 634   /* For some reason, Athlon deals better with REP prefix (relative to loops)
 635      compared to K8. Alignment becomes important after 8 bytes for memcpy and
 636      128 bytes for memset.  */
 637   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 638    DUMMY_STRINGOP_ALGS},
 639   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 640    DUMMY_STRINGOP_ALGS},
 641   1,                                    /* scalar_stmt_cost.  */
 642   1,                                    /* scalar load_cost.  */
 643   1,                                    /* scalar_store_cost.  */
 644   1,                                    /* vec_stmt_cost.  */
 645   1,                                    /* vec_to_scalar_cost.  */
 646   1,                                    /* scalar_to_vec_cost.  */
 647   1,                                    /* vec_align_load_cost.  */
 648   2,                                    /* vec_unalign_load_cost.  */
 649   1,                                    /* vec_store_cost.  */
 650   3,                                    /* cond_taken_branch_cost.  */
 651   1,                                    /* cond_not_taken_branch_cost.  */
 652 };
 653
 654 static const
 655 struct processor_costs k8_cost = {
 656   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 657   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 658   COSTS_N_INSNS (1),                    /* variable shift costs */
 659   COSTS_N_INSNS (1),                    /* constant shift costs */
 660   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 661    COSTS_N_INSNS (4),                   /*                               HI */
 662    COSTS_N_INSNS (3),                   /*                               SI */
 663    COSTS_N_INSNS (4),                   /*                               DI */
 664    COSTS_N_INSNS (5)},                  /*                               other */
 665   0,                                    /* cost of multiply per each bit set */
 666   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 667    COSTS_N_INSNS (26),                  /*                          HI */
 668    COSTS_N_INSNS (42),                  /*                          SI */
 669    COSTS_N_INSNS (74),                  /*                          DI */
 670    COSTS_N_INSNS (74)},                 /*                          other */
 671   COSTS_N_INSNS (1),                    /* cost of movsx */
 672   COSTS_N_INSNS (1),                    /* cost of movzx */
 673   8,                                    /* "large" insn */
 674   9,                                    /* MOVE_RATIO */
 675   4,                                    /* cost for loading QImode using movzbl */
 676   {3, 4, 3},                            /* cost of loading integer registers
 677                                            in QImode, HImode and SImode.
 678                                            Relative to reg-reg move (2).  */
 679   {3, 4, 3},                            /* cost of storing integer registers */
 680   4,                                    /* cost of reg,reg fld/fst */
 681   {4, 4, 12},                           /* cost of loading fp registers
 682                                            in SFmode, DFmode and XFmode */
 683   {6, 6, 8},                            /* cost of storing fp registers
 684                                            in SFmode, DFmode and XFmode */
 685   2,                                    /* cost of moving MMX register */
 686   {3, 3},                               /* cost of loading MMX registers
 687                                            in SImode and DImode */
 688   {4, 4},                               /* cost of storing MMX registers
 689                                            in SImode and DImode */
 690   2,                                    /* cost of moving SSE register */
 691   {4, 3, 6},                            /* cost of loading SSE registers
 692                                            in SImode, DImode and TImode */
 693   {4, 4, 5},                            /* cost of storing SSE registers
 694                                            in SImode, DImode and TImode */
 695   5,                                    /* MMX or SSE register to integer */
 696   64,                                   /* size of l1 cache.  */
 697   512,                                  /* size of l2 cache.  */
 698   64,                                   /* size of prefetch block */
 699   /* New AMD processors never drop prefetches; if they cannot be performed
 700      immediately, they are queued.  We set number of simultaneous prefetches
 701      to a large constant to reflect this (it probably is not a good idea not
 702      to limit number of prefetches at all, as their execution also takes some
 703      time).  */
 704   100,                                  /* number of parallel prefetches */
 705   3,                                    /* Branch cost */
 706   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 707   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 708   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 709   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 710   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 711   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 712   /* K8 has optimized REP instruction for medium sized blocks, but for very small
 713      blocks it is better to use loop. For large blocks, libcall can do
 714      nontemporary accesses and beat inline considerably.  */
 715   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 716    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 717   {{libcall, {{8, loop}, {24, unrolled_loop},
 718               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 719    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 720   4,                                    /* scalar_stmt_cost.  */
 721   2,                                    /* scalar load_cost.  */
 722   2,                                    /* scalar_store_cost.  */
 723   5,                                    /* vec_stmt_cost.  */
 724   0,                                    /* vec_to_scalar_cost.  */
 725   2,                                    /* scalar_to_vec_cost.  */
 726   2,                                    /* vec_align_load_cost.  */
 727   3,                                    /* vec_unalign_load_cost.  */
 728   3,                                    /* vec_store_cost.  */
 729   3,                                    /* cond_taken_branch_cost.  */
 730   2,                                    /* cond_not_taken_branch_cost.  */
 731 };
 732
 733 struct processor_costs amdfam10_cost = {
 734   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 735   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 736   COSTS_N_INSNS (1),                    /* variable shift costs */
 737   COSTS_N_INSNS (1),                    /* constant shift costs */
 738   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 739    COSTS_N_INSNS (4),                   /*                               HI */
 740    COSTS_N_INSNS (3),                   /*                               SI */
 741    COSTS_N_INSNS (4),                   /*                               DI */
 742    COSTS_N_INSNS (5)},                  /*                               other */
 743   0,                                    /* cost of multiply per each bit set */
 744   {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
 745    COSTS_N_INSNS (35),                  /*                          HI */
 746    COSTS_N_INSNS (51),                  /*                          SI */
 747    COSTS_N_INSNS (83),                  /*                          DI */
 748    COSTS_N_INSNS (83)},                 /*                          other */
 749   COSTS_N_INSNS (1),                    /* cost of movsx */
 750   COSTS_N_INSNS (1),                    /* cost of movzx */
 751   8,                                    /* "large" insn */
 752   9,                                    /* MOVE_RATIO */
 753   4,                                    /* cost for loading QImode using movzbl */
 754   {3, 4, 3},                            /* cost of loading integer registers
 755                                            in QImode, HImode and SImode.
 756                                            Relative to reg-reg move (2).  */
 757   {3, 4, 3},                            /* cost of storing integer registers */
 758   4,                                    /* cost of reg,reg fld/fst */
 759   {4, 4, 12},                           /* cost of loading fp registers
 760                                            in SFmode, DFmode and XFmode */
 761   {6, 6, 8},                            /* cost of storing fp registers
 762                                            in SFmode, DFmode and XFmode */
 763   2,                                    /* cost of moving MMX register */
 764   {3, 3},                               /* cost of loading MMX registers
 765                                            in SImode and DImode */
 766   {4, 4},                               /* cost of storing MMX registers
 767                                            in SImode and DImode */
 768   2,                                    /* cost of moving SSE register */
 769   {4, 4, 3},                            /* cost of loading SSE registers
 770                                            in SImode, DImode and TImode */
 771   {4, 4, 5},                            /* cost of storing SSE registers
 772                                            in SImode, DImode and TImode */
 773   3,                                    /* MMX or SSE register to integer */
 774                                         /* On K8
 775                                             MOVD reg64, xmmreg  Double  FSTORE 4
 776                                             MOVD reg32, xmmreg  Double  FSTORE 4
 777                                            On AMDFAM10
 778                                             MOVD reg64, xmmreg  Double  FADD 3
 779                                                                 1/1  1/1
 780                                             MOVD reg32, xmmreg  Double  FADD 3
 781                                                                 1/1  1/1 */
 782   64,                                   /* size of l1 cache.  */
 783   512,                                  /* size of l2 cache.  */
 784   64,                                   /* size of prefetch block */
 785   /* New AMD processors never drop prefetches; if they cannot be performed
 786      immediately, they are queued.  We set number of simultaneous prefetches
 787      to a large constant to reflect this (it probably is not a good idea not
 788      to limit number of prefetches at all, as their execution also takes some
 789      time).  */
 790   100,                                  /* number of parallel prefetches */
 791   2,                                    /* Branch cost */
 792   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 793   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 794   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 795   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 796   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 797   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 798
 799   /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
 800      very small blocks it is better to use loop. For large blocks, libcall can
 801      do nontemporary accesses and beat inline considerably.  */
 802   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 803    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 804   {{libcall, {{8, loop}, {24, unrolled_loop},
 805               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 806    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 807   4,                                    /* scalar_stmt_cost.  */
 808   2,                                    /* scalar load_cost.  */
 809   2,                                    /* scalar_store_cost.  */
 810   6,                                    /* vec_stmt_cost.  */
 811   0,                                    /* vec_to_scalar_cost.  */
 812   2,                                    /* scalar_to_vec_cost.  */
 813   2,                                    /* vec_align_load_cost.  */
 814   2,                                    /* vec_unalign_load_cost.  */
 815   2,                                    /* vec_store_cost.  */
 816   2,                                    /* cond_taken_branch_cost.  */
 817   1,                                    /* cond_not_taken_branch_cost.  */
 818 };
 819
 820 static const
 821 struct processor_costs pentium4_cost = {
 822   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 823   COSTS_N_INSNS (3),                    /* cost of a lea instruction */
 824   COSTS_N_INSNS (4),                    /* variable shift costs */
 825   COSTS_N_INSNS (4),                    /* constant shift costs */
 826   {COSTS_N_INSNS (15),                  /* cost of starting multiply for QI */
 827    COSTS_N_INSNS (15),                  /*                               HI */
 828    COSTS_N_INSNS (15),                  /*                               SI */
 829    COSTS_N_INSNS (15),                  /*                               DI */
 830    COSTS_N_INSNS (15)},                 /*                               other */
 831   0,                                    /* cost of multiply per each bit set */
 832   {COSTS_N_INSNS (56),                  /* cost of a divide/mod for QI */
 833    COSTS_N_INSNS (56),                  /*                          HI */
 834    COSTS_N_INSNS (56),                  /*                          SI */
 835    COSTS_N_INSNS (56),                  /*                          DI */
 836    COSTS_N_INSNS (56)},                 /*                          other */
 837   COSTS_N_INSNS (1),                    /* cost of movsx */
 838   COSTS_N_INSNS (1),                    /* cost of movzx */
 839   16,                                   /* "large" insn */
 840   6,                                    /* MOVE_RATIO */
 841   2,                                    /* cost for loading QImode using movzbl */
 842   {4, 5, 4},                            /* cost of loading integer registers
 843                                            in QImode, HImode and SImode.
 844                                            Relative to reg-reg move (2).  */
 845   {2, 3, 2},                            /* cost of storing integer registers */
 846   2,                                    /* cost of reg,reg fld/fst */
 847   {2, 2, 6},                            /* cost of loading fp registers
 848                                            in SFmode, DFmode and XFmode */
 849   {4, 4, 6},                            /* cost of storing fp registers
 850                                            in SFmode, DFmode and XFmode */
 851   2,                                    /* cost of moving MMX register */
 852   {2, 2},                               /* cost of loading MMX registers
 853                                            in SImode and DImode */
 854   {2, 2},                               /* cost of storing MMX registers
 855                                            in SImode and DImode */
 856   12,                                   /* cost of moving SSE register */
 857   {12, 12, 12},                         /* cost of loading SSE registers
 858                                            in SImode, DImode and TImode */
 859   {2, 2, 8},                            /* cost of storing SSE registers
 860                                            in SImode, DImode and TImode */
 861   10,                                   /* MMX or SSE register to integer */
 862   8,                                    /* size of l1 cache.  */
 863   256,                                  /* size of l2 cache.  */
 864   64,                                   /* size of prefetch block */
 865   6,                                    /* number of parallel prefetches */
 866   2,                                    /* Branch cost */
 867   COSTS_N_INSNS (5),                    /* cost of FADD and FSUB insns.  */
 868   COSTS_N_INSNS (7),                    /* cost of FMUL instruction.  */
 869   COSTS_N_INSNS (43),                   /* cost of FDIV instruction.  */
 870   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 871   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 872   COSTS_N_INSNS (43),                   /* cost of FSQRT instruction.  */
 873   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 874    DUMMY_STRINGOP_ALGS},
 875   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 876    {-1, libcall}}},
 877    DUMMY_STRINGOP_ALGS},
 878   1,                                    /* scalar_stmt_cost.  */
 879   1,                                    /* scalar load_cost.  */
 880   1,                                    /* scalar_store_cost.  */
 881   1,                                    /* vec_stmt_cost.  */
 882   1,                                    /* vec_to_scalar_cost.  */
 883   1,                                    /* scalar_to_vec_cost.  */
 884   1,                                    /* vec_align_load_cost.  */
 885   2,                                    /* vec_unalign_load_cost.  */
 886   1,                                    /* vec_store_cost.  */
 887   3,                                    /* cond_taken_branch_cost.  */
 888   1,                                    /* cond_not_taken_branch_cost.  */
 889 };
 890
 891 static const
 892 struct processor_costs nocona_cost = {
 893   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 894   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 895   COSTS_N_INSNS (1),                    /* variable shift costs */
 896   COSTS_N_INSNS (1),                    /* constant shift costs */
 897   {COSTS_N_INSNS (10),                  /* cost of starting multiply for QI */
 898    COSTS_N_INSNS (10),                  /*                               HI */
 899    COSTS_N_INSNS (10),                  /*                               SI */
 900    COSTS_N_INSNS (10),                  /*                               DI */
 901    COSTS_N_INSNS (10)},                 /*                               other */
 902   0,                                    /* cost of multiply per each bit set */
 903   {COSTS_N_INSNS (66),                  /* cost of a divide/mod for QI */
 904    COSTS_N_INSNS (66),                  /*                          HI */
 905    COSTS_N_INSNS (66),                  /*                          SI */
 906    COSTS_N_INSNS (66),                  /*                          DI */
 907    COSTS_N_INSNS (66)},                 /*                          other */
 908   COSTS_N_INSNS (1),                    /* cost of movsx */
 909   COSTS_N_INSNS (1),                    /* cost of movzx */
 910   16,                                   /* "large" insn */
 911   17,                                   /* MOVE_RATIO */
 912   4,                                    /* cost for loading QImode using movzbl */
 913   {4, 4, 4},                            /* cost of loading integer registers
 914                                            in QImode, HImode and SImode.
 915                                            Relative to reg-reg move (2).  */
 916   {4, 4, 4},                            /* cost of storing integer registers */
 917   3,                                    /* cost of reg,reg fld/fst */
 918   {12, 12, 12},                         /* cost of loading fp registers
 919                                            in SFmode, DFmode and XFmode */
 920   {4, 4, 4},                            /* cost of storing fp registers
 921                                            in SFmode, DFmode and XFmode */
 922   6,                                    /* cost of moving MMX register */
 923   {12, 12},                             /* cost of loading MMX registers
 924                                            in SImode and DImode */
 925   {12, 12},                             /* cost of storing MMX registers
 926                                            in SImode and DImode */
 927   6,                                    /* cost of moving SSE register */
 928   {12, 12, 12},                         /* cost of loading SSE registers
 929                                            in SImode, DImode and TImode */
 930   {12, 12, 12},                         /* cost of storing SSE registers
 931                                            in SImode, DImode and TImode */
 932   8,                                    /* MMX or SSE register to integer */
 933   8,                                    /* size of l1 cache.  */
 934   1024,                                 /* size of l2 cache.  */
 935   128,                                  /* size of prefetch block */
 936   8,                                    /* number of parallel prefetches */
 937   1,                                    /* Branch cost */
 938   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 939   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
 940   COSTS_N_INSNS (40),                   /* cost of FDIV instruction.  */
 941   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 942   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 943   COSTS_N_INSNS (44),                   /* cost of FSQRT instruction.  */
 944   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 945    {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
 946               {100000, unrolled_loop}, {-1, libcall}}}},
 947   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 948    {-1, libcall}}},
 949    {libcall, {{24, loop}, {64, unrolled_loop},
 950               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 951   1,                                    /* scalar_stmt_cost.  */
 952   1,                                    /* scalar load_cost.  */
 953   1,                                    /* scalar_store_cost.  */
 954   1,                                    /* vec_stmt_cost.  */
 955   1,                                    /* vec_to_scalar_cost.  */
 956   1,                                    /* scalar_to_vec_cost.  */
 957   1,                                    /* vec_align_load_cost.  */
 958   2,                                    /* vec_unalign_load_cost.  */
 959   1,                                    /* vec_store_cost.  */
 960   3,                                    /* cond_taken_branch_cost.  */
 961   1,                                    /* cond_not_taken_branch_cost.  */
 962 };
 963
 964 static const
 965 struct processor_costs core2_cost = {
 966   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 967   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
 968   COSTS_N_INSNS (1),                    /* variable shift costs */
 969   COSTS_N_INSNS (1),                    /* constant shift costs */
 970   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 971    COSTS_N_INSNS (3),                   /*                               HI */
 972    COSTS_N_INSNS (3),                   /*                               SI */
 973    COSTS_N_INSNS (3),                   /*                               DI */
 974    COSTS_N_INSNS (3)},                  /*                               other */
 975   0,                                    /* cost of multiply per each bit set */
 976   {COSTS_N_INSNS (22),                  /* cost of a divide/mod for QI */
 977    COSTS_N_INSNS (22),                  /*                          HI */
 978    COSTS_N_INSNS (22),                  /*                          SI */
 979    COSTS_N_INSNS (22),                  /*                          DI */
 980    COSTS_N_INSNS (22)},                 /*                          other */
 981   COSTS_N_INSNS (1),                    /* cost of movsx */
 982   COSTS_N_INSNS (1),                    /* cost of movzx */
 983   8,                                    /* "large" insn */
 984   16,                                   /* MOVE_RATIO */
 985   2,                                    /* cost for loading QImode using movzbl */
 986   {6, 6, 6},                            /* cost of loading integer registers
 987                                            in QImode, HImode and SImode.
 988                                            Relative to reg-reg move (2).  */
 989   {4, 4, 4},                            /* cost of storing integer registers */
 990   2,                                    /* cost of reg,reg fld/fst */
 991   {6, 6, 6},                            /* cost of loading fp registers
 992                                            in SFmode, DFmode and XFmode */
 993   {4, 4, 4},                            /* cost of storing fp registers
 994                                            in SFmode, DFmode and XFmode */
 995   2,                                    /* cost of moving MMX register */
 996   {6, 6},                               /* cost of loading MMX registers
 997                                            in SImode and DImode */
 998   {4, 4},                               /* cost of storing MMX registers
 999                                            in SImode and DImode */
1000   2,                                    /* cost of moving SSE register */
1001   {6, 6, 6},                            /* cost of loading SSE registers
1002                                            in SImode, DImode and TImode */
1003   {4, 4, 4},                            /* cost of storing SSE registers
1004                                            in SImode, DImode and TImode */
1005   2,                                    /* MMX or SSE register to integer */
1006   32,                                   /* size of l1 cache.  */
1007   2048,                                 /* size of l2 cache.  */
1008   128,                                  /* size of prefetch block */
1009   8,                                    /* number of parallel prefetches */
1010   3,                                    /* Branch cost */
1011   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
1012   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
1013   COSTS_N_INSNS (32),                   /* cost of FDIV instruction.  */
1014   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
1015   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
1016   COSTS_N_INSNS (58),                   /* cost of FSQRT instruction.  */
1017   {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018    {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020   {{libcall, {{8, loop}, {15, unrolled_loop},
1021               {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022    {libcall, {{24, loop}, {32, unrolled_loop},
1023               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024   1,                                    /* scalar_stmt_cost.  */
1025   1,                                    /* scalar load_cost.  */
1026   1,                                    /* scalar_store_cost.  */
1027   1,                                    /* vec_stmt_cost.  */
1028   1,                                    /* vec_to_scalar_cost.  */
1029   1,                                    /* scalar_to_vec_cost.  */
1030   1,                                    /* vec_align_load_cost.  */
1031   2,                                    /* vec_unalign_load_cost.  */
1032   1,                                    /* vec_store_cost.  */
1033   3,                                    /* cond_taken_branch_cost.  */
1034   1,                                    /* cond_not_taken_branch_cost.  */
1035 };
1036
1037 static const
1038 struct processor_costs atom_cost = {
1039   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1040   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1041   COSTS_N_INSNS (1),                    /* variable shift costs */
1042   COSTS_N_INSNS (1),                    /* constant shift costs */
1043   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1044    COSTS_N_INSNS (4),                   /*                               HI */
1045    COSTS_N_INSNS (3),                   /*                               SI */
1046    COSTS_N_INSNS (4),                   /*                               DI */
1047    COSTS_N_INSNS (2)},                  /*                               other */
1048   0,                                    /* cost of multiply per each bit set */
1049   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1050    COSTS_N_INSNS (26),                  /*                          HI */
1051    COSTS_N_INSNS (42),                  /*                          SI */
1052    COSTS_N_INSNS (74),                  /*                          DI */
1053    COSTS_N_INSNS (74)},                 /*                          other */
1054   COSTS_N_INSNS (1),                    /* cost of movsx */
1055   COSTS_N_INSNS (1),                    /* cost of movzx */
1056   8,                                    /* "large" insn */
1057   17,                                   /* MOVE_RATIO */
1058   2,                                    /* cost for loading QImode using movzbl */
1059   {4, 4, 4},                            /* cost of loading integer registers
1060                                            in QImode, HImode and SImode.
1061                                            Relative to reg-reg move (2).  */
1062   {4, 4, 4},                            /* cost of storing integer registers */
1063   4,                                    /* cost of reg,reg fld/fst */
1064   {12, 12, 12},                         /* cost of loading fp registers
1065                                            in SFmode, DFmode and XFmode */
1066   {6, 6, 8},                            /* cost of storing fp registers
1067                                            in SFmode, DFmode and XFmode */
1068   2,                                    /* cost of moving MMX register */
1069   {8, 8},                               /* cost of loading MMX registers
1070                                            in SImode and DImode */
1071   {8, 8},                               /* cost of storing MMX registers
1072                                            in SImode and DImode */
1073   2,                                    /* cost of moving SSE register */
1074   {8, 8, 8},                            /* cost of loading SSE registers
1075                                            in SImode, DImode and TImode */
1076   {8, 8, 8},                            /* cost of storing SSE registers
1077                                            in SImode, DImode and TImode */
1078   5,                                    /* MMX or SSE register to integer */
1079   32,                                   /* size of l1 cache.  */
1080   256,                                  /* size of l2 cache.  */
1081   64,                                   /* size of prefetch block */
1082   6,                                    /* number of parallel prefetches */
1083   3,                                    /* Branch cost */
1084   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1085   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1086   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1087   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1088   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1089   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1090   {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1091    {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1092           {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1093   {{libcall, {{8, loop}, {15, unrolled_loop},
1094           {2048, rep_prefix_4_byte}, {-1, libcall}}},
1095    {libcall, {{24, loop}, {32, unrolled_loop},
1096           {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1097   1,                                    /* scalar_stmt_cost.  */
1098   1,                                    /* scalar load_cost.  */
1099   1,                                    /* scalar_store_cost.  */
1100   1,                                    /* vec_stmt_cost.  */
1101   1,                                    /* vec_to_scalar_cost.  */
1102   1,                                    /* scalar_to_vec_cost.  */
1103   1,                                    /* vec_align_load_cost.  */
1104   2,                                    /* vec_unalign_load_cost.  */
1105   1,                                    /* vec_store_cost.  */
1106   3,                                    /* cond_taken_branch_cost.  */
1107   1,                                    /* cond_not_taken_branch_cost.  */
1108 };
1109
1110 /* Generic64 should produce code tuned for Nocona and K8.  */
1111 static const
1112 struct processor_costs generic64_cost = {
1113   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1114   /* On all chips taken into consideration lea is 2 cycles and more.  With
1115      this cost however our current implementation of synth_mult results in
1116      use of unnecessary temporary registers causing regression on several
1117      SPECfp benchmarks.  */
1118   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1119   COSTS_N_INSNS (1),                    /* variable shift costs */
1120   COSTS_N_INSNS (1),                    /* constant shift costs */
1121   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1122    COSTS_N_INSNS (4),                   /*                               HI */
1123    COSTS_N_INSNS (3),                   /*                               SI */
1124    COSTS_N_INSNS (4),                   /*                               DI */
1125    COSTS_N_INSNS (2)},                  /*                               other */
1126   0,                                    /* cost of multiply per each bit set */
1127   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1128    COSTS_N_INSNS (26),                  /*                          HI */
1129    COSTS_N_INSNS (42),                  /*                          SI */
1130    COSTS_N_INSNS (74),                  /*                          DI */
1131    COSTS_N_INSNS (74)},                 /*                          other */
1132   COSTS_N_INSNS (1),                    /* cost of movsx */
1133   COSTS_N_INSNS (1),                    /* cost of movzx */
1134   8,                                    /* "large" insn */
1135   17,                                   /* MOVE_RATIO */
1136   4,                                    /* cost for loading QImode using movzbl */
1137   {4, 4, 4},                            /* cost of loading integer registers
1138                                            in QImode, HImode and SImode.
1139                                            Relative to reg-reg move (2).  */
1140   {4, 4, 4},                            /* cost of storing integer registers */
1141   4,                                    /* cost of reg,reg fld/fst */
1142   {12, 12, 12},                         /* cost of loading fp registers
1143                                            in SFmode, DFmode and XFmode */
1144   {6, 6, 8},                            /* cost of storing fp registers
1145                                            in SFmode, DFmode and XFmode */
1146   2,                                    /* cost of moving MMX register */
1147   {8, 8},                               /* cost of loading MMX registers
1148                                            in SImode and DImode */
1149   {8, 8},                               /* cost of storing MMX registers
1150                                            in SImode and DImode */
1151   2,                                    /* cost of moving SSE register */
1152   {8, 8, 8},                            /* cost of loading SSE registers
1153                                            in SImode, DImode and TImode */
1154   {8, 8, 8},                            /* cost of storing SSE registers
1155                                            in SImode, DImode and TImode */
1156   5,                                    /* MMX or SSE register to integer */
1157   32,                                   /* size of l1 cache.  */
1158   512,                                  /* size of l2 cache.  */
1159   64,                                   /* size of prefetch block */
1160   6,                                    /* number of parallel prefetches */
1161   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1162      is increased to perhaps more appropriate value of 5.  */
1163   3,                                    /* Branch cost */
1164   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1165   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1166   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1167   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1168   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1169   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1170   {DUMMY_STRINGOP_ALGS,
1171    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1172   {DUMMY_STRINGOP_ALGS,
1173    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174   1,                                    /* scalar_stmt_cost.  */
1175   1,                                    /* scalar load_cost.  */
1176   1,                                    /* scalar_store_cost.  */
1177   1,                                    /* vec_stmt_cost.  */
1178   1,                                    /* vec_to_scalar_cost.  */
1179   1,                                    /* scalar_to_vec_cost.  */
1180   1,                                    /* vec_align_load_cost.  */
1181   2,                                    /* vec_unalign_load_cost.  */
1182   1,                                    /* vec_store_cost.  */
1183   3,                                    /* cond_taken_branch_cost.  */
1184   1,                                    /* cond_not_taken_branch_cost.  */
1185 };
1186
1187 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
1188 static const
1189 struct processor_costs generic32_cost = {
1190   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1191   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1192   COSTS_N_INSNS (1),                    /* variable shift costs */
1193   COSTS_N_INSNS (1),                    /* constant shift costs */
1194   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1195    COSTS_N_INSNS (4),                   /*                               HI */
1196    COSTS_N_INSNS (3),                   /*                               SI */
1197    COSTS_N_INSNS (4),                   /*                               DI */
1198    COSTS_N_INSNS (2)},                  /*                               other */
1199   0,                                    /* cost of multiply per each bit set */
1200   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1201    COSTS_N_INSNS (26),                  /*                          HI */
1202    COSTS_N_INSNS (42),                  /*                          SI */
1203    COSTS_N_INSNS (74),                  /*                          DI */
1204    COSTS_N_INSNS (74)},                 /*                          other */
1205   COSTS_N_INSNS (1),                    /* cost of movsx */
1206   COSTS_N_INSNS (1),                    /* cost of movzx */
1207   8,                                    /* "large" insn */
1208   17,                                   /* MOVE_RATIO */
1209   4,                                    /* cost for loading QImode using movzbl */
1210   {4, 4, 4},                            /* cost of loading integer registers
1211                                            in QImode, HImode and SImode.
1212                                            Relative to reg-reg move (2).  */
1213   {4, 4, 4},                            /* cost of storing integer registers */
1214   4,                                    /* cost of reg,reg fld/fst */
1215   {12, 12, 12},                         /* cost of loading fp registers
1216                                            in SFmode, DFmode and XFmode */
1217   {6, 6, 8},                            /* cost of storing fp registers
1218                                            in SFmode, DFmode and XFmode */
1219   2,                                    /* cost of moving MMX register */
1220   {8, 8},                               /* cost of loading MMX registers
1221                                            in SImode and DImode */
1222   {8, 8},                               /* cost of storing MMX registers
1223                                            in SImode and DImode */
1224   2,                                    /* cost of moving SSE register */
1225   {8, 8, 8},                            /* cost of loading SSE registers
1226                                            in SImode, DImode and TImode */
1227   {8, 8, 8},                            /* cost of storing SSE registers
1228                                            in SImode, DImode and TImode */
1229   5,                                    /* MMX or SSE register to integer */
1230   32,                                   /* size of l1 cache.  */
1231   256,                                  /* size of l2 cache.  */
1232   64,                                   /* size of prefetch block */
1233   6,                                    /* number of parallel prefetches */
1234   3,                                    /* Branch cost */
1235   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1236   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1237   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1238   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1239   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1240   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1241   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1242    DUMMY_STRINGOP_ALGS},
1243   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244    DUMMY_STRINGOP_ALGS},
1245   1,                                    /* scalar_stmt_cost.  */
1246   1,                                    /* scalar load_cost.  */
1247   1,                                    /* scalar_store_cost.  */
1248   1,                                    /* vec_stmt_cost.  */
1249   1,                                    /* vec_to_scalar_cost.  */
1250   1,                                    /* scalar_to_vec_cost.  */
1251   1,                                    /* vec_align_load_cost.  */
1252   2,                                    /* vec_unalign_load_cost.  */
1253   1,                                    /* vec_store_cost.  */
1254   3,                                    /* cond_taken_branch_cost.  */
1255   1,                                    /* cond_not_taken_branch_cost.  */
1256 };
1257
1258 const struct processor_costs *ix86_cost = &pentium_cost;
1259
1260 /* Processor feature/optimization bitmasks.  */
1261 #define m_386 (1<<PROCESSOR_I386)
1262 #define m_486 (1<<PROCESSOR_I486)
1263 #define m_PENT (1<<PROCESSOR_PENTIUM)
1264 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1265 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1266 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
1267 #define m_CORE2  (1<<PROCESSOR_CORE2)
1268 #define m_ATOM  (1<<PROCESSOR_ATOM)
1269
1270 #define m_GEODE  (1<<PROCESSOR_GEODE)
1271 #define m_K6  (1<<PROCESSOR_K6)
1272 #define m_K6_GEODE  (m_K6 | m_GEODE)
1273 #define m_K8  (1<<PROCESSOR_K8)
1274 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
1275 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
1276 #define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
1277 #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10)
1278
1279 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1280 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1281
1282 /* Generic instruction choice should be common subset of supported CPUs
1283    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
1284 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1285
1286 /* Feature tests against the various tunings.  */
1287 unsigned char ix86_tune_features[X86_TUNE_LAST];
1288
1289 /* Feature tests against the various tunings used to create ix86_tune_features
1290    based on the processor mask.  */
1291 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1292   /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1293      negatively, so enabling for Generic64 seems like good code size
1294      tradeoff.  We can't enable it for 32bit generic because it does not
1295      work well with PPro base chips.  */
1296   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1297
1298   /* X86_TUNE_PUSH_MEMORY */
1299   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1300   | m_NOCONA | m_CORE2 | m_GENERIC,
1301
1302   /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1303   m_486 | m_PENT,
1304
1305   /* X86_TUNE_UNROLL_STRLEN */
1306   m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1307   | m_CORE2 | m_GENERIC,
1308
1309   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1310   m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1311
1312   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1313      on simulation result. But after P4 was made, no performance benefit
1314      was observed with branch hints.  It also increases the code size.
1315      As a result, icc never generates branch hints.  */
1316   0,
1317
1318   /* X86_TUNE_DOUBLE_WITH_ADD */
1319   ~m_386,
1320
1321   /* X86_TUNE_USE_SAHF */
1322   m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1323   | m_NOCONA | m_CORE2 | m_GENERIC,
1324
1325   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1326      partial dependencies.  */
1327   m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1328   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1329
1330   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1331      register stalls on Generic32 compilation setting as well.  However
1332      in current implementation the partial register stalls are not eliminated
1333      very well - they can be introduced via subregs synthesized by combine
1334      and can happen in caller/callee saving sequences.  Because this option
1335      pays back little on PPro based chips and is in conflict with partial reg
1336      dependencies used by Athlon/P4 based chips, it is better to leave it off
1337      for generic32 for now.  */
1338   m_PPRO,
1339
1340   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1341   m_CORE2 | m_GENERIC,
1342
1343   /* X86_TUNE_USE_HIMODE_FIOP */
1344   m_386 | m_486 | m_K6_GEODE,
1345
1346   /* X86_TUNE_USE_SIMODE_FIOP */
1347   ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1348
1349   /* X86_TUNE_USE_MOV0 */
1350   m_K6,
1351
1352   /* X86_TUNE_USE_CLTD */
1353   ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1354
1355   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1356   m_PENT4,
1357
1358   /* X86_TUNE_SPLIT_LONG_MOVES */
1359   m_PPRO,
1360
1361   /* X86_TUNE_READ_MODIFY_WRITE */
1362   ~m_PENT,
1363
1364   /* X86_TUNE_READ_MODIFY */
1365   ~(m_PENT | m_PPRO),
1366
1367   /* X86_TUNE_PROMOTE_QIMODE */
1368   m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1369   | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1370
1371   /* X86_TUNE_FAST_PREFIX */
1372   ~(m_PENT | m_486 | m_386),
1373
1374   /* X86_TUNE_SINGLE_STRINGOP */
1375   m_386 | m_PENT4 | m_NOCONA,
1376
1377   /* X86_TUNE_QIMODE_MATH */
1378   ~0,
1379
1380   /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1381      register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
1382      might be considered for Generic32 if our scheme for avoiding partial
1383      stalls was more effective.  */
1384   ~m_PPRO,
1385
1386   /* X86_TUNE_PROMOTE_QI_REGS */
1387   0,
1388
1389   /* X86_TUNE_PROMOTE_HI_REGS */
1390   m_PPRO,
1391
1392   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1393   m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1394   | m_CORE2 | m_GENERIC,
1395
1396   /* X86_TUNE_ADD_ESP_8 */
1397   m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1398   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1399
1400   /* X86_TUNE_SUB_ESP_4 */
1401   m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1402   | m_GENERIC,
1403
1404   /* X86_TUNE_SUB_ESP_8 */
1405   m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1406   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1407
1408   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1409      for DFmode copies */
1410   ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1411     | m_GENERIC | m_GEODE),
1412
1413   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1414   m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1415
1416   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1417      conflict here in between PPro/Pentium4 based chips that thread 128bit
1418      SSE registers as single units versus K8 based chips that divide SSE
1419      registers to two 64bit halves.  This knob promotes all store destinations
1420      to be 128bit to allow register renaming on 128bit SSE units, but usually
1421      results in one extra microop on 64bit SSE units.  Experimental results
1422      shows that disabling this option on P4 brings over 20% SPECfp regression,
1423      while enabling it on K8 brings roughly 2.4% regression that can be partly
1424      masked by careful scheduling of moves.  */
1425   m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1426   | m_AMDFAM10,
1427
1428   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1429   m_AMDFAM10,
1430
1431   /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1432      are resolved on SSE register parts instead of whole registers, so we may
1433      maintain just lower part of scalar values in proper format leaving the
1434      upper part undefined.  */
1435   m_ATHLON_K8,
1436
1437   /* X86_TUNE_SSE_TYPELESS_STORES */
1438   m_AMD_MULTIPLE,
1439
1440   /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1441   m_PPRO | m_PENT4 | m_NOCONA,
1442
1443   /* X86_TUNE_MEMORY_MISMATCH_STALL */
1444   m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1445
1446   /* X86_TUNE_PROLOGUE_USING_MOVE */
1447   m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1448
1449   /* X86_TUNE_EPILOGUE_USING_MOVE */
1450   m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451
1452   /* X86_TUNE_SHIFT1 */
1453   ~m_486,
1454
1455   /* X86_TUNE_USE_FFREEP */
1456   m_AMD_MULTIPLE,
1457
1458   /* X86_TUNE_INTER_UNIT_MOVES */
1459   ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1460
1461   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1462   ~(m_AMDFAM10),
1463
1464   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1465      than 4 branch instructions in the 16 byte window.  */
1466   m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1467   | m_GENERIC,
1468
1469   /* X86_TUNE_SCHEDULE */
1470   m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1471   | m_GENERIC,
1472
1473   /* X86_TUNE_USE_BT */
1474   m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1475
1476   /* X86_TUNE_USE_INCDEC */
1477   ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1478
1479   /* X86_TUNE_PAD_RETURNS */
1480   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1481
1482   /* X86_TUNE_EXT_80387_CONSTANTS */
1483   m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1484   | m_CORE2 | m_GENERIC,
1485
1486   /* X86_TUNE_SHORTEN_X87_SSE */
1487   ~m_K8,
1488
1489   /* X86_TUNE_AVOID_VECTOR_DECODE */
1490   m_K8 | m_GENERIC64,
1491
1492   /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1493      and SImode multiply, but 386 and 486 do HImode multiply faster.  */
1494   ~(m_386 | m_486),
1495
1496   /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1497      vector path on AMD machines.  */
1498   m_K8 | m_GENERIC64 | m_AMDFAM10,
1499
1500   /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1501      machines.  */
1502   m_K8 | m_GENERIC64 | m_AMDFAM10,
1503
1504   /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1505      than a MOV.  */
1506   m_PENT,
1507
1508   /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1509      but one byte longer.  */
1510   m_PENT,
1511
1512   /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1513      operand that cannot be represented using a modRM byte.  The XOR
1514      replacement is long decoded, so this split helps here as well.  */
1515   m_K6,
1516
1517   /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1518      from FP to FP. */
1519   m_AMDFAM10 | m_GENERIC,
1520
1521   /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1522      from integer to FP. */
1523   m_AMDFAM10,
1524
1525   /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1526      with a subsequent conditional jump instruction into a single
1527      compare-and-branch uop.  */
1528   m_CORE2,
1529
1530   /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1531      will impact LEA instruction selection. */
1532   m_ATOM,
1533 };
1534
1535 /* Feature tests against the various architecture variations.  */
1536 unsigned char ix86_arch_features[X86_ARCH_LAST];
1537
1538 /* Feature tests against the various architecture variations, used to create
1539    ix86_arch_features based on the processor mask.  */
1540 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1541   /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro.  */
1542   ~(m_386 | m_486 | m_PENT | m_K6),
1543
1544   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
1545   ~m_386,
1546
1547   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1548   ~(m_386 | m_486),
1549
1550   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
1551   ~m_386,
1552
1553   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
1554   ~m_386,
1555 };
1556
1557 static const unsigned int x86_accumulate_outgoing_args
1558   = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1559     | m_GENERIC;
1560
1561 static const unsigned int x86_arch_always_fancy_math_387
1562   = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1563     | m_NOCONA | m_CORE2 | m_GENERIC;
1564
1565 static enum stringop_alg stringop_alg = no_stringop;
1566
1567 /* In case the average insn count for single function invocation is
1568    lower than this constant, emit fast (but longer) prologue and
1569    epilogue code.  */
1570 #define FAST_PROLOGUE_INSN_COUNT 20
1571
1572 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1573 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1574 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1575 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1576
1577 /* Array of the smallest class containing reg number REGNO, indexed by
1578    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1579
1580 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1581 {
1582   /* ax, dx, cx, bx */
1583   AREG, DREG, CREG, BREG,
1584   /* si, di, bp, sp */
1585   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1586   /* FP registers */
1587   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1588   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1589   /* arg pointer */
1590   NON_Q_REGS,
1591   /* flags, fpsr, fpcr, frame */
1592   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1593   /* SSE registers */
1594   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1595   SSE_REGS, SSE_REGS,
1596   /* MMX registers */
1597   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1598   MMX_REGS, MMX_REGS,
1599   /* REX registers */
1600   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1601   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602   /* SSE REX registers */
1603   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1604   SSE_REGS, SSE_REGS,
1605 };
1606
1607 /* The "default" register map used in 32bit mode.  */
1608
1609 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1610 {
1611   0, 2, 1, 3, 6, 7, 4, 5,               /* general regs */
1612   12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
1613   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1614   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
1615   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1616   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1617   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1618 };
1619
1620 /* The "default" register map used in 64bit mode.  */
1621
1622 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1623 {
1624   0, 1, 2, 3, 4, 5, 6, 7,               /* general regs */
1625   33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
1626   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1627   17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
1628   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1629   8,9,10,11,12,13,14,15,                /* extended integer registers */
1630   25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
1631 };
1632
1633 /* Define the register numbers to be used in Dwarf debugging information.
1634    The SVR4 reference port C compiler uses the following register numbers
1635    in its Dwarf output code:
1636         0 for %eax (gcc regno = 0)
1637         1 for %ecx (gcc regno = 2)
1638         2 for %edx (gcc regno = 1)
1639         3 for %ebx (gcc regno = 3)
1640         4 for %esp (gcc regno = 7)
1641         5 for %ebp (gcc regno = 6)
1642         6 for %esi (gcc regno = 4)
1643         7 for %edi (gcc regno = 5)
1644    The following three DWARF register numbers are never generated by
1645    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1646    believes these numbers have these meanings.
1647         8  for %eip    (no gcc equivalent)
1648         9  for %eflags (gcc regno = 17)
1649         10 for %trapno (no gcc equivalent)
1650    It is not at all clear how we should number the FP stack registers
1651    for the x86 architecture.  If the version of SDB on x86/svr4 were
1652    a bit less brain dead with respect to floating-point then we would
1653    have a precedent to follow with respect to DWARF register numbers
1654    for x86 FP registers, but the SDB on x86/svr4 is so completely
1655    broken with respect to FP registers that it is hardly worth thinking
1656    of it as something to strive for compatibility with.
1657    The version of x86/svr4 SDB I have at the moment does (partially)
1658    seem to believe that DWARF register number 11 is associated with
1659    the x86 register %st(0), but that's about all.  Higher DWARF
1660    register numbers don't seem to be associated with anything in
1661    particular, and even for DWARF regno 11, SDB only seems to under-
1662    stand that it should say that a variable lives in %st(0) (when
1663    asked via an `=' command) if we said it was in DWARF regno 11,
1664    but SDB still prints garbage when asked for the value of the
1665    variable in question (via a `/' command).
1666    (Also note that the labels SDB prints for various FP stack regs
1667    when doing an `x' command are all wrong.)
1668    Note that these problems generally don't affect the native SVR4
1669    C compiler because it doesn't allow the use of -O with -g and
1670    because when it is *not* optimizing, it allocates a memory
1671    location for each floating-point variable, and the memory
1672    location is what gets described in the DWARF AT_location
1673    attribute for the variable in question.
1674    Regardless of the severe mental illness of the x86/svr4 SDB, we
1675    do something sensible here and we use the following DWARF
1676    register numbers.  Note that these are all stack-top-relative
1677    numbers.
1678         11 for %st(0) (gcc regno = 8)
1679         12 for %st(1) (gcc regno = 9)
1680         13 for %st(2) (gcc regno = 10)
1681         14 for %st(3) (gcc regno = 11)
1682         15 for %st(4) (gcc regno = 12)
1683         16 for %st(5) (gcc regno = 13)
1684         17 for %st(6) (gcc regno = 14)
1685         18 for %st(7) (gcc regno = 15)
1686 */
1687 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1688 {
1689   0, 2, 1, 3, 6, 7, 5, 4,               /* general regs */
1690   11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
1691   -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, fpcr, frame */
1692   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
1693   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
1694   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1695   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1696 };
1697
1698 /* Test and compare insns in i386.md store the information needed to
1699    generate branch and scc insns here.  */
1700
1701 rtx ix86_compare_op0 = NULL_RTX;
1702 rtx ix86_compare_op1 = NULL_RTX;
1703
1704 /* Define parameter passing and return registers.  */
1705
1706 static int const x86_64_int_parameter_registers[6] =
1707 {
1708   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1709 };
1710
1711 static int const x86_64_ms_abi_int_parameter_registers[4] =
1712 {
1713   CX_REG, DX_REG, R8_REG, R9_REG
1714 };
1715
1716 static int const x86_64_int_return_registers[4] =
1717 {
1718   AX_REG, DX_REG, DI_REG, SI_REG
1719 };
1720
1721 /* Define the structure for the machine field in struct function.  */
1722
1723 struct GTY(()) stack_local_entry {
1724   unsigned short mode;
1725   unsigned short n;
1726   rtx rtl;
1727   struct stack_local_entry *next;
1728 };
1729
1730 /* Structure describing stack frame layout.
1731    Stack grows downward:
1732
1733    [arguments]
1734                                               <- ARG_POINTER
1735    saved pc
1736
1737    saved frame pointer if frame_pointer_needed
1738                                               <- HARD_FRAME_POINTER
1739    [saved regs]
1740
1741    [padding0]
1742
1743    [saved SSE regs]
1744
1745    [padding1]          \
1746                         )
1747    [va_arg registers]  (
1748                         > to_allocate         <- FRAME_POINTER
1749    [frame]             (
1750                         )
1751    [padding2]          /
1752   */
1753 struct ix86_frame
1754 {
1755   int padding0;
1756   int nsseregs;
1757   int nregs;
1758   int padding1;
1759   int va_arg_size;
1760   HOST_WIDE_INT frame;
1761   int padding2;
1762   int outgoing_arguments_size;
1763   int red_zone_size;
1764
1765   HOST_WIDE_INT to_allocate;
1766   /* The offsets relative to ARG_POINTER.  */
1767   HOST_WIDE_INT frame_pointer_offset;
1768   HOST_WIDE_INT hard_frame_pointer_offset;
1769   HOST_WIDE_INT stack_pointer_offset;
1770
1771   /* When save_regs_using_mov is set, emit prologue using
1772      move instead of push instructions.  */
1773   bool save_regs_using_mov;
1774 };
1775
1776 /* Code model option.  */
1777 enum cmodel ix86_cmodel;
1778 /* Asm dialect.  */
1779 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1780 /* TLS dialects.  */
1781 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1782
1783 /* Which unit we are generating floating point math for.  */
1784 enum fpmath_unit ix86_fpmath;
1785
1786 /* Which cpu are we scheduling for.  */
1787 enum attr_cpu ix86_schedule;
1788
1789 /* Which cpu are we optimizing for.  */
1790 enum processor_type ix86_tune;
1791
1792 /* Which instruction set architecture to use.  */
1793 enum processor_type ix86_arch;
1794
1795 /* true if sse prefetch instruction is not NOOP.  */
1796 int x86_prefetch_sse;
1797
1798 /* ix86_regparm_string as a number */
1799 static int ix86_regparm;
1800
1801 /* -mstackrealign option */
1802 extern int ix86_force_align_arg_pointer;
1803 static const char ix86_force_align_arg_pointer_string[]
1804   = "force_align_arg_pointer";
1805
1806 static rtx (*ix86_gen_leave) (void);
1807 static rtx (*ix86_gen_pop1) (rtx);
1808 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1809 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1811 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1812 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1813 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1814
1815 /* Preferred alignment for stack boundary in bits.  */
1816 unsigned int ix86_preferred_stack_boundary;
1817
1818 /* Alignment for incoming stack boundary in bits specified at
1819    command line.  */
1820 static unsigned int ix86_user_incoming_stack_boundary;
1821
1822 /* Default alignment for incoming stack boundary in bits.  */
1823 static unsigned int ix86_default_incoming_stack_boundary;
1824
1825 /* Alignment for incoming stack boundary in bits.  */
1826 unsigned int ix86_incoming_stack_boundary;
1827
1828 /* The abi used by target.  */
1829 enum calling_abi ix86_abi;
1830
1831 /* Values 1-5: see jump.c */
1832 int ix86_branch_cost;
1833
1834 /* Calling abi specific va_list type nodes.  */
1835 static GTY(()) tree sysv_va_list_type_node;
1836 static GTY(()) tree ms_va_list_type_node;
1837
1838 /* Variables which are this size or smaller are put in the data/bss
1839    or ldata/lbss sections.  */
1840
1841 int ix86_section_threshold = 65536;
1842
1843 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1844 char internal_label_prefix[16];
1845 int internal_label_prefix_len;
1846
1847 /* Fence to use after loop using movnt.  */
1848 tree x86_mfence;
1849
1850 /* Register class used for passing given 64bit part of the argument.
1851    These represent classes as documented by the PS ABI, with the exception
1852    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1853    use SF or DFmode move instead of DImode to avoid reformatting penalties.
1854
1855    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1856    whenever possible (upper half does contain padding).  */
1857 enum x86_64_reg_class
1858   {
1859     X86_64_NO_CLASS,
1860     X86_64_INTEGER_CLASS,
1861     X86_64_INTEGERSI_CLASS,
1862     X86_64_SSE_CLASS,
1863     X86_64_SSESF_CLASS,
1864     X86_64_SSEDF_CLASS,
1865     X86_64_SSEUP_CLASS,
1866     X86_64_X87_CLASS,
1867     X86_64_X87UP_CLASS,
1868     X86_64_COMPLEX_X87_CLASS,
1869     X86_64_MEMORY_CLASS
1870   };
1871
1872 #define MAX_CLASSES 4
1873
1874 /* Table of constants used by fldpi, fldln2, etc....  */
1875 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1876 static bool ext_80387_constants_init = 0;
1877
1878 \f
1879 static struct machine_function * ix86_init_machine_status (void);
1880 static rtx ix86_function_value (const_tree, const_tree, bool);
1881 static rtx ix86_static_chain (const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1885                                                  rtx, rtx, int);
1886 static void ix86_add_new_builtins (int);
1887 static rtx ix86_expand_vec_perm_builtin (tree);
1888
1889 enum ix86_function_specific_strings
1890 {
1891   IX86_FUNCTION_SPECIFIC_ARCH,
1892   IX86_FUNCTION_SPECIFIC_TUNE,
1893   IX86_FUNCTION_SPECIFIC_FPMATH,
1894   IX86_FUNCTION_SPECIFIC_MAX
1895 };
1896
1897 static char *ix86_target_string (int, int, const char *, const char *,
1898                                  const char *, bool);
1899 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1900 static void ix86_function_specific_save (struct cl_target_option *);
1901 static void ix86_function_specific_restore (struct cl_target_option *);
1902 static void ix86_function_specific_print (FILE *, int,
1903                                           struct cl_target_option *);
1904 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1905 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1906 static bool ix86_can_inline_p (tree, tree);
1907 static void ix86_set_current_function (tree);
1908 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
1909
1910 static enum calling_abi ix86_function_abi (const_tree);
1911
1912 \f
1913 /* The svr4 ABI for the i386 says that records and unions are returned
1914    in memory.  */
1915 #ifndef DEFAULT_PCC_STRUCT_RETURN
1916 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 #endif
1918
1919 /* Whether -mtune= or -march= were specified */
1920 static int ix86_tune_defaulted;
1921 static int ix86_arch_specified;
1922
1923 /* Bit flags that specify the ISA we are compiling for.  */
1924 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1925
1926 /* A mask of ix86_isa_flags that includes bit X if X
1927    was set or cleared on the command line.  */
1928 static int ix86_isa_flags_explicit;
1929
1930 /* Define a set of ISAs which are available when a given ISA is
1931    enabled.  MMX and SSE ISAs are handled separately.  */
1932
1933 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1934 #define OPTION_MASK_ISA_3DNOW_SET \
1935   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1936
1937 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1938 #define OPTION_MASK_ISA_SSE2_SET \
1939   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1940 #define OPTION_MASK_ISA_SSE3_SET \
1941   (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1942 #define OPTION_MASK_ISA_SSSE3_SET \
1943   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_1_SET \
1945   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1946 #define OPTION_MASK_ISA_SSE4_2_SET \
1947   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1948 #define OPTION_MASK_ISA_AVX_SET \
1949   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1950 #define OPTION_MASK_ISA_FMA_SET \
1951   (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1952
1953 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1954    as -msse4.2.  */
1955 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1956
1957 #define OPTION_MASK_ISA_SSE4A_SET \
1958   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1959 #define OPTION_MASK_ISA_FMA4_SET \
1960   (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
1961    | OPTION_MASK_ISA_AVX_SET)
1962 #define OPTION_MASK_ISA_XOP_SET \
1963   (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
1964 #define OPTION_MASK_ISA_LWP_SET \
1965   OPTION_MASK_ISA_LWP
1966
1967 /* AES and PCLMUL need SSE2 because they use xmm registers */
1968 #define OPTION_MASK_ISA_AES_SET \
1969   (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1970 #define OPTION_MASK_ISA_PCLMUL_SET \
1971   (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1972
1973 #define OPTION_MASK_ISA_ABM_SET \
1974   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1975
1976 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1977 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1978 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1979 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1980 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1981
1982 /* Define a set of ISAs which aren't available when a given ISA is
1983    disabled.  MMX and SSE ISAs are handled separately.  */
1984
1985 #define OPTION_MASK_ISA_MMX_UNSET \
1986   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1987 #define OPTION_MASK_ISA_3DNOW_UNSET \
1988   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1989 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1990
1991 #define OPTION_MASK_ISA_SSE_UNSET \
1992   (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1993 #define OPTION_MASK_ISA_SSE2_UNSET \
1994   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1995 #define OPTION_MASK_ISA_SSE3_UNSET \
1996   (OPTION_MASK_ISA_SSE3 \
1997    | OPTION_MASK_ISA_SSSE3_UNSET \
1998    | OPTION_MASK_ISA_SSE4A_UNSET )
1999 #define OPTION_MASK_ISA_SSSE3_UNSET \
2000   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2001 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2002   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2003 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2004   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2005 #define OPTION_MASK_ISA_AVX_UNSET \
2006   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2007    | OPTION_MASK_ISA_FMA4_UNSET)
2008 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2009
2010 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
2011    as -mno-sse4.1. */
2012 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2013
2014 #define OPTION_MASK_ISA_SSE4A_UNSET \
2015   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2016
2017 #define OPTION_MASK_ISA_FMA4_UNSET \
2018   (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2019 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2020 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2021
2022 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2023 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2024 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2025 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2026 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2027 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2028 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2029 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2030
2031 /* Vectorization library interface and handlers.  */
2032 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2033 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2034 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2035
2036 /* Processor target table, indexed by processor number */
2037 struct ptt
2038 {
2039   const struct processor_costs *cost;           /* Processor costs */
2040   const int align_loop;                         /* Default alignments.  */
2041   const int align_loop_max_skip;
2042   const int align_jump;
2043   const int align_jump_max_skip;
2044   const int align_func;
2045 };
2046
2047 static const struct ptt processor_target_table[PROCESSOR_max] =
2048 {
2049   {&i386_cost, 4, 3, 4, 3, 4},
2050   {&i486_cost, 16, 15, 16, 15, 16},
2051   {&pentium_cost, 16, 7, 16, 7, 16},
2052   {&pentiumpro_cost, 16, 15, 16, 10, 16},
2053   {&geode_cost, 0, 0, 0, 0, 0},
2054   {&k6_cost, 32, 7, 32, 7, 32},
2055   {&athlon_cost, 16, 7, 16, 7, 16},
2056   {&pentium4_cost, 0, 0, 0, 0, 0},
2057   {&k8_cost, 16, 7, 16, 7, 16},
2058   {&nocona_cost, 0, 0, 0, 0, 0},
2059   {&core2_cost, 16, 10, 16, 10, 16},
2060   {&generic32_cost, 16, 7, 16, 7, 16},
2061   {&generic64_cost, 16, 10, 16, 10, 16},
2062   {&amdfam10_cost, 32, 24, 32, 7, 32},
2063   {&atom_cost, 16, 7, 16, 7, 16}
2064 };
2065
2066 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2067 {
2068   "generic",
2069   "i386",
2070   "i486",
2071   "pentium",
2072   "pentium-mmx",
2073   "pentiumpro",
2074   "pentium2",
2075   "pentium3",
2076   "pentium4",
2077   "pentium-m",
2078   "prescott",
2079   "nocona",
2080   "core2",
2081   "atom",
2082   "geode",
2083   "k6",
2084   "k6-2",
2085   "k6-3",
2086   "athlon",
2087   "athlon-4",
2088   "k8",
2089   "amdfam10"
2090 };
2091 \f
2092 /* Implement TARGET_HANDLE_OPTION.  */
2093
2094 static bool
2095 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2096 {
2097   switch (code)
2098     {
2099     case OPT_mmmx:
2100       if (value)
2101         {
2102           ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2103           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2104         }
2105       else
2106         {
2107           ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2108           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2109         }
2110       return true;
2111
2112     case OPT_m3dnow:
2113       if (value)
2114         {
2115           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2116           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2117         }
2118       else
2119         {
2120           ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2121           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2122         }
2123       return true;
2124
2125     case OPT_m3dnowa:
2126       return false;
2127
2128     case OPT_msse:
2129       if (value)
2130         {
2131           ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2132           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2133         }
2134       else
2135         {
2136           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2137           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2138         }
2139       return true;
2140
2141     case OPT_msse2:
2142       if (value)
2143         {
2144           ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2145           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2146         }
2147       else
2148         {
2149           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2150           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2151         }
2152       return true;
2153
2154     case OPT_msse3:
2155       if (value)
2156         {
2157           ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2158           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2159         }
2160       else
2161         {
2162           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2163           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2164         }
2165       return true;
2166
2167     case OPT_mssse3:
2168       if (value)
2169         {
2170           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2171           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2172         }
2173       else
2174         {
2175           ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2176           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2177         }
2178       return true;
2179
2180     case OPT_msse4_1:
2181       if (value)
2182         {
2183           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2184           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2185         }
2186       else
2187         {
2188           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2189           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2190         }
2191       return true;
2192
2193     case OPT_msse4_2:
2194       if (value)
2195         {
2196           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2197           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2198         }
2199       else
2200         {
2201           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2202           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2203         }
2204       return true;
2205
2206     case OPT_mavx:
2207       if (value)
2208         {
2209           ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2210           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2211         }
2212       else
2213         {
2214           ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2215           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2216         }
2217       return true;
2218
2219     case OPT_mfma:
2220       if (value)
2221         {
2222           ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2223           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2224         }
2225       else
2226         {
2227           ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2228           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2229         }
2230       return true;
2231
2232     case OPT_msse4:
2233       ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2234       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2235       return true;
2236
2237     case OPT_mno_sse4:
2238       ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2239       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2240       return true;
2241
2242     case OPT_msse4a:
2243       if (value)
2244         {
2245           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2246           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2247         }
2248       else
2249         {
2250           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2251           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2252         }
2253       return true;
2254
2255     case OPT_mfma4:
2256       if (value)
2257         {
2258           ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2259           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2260         }
2261       else
2262         {
2263           ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2264           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2265         }
2266       return true;
2267
2268    case OPT_mxop:
2269       if (value)
2270         {
2271           ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2272           ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2273         }
2274       else
2275         {
2276           ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2277           ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2278         }
2279       return true;
2280
2281    case OPT_mlwp:
2282       if (value)
2283         {
2284           ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2285           ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2286         }
2287       else
2288         {
2289           ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2290           ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2291         }
2292       return true;
2293
2294     case OPT_mabm:
2295       if (value)
2296         {
2297           ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2298           ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2299         }
2300       else
2301         {
2302           ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2303           ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2304         }
2305       return true;
2306
2307     case OPT_mpopcnt:
2308       if (value)
2309         {
2310           ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2311           ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2312         }
2313       else
2314         {
2315           ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2316           ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2317         }
2318       return true;
2319
2320     case OPT_msahf:
2321       if (value)
2322         {
2323           ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2324           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2325         }
2326       else
2327         {
2328           ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2329           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2330         }
2331       return true;
2332
2333     case OPT_mcx16:
2334       if (value)
2335         {
2336           ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2337           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2338         }
2339       else
2340         {
2341           ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2342           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2343         }
2344       return true;
2345
2346     case OPT_mmovbe:
2347       if (value)
2348         {
2349           ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2350           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2351         }
2352       else
2353         {
2354           ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2355           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2356         }
2357       return true;
2358
2359     case OPT_mcrc32:
2360       if (value)
2361         {
2362           ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2363           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2364         }
2365       else
2366         {
2367           ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2368           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2369         }
2370       return true;
2371
2372     case OPT_maes:
2373       if (value)
2374         {
2375           ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2376           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2377         }
2378       else
2379         {
2380           ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2381           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2382         }
2383       return true;
2384
2385     case OPT_mpclmul:
2386       if (value)
2387         {
2388           ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2389           ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2390         }
2391       else
2392         {
2393           ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2394           ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2395         }
2396       return true;
2397
2398     default:
2399       return true;
2400     }
2401 }
2402 \f
2403 /* Return a string the documents the current -m options.  The caller is
2404    responsible for freeing the string.  */
2405
2406 static char *
2407 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2408                     const char *fpmath, bool add_nl_p)
2409 {
2410   struct ix86_target_opts
2411   {
2412     const char *option;         /* option string */
2413     int mask;                   /* isa mask options */
2414   };
2415
2416   /* This table is ordered so that options like -msse4.2 that imply
2417      preceding options while match those first.  */
2418   static struct ix86_target_opts isa_opts[] =
2419   {
2420     { "-m64",           OPTION_MASK_ISA_64BIT },
2421     { "-mfma4",         OPTION_MASK_ISA_FMA4 },
2422     { "-mxop",          OPTION_MASK_ISA_XOP },
2423     { "-mlwp",          OPTION_MASK_ISA_LWP },
2424     { "-msse4a",        OPTION_MASK_ISA_SSE4A },
2425     { "-msse4.2",       OPTION_MASK_ISA_SSE4_2 },
2426     { "-msse4.1",       OPTION_MASK_ISA_SSE4_1 },
2427     { "-mssse3",        OPTION_MASK_ISA_SSSE3 },
2428     { "-msse3",         OPTION_MASK_ISA_SSE3 },
2429     { "-msse2",         OPTION_MASK_ISA_SSE2 },
2430     { "-msse",          OPTION_MASK_ISA_SSE },
2431     { "-m3dnow",        OPTION_MASK_ISA_3DNOW },
2432     { "-m3dnowa",       OPTION_MASK_ISA_3DNOW_A },
2433     { "-mmmx",          OPTION_MASK_ISA_MMX },
2434     { "-mabm",          OPTION_MASK_ISA_ABM },
2435     { "-mpopcnt",       OPTION_MASK_ISA_POPCNT },
2436     { "-mmovbe",        OPTION_MASK_ISA_MOVBE },
2437     { "-mcrc32",        OPTION_MASK_ISA_CRC32 },
2438     { "-maes",          OPTION_MASK_ISA_AES },
2439     { "-mpclmul",       OPTION_MASK_ISA_PCLMUL },
2440   };
2441
2442   /* Flag options.  */
2443   static struct ix86_target_opts flag_opts[] =
2444   {
2445     { "-m128bit-long-double",           MASK_128BIT_LONG_DOUBLE },
2446     { "-m80387",                        MASK_80387 },
2447     { "-maccumulate-outgoing-args",     MASK_ACCUMULATE_OUTGOING_ARGS },
2448     { "-malign-double",                 MASK_ALIGN_DOUBLE },
2449     { "-mcld",                          MASK_CLD },
2450     { "-mfp-ret-in-387",                MASK_FLOAT_RETURNS },
2451     { "-mieee-fp",                      MASK_IEEE_FP },
2452     { "-minline-all-stringops",         MASK_INLINE_ALL_STRINGOPS },
2453     { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2454     { "-mms-bitfields",                 MASK_MS_BITFIELD_LAYOUT },
2455     { "-mno-align-stringops",           MASK_NO_ALIGN_STRINGOPS },
2456     { "-mno-fancy-math-387",            MASK_NO_FANCY_MATH_387 },
2457     { "-mno-push-args",                 MASK_NO_PUSH_ARGS },
2458     { "-mno-red-zone",                  MASK_NO_RED_ZONE },
2459     { "-momit-leaf-frame-pointer",      MASK_OMIT_LEAF_FRAME_POINTER },
2460     { "-mrecip",                        MASK_RECIP },
2461     { "-mrtd",                          MASK_RTD },
2462     { "-msseregparm",                   MASK_SSEREGPARM },
2463     { "-mstack-arg-probe",              MASK_STACK_PROBE },
2464     { "-mtls-direct-seg-refs",          MASK_TLS_DIRECT_SEG_REFS },
2465   };
2466
2467   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2468
2469   char isa_other[40];
2470   char target_other[40];
2471   unsigned num = 0;
2472   unsigned i, j;
2473   char *ret;
2474   char *ptr;
2475   size_t len;
2476   size_t line_len;
2477   size_t sep_len;
2478
2479   memset (opts, '\0', sizeof (opts));
2480
2481   /* Add -march= option.  */
2482   if (arch)
2483     {
2484       opts[num][0] = "-march=";
2485       opts[num++][1] = arch;
2486     }
2487
2488   /* Add -mtune= option.  */
2489   if (tune)
2490     {
2491       opts[num][0] = "-mtune=";
2492       opts[num++][1] = tune;
2493     }
2494
2495   /* Pick out the options in isa options.  */
2496   for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2497     {
2498       if ((isa & isa_opts[i].mask) != 0)
2499         {
2500           opts[num++][0] = isa_opts[i].option;
2501           isa &= ~ isa_opts[i].mask;
2502         }
2503     }
2504
2505   if (isa && add_nl_p)
2506     {
2507       opts[num++][0] = isa_other;
2508       sprintf (isa_other, "(other isa: 0x%x)", isa);
2509     }
2510
2511   /* Add flag options.  */
2512   for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2513     {
2514       if ((flags & flag_opts[i].mask) != 0)
2515         {
2516           opts[num++][0] = flag_opts[i].option;
2517           flags &= ~ flag_opts[i].mask;
2518         }
2519     }
2520
2521   if (flags && add_nl_p)
2522     {
2523       opts[num++][0] = target_other;
2524       sprintf (target_other, "(other flags: 0x%x)", isa);
2525     }
2526
2527   /* Add -fpmath= option.  */
2528   if (fpmath)
2529     {
2530       opts[num][0] = "-mfpmath=";
2531       opts[num++][1] = fpmath;
2532     }
2533
2534   /* Any options?  */
2535   if (num == 0)
2536     return NULL;
2537
2538   gcc_assert (num < ARRAY_SIZE (opts));
2539
2540   /* Size the string.  */
2541   len = 0;
2542   sep_len = (add_nl_p) ? 3 : 1;
2543   for (i = 0; i < num; i++)
2544     {
2545       len += sep_len;
2546       for (j = 0; j < 2; j++)
2547         if (opts[i][j])
2548           len += strlen (opts[i][j]);
2549     }
2550
2551   /* Build the string.  */
2552   ret = ptr = (char *) xmalloc (len);
2553   line_len = 0;
2554
2555   for (i = 0; i < num; i++)
2556     {
2557       size_t len2[2];
2558
2559       for (j = 0; j < 2; j++)
2560         len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2561
2562       if (i != 0)
2563         {
2564           *ptr++ = ' ';
2565           line_len++;
2566
2567           if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2568             {
2569               *ptr++ = '\\';
2570               *ptr++ = '\n';
2571               line_len = 0;
2572             }
2573         }
2574
2575       for (j = 0; j < 2; j++)
2576         if (opts[i][j])
2577           {
2578             memcpy (ptr, opts[i][j], len2[j]);
2579             ptr += len2[j];
2580             line_len += len2[j];
2581           }
2582     }
2583
2584   *ptr = '\0';
2585   gcc_assert (ret + len >= ptr);
2586
2587   return ret;
2588 }
2589
2590 /* Function that is callable from the debugger to print the current
2591    options.  */
2592 void
2593 ix86_debug_options (void)
2594 {
2595   char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2596                                    ix86_arch_string, ix86_tune_string,
2597                                    ix86_fpmath_string, true);
2598
2599   if (opts)
2600     {
2601       fprintf (stderr, "%s\n\n", opts);
2602       free (opts);
2603     }
2604   else
2605     fputs ("<no options>\n\n", stderr);
2606
2607   return;
2608 }
2609 \f
2610 /* Sometimes certain combinations of command options do not make
2611    sense on a particular target machine.  You can define a macro
2612    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
2613    defined, is executed once just after all the command options have
2614    been parsed.
2615
2616    Don't use this macro to turn on various extra optimizations for
2617    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
2618
2619 void
2620 override_options (bool main_args_p)
2621 {
2622   int i;
2623   unsigned int ix86_arch_mask, ix86_tune_mask;
2624   const char *prefix;
2625   const char *suffix;
2626   const char *sw;
2627
2628   /* Comes from final.c -- no real reason to change it.  */
2629 #define MAX_CODE_ALIGN 16
2630
2631   enum pta_flags
2632     {
2633       PTA_SSE = 1 << 0,
2634       PTA_SSE2 = 1 << 1,
2635       PTA_SSE3 = 1 << 2,
2636       PTA_MMX = 1 << 3,
2637       PTA_PREFETCH_SSE = 1 << 4,
2638       PTA_3DNOW = 1 << 5,
2639       PTA_3DNOW_A = 1 << 6,
2640       PTA_64BIT = 1 << 7,
2641       PTA_SSSE3 = 1 << 8,
2642       PTA_CX16 = 1 << 9,
2643       PTA_POPCNT = 1 << 10,
2644       PTA_ABM = 1 << 11,
2645       PTA_SSE4A = 1 << 12,
2646       PTA_NO_SAHF = 1 << 13,
2647       PTA_SSE4_1 = 1 << 14,
2648       PTA_SSE4_2 = 1 << 15,
2649       PTA_AES = 1 << 16,
2650       PTA_PCLMUL = 1 << 17,
2651       PTA_AVX = 1 << 18,
2652       PTA_FMA = 1 << 19,
2653       PTA_MOVBE = 1 << 20,
2654       PTA_FMA4 = 1 << 21,
2655       PTA_XOP = 1 << 22,
2656       PTA_LWP = 1 << 23
2657     };
2658
2659   static struct pta
2660     {
2661       const char *const name;           /* processor name or nickname.  */
2662       const enum processor_type processor;
2663       const enum attr_cpu schedule;
2664       const unsigned /*enum pta_flags*/ flags;
2665     }
2666   const processor_alias_table[] =
2667     {
2668       {"i386", PROCESSOR_I386, CPU_NONE, 0},
2669       {"i486", PROCESSOR_I486, CPU_NONE, 0},
2670       {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2671       {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2672       {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2673       {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2674       {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2675       {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2676       {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2677       {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2678       {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2679       {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2680       {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2681         PTA_MMX | PTA_SSE},
2682       {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2683         PTA_MMX | PTA_SSE},
2684       {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2685         PTA_MMX | PTA_SSE | PTA_SSE2},
2686       {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2687         PTA_MMX |PTA_SSE | PTA_SSE2},
2688       {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2689         PTA_MMX | PTA_SSE | PTA_SSE2},
2690       {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2691         PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2692       {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2693         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2694         | PTA_CX16 | PTA_NO_SAHF},
2695       {"core2", PROCESSOR_CORE2, CPU_CORE2,
2696         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2697         | PTA_SSSE3 | PTA_CX16},
2698       {"atom", PROCESSOR_ATOM, CPU_ATOM,
2699         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2700         | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2701       {"geode", PROCESSOR_GEODE, CPU_GEODE,
2702         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2703       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2704       {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2705       {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2706       {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2707         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2708       {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2709         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2710       {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2711         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2712       {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2713         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2714       {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2715         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2716       {"x86-64", PROCESSOR_K8, CPU_K8,
2717         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2718       {"k8", PROCESSOR_K8, CPU_K8,
2719         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2720         | PTA_SSE2 | PTA_NO_SAHF},
2721       {"k8-sse3", PROCESSOR_K8, CPU_K8,
2722         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2723         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2724       {"opteron", PROCESSOR_K8, CPU_K8,
2725         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2726         | PTA_SSE2 | PTA_NO_SAHF},
2727       {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2728         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2729         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2730       {"athlon64", PROCESSOR_K8, CPU_K8,
2731         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2732         | PTA_SSE2 | PTA_NO_SAHF},
2733       {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2734         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2735         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2736       {"athlon-fx", PROCESSOR_K8, CPU_K8,
2737         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2738         | PTA_SSE2 | PTA_NO_SAHF},
2739       {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2740         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2741         | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2742       {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2743         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2744         | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2745       {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2746         0 /* flags are only used for -march switch.  */ },
2747       {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2748         PTA_64BIT /* flags are only used for -march switch.  */ },
2749     };
2750
2751   int const pta_size = ARRAY_SIZE (processor_alias_table);
2752
2753   /* Set up prefix/suffix so the error messages refer to either the command
2754      line argument, or the attribute(target).  */
2755   if (main_args_p)
2756     {
2757       prefix = "-m";
2758       suffix = "";
2759       sw = "switch";
2760     }
2761   else
2762     {
2763       prefix = "option(\"";
2764       suffix = "\")";
2765       sw = "attribute";
2766     }
2767
2768 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2769   SUBTARGET_OVERRIDE_OPTIONS;
2770 #endif
2771
2772 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2773   SUBSUBTARGET_OVERRIDE_OPTIONS;
2774 #endif
2775
2776   /* -fPIC is the default for x86_64.  */
2777   if (TARGET_MACHO && TARGET_64BIT)
2778     flag_pic = 2;
2779
2780   /* Set the default values for switches whose default depends on TARGET_64BIT
2781      in case they weren't overwritten by command line options.  */
2782   if (TARGET_64BIT)
2783     {
2784       /* Mach-O doesn't support omitting the frame pointer for now.  */
2785       if (flag_omit_frame_pointer == 2)
2786         flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2787       if (flag_asynchronous_unwind_tables == 2)
2788         flag_asynchronous_unwind_tables = 1;
2789       if (flag_pcc_struct_return == 2)
2790         flag_pcc_struct_return = 0;
2791     }
2792   else
2793     {
2794       if (flag_omit_frame_pointer == 2)
2795         flag_omit_frame_pointer = 0;
2796       if (flag_asynchronous_unwind_tables == 2)
2797         flag_asynchronous_unwind_tables = 0;
2798       if (flag_pcc_struct_return == 2)
2799         flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2800     }
2801
2802   /* Need to check -mtune=generic first.  */
2803   if (ix86_tune_string)
2804     {
2805       if (!strcmp (ix86_tune_string, "generic")
2806           || !strcmp (ix86_tune_string, "i686")
2807           /* As special support for cross compilers we read -mtune=native
2808              as -mtune=generic.  With native compilers we won't see the
2809              -mtune=native, as it was changed by the driver.  */
2810           || !strcmp (ix86_tune_string, "native"))
2811         {
2812           if (TARGET_64BIT)
2813             ix86_tune_string = "generic64";
2814           else
2815             ix86_tune_string = "generic32";
2816         }
2817       /* If this call is for setting the option attribute, allow the
2818          generic32/generic64 that was previously set.  */
2819       else if (!main_args_p
2820                && (!strcmp (ix86_tune_string, "generic32")
2821                    || !strcmp (ix86_tune_string, "generic64")))
2822         ;
2823       else if (!strncmp (ix86_tune_string, "generic", 7))
2824         error ("bad value (%s) for %stune=%s %s",
2825                ix86_tune_string, prefix, suffix, sw);
2826     }
2827   else
2828     {
2829       if (ix86_arch_string)
2830         ix86_tune_string = ix86_arch_string;
2831       if (!ix86_tune_string)
2832         {
2833           ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2834           ix86_tune_defaulted = 1;
2835         }
2836
2837       /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
2838          need to use a sensible tune option.  */
2839       if (!strcmp (ix86_tune_string, "generic")
2840           || !strcmp (ix86_tune_string, "x86-64")
2841           || !strcmp (ix86_tune_string, "i686"))
2842         {
2843           if (TARGET_64BIT)
2844             ix86_tune_string = "generic64";
2845           else
2846             ix86_tune_string = "generic32";
2847         }
2848     }
2849   if (ix86_stringop_string)
2850     {
2851       if (!strcmp (ix86_stringop_string, "rep_byte"))
2852         stringop_alg = rep_prefix_1_byte;
2853       else if (!strcmp (ix86_stringop_string, "libcall"))
2854         stringop_alg = libcall;
2855       else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2856         stringop_alg = rep_prefix_4_byte;
2857       else if (!strcmp (ix86_stringop_string, "rep_8byte")
2858                && TARGET_64BIT)
2859         /* rep; movq isn't available in 32-bit code.  */
2860         stringop_alg = rep_prefix_8_byte;
2861       else if (!strcmp (ix86_stringop_string, "byte_loop"))
2862         stringop_alg = loop_1_byte;
2863       else if (!strcmp (ix86_stringop_string, "loop"))
2864         stringop_alg = loop;
2865       else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2866         stringop_alg = unrolled_loop;
2867       else
2868         error ("bad value (%s) for %sstringop-strategy=%s %s",
2869                ix86_stringop_string, prefix, suffix, sw);
2870     }
2871   if (!strcmp (ix86_tune_string, "x86-64"))
2872     warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated.  Use "
2873              "%stune=k8%s or %stune=generic%s instead as appropriate.",
2874              prefix, suffix, prefix, suffix, prefix, suffix);
2875
2876   if (!ix86_arch_string)
2877     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2878   else
2879     ix86_arch_specified = 1;
2880
2881   if (!strcmp (ix86_arch_string, "generic"))
2882     error ("generic CPU can be used only for %stune=%s %s",
2883            prefix, suffix, sw);
2884   if (!strncmp (ix86_arch_string, "generic", 7))
2885     error ("bad value (%s) for %sarch=%s %s",
2886            ix86_arch_string, prefix, suffix, sw);
2887
2888   /* Validate -mabi= value.  */
2889   if (ix86_abi_string)
2890     {
2891       if (strcmp (ix86_abi_string, "sysv") == 0)
2892         ix86_abi = SYSV_ABI;
2893       else if (strcmp (ix86_abi_string, "ms") == 0)
2894         ix86_abi = MS_ABI;
2895       else
2896         error ("unknown ABI (%s) for %sabi=%s %s",
2897                ix86_abi_string, prefix, suffix, sw);
2898     }
2899   else
2900     ix86_abi = DEFAULT_ABI;
2901
2902   if (ix86_cmodel_string != 0)
2903     {
2904       if (!strcmp (ix86_cmodel_string, "small"))
2905         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2906       else if (!strcmp (ix86_cmodel_string, "medium"))
2907         ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2908       else if (!strcmp (ix86_cmodel_string, "large"))
2909         ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2910       else if (flag_pic)
2911         error ("code model %s does not support PIC mode", ix86_cmodel_string);
2912       else if (!strcmp (ix86_cmodel_string, "32"))
2913         ix86_cmodel = CM_32;
2914       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2915         ix86_cmodel = CM_KERNEL;
2916       else
2917         error ("bad value (%s) for %scmodel=%s %s",
2918                ix86_cmodel_string, prefix, suffix, sw);
2919     }
2920   else
2921     {
2922       /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2923          use of rip-relative addressing.  This eliminates fixups that
2924          would otherwise be needed if this object is to be placed in a
2925          DLL, and is essentially just as efficient as direct addressing.  */
2926       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2927         ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2928       else if (TARGET_64BIT)
2929         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2930       else
2931         ix86_cmodel = CM_32;
2932     }
2933   if (ix86_asm_string != 0)
2934     {
2935       if (! TARGET_MACHO
2936           && !strcmp (ix86_asm_string, "intel"))
2937         ix86_asm_dialect = ASM_INTEL;
2938       else if (!strcmp (ix86_asm_string, "att"))
2939         ix86_asm_dialect = ASM_ATT;
2940       else
2941         error ("bad value (%s) for %sasm=%s %s",
2942                ix86_asm_string, prefix, suffix, sw);
2943     }
2944   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2945     error ("code model %qs not supported in the %s bit mode",
2946            ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2947   if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2948     sorry ("%i-bit mode not compiled in",
2949            (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2950
2951   for (i = 0; i < pta_size; i++)
2952     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2953       {
2954         ix86_schedule = processor_alias_table[i].schedule;
2955         ix86_arch = processor_alias_table[i].processor;
2956         /* Default cpu tuning to the architecture.  */
2957         ix86_tune = ix86_arch;
2958
2959         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2960           error ("CPU you selected does not support x86-64 "
2961                  "instruction set");
2962
2963         if (processor_alias_table[i].flags & PTA_MMX
2964             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2965           ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2966         if (processor_alias_table[i].flags & PTA_3DNOW
2967             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2968           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2969         if (processor_alias_table[i].flags & PTA_3DNOW_A
2970             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2971           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2972         if (processor_alias_table[i].flags & PTA_SSE
2973             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2974           ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2975         if (processor_alias_table[i].flags & PTA_SSE2
2976             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2977           ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2978         if (processor_alias_table[i].flags & PTA_SSE3
2979             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2980           ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2981         if (processor_alias_table[i].flags & PTA_SSSE3
2982             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2983           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2984         if (processor_alias_table[i].flags & PTA_SSE4_1
2985             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2986           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2987         if (processor_alias_table[i].flags & PTA_SSE4_2
2988             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2989           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2990         if (processor_alias_table[i].flags & PTA_AVX
2991             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2992           ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2993         if (processor_alias_table[i].flags & PTA_FMA
2994             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2995           ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2996         if (processor_alias_table[i].flags & PTA_SSE4A
2997             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2998           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2999         if (processor_alias_table[i].flags & PTA_FMA4
3000             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3001           ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3002         if (processor_alias_table[i].flags & PTA_XOP
3003             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3004           ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3005         if (processor_alias_table[i].flags & PTA_LWP
3006             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3007           ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3008         if (processor_alias_table[i].flags & PTA_ABM
3009             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3010           ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3011         if (processor_alias_table[i].flags & PTA_CX16
3012             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3013           ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3014         if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3015             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3016           ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3017         if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3018             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3019           ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3020         if (processor_alias_table[i].flags & PTA_MOVBE
3021             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3022           ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3023         if (processor_alias_table[i].flags & PTA_AES
3024             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3025           ix86_isa_flags |= OPTION_MASK_ISA_AES;
3026         if (processor_alias_table[i].flags & PTA_PCLMUL
3027             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3028           ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3029         if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3030           x86_prefetch_sse = true;
3031
3032         break;
3033       }
3034
3035   if (i == pta_size)
3036     error ("bad value (%s) for %sarch=%s %s",
3037            ix86_arch_string, prefix, suffix, sw);
3038
3039   ix86_arch_mask = 1u << ix86_arch;
3040   for (i = 0; i < X86_ARCH_LAST; ++i)
3041     ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3042
3043   for (i = 0; i < pta_size; i++)
3044     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3045       {
3046         ix86_schedule = processor_alias_table[i].schedule;
3047         ix86_tune = processor_alias_table[i].processor;
3048         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3049           {
3050             if (ix86_tune_defaulted)
3051               {
3052                 ix86_tune_string = "x86-64";
3053                 for (i = 0; i < pta_size; i++)
3054                   if (! strcmp (ix86_tune_string,
3055                                 processor_alias_table[i].name))
3056                     break;
3057                 ix86_schedule = processor_alias_table[i].schedule;
3058                 ix86_tune = processor_alias_table[i].processor;
3059               }
3060             else
3061               error ("CPU you selected does not support x86-64 "
3062                      "instruction set");
3063           }
3064         /* Intel CPUs have always interpreted SSE prefetch instructions as
3065            NOPs; so, we can enable SSE prefetch instructions even when
3066            -mtune (rather than -march) points us to a processor that has them.
3067            However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3068            higher processors.  */
3069         if (TARGET_CMOVE
3070             && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3071           x86_prefetch_sse = true;
3072         break;
3073       }
3074   if (i == pta_size)
3075     error ("bad value (%s) for %stune=%s %s",
3076            ix86_tune_string, prefix, suffix, sw);
3077
3078   ix86_tune_mask = 1u << ix86_tune;
3079   for (i = 0; i < X86_TUNE_LAST; ++i)
3080     ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3081
3082   if (optimize_size)
3083     ix86_cost = &ix86_size_cost;
3084   else
3085     ix86_cost = processor_target_table[ix86_tune].cost;
3086
3087   /* Arrange to set up i386_stack_locals for all functions.  */
3088   init_machine_status = ix86_init_machine_status;
3089
3090   /* Validate -mregparm= value.  */
3091   if (ix86_regparm_string)
3092     {
3093       if (TARGET_64BIT)
3094         warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3095       i = atoi (ix86_regparm_string);
3096       if (i < 0 || i > REGPARM_MAX)
3097         error ("%sregparm=%d%s is not between 0 and %d",
3098                prefix, i, suffix, REGPARM_MAX);
3099       else
3100         ix86_regparm = i;
3101     }
3102   if (TARGET_64BIT)
3103     ix86_regparm = REGPARM_MAX;
3104
3105   /* If the user has provided any of the -malign-* options,
3106      warn and use that value only if -falign-* is not set.
3107      Remove this code in GCC 3.2 or later.  */
3108   if (ix86_align_loops_string)
3109     {
3110       warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3111                prefix, suffix, suffix);
3112       if (align_loops == 0)
3113         {
3114           i = atoi (ix86_align_loops_string);
3115           if (i < 0 || i > MAX_CODE_ALIGN)
3116             error ("%salign-loops=%d%s is not between 0 and %d",
3117                    prefix, i, suffix, MAX_CODE_ALIGN);
3118           else
3119             align_loops = 1 << i;
3120         }
3121     }
3122
3123   if (ix86_align_jumps_string)
3124     {
3125       warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3126                prefix, suffix, suffix);
3127       if (align_jumps == 0)
3128         {
3129           i = atoi (ix86_align_jumps_string);
3130           if (i < 0 || i > MAX_CODE_ALIGN)
3131             error ("%salign-loops=%d%s is not between 0 and %d",
3132                    prefix, i, suffix, MAX_CODE_ALIGN);
3133           else
3134             align_jumps = 1 << i;
3135         }
3136     }
3137
3138   if (ix86_align_funcs_string)
3139     {
3140       warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3141                prefix, suffix, suffix);
3142       if (align_functions == 0)
3143         {
3144           i = atoi (ix86_align_funcs_string);
3145           if (i < 0 || i > MAX_CODE_ALIGN)
3146             error ("%salign-loops=%d%s is not between 0 and %d",
3147                    prefix, i, suffix, MAX_CODE_ALIGN);
3148           else
3149             align_functions = 1 << i;
3150         }
3151     }
3152
3153   /* Default align_* from the processor table.  */
3154   if (align_loops == 0)
3155     {
3156       align_loops = processor_target_table[ix86_tune].align_loop;
3157       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3158     }
3159   if (align_jumps == 0)
3160     {
3161       align_jumps = processor_target_table[ix86_tune].align_jump;
3162       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3163     }
3164   if (align_functions == 0)
3165     {
3166       align_functions = processor_target_table[ix86_tune].align_func;
3167     }
3168
3169   /* Validate -mbranch-cost= value, or provide default.  */
3170   ix86_branch_cost = ix86_cost->branch_cost;
3171   if (ix86_branch_cost_string)
3172     {
3173       i = atoi (ix86_branch_cost_string);
3174       if (i < 0 || i > 5)
3175         error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3176       else
3177         ix86_branch_cost = i;
3178     }
3179   if (ix86_section_threshold_string)
3180     {
3181       i = atoi (ix86_section_threshold_string);
3182       if (i < 0)
3183         error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3184       else
3185         ix86_section_threshold = i;
3186     }
3187
3188   if (ix86_tls_dialect_string)
3189     {
3190       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3191         ix86_tls_dialect = TLS_DIALECT_GNU;
3192       else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3193         ix86_tls_dialect = TLS_DIALECT_GNU2;
3194       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3195         ix86_tls_dialect = TLS_DIALECT_SUN;
3196       else
3197         error ("bad value (%s) for %stls-dialect=%s %s",
3198                ix86_tls_dialect_string, prefix, suffix, sw);
3199     }
3200
3201   if (ix87_precision_string)
3202     {
3203       i = atoi (ix87_precision_string);
3204       if (i != 32 && i != 64 && i != 80)
3205         error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3206     }
3207
3208   if (TARGET_64BIT)
3209     {
3210       target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3211
3212       /* Enable by default the SSE and MMX builtins.  Do allow the user to
3213          explicitly disable any of these.  In particular, disabling SSE and
3214          MMX for kernel code is extremely useful.  */
3215       if (!ix86_arch_specified)
3216       ix86_isa_flags
3217         |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3218              | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3219
3220       if (TARGET_RTD)
3221         warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3222     }
3223   else
3224     {
3225       target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3226
3227       if (!ix86_arch_specified)
3228       ix86_isa_flags
3229         |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3230
3231       /* i386 ABI does not specify red zone.  It still makes sense to use it
3232          when programmer takes care to stack from being destroyed.  */
3233       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3234         target_flags |= MASK_NO_RED_ZONE;
3235     }
3236
3237   /* Keep nonleaf frame pointers.  */
3238   if (flag_omit_frame_pointer)
3239     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3240   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3241     flag_omit_frame_pointer = 1;
3242
3243   /* If we're doing fast math, we don't care about comparison order
3244      wrt NaNs.  This lets us use a shorter comparison sequence.  */
3245   if (flag_finite_math_only)
3246     target_flags &= ~MASK_IEEE_FP;
3247
3248   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3249      since the insns won't need emulation.  */
3250   if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3251     target_flags &= ~MASK_NO_FANCY_MATH_387;
3252
3253   /* Likewise, if the target doesn't have a 387, or we've specified
3254      software floating point, don't use 387 inline intrinsics.  */
3255   if (!TARGET_80387)
3256     target_flags |= MASK_NO_FANCY_MATH_387;
3257
3258   /* Turn on MMX builtins for -msse.  */
3259   if (TARGET_SSE)
3260     {
3261       ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3262       x86_prefetch_sse = true;
3263     }
3264
3265   /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
3266   if (TARGET_SSE4_2 || TARGET_ABM)
3267     ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3268
3269   /* Validate -mpreferred-stack-boundary= value or default it to
3270      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
3271   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3272   if (ix86_preferred_stack_boundary_string)
3273     {
3274       i = atoi (ix86_preferred_stack_boundary_string);
3275       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3276         error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3277                prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3278       else
3279         ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3280     }
3281
3282   /* Set the default value for -mstackrealign.  */
3283   if (ix86_force_align_arg_pointer == -1)
3284     ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3285
3286   ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3287
3288   /* Validate -mincoming-stack-boundary= value or default it to
3289      MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
3290   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3291   if (ix86_incoming_stack_boundary_string)
3292     {
3293       i = atoi (ix86_incoming_stack_boundary_string);
3294       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3295         error ("-mincoming-stack-boundary=%d is not between %d and 12",
3296                i, TARGET_64BIT ? 4 : 2);
3297       else
3298         {
3299           ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3300           ix86_incoming_stack_boundary
3301             = ix86_user_incoming_stack_boundary;
3302         }
3303     }
3304
3305   /* Accept -msseregparm only if at least SSE support is enabled.  */
3306   if (TARGET_SSEREGPARM
3307       && ! TARGET_SSE)
3308     error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3309
3310   ix86_fpmath = TARGET_FPMATH_DEFAULT;
3311   if (ix86_fpmath_string != 0)
3312     {
3313       if (! strcmp (ix86_fpmath_string, "387"))
3314         ix86_fpmath = FPMATH_387;
3315       else if (! strcmp (ix86_fpmath_string, "sse"))
3316         {
3317           if (!TARGET_SSE)
3318             {
3319               warning (0, "SSE instruction set disabled, using 387 arithmetics");
3320               ix86_fpmath = FPMATH_387;
3321             }
3322           else
3323             ix86_fpmath = FPMATH_SSE;
3324         }
3325       else if (! strcmp (ix86_fpmath_string, "387,sse")
3326                || ! strcmp (ix86_fpmath_string, "387+sse")
3327                || ! strcmp (ix86_fpmath_string, "sse,387")
3328                || ! strcmp (ix86_fpmath_string, "sse+387")
3329                || ! strcmp (ix86_fpmath_string, "both"))
3330         {
3331           if (!TARGET_SSE)
3332             {
3333               warning (0, "SSE instruction set disabled, using 387 arithmetics");
3334               ix86_fpmath = FPMATH_387;
3335             }
3336           else if (!TARGET_80387)
3337             {
3338               warning (0, "387 instruction set disabled, using SSE arithmetics");
3339               ix86_fpmath = FPMATH_SSE;
3340             }
3341           else
3342             ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3343         }
3344       else
3345         error ("bad value (%s) for %sfpmath=%s %s",
3346                ix86_fpmath_string, prefix, suffix, sw);
3347     }
3348
3349   /* If the i387 is disabled, then do not return values in it. */
3350   if (!TARGET_80387)
3351     target_flags &= ~MASK_FLOAT_RETURNS;
3352
3353   /* Use external vectorized library in vectorizing intrinsics.  */
3354   if (ix86_veclibabi_string)
3355     {
3356       if (strcmp (ix86_veclibabi_string, "svml") == 0)
3357         ix86_veclib_handler = ix86_veclibabi_svml;
3358       else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3359         ix86_veclib_handler = ix86_veclibabi_acml;
3360       else
3361         error ("unknown vectorization library ABI type (%s) for "
3362                "%sveclibabi=%s %s", ix86_veclibabi_string,
3363                prefix, suffix, sw);
3364     }
3365
3366   if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3367       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3368       && !optimize_size)
3369     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3370
3371   /* ??? Unwind info is not correct around the CFG unless either a frame
3372      pointer is present or M_A_O_A is set.  Fixing this requires rewriting
3373      unwind info generation to be aware of the CFG and propagating states
3374      around edges.  */
3375   if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3376        || flag_exceptions || flag_non_call_exceptions)
3377       && flag_omit_frame_pointer
3378       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3379     {
3380       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3381         warning (0, "unwind tables currently require either a frame pointer "
3382                  "or %saccumulate-outgoing-args%s for correctness",
3383                  prefix, suffix);
3384       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3385     }
3386
3387   /* If stack probes are required, the space used for large function
3388      arguments on the stack must also be probed, so enable
3389      -maccumulate-outgoing-args so this happens in the prologue.  */
3390   if (TARGET_STACK_PROBE
3391       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3392     {
3393       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3394         warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3395                  "for correctness", prefix, suffix);
3396       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3397     }
3398
3399   /* For sane SSE instruction set generation we need fcomi instruction.
3400      It is safe to enable all CMOVE instructions.  */
3401   if (TARGET_SSE)
3402     TARGET_CMOVE = 1;
3403
3404   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
3405   {
3406     char *p;
3407     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3408     p = strchr (internal_label_prefix, 'X');
3409     internal_label_prefix_len = p - internal_label_prefix;
3410     *p = '\0';
3411   }
3412
3413   /* When scheduling description is not available, disable scheduler pass
3414      so it won't slow down the compilation and make x87 code slower.  */
3415   if (!TARGET_SCHEDULE)
3416     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3417
3418   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3419     set_param_value ("simultaneous-prefetches",
3420                      ix86_cost->simultaneous_prefetches);
3421   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3422     set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3423   if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3424     set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3425   if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3426     set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3427
3428   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3429      can be optimized to ap = __builtin_next_arg (0).  */
3430   if (!TARGET_64BIT)
3431     targetm.expand_builtin_va_start = NULL;
3432
3433   if (TARGET_64BIT)
3434     {
3435       ix86_gen_leave = gen_leave_rex64;
3436       ix86_gen_pop1 = gen_popdi1;
3437       ix86_gen_add3 = gen_adddi3;
3438       ix86_gen_sub3 = gen_subdi3;
3439       ix86_gen_sub3_carry = gen_subdi3_carry;
3440       ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3441       ix86_gen_monitor = gen_sse3_monitor64;
3442       ix86_gen_andsp = gen_anddi3;
3443     }
3444   else
3445     {
3446       ix86_gen_leave = gen_leave;
3447       ix86_gen_pop1 = gen_popsi1;
3448       ix86_gen_add3 = gen_addsi3;
3449       ix86_gen_sub3 = gen_subsi3;
3450       ix86_gen_sub3_carry = gen_subsi3_carry;
3451       ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3452       ix86_gen_monitor = gen_sse3_monitor;
3453       ix86_gen_andsp = gen_andsi3;
3454     }
3455
3456 #ifdef USE_IX86_CLD
3457   /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
3458   if (!TARGET_64BIT)
3459     target_flags |= MASK_CLD & ~target_flags_explicit;
3460 #endif
3461
3462   /* Save the initial options in case the user does function specific options */
3463   if (main_args_p)
3464     target_option_default_node = target_option_current_node
3465       = build_target_option_node ();
3466 }
3467
3468 /* Update register usage after having seen the compiler flags.  */
3469
3470 void
3471 ix86_conditional_register_usage (void)
3472 {
3473   int i;
3474   unsigned int j;
3475
3476   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3477     {
3478       if (fixed_regs[i] > 1)
3479         fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3480       if (call_used_regs[i] > 1)
3481         call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3482     }
3483
3484   /* The PIC register, if it exists, is fixed.  */
3485   j = PIC_OFFSET_TABLE_REGNUM;
3486   if (j != INVALID_REGNUM)
3487     fixed_regs[j] = call_used_regs[j] = 1;
3488
3489   /* The MS_ABI changes the set of call-used registers.  */
3490   if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3491     {
3492       call_used_regs[SI_REG] = 0;
3493       call_used_regs[DI_REG] = 0;
3494       call_used_regs[XMM6_REG] = 0;
3495       call_used_regs[XMM7_REG] = 0;
3496       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3497         call_used_regs[i] = 0;
3498     }
3499
3500   /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3501      other call-clobbered regs for 64-bit.  */
3502   if (TARGET_64BIT)
3503     {
3504       CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3505
3506       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3507         if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3508             && call_used_regs[i])
3509           SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3510     }
3511
3512   /* If MMX is disabled, squash the registers.  */
3513   if (! TARGET_MMX)
3514     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3515       if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3516         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3517
3518   /* If SSE is disabled, squash the registers.  */
3519   if (! TARGET_SSE)
3520     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3521       if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3522         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3523
3524   /* If the FPU is disabled, squash the registers.  */
3525   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3526     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3527       if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3528         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3529
3530   /* If 32-bit, squash the 64-bit registers.  */
3531   if (! TARGET_64BIT)
3532     {
3533       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3534         reg_names[i] = "";
3535       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3536         reg_names[i] = "";
3537     }
3538 }
3539
3540 \f
3541 /* Save the current options */
3542
3543 static void
3544 ix86_function_specific_save (struct cl_target_option *ptr)
3545 {
3546   ptr->arch = ix86_arch;
3547   ptr->schedule = ix86_schedule;
3548   ptr->tune = ix86_tune;
3549   ptr->fpmath = ix86_fpmath;
3550   ptr->branch_cost = ix86_branch_cost;
3551   ptr->tune_defaulted = ix86_tune_defaulted;
3552   ptr->arch_specified = ix86_arch_specified;
3553   ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3554   ptr->target_flags_explicit = target_flags_explicit;
3555
3556   /* The fields are char but the variables are not; make sure the
3557      values fit in the fields.  */
3558   gcc_assert (ptr->arch == ix86_arch);
3559   gcc_assert (ptr->schedule == ix86_schedule);
3560   gcc_assert (ptr->tune == ix86_tune);
3561   gcc_assert (ptr->fpmath == ix86_fpmath);
3562   gcc_assert (ptr->branch_cost == ix86_branch_cost);
3563 }
3564
3565 /* Restore the current options */
3566
3567 static void
3568 ix86_function_specific_restore (struct cl_target_option *ptr)
3569 {
3570   enum processor_type old_tune = ix86_tune;
3571   enum processor_type old_arch = ix86_arch;
3572   unsigned int ix86_arch_mask, ix86_tune_mask;
3573   int i;
3574
3575   ix86_arch = (enum processor_type) ptr->arch;
3576   ix86_schedule = (enum attr_cpu) ptr->schedule;
3577   ix86_tune = (enum processor_type) ptr->tune;
3578   ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3579   ix86_branch_cost = ptr->branch_cost;
3580   ix86_tune_defaulted = ptr->tune_defaulted;
3581   ix86_arch_specified = ptr->arch_specified;
3582   ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3583   target_flags_explicit = ptr->target_flags_explicit;
3584
3585   /* Recreate the arch feature tests if the arch changed */
3586   if (old_arch != ix86_arch)
3587     {
3588       ix86_arch_mask = 1u << ix86_arch;
3589       for (i = 0; i < X86_ARCH_LAST; ++i)
3590         ix86_arch_features[i]
3591           = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3592     }
3593
3594   /* Recreate the tune optimization tests */
3595   if (old_tune != ix86_tune)
3596     {
3597       ix86_tune_mask = 1u << ix86_tune;
3598       for (i = 0; i < X86_TUNE_LAST; ++i)
3599         ix86_tune_features[i]
3600           = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3601     }
3602 }
3603
3604 /* Print the current options */
3605
3606 static void
3607 ix86_function_specific_print (FILE *file, int indent,
3608                               struct cl_target_option *ptr)
3609 {
3610   char *target_string
3611     = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3612                           NULL, NULL, NULL, false);
3613
3614   fprintf (file, "%*sarch = %d (%s)\n",
3615            indent, "",
3616            ptr->arch,
3617            ((ptr->arch < TARGET_CPU_DEFAULT_max)
3618             ? cpu_names[ptr->arch]
3619             : "<unknown>"));
3620
3621   fprintf (file, "%*stune = %d (%s)\n",
3622            indent, "",
3623            ptr->tune,
3624            ((ptr->tune < TARGET_CPU_DEFAULT_max)
3625             ? cpu_names[ptr->tune]
3626             : "<unknown>"));
3627
3628   fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3629            (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3630            (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3631   fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3632
3633   if (target_string)
3634     {
3635       fprintf (file, "%*s%s\n", indent, "", target_string);
3636       free (target_string);
3637     }
3638 }
3639
3640 \f
3641 /* Inner function to process the attribute((target(...))), take an argument and
3642    set the current options from the argument. If we have a list, recursively go
3643    over the list.  */
3644
3645 static bool
3646 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3647 {
3648   char *next_optstr;
3649   bool ret = true;
3650
3651 #define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3652 #define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3653 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3654 #define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
3655
3656   enum ix86_opt_type
3657   {
3658     ix86_opt_unknown,
3659     ix86_opt_yes,
3660     ix86_opt_no,
3661     ix86_opt_str,
3662     ix86_opt_isa
3663   };
3664
3665   static const struct
3666   {
3667     const char *string;
3668     size_t len;
3669     enum ix86_opt_type type;
3670     int opt;
3671     int mask;
3672   } attrs[] = {
3673     /* isa options */
3674     IX86_ATTR_ISA ("3dnow",     OPT_m3dnow),
3675     IX86_ATTR_ISA ("abm",       OPT_mabm),
3676     IX86_ATTR_ISA ("aes",       OPT_maes),
3677     IX86_ATTR_ISA ("avx",       OPT_mavx),
3678     IX86_ATTR_ISA ("mmx",       OPT_mmmx),
3679     IX86_ATTR_ISA ("pclmul",    OPT_mpclmul),
3680     IX86_ATTR_ISA ("popcnt",    OPT_mpopcnt),
3681     IX86_ATTR_ISA ("sse",       OPT_msse),
3682     IX86_ATTR_ISA ("sse2",      OPT_msse2),
3683     IX86_ATTR_ISA ("sse3",      OPT_msse3),
3684     IX86_ATTR_ISA ("sse4",      OPT_msse4),
3685     IX86_ATTR_ISA ("sse4.1",    OPT_msse4_1),
3686     IX86_ATTR_ISA ("sse4.2",    OPT_msse4_2),
3687     IX86_ATTR_ISA ("sse4a",     OPT_msse4a),
3688     IX86_ATTR_ISA ("ssse3",     OPT_mssse3),
3689     IX86_ATTR_ISA ("fma4",      OPT_mfma4),
3690     IX86_ATTR_ISA ("xop",       OPT_mxop),
3691     IX86_ATTR_ISA ("lwp",       OPT_mlwp),
3692
3693     /* string options */
3694     IX86_ATTR_STR ("arch=",     IX86_FUNCTION_SPECIFIC_ARCH),
3695     IX86_ATTR_STR ("fpmath=",   IX86_FUNCTION_SPECIFIC_FPMATH),
3696     IX86_ATTR_STR ("tune=",     IX86_FUNCTION_SPECIFIC_TUNE),
3697
3698     /* flag options */
3699     IX86_ATTR_YES ("cld",
3700                    OPT_mcld,
3701                    MASK_CLD),
3702
3703     IX86_ATTR_NO ("fancy-math-387",
3704                   OPT_mfancy_math_387,
3705                   MASK_NO_FANCY_MATH_387),
3706
3707     IX86_ATTR_YES ("ieee-fp",
3708                    OPT_mieee_fp,
3709                    MASK_IEEE_FP),
3710
3711     IX86_ATTR_YES ("inline-all-stringops",
3712                    OPT_minline_all_stringops,
3713                    MASK_INLINE_ALL_STRINGOPS),
3714
3715     IX86_ATTR_YES ("inline-stringops-dynamically",
3716                    OPT_minline_stringops_dynamically,
3717                    MASK_INLINE_STRINGOPS_DYNAMICALLY),
3718
3719     IX86_ATTR_NO ("align-stringops",
3720                   OPT_mno_align_stringops,
3721                   MASK_NO_ALIGN_STRINGOPS),
3722
3723     IX86_ATTR_YES ("recip",
3724                    OPT_mrecip,
3725                    MASK_RECIP),
3726
3727   };
3728
3729   /* If this is a list, recurse to get the options.  */
3730   if (TREE_CODE (args) == TREE_LIST)
3731     {
3732       bool ret = true;
3733
3734       for (; args; args = TREE_CHAIN (args))
3735         if (TREE_VALUE (args)
3736             && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3737           ret = false;
3738
3739       return ret;
3740     }
3741
3742   else if (TREE_CODE (args) != STRING_CST)
3743     gcc_unreachable ();
3744
3745   /* Handle multiple arguments separated by commas.  */
3746   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3747
3748   while (next_optstr && *next_optstr != '\0')
3749     {
3750       char *p = next_optstr;
3751       char *orig_p = p;
3752       char *comma = strchr (next_optstr, ',');
3753       const char *opt_string;
3754       size_t len, opt_len;
3755       int opt;
3756       bool opt_set_p;
3757       char ch;
3758       unsigned i;
3759       enum ix86_opt_type type = ix86_opt_unknown;
3760       int mask = 0;
3761
3762       if (comma)
3763         {
3764           *comma = '\0';
3765           len = comma - next_optstr;
3766           next_optstr = comma + 1;
3767         }
3768       else
3769         {
3770           len = strlen (p);
3771           next_optstr = NULL;
3772         }
3773
3774       /* Recognize no-xxx.  */
3775       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3776         {
3777           opt_set_p = false;
3778           p += 3;
3779           len -= 3;
3780         }
3781       else
3782         opt_set_p = true;
3783
3784       /* Find the option.  */
3785       ch = *p;
3786       opt = N_OPTS;
3787       for (i = 0; i < ARRAY_SIZE (attrs); i++)
3788         {
3789           type = attrs[i].type;
3790           opt_len = attrs[i].len;
3791           if (ch == attrs[i].string[0]
3792               && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3793               && memcmp (p, attrs[i].string, opt_len) == 0)
3794             {
3795               opt = attrs[i].opt;
3796               mask = attrs[i].mask;
3797               opt_string = attrs[i].string;
3798               break;
3799             }
3800         }
3801
3802       /* Process the option.  */
3803       if (opt == N_OPTS)
3804         {
3805           error ("attribute(target(\"%s\")) is unknown", orig_p);
3806           ret = false;
3807         }
3808
3809       else if (type == ix86_opt_isa)
3810         ix86_handle_option (opt, p, opt_set_p);
3811
3812       else if (type == ix86_opt_yes || type == ix86_opt_no)
3813         {
3814           if (type == ix86_opt_no)
3815             opt_set_p = !opt_set_p;
3816
3817           if (opt_set_p)
3818             target_flags |= mask;
3819           else
3820             target_flags &= ~mask;
3821         }
3822
3823       else if (type == ix86_opt_str)
3824         {
3825           if (p_strings[opt])
3826             {
3827               error ("option(\"%s\") was already specified", opt_string);
3828               ret = false;
3829             }
3830           else
3831             p_strings[opt] = xstrdup (p + opt_len);
3832         }
3833
3834       else
3835         gcc_unreachable ();
3836     }
3837
3838   return ret;
3839 }
3840
3841 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
3842
3843 tree
3844 ix86_valid_target_attribute_tree (tree args)
3845 {
3846   const char *orig_arch_string = ix86_arch_string;
3847   const char *orig_tune_string = ix86_tune_string;
3848   const char *orig_fpmath_string = ix86_fpmath_string;
3849   int orig_tune_defaulted = ix86_tune_defaulted;
3850   int orig_arch_specified = ix86_arch_specified;
3851   char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3852   tree t = NULL_TREE;
3853   int i;
3854   struct cl_target_option *def
3855     = TREE_TARGET_OPTION (target_option_default_node);
3856
3857   /* Process each of the options on the chain.  */
3858   if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3859     return NULL_TREE;
3860
3861   /* If the changed options are different from the default, rerun override_options,
3862      and then save the options away.  The string options are are attribute options,
3863      and will be undone when we copy the save structure.  */
3864   if (ix86_isa_flags != def->ix86_isa_flags
3865       || target_flags != def->target_flags
3866       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3867       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3868       || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3869     {
3870       /* If we are using the default tune= or arch=, undo the string assigned,
3871          and use the default.  */
3872       if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3873         ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3874       else if (!orig_arch_specified)
3875         ix86_arch_string = NULL;
3876
3877       if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3878         ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3879       else if (orig_tune_defaulted)
3880         ix86_tune_string = NULL;
3881
3882       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
3883       if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3884         ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3885       else if (!TARGET_64BIT && TARGET_SSE)
3886         ix86_fpmath_string = "sse,387";
3887
3888       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
3889       override_options (false);
3890
3891       /* Add any builtin functions with the new isa if any.  */
3892       ix86_add_new_builtins (ix86_isa_flags);
3893
3894       /* Save the current options unless we are validating options for
3895          #pragma.  */
3896       t = build_target_option_node ();
3897
3898       ix86_arch_string = orig_arch_string;
3899       ix86_tune_string = orig_tune_string;
3900       ix86_fpmath_string = orig_fpmath_string;
3901
3902       /* Free up memory allocated to hold the strings */
3903       for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3904         if (option_strings[i])
3905           free (option_strings[i]);
3906     }
3907
3908   return t;
3909 }
3910
3911 /* Hook to validate attribute((target("string"))).  */
3912
3913 static bool
3914 ix86_valid_target_attribute_p (tree fndecl,
3915                                tree ARG_UNUSED (name),
3916                                tree args,
3917                                int ARG_UNUSED (flags))
3918 {
3919   struct cl_target_option cur_target;
3920   bool ret = true;
3921   tree old_optimize = build_optimization_node ();
3922   tree new_target, new_optimize;
3923   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3924
3925   /* If the function changed the optimization levels as well as setting target
3926      options, start with the optimizations specified.  */
3927   if (func_optimize && func_optimize != old_optimize)
3928     cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3929
3930   /* The target attributes may also change some optimization flags, so update
3931      the optimization options if necessary.  */
3932   cl_target_option_save (&cur_target);
3933   new_target = ix86_valid_target_attribute_tree (args);
3934   new_optimize = build_optimization_node ();
3935
3936   if (!new_target)
3937     ret = false;
3938
3939   else if (fndecl)
3940     {
3941       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3942
3943       if (old_optimize != new_optimize)
3944         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3945     }
3946
3947   cl_target_option_restore (&cur_target);
3948
3949   if (old_optimize != new_optimize)
3950     cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3951
3952   return ret;
3953 }
3954
3955 \f
3956 /* Hook to determine if one function can safely inline another.  */
3957
3958 static bool
3959 ix86_can_inline_p (tree caller, tree callee)
3960 {
3961   bool ret = false;
3962   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3963   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3964
3965   /* If callee has no option attributes, then it is ok to inline.  */
3966   if (!callee_tree)
3967     ret = true;
3968
3969   /* If caller has no option attributes, but callee does then it is not ok to
3970      inline.  */
3971   else if (!caller_tree)
3972     ret = false;
3973
3974   else
3975     {
3976       struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3977       struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3978
3979       /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
3980          can inline a SSE2 function but a SSE2 function can't inline a SSE4
3981          function.  */
3982       if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3983           != callee_opts->ix86_isa_flags)
3984         ret = false;
3985
3986       /* See if we have the same non-isa options.  */
3987       else if (caller_opts->target_flags != callee_opts->target_flags)
3988         ret = false;
3989
3990       /* See if arch, tune, etc. are the same.  */
3991       else if (caller_opts->arch != callee_opts->arch)
3992         ret = false;
3993
3994       else if (caller_opts->tune != callee_opts->tune)
3995         ret = false;
3996
3997       else if (caller_opts->fpmath != callee_opts->fpmath)
3998         ret = false;
3999
4000       else if (caller_opts->branch_cost != callee_opts->branch_cost)
4001         ret = false;
4002
4003       else
4004         ret = true;
4005     }
4006
4007   return ret;
4008 }
4009
4010 \f
4011 /* Remember the last target of ix86_set_current_function.  */
4012 static GTY(()) tree ix86_previous_fndecl;
4013
4014 /* Establish appropriate back-end context for processing the function
4015    FNDECL.  The argument might be NULL to indicate processing at top
4016    level, outside of any function scope.  */
4017 static void
4018 ix86_set_current_function (tree fndecl)
4019 {
4020   /* Only change the context if the function changes.  This hook is called
4021      several times in the course of compiling a function, and we don't want to
4022      slow things down too much or call target_reinit when it isn't safe.  */
4023   if (fndecl && fndecl != ix86_previous_fndecl)
4024     {
4025       tree old_tree = (ix86_previous_fndecl
4026                        ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4027                        : NULL_TREE);
4028
4029       tree new_tree = (fndecl
4030                        ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4031                        : NULL_TREE);
4032
4033       ix86_previous_fndecl = fndecl;
4034       if (old_tree == new_tree)
4035         ;
4036
4037       else if (new_tree)
4038         {
4039           cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4040           target_reinit ();
4041         }
4042
4043       else if (old_tree)
4044         {
4045           struct cl_target_option *def
4046             = TREE_TARGET_OPTION (target_option_current_node);
4047
4048           cl_target_option_restore (def);
4049           target_reinit ();
4050         }
4051     }
4052 }
4053
4054 \f
4055 /* Return true if this goes in large data/bss.  */
4056
4057 static bool
4058 ix86_in_large_data_p (tree exp)
4059 {
4060   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4061     return false;
4062
4063   /* Functions are never large data.  */
4064   if (TREE_CODE (exp) == FUNCTION_DECL)
4065     return false;
4066
4067   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4068     {
4069       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4070       if (strcmp (section, ".ldata") == 0
4071           || strcmp (section, ".lbss") == 0)
4072         return true;
4073       return false;
4074     }
4075   else
4076     {
4077       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4078
4079       /* If this is an incomplete type with size 0, then we can't put it
4080          in data because it might be too big when completed.  */
4081       if (!size || size > ix86_section_threshold)
4082         return true;
4083     }
4084
4085   return false;
4086 }
4087
4088 /* Switch to the appropriate section for output of DECL.
4089    DECL is either a `VAR_DECL' node or a constant of some sort.
4090    RELOC indicates whether forming the initial value of DECL requires
4091    link-time relocations.  */
4092
4093 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4094         ATTRIBUTE_UNUSED;
4095
4096 static section *
4097 x86_64_elf_select_section (tree decl, int reloc,
4098                            unsigned HOST_WIDE_INT align)
4099 {
4100   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4101       && ix86_in_large_data_p (decl))
4102     {
4103       const char *sname = NULL;
4104       unsigned int flags = SECTION_WRITE;
4105       switch (categorize_decl_for_section (decl, reloc))
4106         {
4107         case SECCAT_DATA:
4108           sname = ".ldata";
4109           break;
4110         case SECCAT_DATA_REL:
4111           sname = ".ldata.rel";
4112           break;
4113         case SECCAT_DATA_REL_LOCAL:
4114           sname = ".ldata.rel.local";
4115           break;
4116         case SECCAT_DATA_REL_RO:
4117           sname = ".ldata.rel.ro";
4118           break;
4119         case SECCAT_DATA_REL_RO_LOCAL:
4120           sname = ".ldata.rel.ro.local";
4121           break;
4122         case SECCAT_BSS:
4123           sname = ".lbss";
4124           flags |= SECTION_BSS;
4125           break;
4126         case SECCAT_RODATA:
4127         case SECCAT_RODATA_MERGE_STR:
4128         case SECCAT_RODATA_MERGE_STR_INIT:
4129         case SECCAT_RODATA_MERGE_CONST:
4130           sname = ".lrodata";
4131           flags = 0;
4132           break;
4133         case SECCAT_SRODATA:
4134         case SECCAT_SDATA:
4135         case SECCAT_SBSS:
4136           gcc_unreachable ();
4137         case SECCAT_TEXT:
4138         case SECCAT_TDATA:
4139         case SECCAT_TBSS:
4140           /* We don't split these for medium model.  Place them into
4141              default sections and hope for best.  */
4142           break;
4143         case SECCAT_EMUTLS_VAR:
4144         case SECCAT_EMUTLS_TMPL:
4145           gcc_unreachable ();
4146         }
4147       if (sname)
4148         {
4149           /* We might get called with string constants, but get_named_section
4150              doesn't like them as they are not DECLs.  Also, we need to set
4151              flags in that case.  */
4152           if (!DECL_P (decl))
4153             return get_section (sname, flags, NULL);
4154           return get_named_section (decl, sname, reloc);
4155         }
4156     }
4157   return default_elf_select_section (decl, reloc, align);
4158 }
4159
4160 /* Build up a unique section name, expressed as a
4161    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4162    RELOC indicates whether the initial value of EXP requires
4163    link-time relocations.  */
4164
4165 static void ATTRIBUTE_UNUSED
4166 x86_64_elf_unique_section (tree decl, int reloc)
4167 {
4168   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4169       && ix86_in_large_data_p (decl))
4170     {
4171       const char *prefix = NULL;
4172       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
4173       bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4174
4175       switch (categorize_decl_for_section (decl, reloc))
4176         {
4177         case SECCAT_DATA:
4178         case SECCAT_DATA_REL:
4179         case SECCAT_DATA_REL_LOCAL:
4180         case SECCAT_DATA_REL_RO:
4181         case SECCAT_DATA_REL_RO_LOCAL:
4182           prefix = one_only ? ".ld" : ".ldata";
4183           break;
4184         case SECCAT_BSS:
4185           prefix = one_only ? ".lb" : ".lbss";
4186           break;
4187         case SECCAT_RODATA:
4188         case SECCAT_RODATA_MERGE_STR:
4189         case SECCAT_RODATA_MERGE_STR_INIT:
4190         case SECCAT_RODATA_MERGE_CONST:
4191           prefix = one_only ? ".lr" : ".lrodata";
4192           break;
4193         case SECCAT_SRODATA:
4194         case SECCAT_SDATA:
4195         case SECCAT_SBSS:
4196           gcc_unreachable ();
4197         case SECCAT_TEXT:
4198         case SECCAT_TDATA:
4199         case SECCAT_TBSS:
4200           /* We don't split these for medium model.  Place them into
4201              default sections and hope for best.  */
4202           break;
4203         case SECCAT_EMUTLS_VAR:
4204           prefix = targetm.emutls.var_section;
4205           break;
4206         case SECCAT_EMUTLS_TMPL:
4207           prefix = targetm.emutls.tmpl_section;
4208           break;
4209         }
4210       if (prefix)
4211         {
4212           const char *name, *linkonce;
4213           char *string;
4214
4215           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4216           name = targetm.strip_name_encoding (name);
4217
4218           /* If we're using one_only, then there needs to be a .gnu.linkonce
4219              prefix to the section name.  */
4220           linkonce = one_only ? ".gnu.linkonce" : "";
4221
4222           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4223
4224           DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4225           return;
4226         }
4227     }
4228   default_unique_section (decl, reloc);
4229 }
4230
4231 #ifdef COMMON_ASM_OP
4232 /* This says how to output assembler code to declare an
4233    uninitialized external linkage data object.
4234
4235    For medium model x86-64 we need to use .largecomm opcode for
4236    large objects.  */
4237 void
4238 x86_elf_aligned_common (FILE *file,
4239                         const char *name, unsigned HOST_WIDE_INT size,
4240                         int align)
4241 {
4242   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4243       && size > (unsigned int)ix86_section_threshold)
4244     fputs (".largecomm\t", file);
4245   else
4246     fputs (COMMON_ASM_OP, file);
4247   assemble_name (file, name);
4248   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4249            size, align / BITS_PER_UNIT);
4250 }
4251 #endif
4252
4253 /* Utility function for targets to use in implementing
4254    ASM_OUTPUT_ALIGNED_BSS.  */
4255
4256 void
4257 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4258                         const char *name, unsigned HOST_WIDE_INT size,
4259                         int align)
4260 {
4261   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4262       && size > (unsigned int)ix86_section_threshold)
4263     switch_to_section (get_named_section (decl, ".lbss", 0));
4264   else
4265     switch_to_section (bss_section);
4266   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4267 #ifdef ASM_DECLARE_OBJECT_NAME
4268   last_assemble_variable_decl = decl;
4269   ASM_DECLARE_OBJECT_NAME (file, name, decl);
4270 #else
4271   /* Standard thing is just output label for the object.  */
4272   ASM_OUTPUT_LABEL (file, name);
4273 #endif /* ASM_DECLARE_OBJECT_NAME */
4274   ASM_OUTPUT_SKIP (file, size ? size : 1);
4275 }
4276 \f
4277 void
4278 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4279 {
4280   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
4281      make the problem with not enough registers even worse.  */
4282 #ifdef INSN_SCHEDULING
4283   if (level > 1)
4284     flag_schedule_insns = 0;
4285 #endif
4286
4287   if (TARGET_MACHO)
4288     /* The Darwin libraries never set errno, so we might as well
4289        avoid calling them when that's the only reason we would.  */
4290     flag_errno_math = 0;
4291
4292   /* The default values of these switches depend on the TARGET_64BIT
4293      that is not known at this moment.  Mark these values with 2 and
4294      let user the to override these.  In case there is no command line option
4295      specifying them, we will set the defaults in override_options.  */
4296   if (optimize >= 1)
4297     flag_omit_frame_pointer = 2;
4298   flag_pcc_struct_return = 2;
4299   flag_asynchronous_unwind_tables = 2;
4300   flag_vect_cost_model = 1;
4301 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4302   SUBTARGET_OPTIMIZATION_OPTIONS;
4303 #endif
4304 }
4305 \f
4306 /* Decide whether we can make a sibling call to a function.  DECL is the
4307    declaration of the function being targeted by the call and EXP is the
4308    CALL_EXPR representing the call.  */
4309
4310 static bool
4311 ix86_function_ok_for_sibcall (tree decl, tree exp)
4312 {
4313   tree type, decl_or_type;
4314   rtx a, b;
4315
4316   /* If we are generating position-independent code, we cannot sibcall
4317      optimize any indirect call, or a direct call to a global function,
4318      as the PLT requires %ebx be live.  */
4319   if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4320     return false;
4321
4322   /* If we need to align the outgoing stack, then sibcalling would
4323      unalign the stack, which may break the called function.  */
4324   if (ix86_minimum_incoming_stack_boundary (true)
4325       < PREFERRED_STACK_BOUNDARY)
4326     return false;
4327
4328   if (decl)
4329     {
4330       decl_or_type = decl;
4331       type = TREE_TYPE (decl);
4332     }
4333   else
4334     {
4335       /* We're looking at the CALL_EXPR, we need the type of the function.  */
4336       type = CALL_EXPR_FN (exp);                /* pointer expression */
4337       type = TREE_TYPE (type);                  /* pointer type */
4338       type = TREE_TYPE (type);                  /* function type */
4339       decl_or_type = type;
4340     }
4341
4342   /* Check that the return value locations are the same.  Like
4343      if we are returning floats on the 80387 register stack, we cannot
4344      make a sibcall from a function that doesn't return a float to a
4345      function that does or, conversely, from a function that does return
4346      a float to a function that doesn't; the necessary stack adjustment
4347      would not be executed.  This is also the place we notice
4348      differences in the return value ABI.  Note that it is ok for one
4349      of the functions to have void return type as long as the return
4350      value of the other is passed in a register.  */
4351   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4352   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4353                            cfun->decl, false);
4354   if (STACK_REG_P (a) || STACK_REG_P (b))
4355     {
4356       if (!rtx_equal_p (a, b))
4357         return false;
4358     }
4359   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4360     ;
4361   else if (!rtx_equal_p (a, b))
4362     return false;
4363
4364   if (TARGET_64BIT)
4365     {
4366       /* The SYSV ABI has more call-clobbered registers;
4367          disallow sibcalls from MS to SYSV.  */
4368       if (cfun->machine->call_abi == MS_ABI
4369           && ix86_function_type_abi (type) == SYSV_ABI)
4370         return false;
4371     }
4372   else
4373     {
4374       /* If this call is indirect, we'll need to be able to use a
4375          call-clobbered register for the address of the target function.
4376          Make sure that all such registers are not used for passing
4377          parameters.  Note that DLLIMPORT functions are indirect.  */
4378       if (!decl
4379           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4380         {
4381           if (ix86_function_regparm (type, NULL) >= 3)
4382             {
4383               /* ??? Need to count the actual number of registers to be used,
4384                  not the possible number of registers.  Fix later.  */
4385               return false;
4386             }
4387         }
4388     }
4389
4390   /* Otherwise okay.  That also includes certain types of indirect calls.  */
4391   return true;
4392 }
4393
4394 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4395    calling convention attributes;
4396    arguments as in struct attribute_spec.handler.  */
4397
4398 static tree
4399 ix86_handle_cconv_attribute (tree *node, tree name,
4400                                    tree args,
4401                                    int flags ATTRIBUTE_UNUSED,
4402                                    bool *no_add_attrs)
4403 {
4404   if (TREE_CODE (*node) != FUNCTION_TYPE
4405       && TREE_CODE (*node) != METHOD_TYPE
4406       && TREE_CODE (*node) != FIELD_DECL
4407       && TREE_CODE (*node) != TYPE_DECL)
4408     {
4409       warning (OPT_Wattributes, "%qE attribute only applies to functions",
4410                name);
4411       *no_add_attrs = true;
4412       return NULL_TREE;
4413     }
4414
4415   /* Can combine regparm with all attributes but fastcall.  */
4416   if (is_attribute_p ("regparm", name))
4417     {
4418       tree cst;
4419
4420       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4421         {
4422           error ("fastcall and regparm attributes are not compatible");
4423         }
4424
4425       cst = TREE_VALUE (args);
4426       if (TREE_CODE (cst) != INTEGER_CST)
4427         {
4428           warning (OPT_Wattributes,
4429                    "%qE attribute requires an integer constant argument",
4430                    name);
4431           *no_add_attrs = true;
4432         }
4433       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4434         {
4435           warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4436                    name, REGPARM_MAX);
4437           *no_add_attrs = true;
4438         }
4439
4440       return NULL_TREE;
4441     }
4442
4443   if (TARGET_64BIT)
4444     {
4445       /* Do not warn when emulating the MS ABI.  */
4446       if (TREE_CODE (*node) != FUNCTION_TYPE
4447           || ix86_function_type_abi (*node) != MS_ABI)
4448         warning (OPT_Wattributes, "%qE attribute ignored",
4449                  name);
4450       *no_add_attrs = true;
4451       return NULL_TREE;
4452     }
4453
4454   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
4455   if (is_attribute_p ("fastcall", name))
4456     {
4457       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4458         {
4459           error ("fastcall and cdecl attributes are not compatible");
4460         }
4461       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4462         {
4463           error ("fastcall and stdcall attributes are not compatible");
4464         }
4465       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4466         {
4467           error ("fastcall and regparm attributes are not compatible");
4468         }
4469     }
4470
4471   /* Can combine stdcall with fastcall (redundant), regparm and
4472      sseregparm.  */
4473   else if (is_attribute_p ("stdcall", name))
4474     {
4475       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4476         {
4477           error ("stdcall and cdecl attributes are not compatible");
4478         }
4479       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4480         {
4481           error ("stdcall and fastcall attributes are not compatible");
4482         }
4483     }
4484
4485   /* Can combine cdecl with regparm and sseregparm.  */
4486   else if (is_attribute_p ("cdecl", name))
4487     {
4488       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4489         {
4490           error ("stdcall and cdecl attributes are not compatible");
4491         }
4492       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4493         {
4494           error ("fastcall and cdecl attributes are not compatible");
4495         }
4496     }
4497
4498   /* Can combine sseregparm with all attributes.  */
4499
4500   return NULL_TREE;
4501 }
4502
4503 /* Return 0 if the attributes for two types are incompatible, 1 if they
4504    are compatible, and 2 if they are nearly compatible (which causes a
4505    warning to be generated).  */
4506
4507 static int
4508 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4509 {
4510   /* Check for mismatch of non-default calling convention.  */
4511   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4512
4513   if (TREE_CODE (type1) != FUNCTION_TYPE
4514       && TREE_CODE (type1) != METHOD_TYPE)
4515     return 1;
4516
4517   /* Check for mismatched fastcall/regparm types.  */
4518   if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4519        != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4520       || (ix86_function_regparm (type1, NULL)
4521           != ix86_function_regparm (type2, NULL)))
4522     return 0;
4523
4524   /* Check for mismatched sseregparm types.  */
4525   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4526       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4527     return 0;
4528
4529   /* Check for mismatched return types (cdecl vs stdcall).  */
4530   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4531       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4532     return 0;
4533
4534   return 1;
4535 }
4536 \f
4537 /* Return the regparm value for a function with the indicated TYPE and DECL.
4538    DECL may be NULL when calling function indirectly
4539    or considering a libcall.  */
4540
4541 static int
4542 ix86_function_regparm (const_tree type, const_tree decl)
4543 {
4544   tree attr;
4545   int regparm;
4546
4547   if (TARGET_64BIT)
4548     return (ix86_function_type_abi (type) == SYSV_ABI
4549             ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4550
4551   regparm = ix86_regparm;
4552   attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4553   if (attr)
4554     {
4555       regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4556       return regparm;
4557     }
4558
4559   if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4560     return 2;
4561
4562   /* Use register calling convention for local functions when possible.  */
4563   if (decl
4564       && TREE_CODE (decl) == FUNCTION_DECL
4565       && optimize
4566       && !profile_flag)
4567     {
4568       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4569       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4570       if (i && i->local)
4571         {
4572           int local_regparm, globals = 0, regno;
4573
4574           /* Make sure no regparm register is taken by a
4575              fixed register variable.  */
4576           for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4577             if (fixed_regs[local_regparm])
4578               break;
4579
4580           /* We don't want to use regparm(3) for nested functions as
4581              these use a static chain pointer in the third argument.  */
4582           if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4583             local_regparm = 2;
4584
4585           /* Each fixed register usage increases register pressure,
4586              so less registers should be used for argument passing.
4587              This functionality can be overriden by an explicit
4588              regparm value.  */
4589           for (regno = 0; regno <= DI_REG; regno++)
4590             if (fixed_regs[regno])
4591               globals++;
4592
4593           local_regparm
4594             = globals < local_regparm ? local_regparm - globals : 0;
4595
4596           if (local_regparm > regparm)
4597             regparm = local_regparm;
4598         }
4599     }
4600
4601   return regparm;
4602 }
4603
4604 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4605    DFmode (2) arguments in SSE registers for a function with the
4606    indicated TYPE and DECL.  DECL may be NULL when calling function
4607    indirectly or considering a libcall.  Otherwise return 0.  */
4608
4609 static int
4610 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4611 {
4612   gcc_assert (!TARGET_64BIT);
4613
4614   /* Use SSE registers to pass SFmode and DFmode arguments if requested
4615      by the sseregparm attribute.  */
4616   if (TARGET_SSEREGPARM
4617       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4618     {
4619       if (!TARGET_SSE)
4620         {
4621           if (warn)
4622             {
4623               if (decl)
4624                 error ("Calling %qD with attribute sseregparm without "
4625                        "SSE/SSE2 enabled", decl);
4626               else
4627                 error ("Calling %qT with attribute sseregparm without "
4628                        "SSE/SSE2 enabled", type);
4629             }
4630           return 0;
4631         }
4632
4633       return 2;
4634     }
4635
4636   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4637      (and DFmode for SSE2) arguments in SSE registers.  */
4638   if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4639     {
4640       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4641       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4642       if (i && i->local)
4643         return TARGET_SSE2 ? 2 : 1;
4644     }
4645
4646   return 0;
4647 }
4648
4649 /* Return true if EAX is live at the start of the function.  Used by
4650    ix86_expand_prologue to determine if we need special help before
4651    calling allocate_stack_worker.  */
4652
4653 static bool
4654 ix86_eax_live_at_start_p (void)
4655 {
4656   /* Cheat.  Don't bother working forward from ix86_function_regparm
4657      to the function type to whether an actual argument is located in
4658      eax.  Instead just look at cfg info, which is still close enough
4659      to correct at this point.  This gives false positives for broken
4660      functions that might use uninitialized data that happens to be
4661      allocated in eax, but who cares?  */
4662   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4663 }
4664
4665 /* Value is the number of bytes of arguments automatically
4666    popped when returning from a subroutine call.
4667    FUNDECL is the declaration node of the function (as a tree),
4668    FUNTYPE is the data type of the function (as a tree),
4669    or for a library call it is an identifier node for the subroutine name.
4670    SIZE is the number of bytes of arguments passed on the stack.
4671
4672    On the 80386, the RTD insn may be used to pop them if the number
4673      of args is fixed, but if the number is variable then the caller
4674      must pop them all.  RTD can't be used for library calls now
4675      because the library is compiled with the Unix compiler.
4676    Use of RTD is a selectable option, since it is incompatible with
4677    standard Unix calling sequences.  If the option is not selected,
4678    the caller must always pop the args.
4679
4680    The attribute stdcall is equivalent to RTD on a per module basis.  */
4681
4682 int
4683 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4684 {
4685   int rtd;
4686
4687   /* None of the 64-bit ABIs pop arguments.  */
4688   if (TARGET_64BIT)
4689     return 0;
4690
4691   rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4692
4693   /* Cdecl functions override -mrtd, and never pop the stack.  */
4694   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4695     {
4696       /* Stdcall and fastcall functions will pop the stack if not
4697          variable args.  */
4698       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4699           || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4700         rtd = 1;
4701
4702       if (rtd && ! stdarg_p (funtype))
4703         return size;
4704     }
4705
4706   /* Lose any fake structure return argument if it is passed on the stack.  */
4707   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4708       && !KEEP_AGGREGATE_RETURN_POINTER)
4709     {
4710       int nregs = ix86_function_regparm (funtype, fundecl);
4711       if (nregs == 0)
4712         return GET_MODE_SIZE (Pmode);
4713     }
4714
4715   return 0;
4716 }
4717 \f
4718 /* Argument support functions.  */
4719
4720 /* Return true when register may be used to pass function parameters.  */
4721 bool
4722 ix86_function_arg_regno_p (int regno)
4723 {
4724   int i;
4725   const int *parm_regs;
4726
4727   if (!TARGET_64BIT)
4728     {
4729       if (TARGET_MACHO)
4730         return (regno < REGPARM_MAX
4731                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4732       else
4733         return (regno < REGPARM_MAX
4734                 || (TARGET_MMX && MMX_REGNO_P (regno)
4735                     && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4736                 || (TARGET_SSE && SSE_REGNO_P (regno)
4737                     && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4738     }
4739
4740   if (TARGET_MACHO)
4741     {
4742       if (SSE_REGNO_P (regno) && TARGET_SSE)
4743         return true;
4744     }
4745   else
4746     {
4747       if (TARGET_SSE && SSE_REGNO_P (regno)
4748           && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4749         return true;
4750     }
4751
4752   /* TODO: The function should depend on current function ABI but
4753      builtins.c would need updating then. Therefore we use the
4754      default ABI.  */
4755
4756   /* RAX is used as hidden argument to va_arg functions.  */
4757   if (ix86_abi == SYSV_ABI && regno == AX_REG)
4758     return true;
4759
4760   if (ix86_abi == MS_ABI)
4761     parm_regs = x86_64_ms_abi_int_parameter_registers;
4762   else
4763     parm_regs = x86_64_int_parameter_registers;
4764   for (i = 0; i < (ix86_abi == MS_ABI
4765                    ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4766     if (regno == parm_regs[i])
4767       return true;
4768   return false;
4769 }
4770
4771 /* Return if we do not know how to pass TYPE solely in registers.  */
4772
4773 static bool
4774 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4775 {
4776   if (must_pass_in_stack_var_size_or_pad (mode, type))
4777     return true;
4778
4779   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
4780      The layout_type routine is crafty and tries to trick us into passing
4781      currently unsupported vector types on the stack by using TImode.  */
4782   return (!TARGET_64BIT && mode == TImode
4783           && type && TREE_CODE (type) != VECTOR_TYPE);
4784 }
4785
4786 /* It returns the size, in bytes, of the area reserved for arguments passed
4787    in registers for the function represented by fndecl dependent to the used
4788    abi format.  */
4789 int
4790 ix86_reg_parm_stack_space (const_tree fndecl)
4791 {
4792   enum calling_abi call_abi = SYSV_ABI;
4793   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4794     call_abi = ix86_function_abi (fndecl);
4795   else
4796     call_abi = ix86_function_type_abi (fndecl);
4797   if (call_abi == MS_ABI)
4798     return 32;
4799   return 0;
4800 }
4801
4802 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4803    call abi used.  */
4804 enum calling_abi
4805 ix86_function_type_abi (const_tree fntype)
4806 {
4807   if (TARGET_64BIT && fntype != NULL)
4808     {
4809       enum calling_abi abi = ix86_abi;
4810       if (abi == SYSV_ABI)
4811         {
4812           if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4813             abi = MS_ABI;
4814         }
4815       else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4816         abi = SYSV_ABI;
4817       return abi;
4818     }
4819   return ix86_abi;
4820 }
4821
4822 static bool
4823 ix86_function_ms_hook_prologue (const_tree fntype)
4824 {
4825   if (!TARGET_64BIT)
4826     {
4827       if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
4828         {
4829           if (decl_function_context (fntype) != NULL_TREE)
4830           {
4831             error_at (DECL_SOURCE_LOCATION (fntype),
4832                 "ms_hook_prologue is not compatible with nested function");
4833           }
4834
4835           return true;
4836         }
4837     }
4838   return false;
4839 }
4840
4841 static enum calling_abi
4842 ix86_function_abi (const_tree fndecl)
4843 {
4844   if (! fndecl)
4845     return ix86_abi;
4846   return ix86_function_type_abi (TREE_TYPE (fndecl));
4847 }
4848
4849 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4850    call abi used.  */
4851 enum calling_abi
4852 ix86_cfun_abi (void)
4853 {
4854   if (! cfun || ! TARGET_64BIT)
4855     return ix86_abi;
4856   return cfun->machine->call_abi;
4857 }
4858
4859 /* regclass.c  */
4860 extern void init_regs (void);
4861
4862 /* Implementation of call abi switching target hook. Specific to FNDECL
4863    the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4864    for more details.  */
4865 void
4866 ix86_call_abi_override (const_tree fndecl)
4867 {
4868   if (fndecl == NULL_TREE)
4869     cfun->machine->call_abi = ix86_abi;
4870   else
4871     cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4872 }
4873
4874 /* MS and SYSV ABI have different set of call used registers.  Avoid expensive
4875    re-initialization of init_regs each time we switch function context since
4876    this is needed only during RTL expansion.  */
4877 static void
4878 ix86_maybe_switch_abi (void)
4879 {
4880   if (TARGET_64BIT &&
4881       call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4882     reinit_regs ();
4883 }
4884
4885 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4886    for a call to a function whose data type is FNTYPE.
4887    For a library call, FNTYPE is 0.  */
4888
4889 void
4890 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
4891                       tree fntype,      /* tree ptr for function decl */
4892                       rtx libname,      /* SYMBOL_REF of library name or 0 */
4893                       tree fndecl)
4894 {
4895   struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4896   memset (cum, 0, sizeof (*cum));
4897
4898   if (fndecl)
4899    cum->call_abi = ix86_function_abi (fndecl);
4900   else
4901    cum->call_abi = ix86_function_type_abi (fntype);
4902   /* Set up the number of registers to use for passing arguments.  */
4903
4904   if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4905     sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4906            "or subtarget optimization implying it");
4907   cum->nregs = ix86_regparm;
4908   if (TARGET_64BIT)
4909     {
4910       if (cum->call_abi != ix86_abi)
4911         cum->nregs = (ix86_abi != SYSV_ABI
4912                       ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4913     }
4914   if (TARGET_SSE)
4915     {
4916       cum->sse_nregs = SSE_REGPARM_MAX;
4917       if (TARGET_64BIT)
4918         {
4919           if (cum->call_abi != ix86_abi)
4920             cum->sse_nregs = (ix86_abi != SYSV_ABI
4921                               ? X86_64_SSE_REGPARM_MAX
4922                               : X86_64_MS_SSE_REGPARM_MAX);
4923         }
4924     }
4925   if (TARGET_MMX)
4926     cum->mmx_nregs = MMX_REGPARM_MAX;
4927   cum->warn_avx = true;
4928   cum->warn_sse = true;
4929   cum->warn_mmx = true;
4930
4931   /* Because type might mismatch in between caller and callee, we need to
4932      use actual type of function for local calls.
4933      FIXME: cgraph_analyze can be told to actually record if function uses
4934      va_start so for local functions maybe_vaarg can be made aggressive
4935      helping K&R code.
4936      FIXME: once typesytem is fixed, we won't need this code anymore.  */
4937   if (i && i->local)
4938     fntype = TREE_TYPE (fndecl);
4939   cum->maybe_vaarg = (fntype
4940                       ? (!prototype_p (fntype) || stdarg_p (fntype))
4941                       : !libname);
4942
4943   if (!TARGET_64BIT)
4944     {
4945       /* If there are variable arguments, then we won't pass anything
4946          in registers in 32-bit mode. */
4947       if (stdarg_p (fntype))
4948         {
4949           cum->nregs = 0;
4950           cum->sse_nregs = 0;
4951           cum->mmx_nregs = 0;
4952           cum->warn_avx = 0;
4953           cum->warn_sse = 0;
4954           cum->warn_mmx = 0;
4955           return;
4956         }
4957
4958       /* Use ecx and edx registers if function has fastcall attribute,
4959          else look for regparm information.  */
4960       if (fntype)
4961         {
4962           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4963             {
4964               cum->nregs = 2;
4965               cum->fastcall = 1;
4966             }
4967           else
4968             cum->nregs = ix86_function_regparm (fntype, fndecl);
4969         }
4970
4971       /* Set up the number of SSE registers used for passing SFmode
4972          and DFmode arguments.  Warn for mismatching ABI.  */
4973       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4974     }
4975 }
4976
4977 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
4978    But in the case of vector types, it is some vector mode.
4979
4980    When we have only some of our vector isa extensions enabled, then there
4981    are some modes for which vector_mode_supported_p is false.  For these
4982    modes, the generic vector support in gcc will choose some non-vector mode
4983    in order to implement the type.  By computing the natural mode, we'll
4984    select the proper ABI location for the operand and not depend on whatever
4985    the middle-end decides to do with these vector types.
4986
4987    The midde-end can't deal with the vector types > 16 bytes.  In this
4988    case, we return the original mode and warn ABI change if CUM isn't
4989    NULL.  */
4990
4991 static enum machine_mode
4992 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4993 {
4994   enum machine_mode mode = TYPE_MODE (type);
4995
4996   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4997     {
4998       HOST_WIDE_INT size = int_size_in_bytes (type);
4999       if ((size == 8 || size == 16 || size == 32)
5000           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
5001           && TYPE_VECTOR_SUBPARTS (type) > 1)
5002         {
5003           enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5004
5005           if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5006             mode = MIN_MODE_VECTOR_FLOAT;
5007           else
5008             mode = MIN_MODE_VECTOR_INT;
5009
5010           /* Get the mode which has this inner mode and number of units.  */
5011           for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5012             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5013                 && GET_MODE_INNER (mode) == innermode)
5014               {
5015                 if (size == 32 && !TARGET_AVX)
5016                   {
5017                     static bool warnedavx;
5018
5019                     if (cum
5020                         && !warnedavx
5021                         && cum->warn_avx)
5022                       {
5023                         warnedavx = true;
5024                         warning (0, "AVX vector argument without AVX "
5025                                  "enabled changes the ABI");
5026                       }
5027                     return TYPE_MODE (type);
5028                   }
5029                 else
5030                   return mode;
5031               }
5032
5033           gcc_unreachable ();
5034         }
5035     }
5036
5037   return mode;
5038 }
5039
5040 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
5041    this may not agree with the mode that the type system has chosen for the
5042    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
5043    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
5044
5045 static rtx
5046 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5047                      unsigned int regno)
5048 {
5049   rtx tmp;
5050
5051   if (orig_mode != BLKmode)
5052     tmp = gen_rtx_REG (orig_mode, regno);
5053   else
5054     {
5055       tmp = gen_rtx_REG (mode, regno);
5056       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5057       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5058     }
5059
5060   return tmp;
5061 }
5062
5063 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
5064    of this code is to classify each 8bytes of incoming argument by the register
5065    class and assign registers accordingly.  */
5066
5067 /* Return the union class of CLASS1 and CLASS2.
5068    See the x86-64 PS ABI for details.  */
5069
5070 static enum x86_64_reg_class
5071 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5072 {
5073   /* Rule #1: If both classes are equal, this is the resulting class.  */
5074   if (class1 == class2)
5075     return class1;
5076
5077   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5078      the other class.  */
5079   if (class1 == X86_64_NO_CLASS)
5080     return class2;
5081   if (class2 == X86_64_NO_CLASS)
5082     return class1;
5083
5084   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
5085   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5086     return X86_64_MEMORY_CLASS;
5087
5088   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
5089   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5090       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5091     return X86_64_INTEGERSI_CLASS;
5092   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5093       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5094     return X86_64_INTEGER_CLASS;
5095
5096   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5097      MEMORY is used.  */
5098   if (class1 == X86_64_X87_CLASS
5099       || class1 == X86_64_X87UP_CLASS
5100       || class1 == X86_64_COMPLEX_X87_CLASS
5101       || class2 == X86_64_X87_CLASS
5102       || class2 == X86_64_X87UP_CLASS
5103       || class2 == X86_64_COMPLEX_X87_CLASS)
5104     return X86_64_MEMORY_CLASS;
5105
5106   /* Rule #6: Otherwise class SSE is used.  */
5107   return X86_64_SSE_CLASS;
5108 }
5109
5110 /* Classify the argument of type TYPE and mode MODE.
5111    CLASSES will be filled by the register class used to pass each word
5112    of the operand.  The number of words is returned.  In case the parameter
5113    should be passed in memory, 0 is returned. As a special case for zero
5114    sized containers, classes[0] will be NO_CLASS and 1 is returned.
5115
5116    BIT_OFFSET is used internally for handling records and specifies offset
5117    of the offset in bits modulo 256 to avoid overflow cases.
5118
5119    See the x86-64 PS ABI for details.
5120 */
5121
5122 static int
5123 classify_argument (enum machine_mode mode, const_tree type,
5124                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5125 {
5126   HOST_WIDE_INT bytes =
5127     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5128   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5129
5130   /* Variable sized entities are always passed/returned in memory.  */
5131   if (bytes < 0)
5132     return 0;
5133
5134   if (mode != VOIDmode
5135       && targetm.calls.must_pass_in_stack (mode, type))
5136     return 0;
5137
5138   if (type && AGGREGATE_TYPE_P (type))
5139     {
5140       int i;
5141       tree field;
5142       enum x86_64_reg_class subclasses[MAX_CLASSES];
5143
5144       /* On x86-64 we pass structures larger than 32 bytes on the stack.  */
5145       if (bytes > 32)
5146         return 0;
5147
5148       for (i = 0; i < words; i++)
5149         classes[i] = X86_64_NO_CLASS;
5150
5151       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
5152          signalize memory class, so handle it as special case.  */
5153       if (!words)
5154         {
5155           classes[0] = X86_64_NO_CLASS;
5156           return 1;
5157         }
5158
5159       /* Classify each field of record and merge classes.  */
5160       switch (TREE_CODE (type))
5161         {
5162         case RECORD_TYPE:
5163           /* And now merge the fields of structure.  */
5164           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5165             {
5166               if (TREE_CODE (field) == FIELD_DECL)
5167                 {
5168                   int num;
5169
5170                   if (TREE_TYPE (field) == error_mark_node)
5171                     continue;
5172
5173                   /* Bitfields are always classified as integer.  Handle them
5174                      early, since later code would consider them to be
5175                      misaligned integers.  */
5176                   if (DECL_BIT_FIELD (field))
5177                     {
5178                       for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5179                            i < ((int_bit_position (field) + (bit_offset % 64))
5180                                 + tree_low_cst (DECL_SIZE (field), 0)
5181                                 + 63) / 8 / 8; i++)
5182                         classes[i] =
5183                           merge_classes (X86_64_INTEGER_CLASS,
5184                                          classes[i]);
5185                     }
5186                   else
5187                     {
5188                       int pos;
5189
5190                       type = TREE_TYPE (field);
5191
5192                       /* Flexible array member is ignored.  */
5193                       if (TYPE_MODE (type) == BLKmode
5194                           && TREE_CODE (type) == ARRAY_TYPE
5195                           && TYPE_SIZE (type) == NULL_TREE
5196                           && TYPE_DOMAIN (type) != NULL_TREE
5197                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5198                               == NULL_TREE))
5199                         {
5200                           static bool warned;
5201
5202                           if (!warned && warn_psabi)
5203                             {
5204                               warned = true;
5205                               inform (input_location,
5206                                       "The ABI of passing struct with"
5207                                       " a flexible array member has"
5208                                       " changed in GCC 4.4");
5209                             }
5210                           continue;
5211                         }
5212                       num = classify_argument (TYPE_MODE (type), type,
5213                                                subclasses,
5214                                                (int_bit_position (field)
5215                                                 + bit_offset) % 256);
5216                       if (!num)
5217                         return 0;
5218                       pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5219                       for (i = 0; i < num && (i + pos) < words; i++)
5220                         classes[i + pos] =
5221                           merge_classes (subclasses[i], classes[i + pos]);
5222                     }
5223                 }
5224             }
5225           break;
5226
5227         case ARRAY_TYPE:
5228           /* Arrays are handled as small records.  */
5229           {
5230             int num;
5231             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5232                                      TREE_TYPE (type), subclasses, bit_offset);
5233             if (!num)
5234               return 0;
5235
5236             /* The partial classes are now full classes.  */
5237             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5238               subclasses[0] = X86_64_SSE_CLASS;
5239             if (subclasses[0] == X86_64_INTEGERSI_CLASS
5240                 && !((bit_offset % 64) == 0 && bytes == 4))
5241               subclasses[0] = X86_64_INTEGER_CLASS;
5242
5243             for (i = 0; i < words; i++)
5244               classes[i] = subclasses[i % num];
5245
5246             break;
5247           }
5248         case UNION_TYPE:
5249         case QUAL_UNION_TYPE:
5250           /* Unions are similar to RECORD_TYPE but offset is always 0.
5251              */
5252           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5253             {
5254               if (TREE_CODE (field) == FIELD_DECL)
5255                 {
5256                   int num;
5257
5258                   if (TREE_TYPE (field) == error_mark_node)
5259                     continue;
5260
5261                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5262                                            TREE_TYPE (field), subclasses,
5263                                            bit_offset);
5264                   if (!num)
5265                     return 0;
5266                   for (i = 0; i < num; i++)
5267                     classes[i] = merge_classes (subclasses[i], classes[i]);
5268                 }
5269             }
5270           break;
5271
5272         default:
5273           gcc_unreachable ();
5274         }
5275
5276       if (words > 2)
5277         {
5278           /* When size > 16 bytes, if the first one isn't
5279              X86_64_SSE_CLASS or any other ones aren't
5280              X86_64_SSEUP_CLASS, everything should be passed in
5281              memory.  */
5282           if (classes[0] != X86_64_SSE_CLASS)
5283               return 0;
5284
5285           for (i = 1; i < words; i++)
5286             if (classes[i] != X86_64_SSEUP_CLASS)
5287               return 0;
5288         }
5289
5290       /* Final merger cleanup.  */
5291       for (i = 0; i < words; i++)
5292         {
5293           /* If one class is MEMORY, everything should be passed in
5294              memory.  */
5295           if (classes[i] == X86_64_MEMORY_CLASS)
5296             return 0;
5297
5298           /* The X86_64_SSEUP_CLASS should be always preceded by
5299              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
5300           if (classes[i] == X86_64_SSEUP_CLASS
5301               && classes[i - 1] != X86_64_SSE_CLASS
5302               && classes[i - 1] != X86_64_SSEUP_CLASS)
5303             {
5304               /* The first one should never be X86_64_SSEUP_CLASS.  */
5305               gcc_assert (i != 0);
5306               classes[i] = X86_64_SSE_CLASS;
5307             }
5308
5309           /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5310                everything should be passed in memory.  */
5311           if (classes[i] == X86_64_X87UP_CLASS
5312               && (classes[i - 1] != X86_64_X87_CLASS))
5313             {
5314               static bool warned;
5315
5316               /* The first one should never be X86_64_X87UP_CLASS.  */
5317               gcc_assert (i != 0);
5318               if (!warned && warn_psabi)
5319                 {
5320                   warned = true;
5321                   inform (input_location,
5322                           "The ABI of passing union with long double"
5323                           " has changed in GCC 4.4");
5324                 }
5325               return 0;
5326             }
5327         }
5328       return words;
5329     }
5330
5331   /* Compute alignment needed.  We align all types to natural boundaries with
5332      exception of XFmode that is aligned to 64bits.  */
5333   if (mode != VOIDmode && mode != BLKmode)
5334     {
5335       int mode_alignment = GET_MODE_BITSIZE (mode);
5336
5337       if (mode == XFmode)
5338         mode_alignment = 128;
5339       else if (mode == XCmode)
5340         mode_alignment = 256;
5341       if (COMPLEX_MODE_P (mode))
5342         mode_alignment /= 2;
5343       /* Misaligned fields are always returned in memory.  */
5344       if (bit_offset % mode_alignment)
5345         return 0;
5346     }
5347
5348   /* for V1xx modes, just use the base mode */
5349   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5350       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5351     mode = GET_MODE_INNER (mode);
5352
5353   /* Classification of atomic types.  */
5354   switch (mode)
5355     {
5356     case SDmode:
5357     case DDmode:
5358       classes[0] = X86_64_SSE_CLASS;
5359       return 1;
5360     case TDmode:
5361       classes[0] = X86_64_SSE_CLASS;
5362       classes[1] = X86_64_SSEUP_CLASS;
5363       return 2;
5364     case DImode:
5365     case SImode:
5366     case HImode:
5367     case QImode:
5368     case CSImode:
5369     case CHImode:
5370     case CQImode:
5371       {
5372         int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5373
5374         if (size <= 32)
5375           {
5376             classes[0] = X86_64_INTEGERSI_CLASS;
5377             return 1;
5378           }
5379         else if (size <= 64)
5380           {
5381             classes[0] = X86_64_INTEGER_CLASS;
5382             return 1;
5383           }
5384         else if (size <= 64+32)
5385           {
5386             classes[0] = X86_64_INTEGER_CLASS;
5387             classes[1] = X86_64_INTEGERSI_CLASS;
5388             return 2;
5389           }
5390         else if (size <= 64+64)
5391           {
5392             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5393             return 2;
5394           }
5395         else
5396           gcc_unreachable ();
5397       }
5398     case CDImode:
5399     case TImode:
5400       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5401       return 2;
5402     case COImode:
5403     case OImode:
5404       /* OImode shouldn't be used directly.  */
5405       gcc_unreachable ();
5406     case CTImode:
5407       return 0;
5408     case SFmode:
5409       if (!(bit_offset % 64))
5410         classes[0] = X86_64_SSESF_CLASS;
5411       else
5412         classes[0] = X86_64_SSE_CLASS;
5413       return 1;
5414     case DFmode:
5415       classes[0] = X86_64_SSEDF_CLASS;
5416       return 1;
5417     case XFmode:
5418       classes[0] = X86_64_X87_CLASS;
5419       classes[1] = X86_64_X87UP_CLASS;
5420       return 2;
5421     case TFmode:
5422       classes[0] = X86_64_SSE_CLASS;
5423       classes[1] = X86_64_SSEUP_CLASS;
5424       return 2;
5425     case SCmode:
5426       classes[0] = X86_64_SSE_CLASS;
5427       if (!(bit_offset % 64))
5428         return 1;
5429       else
5430         {
5431           static bool warned;
5432
5433           if (!warned && warn_psabi)
5434             {
5435               warned = true;
5436               inform (input_location,
5437                       "The ABI of passing structure with complex float"
5438                       " member has changed in GCC 4.4");
5439             }
5440           classes[1] = X86_64_SSESF_CLASS;
5441           return 2;
5442         }
5443     case DCmode:
5444       classes[0] = X86_64_SSEDF_CLASS;
5445       classes[1] = X86_64_SSEDF_CLASS;
5446       return 2;
5447     case XCmode:
5448       classes[0] = X86_64_COMPLEX_X87_CLASS;
5449       return 1;
5450     case TCmode:
5451       /* This modes is larger than 16 bytes.  */
5452       return 0;
5453     case V8SFmode:
5454     case V8SImode:
5455     case V32QImode:
5456     case V16HImode:
5457     case V4DFmode:
5458     case V4DImode:
5459       classes[0] = X86_64_SSE_CLASS;
5460       classes[1] = X86_64_SSEUP_CLASS;
5461       classes[2] = X86_64_SSEUP_CLASS;
5462       classes[3] = X86_64_SSEUP_CLASS;
5463       return 4;
5464     case V4SFmode:
5465     case V4SImode:
5466     case V16QImode:
5467     case V8HImode:
5468     case V2DFmode:
5469     case V2DImode:
5470       classes[0] = X86_64_SSE_CLASS;
5471       classes[1] = X86_64_SSEUP_CLASS;
5472       return 2;
5473     case V1TImode:
5474     case V1DImode:
5475     case V2SFmode:
5476     case V2SImode:
5477     case V4HImode:
5478     case V8QImode:
5479       classes[0] = X86_64_SSE_CLASS;
5480       return 1;
5481     case BLKmode:
5482     case VOIDmode:
5483       return 0;
5484     default:
5485       gcc_assert (VECTOR_MODE_P (mode));
5486
5487       if (bytes > 16)
5488         return 0;
5489
5490       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5491
5492       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5493         classes[0] = X86_64_INTEGERSI_CLASS;
5494       else
5495         classes[0] = X86_64_INTEGER_CLASS;
5496       classes[1] = X86_64_INTEGER_CLASS;
5497       return 1 + (bytes > 8);
5498     }
5499 }
5500
5501 /* Examine the argument and return set number of register required in each
5502    class.  Return 0 iff parameter should be passed in memory.  */
5503 static int
5504 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5505                   int *int_nregs, int *sse_nregs)
5506 {
5507   enum x86_64_reg_class regclass[MAX_CLASSES];
5508   int n = classify_argument (mode, type, regclass, 0);
5509
5510   *int_nregs = 0;
5511   *sse_nregs = 0;
5512   if (!n)
5513     return 0;
5514   for (n--; n >= 0; n--)
5515     switch (regclass[n])
5516       {
5517       case X86_64_INTEGER_CLASS:
5518       case X86_64_INTEGERSI_CLASS:
5519         (*int_nregs)++;
5520         break;
5521       case X86_64_SSE_CLASS:
5522       case X86_64_SSESF_CLASS:
5523       case X86_64_SSEDF_CLASS:
5524         (*sse_nregs)++;
5525         break;
5526       case X86_64_NO_CLASS:
5527       case X86_64_SSEUP_CLASS:
5528         break;
5529       case X86_64_X87_CLASS:
5530       case X86_64_X87UP_CLASS:
5531         if (!in_return)
5532           return 0;
5533         break;
5534       case X86_64_COMPLEX_X87_CLASS:
5535         return in_return ? 2 : 0;
5536       case X86_64_MEMORY_CLASS:
5537         gcc_unreachable ();
5538       }
5539   return 1;
5540 }
5541
5542 /* Construct container for the argument used by GCC interface.  See
5543    FUNCTION_ARG for the detailed description.  */
5544
5545 static rtx
5546 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5547                      const_tree type, int in_return, int nintregs, int nsseregs,
5548                      const int *intreg, int sse_regno)
5549 {
5550   /* The following variables hold the static issued_error state.  */
5551   static bool issued_sse_arg_error;
5552   static bool issued_sse_ret_error;
5553   static bool issued_x87_ret_error;
5554
5555   enum machine_mode tmpmode;
5556   int bytes =
5557     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5558   enum x86_64_reg_class regclass[MAX_CLASSES];
5559   int n;
5560   int i;
5561   int nexps = 0;
5562   int needed_sseregs, needed_intregs;
5563   rtx exp[MAX_CLASSES];
5564   rtx ret;
5565
5566   n = classify_argument (mode, type, regclass, 0);
5567   if (!n)
5568     return NULL;
5569   if (!examine_argument (mode, type, in_return, &needed_intregs,
5570                          &needed_sseregs))
5571     return NULL;
5572   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5573     return NULL;
5574
5575   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
5576      some less clueful developer tries to use floating-point anyway.  */
5577   if (needed_sseregs && !TARGET_SSE)
5578     {
5579       if (in_return)
5580         {
5581           if (!issued_sse_ret_error)
5582             {
5583               error ("SSE register return with SSE disabled");
5584               issued_sse_ret_error = true;
5585             }
5586         }
5587       else if (!issued_sse_arg_error)
5588         {
5589           error ("SSE register argument with SSE disabled");
5590           issued_sse_arg_error = true;
5591         }
5592       return NULL;
5593     }
5594
5595   /* Likewise, error if the ABI requires us to return values in the
5596      x87 registers and the user specified -mno-80387.  */
5597   if (!TARGET_80387 && in_return)
5598     for (i = 0; i < n; i++)
5599       if (regclass[i] == X86_64_X87_CLASS
5600           || regclass[i] == X86_64_X87UP_CLASS
5601           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5602         {
5603           if (!issued_x87_ret_error)
5604             {
5605               error ("x87 register return with x87 disabled");
5606               issued_x87_ret_error = true;
5607             }
5608           return NULL;
5609         }
5610
5611   /* First construct simple cases.  Avoid SCmode, since we want to use
5612      single register to pass this type.  */
5613   if (n == 1 && mode != SCmode)
5614     switch (regclass[0])
5615       {
5616       case X86_64_INTEGER_CLASS:
5617       case X86_64_INTEGERSI_CLASS:
5618         return gen_rtx_REG (mode, intreg[0]);
5619       case X86_64_SSE_CLASS:
5620       case X86_64_SSESF_CLASS:
5621       case X86_64_SSEDF_CLASS:
5622         if (mode != BLKmode)
5623           return gen_reg_or_parallel (mode, orig_mode,
5624                                       SSE_REGNO (sse_regno));
5625         break;
5626       case X86_64_X87_CLASS:
5627       case X86_64_COMPLEX_X87_CLASS:
5628         return gen_rtx_REG (mode, FIRST_STACK_REG);
5629       case X86_64_NO_CLASS:
5630         /* Zero sized array, struct or class.  */
5631         return NULL;
5632       default:
5633         gcc_unreachable ();
5634       }
5635   if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5636       && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5637     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5638   if (n == 4
5639       && regclass[0] == X86_64_SSE_CLASS
5640       && regclass[1] == X86_64_SSEUP_CLASS
5641       && regclass[2] == X86_64_SSEUP_CLASS
5642       && regclass[3] == X86_64_SSEUP_CLASS
5643       && mode != BLKmode)
5644     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5645
5646   if (n == 2
5647       && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5648     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5649   if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5650       && regclass[1] == X86_64_INTEGER_CLASS
5651       && (mode == CDImode || mode == TImode || mode == TFmode)
5652       && intreg[0] + 1 == intreg[1])
5653     return gen_rtx_REG (mode, intreg[0]);
5654
5655   /* Otherwise figure out the entries of the PARALLEL.  */
5656   for (i = 0; i < n; i++)
5657     {
5658       int pos;
5659
5660       switch (regclass[i])
5661         {
5662           case X86_64_NO_CLASS:
5663             break;
5664           case X86_64_INTEGER_CLASS:
5665           case X86_64_INTEGERSI_CLASS:
5666             /* Merge TImodes on aligned occasions here too.  */
5667             if (i * 8 + 8 > bytes)
5668               tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5669             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5670               tmpmode = SImode;
5671             else
5672               tmpmode = DImode;
5673             /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
5674             if (tmpmode == BLKmode)
5675               tmpmode = DImode;
5676             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5677                                                gen_rtx_REG (tmpmode, *intreg),
5678                                                GEN_INT (i*8));
5679             intreg++;
5680             break;
5681           case X86_64_SSESF_CLASS:
5682             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5683                                                gen_rtx_REG (SFmode,
5684                                                             SSE_REGNO (sse_regno)),
5685                                                GEN_INT (i*8));
5686             sse_regno++;
5687             break;
5688           case X86_64_SSEDF_CLASS:
5689             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5690                                                gen_rtx_REG (DFmode,
5691                                                             SSE_REGNO (sse_regno)),
5692                                                GEN_INT (i*8));
5693             sse_regno++;
5694             break;
5695           case X86_64_SSE_CLASS:
5696             pos = i;
5697             switch (n)
5698               {
5699               case 1:
5700                 tmpmode = DImode;
5701                 break;
5702               case 2:
5703                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5704                   {
5705                     tmpmode = TImode;
5706                     i++;
5707                   }
5708                 else
5709                   tmpmode = DImode;
5710                 break;
5711               case 4:
5712                 gcc_assert (i == 0
5713                             && regclass[1] == X86_64_SSEUP_CLASS
5714                             && regclass[2] == X86_64_SSEUP_CLASS
5715                             && regclass[3] == X86_64_SSEUP_CLASS);
5716                 tmpmode = OImode;
5717                 i += 3;
5718                 break;
5719               default:
5720                 gcc_unreachable ();
5721               }
5722             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5723                                                gen_rtx_REG (tmpmode,
5724                                                             SSE_REGNO (sse_regno)),
5725                                                GEN_INT (pos*8));
5726             sse_regno++;
5727             break;
5728           default:
5729             gcc_unreachable ();
5730         }
5731     }
5732
5733   /* Empty aligned struct, union or class.  */
5734   if (nexps == 0)
5735     return NULL;
5736
5737   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5738   for (i = 0; i < nexps; i++)
5739     XVECEXP (ret, 0, i) = exp [i];
5740   return ret;
5741 }
5742
5743 /* Update the data in CUM to advance over an argument of mode MODE
5744    and data type TYPE.  (TYPE is null for libcalls where that information
5745    may not be available.)  */
5746
5747 static void
5748 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5749                          tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5750 {
5751   switch (mode)
5752     {
5753     default:
5754       break;
5755
5756     case BLKmode:
5757       if (bytes < 0)
5758         break;
5759       /* FALLTHRU */
5760
5761     case DImode:
5762     case SImode:
5763     case HImode:
5764     case QImode:
5765       cum->words += words;
5766       cum->nregs -= words;
5767       cum->regno += words;
5768
5769       if (cum->nregs <= 0)
5770         {
5771           cum->nregs = 0;
5772           cum->regno = 0;
5773         }
5774       break;
5775
5776     case OImode:
5777       /* OImode shouldn't be used directly.  */
5778       gcc_unreachable ();
5779
5780     case DFmode:
5781       if (cum->float_in_sse < 2)
5782         break;
5783     case SFmode:
5784       if (cum->float_in_sse < 1)
5785         break;
5786       /* FALLTHRU */
5787
5788     case V8SFmode:
5789     case V8SImode:
5790     case V32QImode:
5791     case V16HImode:
5792     case V4DFmode:
5793     case V4DImode:
5794     case TImode:
5795     case V16QImode:
5796     case V8HImode:
5797     case V4SImode:
5798     case V2DImode:
5799     case V4SFmode:
5800     case V2DFmode:
5801       if (!type || !AGGREGATE_TYPE_P (type))
5802         {
5803           cum->sse_words += words;
5804           cum->sse_nregs -= 1;
5805           cum->sse_regno += 1;
5806           if (cum->sse_nregs <= 0)
5807             {
5808               cum->sse_nregs = 0;
5809               cum->sse_regno = 0;
5810             }
5811         }
5812       break;
5813
5814     case V8QImode:
5815     case V4HImode:
5816     case V2SImode:
5817     case V2SFmode:
5818     case V1TImode:
5819     case V1DImode:
5820       if (!type || !AGGREGATE_TYPE_P (type))
5821         {
5822           cum->mmx_words += words;
5823           cum->mmx_nregs -= 1;
5824           cum->mmx_regno += 1;
5825           if (cum->mmx_nregs <= 0)
5826             {
5827               cum->mmx_nregs = 0;
5828               cum->mmx_regno = 0;
5829             }
5830         }
5831       break;
5832     }
5833 }
5834
5835 static void
5836 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5837                          tree type, HOST_WIDE_INT words, int named)
5838 {
5839   int int_nregs, sse_nregs;
5840
5841   /* Unnamed 256bit vector mode parameters are passed on stack.  */
5842   if (!named && VALID_AVX256_REG_MODE (mode))
5843     return;
5844
5845   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5846     cum->words += words;
5847   else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5848     {
5849       cum->nregs -= int_nregs;
5850       cum->sse_nregs -= sse_nregs;
5851       cum->regno += int_nregs;
5852       cum->sse_regno += sse_nregs;
5853     }
5854   else
5855     cum->words += words;
5856 }
5857
5858 static void
5859 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5860                             HOST_WIDE_INT words)
5861 {
5862   /* Otherwise, this should be passed indirect.  */
5863   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5864
5865   cum->words += words;
5866   if (cum->nregs > 0)
5867     {
5868       cum->nregs -= 1;
5869       cum->regno += 1;
5870     }
5871 }
5872
5873 void
5874 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5875                       tree type, int named)
5876 {
5877   HOST_WIDE_INT bytes, words;
5878
5879   if (mode == BLKmode)
5880     bytes = int_size_in_bytes (type);
5881   else
5882     bytes = GET_MODE_SIZE (mode);
5883   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5884
5885   if (type)
5886     mode = type_natural_mode (type, NULL);
5887
5888   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5889     function_arg_advance_ms_64 (cum, bytes, words);
5890   else if (TARGET_64BIT)
5891     function_arg_advance_64 (cum, mode, type, words, named);
5892   else
5893     function_arg_advance_32 (cum, mode, type, bytes, words);
5894 }
5895
5896 /* Define where to put the arguments to a function.
5897    Value is zero to push the argument on the stack,
5898    or a hard register in which to store the argument.
5899
5900    MODE is the argument's machine mode.
5901    TYPE is the data type of the argument (as a tree).
5902     This is null for libcalls where that information may
5903     not be available.
5904    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5905     the preceding args and about the function being called.
5906    NAMED is nonzero if this argument is a named parameter
5907     (otherwise it is an extra parameter matching an ellipsis).  */
5908
5909 static rtx
5910 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5911                  enum machine_mode orig_mode, tree type,
5912                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5913 {
5914   static bool warnedsse, warnedmmx;
5915
5916   /* Avoid the AL settings for the Unix64 ABI.  */
5917   if (mode == VOIDmode)
5918     return constm1_rtx;
5919
5920   switch (mode)
5921     {
5922     default:
5923       break;
5924
5925     case BLKmode:
5926       if (bytes < 0)
5927         break;
5928       /* FALLTHRU */
5929     case DImode:
5930     case SImode:
5931     case HImode:
5932     case QImode:
5933       if (words <= cum->nregs)
5934         {
5935           int regno = cum->regno;
5936
5937           /* Fastcall allocates the first two DWORD (SImode) or
5938             smaller arguments to ECX and EDX if it isn't an
5939             aggregate type .  */
5940           if (cum->fastcall)
5941             {
5942               if (mode == BLKmode
5943                   || mode == DImode
5944                   || (type && AGGREGATE_TYPE_P (type)))
5945                 break;
5946
5947               /* ECX not EAX is the first allocated register.  */
5948               if (regno == AX_REG)
5949                 regno = CX_REG;
5950             }
5951           return gen_rtx_REG (mode, regno);
5952         }
5953       break;
5954
5955     case DFmode:
5956       if (cum->float_in_sse < 2)
5957         break;
5958     case SFmode:
5959       if (cum->float_in_sse < 1)
5960         break;
5961       /* FALLTHRU */
5962     case TImode:
5963       /* In 32bit, we pass TImode in xmm registers.  */
5964     case V16QImode:
5965     case V8HImode:
5966     case V4SImode:
5967     case V2DImode:
5968     case V4SFmode:
5969     case V2DFmode:
5970       if (!type || !AGGREGATE_TYPE_P (type))
5971         {
5972           if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5973             {
5974               warnedsse = true;
5975               warning (0, "SSE vector argument without SSE enabled "
5976                        "changes the ABI");
5977             }
5978           if (cum->sse_nregs)
5979             return gen_reg_or_parallel (mode, orig_mode,
5980                                         cum->sse_regno + FIRST_SSE_REG);
5981         }
5982       break;
5983
5984     case OImode:
5985       /* OImode shouldn't be used directly.  */
5986       gcc_unreachable ();
5987
5988     case V8SFmode:
5989     case V8SImode:
5990     case V32QImode:
5991     case V16HImode:
5992     case V4DFmode:
5993     case V4DImode:
5994       if (!type || !AGGREGATE_TYPE_P (type))
5995         {
5996           if (cum->sse_nregs)
5997             return gen_reg_or_parallel (mode, orig_mode,
5998                                         cum->sse_regno + FIRST_SSE_REG);
5999         }
6000       break;
6001
6002     case V8QImode:
6003     case V4HImode:
6004     case V2SImode:
6005     case V2SFmode:
6006     case V1TImode:
6007     case V1DImode:
6008       if (!type || !AGGREGATE_TYPE_P (type))
6009         {
6010           if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6011             {
6012               warnedmmx = true;
6013               warning (0, "MMX vector argument without MMX enabled "
6014                        "changes the ABI");
6015             }
6016           if (cum->mmx_nregs)
6017             return gen_reg_or_parallel (mode, orig_mode,
6018                                         cum->mmx_regno + FIRST_MMX_REG);
6019         }
6020       break;
6021     }
6022
6023   return NULL_RTX;
6024 }
6025
6026 static rtx
6027 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6028                  enum machine_mode orig_mode, tree type, int named)
6029 {
6030   /* Handle a hidden AL argument containing number of registers
6031      for varargs x86-64 functions.  */
6032   if (mode == VOIDmode)
6033     return GEN_INT (cum->maybe_vaarg
6034                     ? (cum->sse_nregs < 0
6035                        ? (cum->call_abi == ix86_abi
6036                           ? SSE_REGPARM_MAX
6037                           : (ix86_abi != SYSV_ABI
6038                              ? X86_64_SSE_REGPARM_MAX
6039                              : X86_64_MS_SSE_REGPARM_MAX))
6040                        : cum->sse_regno)
6041                     : -1);
6042
6043   switch (mode)
6044     {
6045     default:
6046       break;
6047
6048     case V8SFmode:
6049     case V8SImode:
6050     case V32QImode:
6051     case V16HImode:
6052     case V4DFmode:
6053     case V4DImode:
6054       /* Unnamed 256bit vector mode parameters are passed on stack.  */
6055       if (!named)
6056         return NULL;
6057       break;
6058     }
6059
6060   return construct_container (mode, orig_mode, type, 0, cum->nregs,
6061                               cum->sse_nregs,
6062                               &x86_64_int_parameter_registers [cum->regno],
6063                               cum->sse_regno);
6064 }
6065
6066 static rtx
6067 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6068                     enum machine_mode orig_mode, int named,
6069                     HOST_WIDE_INT bytes)
6070 {
6071   unsigned int regno;
6072
6073   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6074      We use value of -2 to specify that current function call is MSABI.  */
6075   if (mode == VOIDmode)
6076     return GEN_INT (-2);
6077
6078   /* If we've run out of registers, it goes on the stack.  */
6079   if (cum->nregs == 0)
6080     return NULL_RTX;
6081
6082   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6083
6084   /* Only floating point modes are passed in anything but integer regs.  */
6085   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6086     {
6087       if (named)
6088         regno = cum->regno + FIRST_SSE_REG;
6089       else
6090         {
6091           rtx t1, t2;
6092
6093           /* Unnamed floating parameters are passed in both the
6094              SSE and integer registers.  */
6095           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6096           t2 = gen_rtx_REG (mode, regno);
6097           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6098           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6099           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6100         }
6101     }
6102   /* Handle aggregated types passed in register.  */
6103   if (orig_mode == BLKmode)
6104     {
6105       if (bytes > 0 && bytes <= 8)
6106         mode = (bytes > 4 ? DImode : SImode);
6107       if (mode == BLKmode)
6108         mode = DImode;
6109     }
6110
6111   return gen_reg_or_parallel (mode, orig_mode, regno);
6112 }
6113
6114 rtx
6115 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6116               tree type, int named)
6117 {
6118   enum machine_mode mode = omode;
6119   HOST_WIDE_INT bytes, words;
6120
6121   if (mode == BLKmode)
6122     bytes = int_size_in_bytes (type);
6123   else
6124     bytes = GET_MODE_SIZE (mode);
6125   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6126
6127   /* To simplify the code below, represent vector types with a vector mode
6128      even if MMX/SSE are not active.  */
6129   if (type && TREE_CODE (type) == VECTOR_TYPE)
6130     mode = type_natural_mode (type, cum);
6131
6132   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6133     return function_arg_ms_64 (cum, mode, omode, named, bytes);
6134   else if (TARGET_64BIT)
6135     return function_arg_64 (cum, mode, omode, type, named);
6136   else
6137     return function_arg_32 (cum, mode, omode, type, bytes, words);
6138 }
6139
6140 /* A C expression that indicates when an argument must be passed by
6141    reference.  If nonzero for an argument, a copy of that argument is
6142    made in memory and a pointer to the argument is passed instead of
6143    the argument itself.  The pointer is passed in whatever way is
6144    appropriate for passing a pointer to that type.  */
6145
6146 static bool
6147 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6148                         enum machine_mode mode ATTRIBUTE_UNUSED,
6149                         const_tree type, bool named ATTRIBUTE_UNUSED)
6150 {
6151   /* See Windows x64 Software Convention.  */
6152   if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6153     {
6154       int msize = (int) GET_MODE_SIZE (mode);
6155       if (type)
6156         {
6157           /* Arrays are passed by reference.  */
6158           if (TREE_CODE (type) == ARRAY_TYPE)
6159             return true;
6160
6161           if (AGGREGATE_TYPE_P (type))
6162             {
6163               /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6164                  are passed by reference.  */
6165               msize = int_size_in_bytes (type);
6166             }
6167         }
6168
6169       /* __m128 is passed by reference.  */
6170       switch (msize) {
6171       case 1: case 2: case 4: case 8:
6172         break;
6173       default:
6174         return true;
6175       }
6176     }
6177   else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6178     return 1;
6179
6180   return 0;
6181 }
6182
6183 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6184    ABI.  */
6185 static bool
6186 contains_aligned_value_p (tree type)
6187 {
6188   enum machine_mode mode = TYPE_MODE (type);
6189   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6190        || mode == TDmode
6191        || mode == TFmode
6192        || mode == TCmode)
6193       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6194     return true;
6195   if (TYPE_ALIGN (type) < 128)
6196     return false;
6197
6198   if (AGGREGATE_TYPE_P (type))
6199     {
6200       /* Walk the aggregates recursively.  */
6201       switch (TREE_CODE (type))
6202         {
6203         case RECORD_TYPE:
6204         case UNION_TYPE:
6205         case QUAL_UNION_TYPE:
6206           {
6207             tree field;
6208
6209             /* Walk all the structure fields.  */
6210             for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6211               {
6212                 if (TREE_CODE (field) == FIELD_DECL
6213                     && contains_aligned_value_p (TREE_TYPE (field)))
6214                   return true;
6215               }
6216             break;
6217           }
6218
6219         case ARRAY_TYPE:
6220           /* Just for use if some languages passes arrays by value.  */
6221           if (contains_aligned_value_p (TREE_TYPE (type)))
6222             return true;
6223           break;
6224
6225         default:
6226           gcc_unreachable ();
6227         }
6228     }
6229   return false;
6230 }
6231
6232 /* Gives the alignment boundary, in bits, of an argument with the
6233    specified mode and type.  */
6234
6235 int
6236 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6237 {
6238   int align;
6239   if (type)
6240     {
6241       /* Since canonical type is used for call, we convert it to
6242          canonical type if needed.  */
6243       if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6244         type = TYPE_CANONICAL (type);
6245       align = TYPE_ALIGN (type);
6246     }
6247   else
6248     align = GET_MODE_ALIGNMENT (mode);
6249   if (align < PARM_BOUNDARY)
6250     align = PARM_BOUNDARY;
6251   /* In 32bit, only _Decimal128 and __float128 are aligned to their
6252      natural boundaries.  */
6253   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6254     {
6255       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
6256          make an exception for SSE modes since these require 128bit
6257          alignment.
6258
6259          The handling here differs from field_alignment.  ICC aligns MMX
6260          arguments to 4 byte boundaries, while structure fields are aligned
6261          to 8 byte boundaries.  */
6262       if (!type)
6263         {
6264           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6265             align = PARM_BOUNDARY;
6266         }
6267       else
6268         {
6269           if (!contains_aligned_value_p (type))
6270             align = PARM_BOUNDARY;
6271         }
6272     }
6273   if (align > BIGGEST_ALIGNMENT)
6274     align = BIGGEST_ALIGNMENT;
6275   return align;
6276 }
6277
6278 /* Return true if N is a possible register number of function value.  */
6279
6280 bool
6281 ix86_function_value_regno_p (int regno)
6282 {
6283   switch (regno)
6284     {
6285     case 0:
6286       return true;
6287
6288     case FIRST_FLOAT_REG:
6289       /* TODO: The function should depend on current function ABI but
6290        builtins.c would need updating then. Therefore we use the
6291        default ABI.  */
6292       if (TARGET_64BIT && ix86_abi == MS_ABI)
6293         return false;
6294       return TARGET_FLOAT_RETURNS_IN_80387;
6295
6296     case FIRST_SSE_REG:
6297       return TARGET_SSE;
6298
6299     case FIRST_MMX_REG:
6300       if (TARGET_MACHO || TARGET_64BIT)
6301         return false;
6302       return TARGET_MMX;
6303     }
6304
6305   return false;
6306 }
6307
6308 /* Define how to find the value returned by a function.
6309    VALTYPE is the data type of the value (as a tree).
6310    If the precise function being called is known, FUNC is its FUNCTION_DECL;
6311    otherwise, FUNC is 0.  */
6312
6313 static rtx
6314 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6315                    const_tree fntype, const_tree fn)
6316 {
6317   unsigned int regno;
6318
6319   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6320      we normally prevent this case when mmx is not available.  However
6321      some ABIs may require the result to be returned like DImode.  */
6322   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6323     regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6324
6325   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
6326      we prevent this case when sse is not available.  However some ABIs
6327      may require the result to be returned like integer TImode.  */
6328   else if (mode == TImode
6329            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6330     regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6331
6332   /* 32-byte vector modes in %ymm0.   */
6333   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6334     regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6335
6336   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
6337   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6338     regno = FIRST_FLOAT_REG;
6339   else
6340     /* Most things go in %eax.  */
6341     regno = AX_REG;
6342
6343   /* Override FP return register with %xmm0 for local functions when
6344      SSE math is enabled or for functions with sseregparm attribute.  */
6345   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6346     {
6347       int sse_level = ix86_function_sseregparm (fntype, fn, false);
6348       if ((sse_level >= 1 && mode == SFmode)
6349           || (sse_level == 2 && mode == DFmode))
6350         regno = FIRST_SSE_REG;
6351     }
6352
6353   /* OImode shouldn't be used directly.  */
6354   gcc_assert (mode != OImode);
6355
6356   return gen_rtx_REG (orig_mode, regno);
6357 }
6358
6359 static rtx
6360 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6361                    const_tree valtype)
6362 {
6363   rtx ret;
6364
6365   /* Handle libcalls, which don't provide a type node.  */
6366   if (valtype == NULL)
6367     {
6368       switch (mode)
6369         {
6370         case SFmode:
6371         case SCmode:
6372         case DFmode:
6373         case DCmode:
6374         case TFmode:
6375         case SDmode:
6376         case DDmode:
6377         case TDmode:
6378           return gen_rtx_REG (mode, FIRST_SSE_REG);
6379         case XFmode:
6380         case XCmode:
6381           return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6382         case TCmode:
6383           return NULL;
6384         default:
6385           return gen_rtx_REG (mode, AX_REG);
6386         }
6387     }
6388
6389   ret = construct_container (mode, orig_mode, valtype, 1,
6390                              X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6391                              x86_64_int_return_registers, 0);
6392
6393   /* For zero sized structures, construct_container returns NULL, but we
6394      need to keep rest of compiler happy by returning meaningful value.  */
6395   if (!ret)
6396     ret = gen_rtx_REG (orig_mode, AX_REG);
6397
6398   return ret;
6399 }
6400
6401 static rtx
6402 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6403 {
6404   unsigned int regno = AX_REG;
6405
6406   if (TARGET_SSE)
6407     {
6408       switch (GET_MODE_SIZE (mode))
6409         {
6410         case 16:
6411           if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6412              && !COMPLEX_MODE_P (mode))
6413             regno = FIRST_SSE_REG;
6414           break;
6415         case 8:
6416         case 4:
6417           if (mode == SFmode || mode == DFmode)
6418             regno = FIRST_SSE_REG;
6419           break;
6420         default:
6421           break;
6422         }
6423     }
6424   return gen_rtx_REG (orig_mode, regno);
6425 }
6426
6427 static rtx
6428 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6429                        enum machine_mode orig_mode, enum machine_mode mode)
6430 {
6431   const_tree fn, fntype;
6432
6433   fn = NULL_TREE;
6434   if (fntype_or_decl && DECL_P (fntype_or_decl))
6435     fn = fntype_or_decl;
6436   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6437
6438   if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6439     return function_value_ms_64 (orig_mode, mode);
6440   else if (TARGET_64BIT)
6441     return function_value_64 (orig_mode, mode, valtype);
6442   else
6443     return function_value_32 (orig_mode, mode, fntype, fn);
6444 }
6445
6446 static rtx
6447 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6448                      bool outgoing ATTRIBUTE_UNUSED)
6449 {
6450   enum machine_mode mode, orig_mode;
6451
6452   orig_mode = TYPE_MODE (valtype);
6453   mode = type_natural_mode (valtype, NULL);
6454   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6455 }
6456
6457 rtx
6458 ix86_libcall_value (enum machine_mode mode)
6459 {
6460   return ix86_function_value_1 (NULL, NULL, mode, mode);
6461 }
6462
6463 /* Return true iff type is returned in memory.  */
6464
6465 static int ATTRIBUTE_UNUSED
6466 return_in_memory_32 (const_tree type, enum machine_mode mode)
6467 {
6468   HOST_WIDE_INT size;
6469
6470   if (mode == BLKmode)
6471     return 1;
6472
6473   size = int_size_in_bytes (type);
6474
6475   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6476     return 0;
6477
6478   if (VECTOR_MODE_P (mode) || mode == TImode)
6479     {
6480       /* User-created vectors small enough to fit in EAX.  */
6481       if (size < 8)
6482         return 0;
6483
6484       /* MMX/3dNow values are returned in MM0,
6485          except when it doesn't exits.  */
6486       if (size == 8)
6487         return (TARGET_MMX ? 0 : 1);
6488
6489       /* SSE values are returned in XMM0, except when it doesn't exist.  */
6490       if (size == 16)
6491         return (TARGET_SSE ? 0 : 1);
6492
6493       /* AVX values are returned in YMM0, except when it doesn't exist.  */
6494       if (size == 32)
6495         return TARGET_AVX ? 0 : 1;
6496     }
6497
6498   if (mode == XFmode)
6499     return 0;
6500
6501   if (size > 12)
6502     return 1;
6503
6504   /* OImode shouldn't be used directly.  */
6505   gcc_assert (mode != OImode);
6506
6507   return 0;
6508 }
6509
6510 static int ATTRIBUTE_UNUSED
6511 return_in_memory_64 (const_tree type, enum machine_mode mode)
6512 {
6513   int needed_intregs, needed_sseregs;
6514   return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6515 }
6516
6517 static int ATTRIBUTE_UNUSED
6518 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6519 {
6520   HOST_WIDE_INT size = int_size_in_bytes (type);
6521
6522   /* __m128 is returned in xmm0.  */
6523   if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6524       && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6525     return 0;
6526
6527   /* Otherwise, the size must be exactly in [1248]. */
6528   return (size != 1 && size != 2 && size != 4 && size != 8);
6529 }
6530
6531 static bool
6532 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6533 {
6534 #ifdef SUBTARGET_RETURN_IN_MEMORY
6535   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6536 #else
6537   const enum machine_mode mode = type_natural_mode (type, NULL);
6538
6539   if (TARGET_64BIT)
6540     {
6541       if (ix86_function_type_abi (fntype) == MS_ABI)
6542         return return_in_memory_ms_64 (type, mode);
6543       else
6544         return return_in_memory_64 (type, mode);
6545     }
6546   else
6547     return return_in_memory_32 (type, mode);
6548 #endif
6549 }
6550
6551 /* Return false iff TYPE is returned in memory.  This version is used
6552    on Solaris 10.  It is similar to the generic ix86_return_in_memory,
6553    but differs notably in that when MMX is available, 8-byte vectors
6554    are returned in memory, rather than in MMX registers.  */
6555
6556 bool
6557 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6558 {
6559   int size;
6560   enum machine_mode mode = type_natural_mode (type, NULL);
6561
6562   if (TARGET_64BIT)
6563     return return_in_memory_64 (type, mode);
6564
6565   if (mode == BLKmode)
6566     return 1;
6567
6568   size = int_size_in_bytes (type);
6569
6570   if (VECTOR_MODE_P (mode))
6571     {
6572       /* Return in memory only if MMX registers *are* available.  This
6573          seems backwards, but it is consistent with the existing
6574          Solaris x86 ABI.  */
6575       if (size == 8)
6576         return TARGET_MMX;
6577       if (size == 16)
6578         return !TARGET_SSE;
6579     }
6580   else if (mode == TImode)
6581     return !TARGET_SSE;
6582   else if (mode == XFmode)
6583     return 0;
6584
6585   return size > 12;
6586 }
6587
6588 /* When returning SSE vector types, we have a choice of either
6589      (1) being abi incompatible with a -march switch, or
6590      (2) generating an error.
6591    Given no good solution, I think the safest thing is one warning.
6592    The user won't be able to use -Werror, but....
6593
6594    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6595    called in response to actually generating a caller or callee that
6596    uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
6597    via aggregate_value_p for general type probing from tree-ssa.  */
6598
6599 static rtx
6600 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6601 {
6602   static bool warnedsse, warnedmmx;
6603
6604   if (!TARGET_64BIT && type)
6605     {
6606       /* Look at the return type of the function, not the function type.  */
6607       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6608
6609       if (!TARGET_SSE && !warnedsse)
6610         {
6611           if (mode == TImode
6612               || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6613             {
6614               warnedsse = true;
6615               warning (0, "SSE vector return without SSE enabled "
6616                        "changes the ABI");
6617             }
6618         }
6619
6620       if (!TARGET_MMX && !warnedmmx)
6621         {
6622           if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6623             {
6624               warnedmmx = true;
6625               warning (0, "MMX vector return without MMX enabled "
6626                        "changes the ABI");
6627             }
6628         }
6629     }
6630
6631   return NULL;
6632 }
6633
6634 \f
6635 /* Create the va_list data type.  */
6636
6637 /* Returns the calling convention specific va_list date type.
6638    The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
6639
6640 static tree
6641 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6642 {
6643   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6644
6645   /* For i386 we use plain pointer to argument area.  */
6646   if (!TARGET_64BIT || abi == MS_ABI)
6647     return build_pointer_type (char_type_node);
6648
6649   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6650   type_decl = build_decl (BUILTINS_LOCATION,
6651                           TYPE_DECL, get_identifier ("__va_list_tag"), record);
6652
6653   f_gpr = build_decl (BUILTINS_LOCATION,
6654                       FIELD_DECL, get_identifier ("gp_offset"),
6655                       unsigned_type_node);
6656   f_fpr = build_decl (BUILTINS_LOCATION,
6657                       FIELD_DECL, get_identifier ("fp_offset"),
6658                       unsigned_type_node);
6659   f_ovf = build_decl (BUILTINS_LOCATION,
6660                       FIELD_DECL, get_identifier ("overflow_arg_area"),
6661                       ptr_type_node);
6662   f_sav = build_decl (BUILTINS_LOCATION,
6663                       FIELD_DECL, get_identifier ("reg_save_area"),
6664                       ptr_type_node);
6665
6666   va_list_gpr_counter_field = f_gpr;
6667   va_list_fpr_counter_field = f_fpr;
6668
6669   DECL_FIELD_CONTEXT (f_gpr) = record;
6670   DECL_FIELD_CONTEXT (f_fpr) = record;
6671   DECL_FIELD_CONTEXT (f_ovf) = record;
6672   DECL_FIELD_CONTEXT (f_sav) = record;
6673
6674   TREE_CHAIN (record) = type_decl;
6675   TYPE_NAME (record) = type_decl;
6676   TYPE_FIELDS (record) = f_gpr;
6677   TREE_CHAIN (f_gpr) = f_fpr;
6678   TREE_CHAIN (f_fpr) = f_ovf;
6679   TREE_CHAIN (f_ovf) = f_sav;
6680
6681   layout_type (record);
6682
6683   /* The correct type is an array type of one element.  */
6684   return build_array_type (record, build_index_type (size_zero_node));
6685 }
6686
6687 /* Setup the builtin va_list data type and for 64-bit the additional
6688    calling convention specific va_list data types.  */
6689
6690 static tree
6691 ix86_build_builtin_va_list (void)
6692 {
6693   tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6694
6695   /* Initialize abi specific va_list builtin types.  */
6696   if (TARGET_64BIT)
6697     {
6698       tree t;
6699       if (ix86_abi == MS_ABI)
6700         {
6701           t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6702           if (TREE_CODE (t) != RECORD_TYPE)
6703             t = build_variant_type_copy (t);
6704           sysv_va_list_type_node = t;
6705         }
6706       else
6707         {
6708           t = ret;
6709           if (TREE_CODE (t) != RECORD_TYPE)
6710             t = build_variant_type_copy (t);
6711           sysv_va_list_type_node = t;
6712         }
6713       if (ix86_abi != MS_ABI)
6714         {
6715           t = ix86_build_builtin_va_list_abi (MS_ABI);
6716           if (TREE_CODE (t) != RECORD_TYPE)
6717             t = build_variant_type_copy (t);
6718           ms_va_list_type_node = t;
6719         }
6720       else
6721         {
6722           t = ret;
6723           if (TREE_CODE (t) != RECORD_TYPE)
6724             t = build_variant_type_copy (t);
6725           ms_va_list_type_node = t;
6726         }
6727     }
6728
6729   return ret;
6730 }
6731
6732 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
6733
6734 static void
6735 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6736 {
6737   rtx save_area, mem;
6738   rtx label;
6739   rtx label_ref;
6740   rtx tmp_reg;
6741   rtx nsse_reg;
6742   alias_set_type set;
6743   int i;
6744   int regparm = ix86_regparm;
6745
6746   if (cum->call_abi != ix86_abi)
6747     regparm = (ix86_abi != SYSV_ABI
6748                ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6749
6750   /* GPR size of varargs save area.  */
6751   if (cfun->va_list_gpr_size)
6752     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6753   else
6754     ix86_varargs_gpr_size = 0;
6755
6756   /* FPR size of varargs save area.  We don't need it if we don't pass
6757      anything in SSE registers.  */
6758   if (cum->sse_nregs && cfun->va_list_fpr_size)
6759     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6760   else
6761     ix86_varargs_fpr_size = 0;
6762
6763   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6764     return;
6765
6766   save_area = frame_pointer_rtx;
6767   set = get_varargs_alias_set ();
6768
6769   for (i = cum->regno;
6770        i < regparm
6771        && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6772        i++)
6773     {
6774       mem = gen_rtx_MEM (Pmode,
6775                          plus_constant (save_area, i * UNITS_PER_WORD));
6776       MEM_NOTRAP_P (mem) = 1;
6777       set_mem_alias_set (mem, set);
6778       emit_move_insn (mem, gen_rtx_REG (Pmode,
6779                                         x86_64_int_parameter_registers[i]));
6780     }
6781
6782   if (ix86_varargs_fpr_size)
6783     {
6784       /* Now emit code to save SSE registers.  The AX parameter contains number
6785          of SSE parameter registers used to call this function.  We use
6786          sse_prologue_save insn template that produces computed jump across
6787          SSE saves.  We need some preparation work to get this working.  */
6788
6789       label = gen_label_rtx ();
6790       label_ref = gen_rtx_LABEL_REF (Pmode, label);
6791
6792       /* Compute address to jump to :
6793          label - eax*4 + nnamed_sse_arguments*4 Or
6794          label - eax*5 + nnamed_sse_arguments*5 for AVX.  */
6795       tmp_reg = gen_reg_rtx (Pmode);
6796       nsse_reg = gen_reg_rtx (Pmode);
6797       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6798       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6799                               gen_rtx_MULT (Pmode, nsse_reg,
6800                                             GEN_INT (4))));
6801
6802       /* vmovaps is one byte longer than movaps.  */
6803       if (TARGET_AVX)
6804         emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6805                                 gen_rtx_PLUS (Pmode, tmp_reg,
6806                                               nsse_reg)));
6807
6808       if (cum->sse_regno)
6809         emit_move_insn
6810           (nsse_reg,
6811            gen_rtx_CONST (DImode,
6812                           gen_rtx_PLUS (DImode,
6813                                         label_ref,
6814                                         GEN_INT (cum->sse_regno
6815                                                  * (TARGET_AVX ? 5 : 4)))));
6816       else
6817         emit_move_insn (nsse_reg, label_ref);
6818       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6819
6820       /* Compute address of memory block we save into.  We always use pointer
6821          pointing 127 bytes after first byte to store - this is needed to keep
6822          instruction size limited by 4 bytes (5 bytes for AVX) with one
6823          byte displacement.  */
6824       tmp_reg = gen_reg_rtx (Pmode);
6825       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6826                               plus_constant (save_area,
6827                                              ix86_varargs_gpr_size + 127)));
6828       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6829       MEM_NOTRAP_P (mem) = 1;
6830       set_mem_alias_set (mem, set);
6831       set_mem_align (mem, BITS_PER_WORD);
6832
6833       /* And finally do the dirty job!  */
6834       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6835                                         GEN_INT (cum->sse_regno), label));
6836     }
6837 }
6838
6839 static void
6840 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6841 {
6842   alias_set_type set = get_varargs_alias_set ();
6843   int i;
6844
6845   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6846     {
6847       rtx reg, mem;
6848
6849       mem = gen_rtx_MEM (Pmode,
6850                          plus_constant (virtual_incoming_args_rtx,
6851                                         i * UNITS_PER_WORD));
6852       MEM_NOTRAP_P (mem) = 1;
6853       set_mem_alias_set (mem, set);
6854
6855       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6856       emit_move_insn (mem, reg);
6857     }
6858 }
6859
6860 static void
6861 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6862                              tree type, int *pretend_size ATTRIBUTE_UNUSED,
6863                              int no_rtl)
6864 {
6865   CUMULATIVE_ARGS next_cum;
6866   tree fntype;
6867
6868   /* This argument doesn't appear to be used anymore.  Which is good,
6869      because the old code here didn't suppress rtl generation.  */
6870   gcc_assert (!no_rtl);
6871
6872   if (!TARGET_64BIT)
6873     return;
6874
6875   fntype = TREE_TYPE (current_function_decl);
6876
6877   /* For varargs, we do not want to skip the dummy va_dcl argument.
6878      For stdargs, we do want to skip the last named argument.  */
6879   next_cum = *cum;
6880   if (stdarg_p (fntype))
6881     function_arg_advance (&next_cum, mode, type, 1);
6882
6883   if (cum->call_abi == MS_ABI)
6884     setup_incoming_varargs_ms_64 (&next_cum);
6885   else
6886     setup_incoming_varargs_64 (&next_cum);
6887 }
6888
6889 /* Checks if TYPE is of kind va_list char *.  */
6890
6891 static bool
6892 is_va_list_char_pointer (tree type)
6893 {
6894   tree canonic;
6895
6896   /* For 32-bit it is always true.  */
6897   if (!TARGET_64BIT)
6898     return true;
6899   canonic = ix86_canonical_va_list_type (type);
6900   return (canonic == ms_va_list_type_node
6901           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6902 }
6903
6904 /* Implement va_start.  */
6905
6906 static void
6907 ix86_va_start (tree valist, rtx nextarg)
6908 {
6909   HOST_WIDE_INT words, n_gpr, n_fpr;
6910   tree f_gpr, f_fpr, f_ovf, f_sav;
6911   tree gpr, fpr, ovf, sav, t;
6912   tree type;
6913
6914   /* Only 64bit target needs something special.  */
6915   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6916     {
6917       std_expand_builtin_va_start (valist, nextarg);
6918       return;
6919     }
6920
6921   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6922   f_fpr = TREE_CHAIN (f_gpr);
6923   f_ovf = TREE_CHAIN (f_fpr);
6924   f_sav = TREE_CHAIN (f_ovf);
6925
6926   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6927   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6928   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6929   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6930   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6931
6932   /* Count number of gp and fp argument registers used.  */
6933   words = crtl->args.info.words;
6934   n_gpr = crtl->args.info.regno;
6935   n_fpr = crtl->args.info.sse_regno;
6936
6937   if (cfun->va_list_gpr_size)
6938     {
6939       type = TREE_TYPE (gpr);
6940       t = build2 (MODIFY_EXPR, type,
6941                   gpr, build_int_cst (type, n_gpr * 8));
6942       TREE_SIDE_EFFECTS (t) = 1;
6943       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6944     }
6945
6946   if (TARGET_SSE && cfun->va_list_fpr_size)
6947     {
6948       type = TREE_TYPE (fpr);
6949       t = build2 (MODIFY_EXPR, type, fpr,
6950                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6951       TREE_SIDE_EFFECTS (t) = 1;
6952       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6953     }
6954
6955   /* Find the overflow area.  */
6956   type = TREE_TYPE (ovf);
6957   t = make_tree (type, crtl->args.internal_arg_pointer);
6958   if (words != 0)
6959     t = build2 (POINTER_PLUS_EXPR, type, t,
6960                 size_int (words * UNITS_PER_WORD));
6961   t = build2 (MODIFY_EXPR, type, ovf, t);
6962   TREE_SIDE_EFFECTS (t) = 1;
6963   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6964
6965   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6966     {
6967       /* Find the register save area.
6968          Prologue of the function save it right above stack frame.  */
6969       type = TREE_TYPE (sav);
6970       t = make_tree (type, frame_pointer_rtx);
6971       if (!ix86_varargs_gpr_size)
6972         t = build2 (POINTER_PLUS_EXPR, type, t,
6973                     size_int (-8 * X86_64_REGPARM_MAX));
6974       t = build2 (MODIFY_EXPR, type, sav, t);
6975       TREE_SIDE_EFFECTS (t) = 1;
6976       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6977     }
6978 }
6979
6980 /* Implement va_arg.  */
6981
6982 static tree
6983 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6984                       gimple_seq *post_p)
6985 {
6986   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6987   tree f_gpr, f_fpr, f_ovf, f_sav;
6988   tree gpr, fpr, ovf, sav, t;
6989   int size, rsize;
6990   tree lab_false, lab_over = NULL_TREE;
6991   tree addr, t2;
6992   rtx container;
6993   int indirect_p = 0;
6994   tree ptrtype;
6995   enum machine_mode nat_mode;
6996   int arg_boundary;
6997
6998   /* Only 64bit target needs something special.  */
6999   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7000     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7001
7002   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7003   f_fpr = TREE_CHAIN (f_gpr);
7004   f_ovf = TREE_CHAIN (f_fpr);
7005   f_sav = TREE_CHAIN (f_ovf);
7006
7007   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7008                 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7009   valist = build_va_arg_indirect_ref (valist);
7010   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7011   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7012   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7013
7014   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7015   if (indirect_p)
7016     type = build_pointer_type (type);
7017   size = int_size_in_bytes (type);
7018   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7019
7020   nat_mode = type_natural_mode (type, NULL);
7021   switch (nat_mode)
7022     {
7023     case V8SFmode:
7024     case V8SImode:
7025     case V32QImode:
7026     case V16HImode:
7027     case V4DFmode:
7028     case V4DImode:
7029       /* Unnamed 256bit vector mode parameters are passed on stack.  */
7030       if (ix86_cfun_abi () == SYSV_ABI)
7031         {
7032           container = NULL;
7033           break;
7034         }
7035
7036     default:
7037       container = construct_container (nat_mode, TYPE_MODE (type),
7038                                        type, 0, X86_64_REGPARM_MAX,
7039                                        X86_64_SSE_REGPARM_MAX, intreg,
7040                                        0);
7041       break;
7042     }
7043
7044   /* Pull the value out of the saved registers.  */
7045
7046   addr = create_tmp_var (ptr_type_node, "addr");
7047
7048   if (container)
7049     {
7050       int needed_intregs, needed_sseregs;
7051       bool need_temp;
7052       tree int_addr, sse_addr;
7053
7054       lab_false = create_artificial_label (UNKNOWN_LOCATION);
7055       lab_over = create_artificial_label (UNKNOWN_LOCATION);
7056
7057       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7058
7059       need_temp = (!REG_P (container)
7060                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
7061                        || TYPE_ALIGN (type) > 128));
7062
7063       /* In case we are passing structure, verify that it is consecutive block
7064          on the register save area.  If not we need to do moves.  */
7065       if (!need_temp && !REG_P (container))
7066         {
7067           /* Verify that all registers are strictly consecutive  */
7068           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7069             {
7070               int i;
7071
7072               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7073                 {
7074                   rtx slot = XVECEXP (container, 0, i);
7075                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7076                       || INTVAL (XEXP (slot, 1)) != i * 16)
7077                     need_temp = 1;
7078                 }
7079             }
7080           else
7081             {
7082               int i;
7083
7084               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7085                 {
7086                   rtx slot = XVECEXP (container, 0, i);
7087                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7088                       || INTVAL (XEXP (slot, 1)) != i * 8)
7089                     need_temp = 1;
7090                 }
7091             }
7092         }
7093       if (!need_temp)
7094         {
7095           int_addr = addr;
7096           sse_addr = addr;
7097         }
7098       else
7099         {
7100           int_addr = create_tmp_var (ptr_type_node, "int_addr");
7101           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7102         }
7103
7104       /* First ensure that we fit completely in registers.  */
7105       if (needed_intregs)
7106         {
7107           t = build_int_cst (TREE_TYPE (gpr),
7108                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7109           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7110           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7111           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7112           gimplify_and_add (t, pre_p);
7113         }
7114       if (needed_sseregs)
7115         {
7116           t = build_int_cst (TREE_TYPE (fpr),
7117                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7118                              + X86_64_REGPARM_MAX * 8);
7119           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7120           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7121           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7122           gimplify_and_add (t, pre_p);
7123         }
7124
7125       /* Compute index to start of area used for integer regs.  */
7126       if (needed_intregs)
7127         {
7128           /* int_addr = gpr + sav; */
7129           t = fold_convert (sizetype, gpr);
7130           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7131           gimplify_assign (int_addr, t, pre_p);
7132         }
7133       if (needed_sseregs)
7134         {
7135           /* sse_addr = fpr + sav; */
7136           t = fold_convert (sizetype, fpr);
7137           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7138           gimplify_assign (sse_addr, t, pre_p);
7139         }
7140       if (need_temp)
7141         {
7142           int i;
7143           tree temp = create_tmp_var (type, "va_arg_tmp");
7144
7145           /* addr = &temp; */
7146           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7147           gimplify_assign (addr, t, pre_p);
7148
7149           for (i = 0; i < XVECLEN (container, 0); i++)
7150             {
7151               rtx slot = XVECEXP (container, 0, i);
7152               rtx reg = XEXP (slot, 0);
7153               enum machine_mode mode = GET_MODE (reg);
7154               tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7155               tree addr_type = build_pointer_type (piece_type);
7156               tree daddr_type = build_pointer_type_for_mode (piece_type,
7157                                                              ptr_mode, true);
7158               tree src_addr, src;
7159               int src_offset;
7160               tree dest_addr, dest;
7161
7162               if (SSE_REGNO_P (REGNO (reg)))
7163                 {
7164                   src_addr = sse_addr;
7165                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7166                 }
7167               else
7168                 {
7169                   src_addr = int_addr;
7170                   src_offset = REGNO (reg) * 8;
7171                 }
7172               src_addr = fold_convert (addr_type, src_addr);
7173               src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7174                                       size_int (src_offset));
7175               src = build_va_arg_indirect_ref (src_addr);
7176
7177               dest_addr = fold_convert (daddr_type, addr);
7178               dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7179                                        size_int (INTVAL (XEXP (slot, 1))));
7180               dest = build_va_arg_indirect_ref (dest_addr);
7181
7182               gimplify_assign (dest, src, pre_p);
7183             }
7184         }
7185
7186       if (needed_intregs)
7187         {
7188           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7189                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7190           gimplify_assign (gpr, t, pre_p);
7191         }
7192
7193       if (needed_sseregs)
7194         {
7195           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7196                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7197           gimplify_assign (fpr, t, pre_p);
7198         }
7199
7200       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7201
7202       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7203     }
7204
7205   /* ... otherwise out of the overflow area.  */
7206
7207   /* When we align parameter on stack for caller, if the parameter
7208      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7209      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
7210      here with caller.  */
7211   arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7212   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7213     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7214
7215   /* Care for on-stack alignment if needed.  */
7216   if (arg_boundary <= 64
7217       || integer_zerop (TYPE_SIZE (type)))
7218     t = ovf;
7219  else
7220     {
7221       HOST_WIDE_INT align = arg_boundary / 8;
7222       t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7223                   size_int (align - 1));
7224       t = fold_convert (sizetype, t);
7225       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7226                   size_int (-align));
7227       t = fold_convert (TREE_TYPE (ovf), t);
7228     }
7229   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7230   gimplify_assign (addr, t, pre_p);
7231
7232   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7233               size_int (rsize * UNITS_PER_WORD));
7234   gimplify_assign (unshare_expr (ovf), t, pre_p);
7235
7236   if (container)
7237     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7238
7239   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7240   addr = fold_convert (ptrtype, addr);
7241
7242   if (indirect_p)
7243     addr = build_va_arg_indirect_ref (addr);
7244   return build_va_arg_indirect_ref (addr);
7245 }
7246 \f
7247 /* Return nonzero if OPNUM's MEM should be matched
7248    in movabs* patterns.  */
7249
7250 int
7251 ix86_check_movabs (rtx insn, int opnum)
7252 {
7253   rtx set, mem;
7254
7255   set = PATTERN (insn);
7256   if (GET_CODE (set) == PARALLEL)
7257     set = XVECEXP (set, 0, 0);
7258   gcc_assert (GET_CODE (set) == SET);
7259   mem = XEXP (set, opnum);
7260   while (GET_CODE (mem) == SUBREG)
7261     mem = SUBREG_REG (mem);
7262   gcc_assert (MEM_P (mem));
7263   return (volatile_ok || !MEM_VOLATILE_P (mem));
7264 }
7265 \f
7266 /* Initialize the table of extra 80387 mathematical constants.  */
7267
7268 static void
7269 init_ext_80387_constants (void)
7270 {
7271   static const char * cst[5] =
7272   {
7273     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
7274     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
7275     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
7276     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
7277     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
7278   };
7279   int i;
7280
7281   for (i = 0; i < 5; i++)
7282     {
7283       real_from_string (&ext_80387_constants_table[i], cst[i]);
7284       /* Ensure each constant is rounded to XFmode precision.  */
7285       real_convert (&ext_80387_constants_table[i],
7286                     XFmode, &ext_80387_constants_table[i]);
7287     }
7288
7289   ext_80387_constants_init = 1;
7290 }
7291
7292 /* Return true if the constant is something that can be loaded with
7293    a special instruction.  */
7294
7295 int
7296 standard_80387_constant_p (rtx x)
7297 {
7298   enum machine_mode mode = GET_MODE (x);
7299
7300   REAL_VALUE_TYPE r;
7301
7302   if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7303     return -1;
7304
7305   if (x == CONST0_RTX (mode))
7306     return 1;
7307   if (x == CONST1_RTX (mode))
7308     return 2;
7309
7310   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7311
7312   /* For XFmode constants, try to find a special 80387 instruction when
7313      optimizing for size or on those CPUs that benefit from them.  */
7314   if (mode == XFmode
7315       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7316     {
7317       int i;
7318
7319       if (! ext_80387_constants_init)
7320         init_ext_80387_constants ();
7321
7322       for (i = 0; i < 5; i++)
7323         if (real_identical (&r, &ext_80387_constants_table[i]))
7324           return i + 3;
7325     }
7326
7327   /* Load of the constant -0.0 or -1.0 will be split as
7328      fldz;fchs or fld1;fchs sequence.  */
7329   if (real_isnegzero (&r))
7330     return 8;
7331   if (real_identical (&r, &dconstm1))
7332     return 9;
7333
7334   return 0;
7335 }
7336
7337 /* Return the opcode of the special instruction to be used to load
7338    the constant X.  */
7339
7340 const char *
7341 standard_80387_constant_opcode (rtx x)
7342 {
7343   switch (standard_80387_constant_p (x))
7344     {
7345     case 1:
7346       return "fldz";
7347     case 2:
7348       return "fld1";
7349     case 3:
7350       return "fldlg2";
7351     case 4:
7352       return "fldln2";
7353     case 5:
7354       return "fldl2e";
7355     case 6:
7356       return "fldl2t";
7357     case 7:
7358       return "fldpi";
7359     case 8:
7360     case 9:
7361       return "#";
7362     default:
7363       gcc_unreachable ();
7364     }
7365 }
7366
7367 /* Return the CONST_DOUBLE representing the 80387 constant that is
7368    loaded by the specified special instruction.  The argument IDX
7369    matches the return value from standard_80387_constant_p.  */
7370
7371 rtx
7372 standard_80387_constant_rtx (int idx)
7373 {
7374   int i;
7375
7376   if (! ext_80387_constants_init)
7377     init_ext_80387_constants ();
7378
7379   switch (idx)
7380     {
7381     case 3:
7382     case 4:
7383     case 5:
7384     case 6:
7385     case 7:
7386       i = idx - 3;
7387       break;
7388
7389     default:
7390       gcc_unreachable ();
7391     }
7392
7393   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7394                                        XFmode);
7395 }
7396
7397 /* Return 1 if X is all 0s and 2 if x is all 1s
7398    in supported SSE vector mode.  */
7399
7400 int
7401 standard_sse_constant_p (rtx x)
7402 {
7403   enum machine_mode mode = GET_MODE (x);
7404
7405   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7406     return 1;
7407   if (vector_all_ones_operand (x, mode))
7408     switch (mode)
7409       {
7410       case V16QImode:
7411       case V8HImode:
7412       case V4SImode:
7413       case V2DImode:
7414         if (TARGET_SSE2)
7415           return 2;
7416       default:
7417         break;
7418       }
7419
7420   return 0;
7421 }
7422
7423 /* Return the opcode of the special instruction to be used to load
7424    the constant X.  */
7425
7426 const char *
7427 standard_sse_constant_opcode (rtx insn, rtx x)
7428 {
7429   switch (standard_sse_constant_p (x))
7430     {
7431     case 1:
7432       switch (get_attr_mode (insn))
7433         {
7434         case MODE_V4SF:
7435           return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7436         case MODE_V2DF:
7437           return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7438         case MODE_TI:
7439           return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7440         case MODE_V8SF:
7441           return "vxorps\t%x0, %x0, %x0";
7442         case MODE_V4DF:
7443           return "vxorpd\t%x0, %x0, %x0";
7444         case MODE_OI:
7445           return "vpxor\t%x0, %x0, %x0";
7446         default:
7447           break;
7448         }
7449     case 2:
7450       return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7451     default:
7452       break;
7453     }
7454   gcc_unreachable ();
7455 }
7456
7457 /* Returns 1 if OP contains a symbol reference */
7458
7459 int
7460 symbolic_reference_mentioned_p (rtx op)
7461 {
7462   const char *fmt;
7463   int i;
7464
7465   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7466     return 1;
7467
7468   fmt = GET_RTX_FORMAT (GET_CODE (op));
7469   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7470     {
7471       if (fmt[i] == 'E')
7472         {
7473           int j;
7474
7475           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7476             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7477               return 1;
7478         }
7479
7480       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7481         return 1;
7482     }
7483
7484   return 0;
7485 }
7486
7487 /* Return 1 if it is appropriate to emit `ret' instructions in the
7488    body of a function.  Do this only if the epilogue is simple, needing a
7489    couple of insns.  Prior to reloading, we can't tell how many registers
7490    must be saved, so return 0 then.  Return 0 if there is no frame
7491    marker to de-allocate.  */
7492
7493 int
7494 ix86_can_use_return_insn_p (void)
7495 {
7496   struct ix86_frame frame;
7497
7498   if (! reload_completed || frame_pointer_needed)
7499     return 0;
7500
7501   /* Don't allow more than 32 pop, since that's all we can do
7502      with one instruction.  */
7503   if (crtl->args.pops_args
7504       && crtl->args.size >= 32768)
7505     return 0;
7506
7507   ix86_compute_frame_layout (&frame);
7508   return frame.to_allocate == 0 && frame.padding0 == 0
7509          && (frame.nregs + frame.nsseregs) == 0;
7510 }
7511 \f
7512 /* Value should be nonzero if functions must have frame pointers.
7513    Zero means the frame pointer need not be set up (and parms may
7514    be accessed via the stack pointer) in functions that seem suitable.  */
7515
7516 static bool
7517 ix86_frame_pointer_required (void)
7518 {
7519   /* If we accessed previous frames, then the generated code expects
7520      to be able to access the saved ebp value in our frame.  */
7521   if (cfun->machine->accesses_prev_frame)
7522     return true;
7523
7524   /* Several x86 os'es need a frame pointer for other reasons,
7525      usually pertaining to setjmp.  */
7526   if (SUBTARGET_FRAME_POINTER_REQUIRED)
7527     return true;
7528
7529   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7530      the frame pointer by default.  Turn it back on now if we've not
7531      got a leaf function.  */
7532   if (TARGET_OMIT_LEAF_FRAME_POINTER
7533       && (!current_function_is_leaf
7534           || ix86_current_function_calls_tls_descriptor))
7535     return true;
7536
7537   if (crtl->profile)
7538     return true;
7539
7540   return false;
7541 }
7542
7543 /* Record that the current function accesses previous call frames.  */
7544
7545 void
7546 ix86_setup_frame_addresses (void)
7547 {
7548   cfun->machine->accesses_prev_frame = 1;
7549 }
7550 \f
7551 #ifndef USE_HIDDEN_LINKONCE
7552 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7553 #  define USE_HIDDEN_LINKONCE 1
7554 # else
7555 #  define USE_HIDDEN_LINKONCE 0
7556 # endif
7557 #endif
7558
7559 static int pic_labels_used;
7560
7561 /* Fills in the label name that should be used for a pc thunk for
7562    the given register.  */
7563
7564 static void
7565 get_pc_thunk_name (char name[32], unsigned int regno)
7566 {
7567   gcc_assert (!TARGET_64BIT);
7568
7569   if (USE_HIDDEN_LINKONCE)
7570     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7571   else
7572     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7573 }
7574
7575
7576 /* This function generates code for -fpic that loads %ebx with
7577    the return address of the caller and then returns.  */
7578
7579 void
7580 ix86_file_end (void)
7581 {
7582   rtx xops[2];
7583   int regno;
7584
7585   for (regno = 0; regno < 8; ++regno)
7586     {
7587       char name[32];
7588
7589       if (! ((pic_labels_used >> regno) & 1))
7590         continue;
7591
7592       get_pc_thunk_name (name, regno);
7593
7594 #if TARGET_MACHO
7595       if (TARGET_MACHO)
7596         {
7597           switch_to_section (darwin_sections[text_coal_section]);
7598           fputs ("\t.weak_definition\t", asm_out_file);
7599           assemble_name (asm_out_file, name);
7600           fputs ("\n\t.private_extern\t", asm_out_file);
7601           assemble_name (asm_out_file, name);
7602           fputs ("\n", asm_out_file);
7603           ASM_OUTPUT_LABEL (asm_out_file, name);
7604         }
7605       else
7606 #endif
7607       if (USE_HIDDEN_LINKONCE)
7608         {
7609           tree decl;
7610
7611           decl = build_decl (BUILTINS_LOCATION,
7612                              FUNCTION_DECL, get_identifier (name),
7613                              error_mark_node);
7614           TREE_PUBLIC (decl) = 1;
7615           TREE_STATIC (decl) = 1;
7616           DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7617
7618           (*targetm.asm_out.unique_section) (decl, 0);
7619           switch_to_section (get_named_section (decl, NULL, 0));
7620
7621           (*targetm.asm_out.globalize_label) (asm_out_file, name);
7622           fputs ("\t.hidden\t", asm_out_file);
7623           assemble_name (asm_out_file, name);
7624           putc ('\n', asm_out_file);
7625           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7626         }
7627       else
7628         {
7629           switch_to_section (text_section);
7630           ASM_OUTPUT_LABEL (asm_out_file, name);
7631         }
7632
7633       xops[0] = gen_rtx_REG (Pmode, regno);
7634       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7635       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7636       output_asm_insn ("ret", xops);
7637     }
7638
7639   if (NEED_INDICATE_EXEC_STACK)
7640     file_end_indicate_exec_stack ();
7641 }
7642
7643 /* Emit code for the SET_GOT patterns.  */
7644
7645 const char *
7646 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7647 {
7648   rtx xops[3];
7649
7650   xops[0] = dest;
7651
7652   if (TARGET_VXWORKS_RTP && flag_pic)
7653     {
7654       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
7655       xops[2] = gen_rtx_MEM (Pmode,
7656                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7657       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7658
7659       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7660          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7661          an unadorned address.  */
7662       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7663       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7664       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7665       return "";
7666     }
7667
7668   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7669
7670   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7671     {
7672       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7673
7674       if (!flag_pic)
7675         output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7676       else
7677         output_asm_insn ("call\t%a2", xops);
7678
7679 #if TARGET_MACHO
7680       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7681          is what will be referenced by the Mach-O PIC subsystem.  */
7682       if (!label)
7683         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7684 #endif
7685
7686       (*targetm.asm_out.internal_label) (asm_out_file, "L",
7687                                  CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7688
7689       if (flag_pic)
7690         output_asm_insn ("pop%z0\t%0", xops);
7691     }
7692   else
7693     {
7694       char name[32];
7695       get_pc_thunk_name (name, REGNO (dest));
7696       pic_labels_used |= 1 << REGNO (dest);
7697
7698       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7699       xops[2] = gen_rtx_MEM (QImode, xops[2]);
7700       output_asm_insn ("call\t%X2", xops);
7701       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7702          is what will be referenced by the Mach-O PIC subsystem.  */
7703 #if TARGET_MACHO
7704       if (!label)
7705         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7706       else
7707         targetm.asm_out.internal_label (asm_out_file, "L",
7708                                            CODE_LABEL_NUMBER (label));
7709 #endif
7710     }
7711
7712   if (TARGET_MACHO)
7713     return "";
7714
7715   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7716     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7717   else
7718     output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7719
7720   return "";
7721 }
7722
7723 /* Generate an "push" pattern for input ARG.  */
7724
7725 static rtx
7726 gen_push (rtx arg)
7727 {
7728   if (ix86_cfa_state->reg == stack_pointer_rtx)
7729     ix86_cfa_state->offset += UNITS_PER_WORD;
7730
7731   return gen_rtx_SET (VOIDmode,
7732                       gen_rtx_MEM (Pmode,
7733                                    gen_rtx_PRE_DEC (Pmode,
7734                                                     stack_pointer_rtx)),
7735                       arg);
7736 }
7737
7738 /* Return >= 0 if there is an unused call-clobbered register available
7739    for the entire function.  */
7740
7741 static unsigned int
7742 ix86_select_alt_pic_regnum (void)
7743 {
7744   if (current_function_is_leaf && !crtl->profile
7745       && !ix86_current_function_calls_tls_descriptor)
7746     {
7747       int i, drap;
7748       /* Can't use the same register for both PIC and DRAP.  */
7749       if (crtl->drap_reg)
7750         drap = REGNO (crtl->drap_reg);
7751       else
7752         drap = -1;
7753       for (i = 2; i >= 0; --i)
7754         if (i != drap && !df_regs_ever_live_p (i))
7755           return i;
7756     }
7757
7758   return INVALID_REGNUM;
7759 }
7760
7761 /* Return 1 if we need to save REGNO.  */
7762 static int
7763 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7764 {
7765   if (pic_offset_table_rtx
7766       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7767       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7768           || crtl->profile
7769           || crtl->calls_eh_return
7770           || crtl->uses_const_pool))
7771     {
7772       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7773         return 0;
7774       return 1;
7775     }
7776
7777   if (crtl->calls_eh_return && maybe_eh_return)
7778     {
7779       unsigned i;
7780       for (i = 0; ; i++)
7781         {
7782           unsigned test = EH_RETURN_DATA_REGNO (i);
7783           if (test == INVALID_REGNUM)
7784             break;
7785           if (test == regno)
7786             return 1;
7787         }
7788     }
7789
7790   if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7791     return 1;
7792
7793   return (df_regs_ever_live_p (regno)
7794           && !call_used_regs[regno]
7795           && !fixed_regs[regno]
7796           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7797 }
7798
7799 /* Return number of saved general prupose registers.  */
7800
7801 static int
7802 ix86_nsaved_regs (void)
7803 {
7804   int nregs = 0;
7805   int regno;
7806
7807   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7808     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7809       nregs ++;
7810   return nregs;
7811 }
7812
7813 /* Return number of saved SSE registrers.  */
7814
7815 static int
7816 ix86_nsaved_sseregs (void)
7817 {
7818   int nregs = 0;
7819   int regno;
7820
7821   if (ix86_cfun_abi () != MS_ABI)
7822     return 0;
7823   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7824     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7825       nregs ++;
7826   return nregs;
7827 }
7828
7829 /* Given FROM and TO register numbers, say whether this elimination is
7830    allowed.  If stack alignment is needed, we can only replace argument
7831    pointer with hard frame pointer, or replace frame pointer with stack
7832    pointer.  Otherwise, frame pointer elimination is automatically
7833    handled and all other eliminations are valid.  */
7834
7835 static bool
7836 ix86_can_eliminate (const int from, const int to)
7837 {
7838   if (stack_realign_fp)
7839     return ((from == ARG_POINTER_REGNUM
7840              && to == HARD_FRAME_POINTER_REGNUM)
7841             || (from == FRAME_POINTER_REGNUM
7842                 && to == STACK_POINTER_REGNUM));
7843   else
7844     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
7845 }
7846
7847 /* Return the offset between two registers, one to be eliminated, and the other
7848    its replacement, at the start of a routine.  */
7849
7850 HOST_WIDE_INT
7851 ix86_initial_elimination_offset (int from, int to)
7852 {
7853   struct ix86_frame frame;
7854   ix86_compute_frame_layout (&frame);
7855
7856   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7857     return frame.hard_frame_pointer_offset;
7858   else if (from == FRAME_POINTER_REGNUM
7859            && to == HARD_FRAME_POINTER_REGNUM)
7860     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7861   else
7862     {
7863       gcc_assert (to == STACK_POINTER_REGNUM);
7864
7865       if (from == ARG_POINTER_REGNUM)
7866         return frame.stack_pointer_offset;
7867
7868       gcc_assert (from == FRAME_POINTER_REGNUM);
7869       return frame.stack_pointer_offset - frame.frame_pointer_offset;
7870     }
7871 }
7872
7873 /* In a dynamically-aligned function, we can't know the offset from
7874    stack pointer to frame pointer, so we must ensure that setjmp
7875    eliminates fp against the hard fp (%ebp) rather than trying to
7876    index from %esp up to the top of the frame across a gap that is
7877    of unknown (at compile-time) size.  */
7878 static rtx
7879 ix86_builtin_setjmp_frame_value (void)
7880 {
7881   return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7882 }
7883
7884 /* Fill structure ix86_frame about frame of currently computed function.  */
7885
7886 static void
7887 ix86_compute_frame_layout (struct ix86_frame *frame)
7888 {
7889   unsigned int stack_alignment_needed;
7890   HOST_WIDE_INT offset;
7891   unsigned int preferred_alignment;
7892   HOST_WIDE_INT size = get_frame_size ();
7893
7894   frame->nregs = ix86_nsaved_regs ();
7895   frame->nsseregs = ix86_nsaved_sseregs ();
7896
7897   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7898   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7899
7900   /* MS ABI seem to require stack alignment to be always 16 except for function
7901      prologues.  */
7902   if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7903     {
7904       preferred_alignment = 16;
7905       stack_alignment_needed = 16;
7906       crtl->preferred_stack_boundary = 128;
7907       crtl->stack_alignment_needed = 128;
7908     }
7909
7910   gcc_assert (!size || stack_alignment_needed);
7911   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7912   gcc_assert (preferred_alignment <= stack_alignment_needed);
7913
7914   /* During reload iteration the amount of registers saved can change.
7915      Recompute the value as needed.  Do not recompute when amount of registers
7916      didn't change as reload does multiple calls to the function and does not
7917      expect the decision to change within single iteration.  */
7918   if (!optimize_function_for_size_p (cfun)
7919       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7920     {
7921       int count = frame->nregs;
7922
7923       cfun->machine->use_fast_prologue_epilogue_nregs = count;
7924       /* The fast prologue uses move instead of push to save registers.  This
7925          is significantly longer, but also executes faster as modern hardware
7926          can execute the moves in parallel, but can't do that for push/pop.
7927
7928          Be careful about choosing what prologue to emit:  When function takes
7929          many instructions to execute we may use slow version as well as in
7930          case function is known to be outside hot spot (this is known with
7931          feedback only).  Weight the size of function by number of registers
7932          to save as it is cheap to use one or two push instructions but very
7933          slow to use many of them.  */
7934       if (count)
7935         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7936       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7937           || (flag_branch_probabilities
7938               && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7939         cfun->machine->use_fast_prologue_epilogue = false;
7940       else
7941         cfun->machine->use_fast_prologue_epilogue
7942            = !expensive_function_p (count);
7943     }
7944   if (TARGET_PROLOGUE_USING_MOVE
7945       && cfun->machine->use_fast_prologue_epilogue)
7946     frame->save_regs_using_mov = true;
7947   else
7948     frame->save_regs_using_mov = false;
7949
7950   /* Skip return address.  */
7951   offset = UNITS_PER_WORD;
7952
7953   /* Skip pushed static chain.  */
7954   if (ix86_static_chain_on_stack)
7955     offset += UNITS_PER_WORD;
7956
7957   /* Skip saved base pointer.  */
7958   if (frame_pointer_needed)
7959     offset += UNITS_PER_WORD;
7960
7961   frame->hard_frame_pointer_offset = offset;
7962
7963   /* Set offset to aligned because the realigned frame starts from
7964      here.  */
7965   if (stack_realign_fp)
7966     offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7967
7968   /* Register save area */
7969   offset += frame->nregs * UNITS_PER_WORD;
7970
7971   /* Align SSE reg save area.  */
7972   if (frame->nsseregs)
7973     frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7974   else
7975     frame->padding0 = 0;
7976
7977   /* SSE register save area.  */
7978   offset += frame->padding0 + frame->nsseregs * 16;
7979
7980   /* Va-arg area */
7981   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7982   offset += frame->va_arg_size;
7983
7984   /* Align start of frame for local function.  */
7985   frame->padding1 = ((offset + stack_alignment_needed - 1)
7986                      & -stack_alignment_needed) - offset;
7987
7988   offset += frame->padding1;
7989
7990   /* Frame pointer points here.  */
7991   frame->frame_pointer_offset = offset;
7992
7993   offset += size;
7994
7995   /* Add outgoing arguments area.  Can be skipped if we eliminated
7996      all the function calls as dead code.
7997      Skipping is however impossible when function calls alloca.  Alloca
7998      expander assumes that last crtl->outgoing_args_size
7999      of stack frame are unused.  */
8000   if (ACCUMULATE_OUTGOING_ARGS
8001       && (!current_function_is_leaf || cfun->calls_alloca
8002           || ix86_current_function_calls_tls_descriptor))
8003     {
8004       offset += crtl->outgoing_args_size;
8005       frame->outgoing_arguments_size = crtl->outgoing_args_size;
8006     }
8007   else
8008     frame->outgoing_arguments_size = 0;
8009
8010   /* Align stack boundary.  Only needed if we're calling another function
8011      or using alloca.  */
8012   if (!current_function_is_leaf || cfun->calls_alloca
8013       || ix86_current_function_calls_tls_descriptor)
8014     frame->padding2 = ((offset + preferred_alignment - 1)
8015                        & -preferred_alignment) - offset;
8016   else
8017     frame->padding2 = 0;
8018
8019   offset += frame->padding2;
8020
8021   /* We've reached end of stack frame.  */
8022   frame->stack_pointer_offset = offset;
8023
8024   /* Size prologue needs to allocate.  */
8025   frame->to_allocate =
8026     (size + frame->padding1 + frame->padding2
8027      + frame->outgoing_arguments_size + frame->va_arg_size);
8028
8029   if ((!frame->to_allocate && frame->nregs <= 1)
8030       || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8031     frame->save_regs_using_mov = false;
8032
8033   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8034       && current_function_sp_is_unchanging
8035       && current_function_is_leaf
8036       && !ix86_current_function_calls_tls_descriptor)
8037     {
8038       frame->red_zone_size = frame->to_allocate;
8039       if (frame->save_regs_using_mov)
8040         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8041       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8042         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8043     }
8044   else
8045     frame->red_zone_size = 0;
8046   frame->to_allocate -= frame->red_zone_size;
8047   frame->stack_pointer_offset -= frame->red_zone_size;
8048 }
8049
8050 /* Emit code to save registers in the prologue.  */
8051
8052 static void
8053 ix86_emit_save_regs (void)
8054 {
8055   unsigned int regno;
8056   rtx insn;
8057
8058   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8059     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8060       {
8061         insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8062         RTX_FRAME_RELATED_P (insn) = 1;
8063       }
8064 }
8065
8066 /* Emit code to save registers using MOV insns.  First register
8067    is restored from POINTER + OFFSET.  */
8068 static void
8069 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8070 {
8071   unsigned int regno;
8072   rtx insn;
8073
8074   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8075     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8076       {
8077         insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8078                                                Pmode, offset),
8079                                gen_rtx_REG (Pmode, regno));
8080         RTX_FRAME_RELATED_P (insn) = 1;
8081         offset += UNITS_PER_WORD;
8082       }
8083 }
8084
8085 /* Emit code to save registers using MOV insns.  First register
8086    is restored from POINTER + OFFSET.  */
8087 static void
8088 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8089 {
8090   unsigned int regno;
8091   rtx insn;
8092   rtx mem;
8093
8094   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8095     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8096       {
8097         mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8098         set_mem_align (mem, 128);
8099         insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8100         RTX_FRAME_RELATED_P (insn) = 1;
8101         offset += 16;
8102       }
8103 }
8104
8105 static GTY(()) rtx queued_cfa_restores;
8106
8107 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8108    manipulation insn.  Don't add it if the previously
8109    saved value will be left untouched within stack red-zone till return,
8110    as unwinders can find the same value in the register and
8111    on the stack.  */
8112
8113 static void
8114 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8115 {
8116   if (TARGET_RED_ZONE
8117       && !TARGET_64BIT_MS_ABI
8118       && red_offset + RED_ZONE_SIZE >= 0
8119       && crtl->args.pops_args < 65536)
8120     return;
8121
8122   if (insn)
8123     {
8124       add_reg_note (insn, REG_CFA_RESTORE, reg);
8125       RTX_FRAME_RELATED_P (insn) = 1;
8126     }
8127   else
8128     queued_cfa_restores
8129       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8130 }
8131
8132 /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
8133
8134 static void
8135 ix86_add_queued_cfa_restore_notes (rtx insn)
8136 {
8137   rtx last;
8138   if (!queued_cfa_restores)
8139     return;
8140   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8141     ;
8142   XEXP (last, 1) = REG_NOTES (insn);
8143   REG_NOTES (insn) = queued_cfa_restores;
8144   queued_cfa_restores = NULL_RTX;
8145   RTX_FRAME_RELATED_P (insn) = 1;
8146 }
8147
8148 /* Expand prologue or epilogue stack adjustment.
8149    The pattern exist to put a dependency on all ebp-based memory accesses.
8150    STYLE should be negative if instructions should be marked as frame related,
8151    zero if %r11 register is live and cannot be freely used and positive
8152    otherwise.  */
8153
8154 static void
8155 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8156                            int style, bool set_cfa)
8157 {
8158   rtx insn;
8159
8160   if (! TARGET_64BIT)
8161     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8162   else if (x86_64_immediate_operand (offset, DImode))
8163     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8164   else
8165     {
8166       rtx r11;
8167       /* r11 is used by indirect sibcall return as well, set before the
8168          epilogue and used after the epilogue.  ATM indirect sibcall
8169          shouldn't be used together with huge frame sizes in one
8170          function because of the frame_size check in sibcall.c.  */
8171       gcc_assert (style);
8172       r11 = gen_rtx_REG (DImode, R11_REG);
8173       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8174       if (style < 0)
8175         RTX_FRAME_RELATED_P (insn) = 1;
8176       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8177                                                                offset));
8178     }
8179
8180   if (style >= 0)
8181     ix86_add_queued_cfa_restore_notes (insn);
8182
8183   if (set_cfa)
8184     {
8185       rtx r;
8186
8187       gcc_assert (ix86_cfa_state->reg == src);
8188       ix86_cfa_state->offset += INTVAL (offset);
8189       ix86_cfa_state->reg = dest;
8190
8191       r = gen_rtx_PLUS (Pmode, src, offset);
8192       r = gen_rtx_SET (VOIDmode, dest, r);
8193       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8194       RTX_FRAME_RELATED_P (insn) = 1;
8195     }
8196   else if (style < 0)
8197     RTX_FRAME_RELATED_P (insn) = 1;
8198 }
8199
8200 /* Find an available register to be used as dynamic realign argument
8201    pointer regsiter.  Such a register will be written in prologue and
8202    used in begin of body, so it must not be
8203         1. parameter passing register.
8204         2. GOT pointer.
8205    We reuse static-chain register if it is available.  Otherwise, we
8206    use DI for i386 and R13 for x86-64.  We chose R13 since it has
8207    shorter encoding.
8208
8209    Return: the regno of chosen register.  */
8210
8211 static unsigned int
8212 find_drap_reg (void)
8213 {
8214   tree decl = cfun->decl;
8215
8216   if (TARGET_64BIT)
8217     {
8218       /* Use R13 for nested function or function need static chain.
8219          Since function with tail call may use any caller-saved
8220          registers in epilogue, DRAP must not use caller-saved
8221          register in such case.  */
8222       if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8223         return R13_REG;
8224
8225       return R10_REG;
8226     }
8227   else
8228     {
8229       /* Use DI for nested function or function need static chain.
8230          Since function with tail call may use any caller-saved
8231          registers in epilogue, DRAP must not use caller-saved
8232          register in such case.  */
8233       if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8234         return DI_REG;
8235
8236       /* Reuse static chain register if it isn't used for parameter
8237          passing.  */
8238       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8239           && !lookup_attribute ("fastcall",
8240                                 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8241         return CX_REG;
8242       else
8243         return DI_REG;
8244     }
8245 }
8246
8247 /* Return minimum incoming stack alignment.  */
8248
8249 static unsigned int
8250 ix86_minimum_incoming_stack_boundary (bool sibcall)
8251 {
8252   unsigned int incoming_stack_boundary;
8253
8254   /* Prefer the one specified at command line. */
8255   if (ix86_user_incoming_stack_boundary)
8256     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8257   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8258      if -mstackrealign is used, it isn't used for sibcall check and
8259      estimated stack alignment is 128bit.  */
8260   else if (!sibcall
8261            && !TARGET_64BIT
8262            && ix86_force_align_arg_pointer
8263            && crtl->stack_alignment_estimated == 128)
8264     incoming_stack_boundary = MIN_STACK_BOUNDARY;
8265   else
8266     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8267
8268   /* Incoming stack alignment can be changed on individual functions
8269      via force_align_arg_pointer attribute.  We use the smallest
8270      incoming stack boundary.  */
8271   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8272       && lookup_attribute (ix86_force_align_arg_pointer_string,
8273                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8274     incoming_stack_boundary = MIN_STACK_BOUNDARY;
8275
8276   /* The incoming stack frame has to be aligned at least at
8277      parm_stack_boundary.  */
8278   if (incoming_stack_boundary < crtl->parm_stack_boundary)
8279     incoming_stack_boundary = crtl->parm_stack_boundary;
8280
8281   /* Stack at entrance of main is aligned by runtime.  We use the
8282      smallest incoming stack boundary. */
8283   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8284       && DECL_NAME (current_function_decl)
8285       && MAIN_NAME_P (DECL_NAME (current_function_decl))
8286       && DECL_FILE_SCOPE_P (current_function_decl))
8287     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8288
8289   return incoming_stack_boundary;
8290 }
8291
8292 /* Update incoming stack boundary and estimated stack alignment.  */
8293
8294 static void
8295 ix86_update_stack_boundary (void)
8296 {
8297   ix86_incoming_stack_boundary
8298     = ix86_minimum_incoming_stack_boundary (false);
8299
8300   /* x86_64 vararg needs 16byte stack alignment for register save
8301      area.  */
8302   if (TARGET_64BIT
8303       && cfun->stdarg
8304       && crtl->stack_alignment_estimated < 128)
8305     crtl->stack_alignment_estimated = 128;
8306 }
8307
8308 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
8309    needed or an rtx for DRAP otherwise.  */
8310
8311 static rtx
8312 ix86_get_drap_rtx (void)
8313 {
8314   if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8315     crtl->need_drap = true;
8316
8317   if (stack_realign_drap)
8318     {
8319       /* Assign DRAP to vDRAP and returns vDRAP */
8320       unsigned int regno = find_drap_reg ();
8321       rtx drap_vreg;
8322       rtx arg_ptr;
8323       rtx seq, insn;
8324
8325       arg_ptr = gen_rtx_REG (Pmode, regno);
8326       crtl->drap_reg = arg_ptr;
8327
8328       start_sequence ();
8329       drap_vreg = copy_to_reg (arg_ptr);
8330       seq = get_insns ();
8331       end_sequence ();
8332
8333       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8334       RTX_FRAME_RELATED_P (insn) = 1;
8335       return drap_vreg;
8336     }
8337   else
8338     return NULL;
8339 }
8340
8341 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
8342
8343 static rtx
8344 ix86_internal_arg_pointer (void)
8345 {
8346   return virtual_incoming_args_rtx;
8347 }
8348
8349 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8350    to be generated in correct form.  */
8351 static void
8352 ix86_finalize_stack_realign_flags (void)
8353 {
8354   /* Check if stack realign is really needed after reload, and
8355      stores result in cfun */
8356   unsigned int incoming_stack_boundary
8357     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8358        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8359   unsigned int stack_realign = (incoming_stack_boundary
8360                                 < (current_function_is_leaf
8361                                    ? crtl->max_used_stack_slot_alignment
8362                                    : crtl->stack_alignment_needed));
8363
8364   if (crtl->stack_realign_finalized)
8365     {
8366       /* After stack_realign_needed is finalized, we can't no longer
8367          change it.  */
8368       gcc_assert (crtl->stack_realign_needed == stack_realign);
8369     }
8370   else
8371     {
8372       crtl->stack_realign_needed = stack_realign;
8373       crtl->stack_realign_finalized = true;
8374     }
8375 }
8376
8377 /* Expand the prologue into a bunch of separate insns.  */
8378
8379 void
8380 ix86_expand_prologue (void)
8381 {
8382   rtx insn;
8383   bool pic_reg_used;
8384   struct ix86_frame frame;
8385   HOST_WIDE_INT allocate;
8386   int gen_frame_pointer = frame_pointer_needed;
8387
8388   ix86_finalize_stack_realign_flags ();
8389
8390   /* DRAP should not coexist with stack_realign_fp */
8391   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8392
8393   /* Initialize CFA state for before the prologue.  */
8394   ix86_cfa_state->reg = stack_pointer_rtx;
8395   ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8396
8397   ix86_compute_frame_layout (&frame);
8398
8399   if (ix86_function_ms_hook_prologue (current_function_decl))
8400     {
8401       rtx push, mov;
8402
8403       /* Make sure the function starts with
8404          8b ff     movl.s %edi,%edi
8405          55        push   %ebp
8406          8b ec     movl.s %esp,%ebp
8407
8408          This matches the hookable function prologue in Win32 API
8409          functions in Microsoft Windows XP Service Pack 2 and newer.
8410          Wine uses this to enable Windows apps to hook the Win32 API
8411          functions provided by Wine.  */
8412       insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
8413                                       gen_rtx_REG (SImode, DI_REG)));
8414       push = emit_insn (gen_push (hard_frame_pointer_rtx));
8415       mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
8416                                      stack_pointer_rtx));
8417
8418       if (frame_pointer_needed && !(crtl->drap_reg
8419                                     && crtl->stack_realign_needed))
8420         {
8421           /* The push %ebp and movl.s %esp, %ebp already set up
8422              the frame pointer.  No need to do this again. */
8423           gen_frame_pointer = 0;
8424           RTX_FRAME_RELATED_P (push) = 1;
8425           RTX_FRAME_RELATED_P (mov) = 1;
8426           if (ix86_cfa_state->reg == stack_pointer_rtx)
8427             ix86_cfa_state->reg = hard_frame_pointer_rtx;
8428         }
8429       else
8430         /* If the frame pointer is not needed, pop %ebp again. This
8431            could be optimized for cases where ebp needs to be backed up
8432            for some other reason.  If stack realignment is needed, pop
8433            the base pointer again, align the stack, and later regenerate
8434            the frame pointer setup.  The frame pointer generated by the
8435            hook prologue is not aligned, so it can't be used.  */
8436         insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8437     }
8438
8439   /* The first insn of a function that accepts its static chain on the
8440      stack is to push the register that would be filled in by a direct
8441      call.  This insn will be skipped by the trampoline.  */
8442   if (ix86_static_chain_on_stack)
8443     {
8444       rtx t;
8445
8446       insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
8447       emit_insn (gen_blockage ());
8448
8449       /* We don't want to interpret this push insn as a register save,
8450          only as a stack adjustment.  The real copy of the register as
8451          a save will be done later, if needed.  */
8452       t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
8453       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8454       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8455       RTX_FRAME_RELATED_P (insn) = 1;
8456     }
8457
8458   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8459      of DRAP is needed and stack realignment is really needed after reload */
8460   if (crtl->drap_reg && crtl->stack_realign_needed)
8461     {
8462       rtx x, y;
8463       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8464       int param_ptr_offset = UNITS_PER_WORD;
8465
8466       if (ix86_static_chain_on_stack)
8467         param_ptr_offset += UNITS_PER_WORD;
8468       if (!call_used_regs[REGNO (crtl->drap_reg)])
8469         param_ptr_offset += UNITS_PER_WORD;
8470
8471       gcc_assert (stack_realign_drap);
8472
8473       /* Grab the argument pointer.  */
8474       x = plus_constant (stack_pointer_rtx, param_ptr_offset);
8475       y = crtl->drap_reg;
8476
8477       /* Only need to push parameter pointer reg if it is caller
8478          saved reg */
8479       if (!call_used_regs[REGNO (crtl->drap_reg)])
8480         {
8481           /* Push arg pointer reg */
8482           insn = emit_insn (gen_push (y));
8483           RTX_FRAME_RELATED_P (insn) = 1;
8484         }
8485
8486       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8487       RTX_FRAME_RELATED_P (insn) = 1;
8488       ix86_cfa_state->reg = crtl->drap_reg;
8489
8490       /* Align the stack.  */
8491       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8492                                            stack_pointer_rtx,
8493                                            GEN_INT (-align_bytes)));
8494       RTX_FRAME_RELATED_P (insn) = 1;
8495
8496       /* Replicate the return address on the stack so that return
8497          address can be reached via (argp - 1) slot.  This is needed
8498          to implement macro RETURN_ADDR_RTX and intrinsic function
8499          expand_builtin_return_addr etc.  */
8500       x = crtl->drap_reg;
8501       x = gen_frame_mem (Pmode,
8502                          plus_constant (x, -UNITS_PER_WORD));
8503       insn = emit_insn (gen_push (x));
8504       RTX_FRAME_RELATED_P (insn) = 1;
8505     }
8506
8507   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
8508      slower on all targets.  Also sdb doesn't like it.  */
8509
8510   if (gen_frame_pointer)
8511     {
8512       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8513       RTX_FRAME_RELATED_P (insn) = 1;
8514
8515       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8516       RTX_FRAME_RELATED_P (insn) = 1;
8517
8518       if (ix86_cfa_state->reg == stack_pointer_rtx)
8519         ix86_cfa_state->reg = hard_frame_pointer_rtx;
8520     }
8521
8522   if (stack_realign_fp)
8523     {
8524       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8525       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8526
8527       /* Align the stack.  */
8528       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8529                                            stack_pointer_rtx,
8530                                            GEN_INT (-align_bytes)));
8531       RTX_FRAME_RELATED_P (insn) = 1;
8532     }
8533
8534   allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8535
8536   if (!frame.save_regs_using_mov)
8537     ix86_emit_save_regs ();
8538   else
8539     allocate += frame.nregs * UNITS_PER_WORD;
8540
8541   /* When using red zone we may start register saving before allocating
8542      the stack frame saving one cycle of the prologue. However I will
8543      avoid doing this if I am going to have to probe the stack since
8544      at least on x86_64 the stack probe can turn into a call that clobbers
8545      a red zone location */
8546   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8547       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8548     ix86_emit_save_regs_using_mov ((frame_pointer_needed
8549                                      && !crtl->stack_realign_needed)
8550                                    ? hard_frame_pointer_rtx
8551                                    : stack_pointer_rtx,
8552                                    -frame.nregs * UNITS_PER_WORD);
8553
8554   if (allocate == 0)
8555     ;
8556   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8557     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8558                                GEN_INT (-allocate), -1,
8559                                ix86_cfa_state->reg == stack_pointer_rtx);
8560   else
8561     {
8562       /* Only valid for Win32.  */
8563       rtx eax = gen_rtx_REG (Pmode, AX_REG);
8564       bool eax_live;
8565       rtx t;
8566
8567       gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8568
8569       if (cfun->machine->call_abi == MS_ABI)
8570         eax_live = false;
8571       else
8572         eax_live = ix86_eax_live_at_start_p ();
8573
8574       if (eax_live)
8575         {
8576           emit_insn (gen_push (eax));
8577           allocate -= UNITS_PER_WORD;
8578         }
8579
8580       emit_move_insn (eax, GEN_INT (allocate));
8581
8582       if (TARGET_64BIT)
8583         insn = gen_allocate_stack_worker_64 (eax, eax);
8584       else
8585         insn = gen_allocate_stack_worker_32 (eax, eax);
8586       insn = emit_insn (insn);
8587
8588       if (ix86_cfa_state->reg == stack_pointer_rtx)
8589         {
8590           ix86_cfa_state->offset += allocate;
8591           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8592           t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8593           add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8594           RTX_FRAME_RELATED_P (insn) = 1;
8595         }
8596
8597       if (eax_live)
8598         {
8599           if (frame_pointer_needed)
8600             t = plus_constant (hard_frame_pointer_rtx,
8601                                allocate
8602                                - frame.to_allocate
8603                                - frame.nregs * UNITS_PER_WORD);
8604           else
8605             t = plus_constant (stack_pointer_rtx, allocate);
8606           emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8607         }
8608     }
8609
8610   if (frame.save_regs_using_mov
8611       && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8612          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8613     {
8614       if (!frame_pointer_needed
8615           || !(frame.to_allocate + frame.padding0)
8616           || crtl->stack_realign_needed)
8617         ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8618                                        frame.to_allocate
8619                                        + frame.nsseregs * 16 + frame.padding0);
8620       else
8621         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8622                                        -frame.nregs * UNITS_PER_WORD);
8623     }
8624   if (!frame_pointer_needed
8625       || !(frame.to_allocate + frame.padding0)
8626       || crtl->stack_realign_needed)
8627     ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8628                                        frame.to_allocate);
8629   else
8630     ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8631                                        - frame.nregs * UNITS_PER_WORD
8632                                        - frame.nsseregs * 16
8633                                        - frame.padding0);
8634
8635   pic_reg_used = false;
8636   if (pic_offset_table_rtx
8637       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8638           || crtl->profile))
8639     {
8640       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8641
8642       if (alt_pic_reg_used != INVALID_REGNUM)
8643         SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8644
8645       pic_reg_used = true;
8646     }
8647
8648   if (pic_reg_used)
8649     {
8650       if (TARGET_64BIT)
8651         {
8652           if (ix86_cmodel == CM_LARGE_PIC)
8653             {
8654               rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8655               rtx label = gen_label_rtx ();
8656               emit_label (label);
8657               LABEL_PRESERVE_P (label) = 1;
8658               gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8659               insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8660               insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8661               insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8662                                             pic_offset_table_rtx, tmp_reg));
8663             }
8664           else
8665             insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8666         }
8667       else
8668         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8669     }
8670
8671   /* In the pic_reg_used case, make sure that the got load isn't deleted
8672      when mcount needs it.  Blockage to avoid call movement across mcount
8673      call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8674      note.  */
8675   if (crtl->profile && pic_reg_used)
8676     emit_insn (gen_prologue_use (pic_offset_table_rtx));
8677
8678   if (crtl->drap_reg && !crtl->stack_realign_needed)
8679     {
8680       /* vDRAP is setup but after reload it turns out stack realign
8681          isn't necessary, here we will emit prologue to setup DRAP
8682          without stack realign adjustment */
8683       rtx x;
8684       int drap_bp_offset = UNITS_PER_WORD * 2;
8685
8686       if (ix86_static_chain_on_stack)
8687         drap_bp_offset += UNITS_PER_WORD;
8688       x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8689       insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8690     }
8691
8692   /* Prevent instructions from being scheduled into register save push
8693      sequence when access to the redzone area is done through frame pointer.
8694      The offset between the frame pointer and the stack pointer is calculated
8695      relative to the value of the stack pointer at the end of the function
8696      prologue, and moving instructions that access redzone area via frame
8697      pointer inside push sequence violates this assumption.  */
8698   if (frame_pointer_needed && frame.red_zone_size)
8699     emit_insn (gen_memory_blockage ());
8700
8701   /* Emit cld instruction if stringops are used in the function.  */
8702   if (TARGET_CLD && ix86_current_function_needs_cld)
8703     emit_insn (gen_cld ());
8704 }
8705
8706 /* Emit code to restore REG using a POP insn.  */
8707
8708 static void
8709 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8710 {
8711   rtx insn = emit_insn (ix86_gen_pop1 (reg));
8712
8713   if (ix86_cfa_state->reg == crtl->drap_reg
8714       && REGNO (reg) == REGNO (crtl->drap_reg))
8715     {
8716       /* Previously we'd represented the CFA as an expression
8717          like *(%ebp - 8).  We've just popped that value from
8718          the stack, which means we need to reset the CFA to
8719          the drap register.  This will remain until we restore
8720          the stack pointer.  */
8721       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8722       RTX_FRAME_RELATED_P (insn) = 1;
8723       return;
8724     }
8725
8726   if (ix86_cfa_state->reg == stack_pointer_rtx)
8727     {
8728       ix86_cfa_state->offset -= UNITS_PER_WORD;
8729       add_reg_note (insn, REG_CFA_ADJUST_CFA,
8730                     copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8731       RTX_FRAME_RELATED_P (insn) = 1;
8732     }
8733
8734   /* When the frame pointer is the CFA, and we pop it, we are
8735      swapping back to the stack pointer as the CFA.  This happens
8736      for stack frames that don't allocate other data, so we assume
8737      the stack pointer is now pointing at the return address, i.e.
8738      the function entry state, which makes the offset be 1 word.  */
8739   else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8740            && reg == hard_frame_pointer_rtx)
8741     {
8742       ix86_cfa_state->reg = stack_pointer_rtx;
8743       ix86_cfa_state->offset -= UNITS_PER_WORD;
8744
8745       add_reg_note (insn, REG_CFA_DEF_CFA,
8746                     gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8747                                   GEN_INT (ix86_cfa_state->offset)));
8748       RTX_FRAME_RELATED_P (insn) = 1;
8749     }
8750
8751   ix86_add_cfa_restore_note (insn, reg, red_offset);
8752 }
8753
8754 /* Emit code to restore saved registers using POP insns.  */
8755
8756 static void
8757 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8758 {
8759   int regno;
8760
8761   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8762     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8763       {
8764         ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8765                                          red_offset);
8766         red_offset += UNITS_PER_WORD;
8767       }
8768 }
8769
8770 /* Emit code and notes for the LEAVE instruction.  */
8771
8772 static void
8773 ix86_emit_leave (HOST_WIDE_INT red_offset)
8774 {
8775   rtx insn = emit_insn (ix86_gen_leave ());
8776
8777   ix86_add_queued_cfa_restore_notes (insn);
8778
8779   if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8780     {
8781       ix86_cfa_state->reg = stack_pointer_rtx;
8782       ix86_cfa_state->offset -= UNITS_PER_WORD;
8783
8784       add_reg_note (insn, REG_CFA_ADJUST_CFA,
8785                     copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8786       RTX_FRAME_RELATED_P (insn) = 1;
8787       ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8788     }
8789 }
8790
8791 /* Emit code to restore saved registers using MOV insns.  First register
8792    is restored from POINTER + OFFSET.  */
8793 static void
8794 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8795                                   HOST_WIDE_INT red_offset,
8796                                   int maybe_eh_return)
8797 {
8798   unsigned int regno;
8799   rtx base_address = gen_rtx_MEM (Pmode, pointer);
8800   rtx insn;
8801
8802   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8803     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8804       {
8805         rtx reg = gen_rtx_REG (Pmode, regno);
8806
8807         /* Ensure that adjust_address won't be forced to produce pointer
8808            out of range allowed by x86-64 instruction set.  */
8809         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8810           {
8811             rtx r11;
8812
8813             r11 = gen_rtx_REG (DImode, R11_REG);
8814             emit_move_insn (r11, GEN_INT (offset));
8815             emit_insn (gen_adddi3 (r11, r11, pointer));
8816             base_address = gen_rtx_MEM (Pmode, r11);
8817             offset = 0;
8818           }
8819         insn = emit_move_insn (reg,
8820                                adjust_address (base_address, Pmode, offset));
8821         offset += UNITS_PER_WORD;
8822
8823         if (ix86_cfa_state->reg == crtl->drap_reg
8824             && regno == REGNO (crtl->drap_reg))
8825           {
8826             /* Previously we'd represented the CFA as an expression
8827                like *(%ebp - 8).  We've just popped that value from
8828                the stack, which means we need to reset the CFA to
8829                the drap register.  This will remain until we restore
8830                the stack pointer.  */
8831             add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8832             RTX_FRAME_RELATED_P (insn) = 1;
8833           }
8834         else
8835           ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8836
8837         red_offset += UNITS_PER_WORD;
8838       }
8839 }
8840
8841 /* Emit code to restore saved registers using MOV insns.  First register
8842    is restored from POINTER + OFFSET.  */
8843 static void
8844 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8845                                       HOST_WIDE_INT red_offset,
8846                                       int maybe_eh_return)
8847 {
8848   int regno;
8849   rtx base_address = gen_rtx_MEM (TImode, pointer);
8850   rtx mem;
8851
8852   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8853     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8854       {
8855         rtx reg = gen_rtx_REG (TImode, regno);
8856
8857         /* Ensure that adjust_address won't be forced to produce pointer
8858            out of range allowed by x86-64 instruction set.  */
8859         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8860           {
8861             rtx r11;
8862
8863             r11 = gen_rtx_REG (DImode, R11_REG);
8864             emit_move_insn (r11, GEN_INT (offset));
8865             emit_insn (gen_adddi3 (r11, r11, pointer));
8866             base_address = gen_rtx_MEM (TImode, r11);
8867             offset = 0;
8868           }
8869         mem = adjust_address (base_address, TImode, offset);
8870         set_mem_align (mem, 128);
8871         emit_move_insn (reg, mem);
8872         offset += 16;
8873
8874         ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8875
8876         red_offset += 16;
8877       }
8878 }
8879
8880 /* Restore function stack, frame, and registers.  */
8881
8882 void
8883 ix86_expand_epilogue (int style)
8884 {
8885   int sp_valid;
8886   struct ix86_frame frame;
8887   HOST_WIDE_INT offset, red_offset;
8888   struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8889   bool using_drap;
8890
8891   ix86_finalize_stack_realign_flags ();
8892
8893  /* When stack is realigned, SP must be valid.  */
8894   sp_valid = (!frame_pointer_needed
8895               || current_function_sp_is_unchanging
8896               || stack_realign_fp);
8897
8898   ix86_compute_frame_layout (&frame);
8899
8900   /* See the comment about red zone and frame
8901      pointer usage in ix86_expand_prologue.  */
8902   if (frame_pointer_needed && frame.red_zone_size)
8903     emit_insn (gen_memory_blockage ());
8904
8905   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8906   gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8907
8908   /* Calculate start of saved registers relative to ebp.  Special care
8909      must be taken for the normal return case of a function using
8910      eh_return: the eax and edx registers are marked as saved, but not
8911      restored along this path.  */
8912   offset = frame.nregs;
8913   if (crtl->calls_eh_return && style != 2)
8914     offset -= 2;
8915   offset *= -UNITS_PER_WORD;
8916   offset -= frame.nsseregs * 16 + frame.padding0;
8917
8918   /* Calculate start of saved registers relative to esp on entry of the
8919      function.  When realigning stack, this needs to be the most negative
8920      value possible at runtime.  */
8921   red_offset = offset;
8922   if (using_drap)
8923     red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8924                   + UNITS_PER_WORD;
8925   else if (stack_realign_fp)
8926     red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8927                   - UNITS_PER_WORD;
8928   if (ix86_static_chain_on_stack)
8929     red_offset -= UNITS_PER_WORD;
8930   if (frame_pointer_needed)
8931     red_offset -= UNITS_PER_WORD;
8932
8933   /* If we're only restoring one register and sp is not valid then
8934      using a move instruction to restore the register since it's
8935      less work than reloading sp and popping the register.
8936
8937      The default code result in stack adjustment using add/lea instruction,
8938      while this code results in LEAVE instruction (or discrete equivalent),
8939      so it is profitable in some other cases as well.  Especially when there
8940      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
8941      and there is exactly one register to pop. This heuristic may need some
8942      tuning in future.  */
8943   if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8944       || (TARGET_EPILOGUE_USING_MOVE
8945           && cfun->machine->use_fast_prologue_epilogue
8946           && ((frame.nregs + frame.nsseregs) > 1
8947               || (frame.to_allocate + frame.padding0) != 0))
8948       || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8949           && (frame.to_allocate + frame.padding0) != 0)
8950       || (frame_pointer_needed && TARGET_USE_LEAVE
8951           && cfun->machine->use_fast_prologue_epilogue
8952           && (frame.nregs + frame.nsseregs) == 1)
8953       || crtl->calls_eh_return)
8954     {
8955       /* Restore registers.  We can use ebp or esp to address the memory
8956          locations.  If both are available, default to ebp, since offsets
8957          are known to be small.  Only exception is esp pointing directly
8958          to the end of block of saved registers, where we may simplify
8959          addressing mode.
8960
8961          If we are realigning stack with bp and sp, regs restore can't
8962          be addressed by bp. sp must be used instead.  */
8963
8964       if (!frame_pointer_needed
8965           || (sp_valid && !(frame.to_allocate + frame.padding0))
8966           || stack_realign_fp)
8967         {
8968           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8969                                                 frame.to_allocate, red_offset,
8970                                                 style == 2);
8971           ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8972                                             frame.to_allocate
8973                                             + frame.nsseregs * 16
8974                                             + frame.padding0,
8975                                             red_offset
8976                                             + frame.nsseregs * 16
8977                                             + frame.padding0, style == 2);
8978         }
8979       else
8980         {
8981           ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8982                                                 offset, red_offset,
8983                                                 style == 2);
8984           ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8985                                             offset
8986                                             + frame.nsseregs * 16
8987                                             + frame.padding0,
8988                                             red_offset
8989                                             + frame.nsseregs * 16
8990                                             + frame.padding0, style == 2);
8991         }
8992
8993       red_offset -= offset;
8994
8995       /* eh_return epilogues need %ecx added to the stack pointer.  */
8996       if (style == 2)
8997         {
8998           rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8999
9000           /* Stack align doesn't work with eh_return.  */
9001           gcc_assert (!crtl->stack_realign_needed);
9002           /* Neither does regparm nested functions.  */
9003           gcc_assert (!ix86_static_chain_on_stack);
9004
9005           if (frame_pointer_needed)
9006             {
9007               tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9008               tmp = plus_constant (tmp, UNITS_PER_WORD);
9009               tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9010
9011               tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9012               tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9013
9014               /* Note that we use SA as a temporary CFA, as the return
9015                  address is at the proper place relative to it.  We
9016                  pretend this happens at the FP restore insn because
9017                  prior to this insn the FP would be stored at the wrong
9018                  offset relative to SA, and after this insn we have no
9019                  other reasonable register to use for the CFA.  We don't
9020                  bother resetting the CFA to the SP for the duration of
9021                  the return insn.  */
9022               add_reg_note (tmp, REG_CFA_DEF_CFA,
9023                             plus_constant (sa, UNITS_PER_WORD));
9024               ix86_add_queued_cfa_restore_notes (tmp);
9025               add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9026               RTX_FRAME_RELATED_P (tmp) = 1;
9027               ix86_cfa_state->reg = sa;
9028               ix86_cfa_state->offset = UNITS_PER_WORD;
9029
9030               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9031                                          const0_rtx, style, false);
9032             }
9033           else
9034             {
9035               tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9036               tmp = plus_constant (tmp, (frame.to_allocate
9037                                          + frame.nregs * UNITS_PER_WORD
9038                                          + frame.nsseregs * 16
9039                                          + frame.padding0));
9040               tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9041               ix86_add_queued_cfa_restore_notes (tmp);
9042
9043               gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9044               if (ix86_cfa_state->offset != UNITS_PER_WORD)
9045                 {
9046                   ix86_cfa_state->offset = UNITS_PER_WORD;
9047                   add_reg_note (tmp, REG_CFA_DEF_CFA,
9048                                 plus_constant (stack_pointer_rtx,
9049                                                UNITS_PER_WORD));
9050                   RTX_FRAME_RELATED_P (tmp) = 1;
9051                 }
9052             }
9053         }
9054       else if (!frame_pointer_needed)
9055         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9056                                    GEN_INT (frame.to_allocate
9057                                             + frame.nregs * UNITS_PER_WORD
9058                                             + frame.nsseregs * 16
9059                                             + frame.padding0),
9060                                    style, !using_drap);
9061       /* If not an i386, mov & pop is faster than "leave".  */
9062       else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9063                || !cfun->machine->use_fast_prologue_epilogue)
9064         ix86_emit_leave (red_offset);
9065       else
9066         {
9067           pro_epilogue_adjust_stack (stack_pointer_rtx,
9068                                      hard_frame_pointer_rtx,
9069                                      const0_rtx, style, !using_drap);
9070
9071           ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9072         }
9073     }
9074   else
9075     {
9076       /* First step is to deallocate the stack frame so that we can
9077          pop the registers.
9078
9079          If we realign stack with frame pointer, then stack pointer
9080          won't be able to recover via lea $offset(%bp), %sp, because
9081          there is a padding area between bp and sp for realign.
9082          "add $to_allocate, %sp" must be used instead.  */
9083       if (!sp_valid)
9084         {
9085           gcc_assert (frame_pointer_needed);
9086           gcc_assert (!stack_realign_fp);
9087           pro_epilogue_adjust_stack (stack_pointer_rtx,
9088                                      hard_frame_pointer_rtx,
9089                                      GEN_INT (offset), style, false);
9090           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9091                                                 0, red_offset,
9092                                                 style == 2);
9093           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9094                                      GEN_INT (frame.nsseregs * 16
9095                                               + frame.padding0),
9096                                      style, false);
9097         }
9098       else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9099         {
9100           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9101                                                 frame.to_allocate, red_offset,
9102                                                 style == 2);
9103           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9104                                      GEN_INT (frame.to_allocate
9105                                               + frame.nsseregs * 16
9106                                               + frame.padding0), style,
9107                                      !using_drap && !frame_pointer_needed);
9108         }
9109
9110       ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9111                                         + frame.padding0);
9112       red_offset -= offset;
9113
9114       if (frame_pointer_needed)
9115         {
9116           /* Leave results in shorter dependency chains on CPUs that are
9117              able to grok it fast.  */
9118           if (TARGET_USE_LEAVE)
9119             ix86_emit_leave (red_offset);
9120           else
9121             {
9122               /* For stack realigned really happens, recover stack
9123                  pointer to hard frame pointer is a must, if not using
9124                  leave.  */
9125               if (stack_realign_fp)
9126                 pro_epilogue_adjust_stack (stack_pointer_rtx,
9127                                            hard_frame_pointer_rtx,
9128                                            const0_rtx, style, !using_drap);
9129               ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9130                                                red_offset);
9131             }
9132         }
9133     }
9134
9135   if (using_drap)
9136     {
9137       int param_ptr_offset = UNITS_PER_WORD;
9138       rtx insn;
9139
9140       gcc_assert (stack_realign_drap);
9141
9142       if (ix86_static_chain_on_stack)
9143         param_ptr_offset += UNITS_PER_WORD;
9144       if (!call_used_regs[REGNO (crtl->drap_reg)])
9145         param_ptr_offset += UNITS_PER_WORD;
9146
9147       insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
9148                                           crtl->drap_reg,
9149                                           GEN_INT (-param_ptr_offset)));
9150
9151       ix86_cfa_state->reg = stack_pointer_rtx;
9152       ix86_cfa_state->offset = param_ptr_offset;
9153
9154       add_reg_note (insn, REG_CFA_DEF_CFA,
9155                     gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9156                                   GEN_INT (ix86_cfa_state->offset)));
9157       RTX_FRAME_RELATED_P (insn) = 1;
9158
9159       if (!call_used_regs[REGNO (crtl->drap_reg)])
9160         ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9161     }
9162
9163   /* Remove the saved static chain from the stack.  The use of ECX is
9164      merely as a scratch register, not as the actual static chain.  */
9165   if (ix86_static_chain_on_stack)
9166     {
9167       rtx r, insn;
9168
9169       gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9170       ix86_cfa_state->offset += UNITS_PER_WORD;
9171
9172       r = gen_rtx_REG (Pmode, CX_REG);
9173       insn = emit_insn (ix86_gen_pop1 (r));
9174
9175       r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9176       r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
9177       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9178       RTX_FRAME_RELATED_P (insn) = 1;
9179     }
9180
9181   /* Sibcall epilogues don't want a return instruction.  */
9182   if (style == 0)
9183     {
9184       *ix86_cfa_state = cfa_state_save;
9185       return;
9186     }
9187
9188   if (crtl->args.pops_args && crtl->args.size)
9189     {
9190       rtx popc = GEN_INT (crtl->args.pops_args);
9191
9192       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
9193          address, do explicit add, and jump indirectly to the caller.  */
9194
9195       if (crtl->args.pops_args >= 65536)
9196         {
9197           rtx ecx = gen_rtx_REG (SImode, CX_REG);
9198           rtx insn;
9199
9200           /* There is no "pascal" calling convention in any 64bit ABI.  */
9201           gcc_assert (!TARGET_64BIT);
9202
9203           insn = emit_insn (gen_popsi1 (ecx));
9204           ix86_cfa_state->offset -= UNITS_PER_WORD;
9205
9206           add_reg_note (insn, REG_CFA_ADJUST_CFA,
9207                         copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9208           add_reg_note (insn, REG_CFA_REGISTER,
9209                         gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9210           RTX_FRAME_RELATED_P (insn) = 1;
9211
9212           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9213                                      popc, -1, true);
9214           emit_jump_insn (gen_return_indirect_internal (ecx));
9215         }
9216       else
9217         emit_jump_insn (gen_return_pop_internal (popc));
9218     }
9219   else
9220     emit_jump_insn (gen_return_internal ());
9221
9222   /* Restore the state back to the state from the prologue,
9223      so that it's correct for the next epilogue.  */
9224   *ix86_cfa_state = cfa_state_save;
9225 }
9226
9227 /* Reset from the function's potential modifications.  */
9228
9229 static void
9230 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9231                                HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9232 {
9233   if (pic_offset_table_rtx)
9234     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9235 #if TARGET_MACHO
9236   /* Mach-O doesn't support labels at the end of objects, so if
9237      it looks like we might want one, insert a NOP.  */
9238   {
9239     rtx insn = get_last_insn ();
9240     while (insn
9241            && NOTE_P (insn)
9242            && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9243       insn = PREV_INSN (insn);
9244     if (insn
9245         && (LABEL_P (insn)
9246             || (NOTE_P (insn)
9247                 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9248       fputs ("\tnop\n", file);
9249   }
9250 #endif
9251
9252 }
9253 \f
9254 /* Extract the parts of an RTL expression that is a valid memory address
9255    for an instruction.  Return 0 if the structure of the address is
9256    grossly off.  Return -1 if the address contains ASHIFT, so it is not
9257    strictly valid, but still used for computing length of lea instruction.  */
9258
9259 int
9260 ix86_decompose_address (rtx addr, struct ix86_address *out)
9261 {
9262   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9263   rtx base_reg, index_reg;
9264   HOST_WIDE_INT scale = 1;
9265   rtx scale_rtx = NULL_RTX;
9266   int retval = 1;
9267   enum ix86_address_seg seg = SEG_DEFAULT;
9268
9269   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9270     base = addr;
9271   else if (GET_CODE (addr) == PLUS)
9272     {
9273       rtx addends[4], op;
9274       int n = 0, i;
9275
9276       op = addr;
9277       do
9278         {
9279           if (n >= 4)
9280             return 0;
9281           addends[n++] = XEXP (op, 1);
9282           op = XEXP (op, 0);
9283         }
9284       while (GET_CODE (op) == PLUS);
9285       if (n >= 4)
9286         return 0;
9287       addends[n] = op;
9288
9289       for (i = n; i >= 0; --i)
9290         {
9291           op = addends[i];
9292           switch (GET_CODE (op))
9293             {
9294             case MULT:
9295               if (index)
9296                 return 0;
9297               index = XEXP (op, 0);
9298               scale_rtx = XEXP (op, 1);
9299               break;
9300
9301             case UNSPEC:
9302               if (XINT (op, 1) == UNSPEC_TP
9303                   && TARGET_TLS_DIRECT_SEG_REFS
9304                   && seg == SEG_DEFAULT)
9305                 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9306               else
9307                 return 0;
9308               break;
9309
9310             case REG:
9311             case SUBREG:
9312               if (!base)
9313                 base = op;
9314               else if (!index)
9315                 index = op;
9316               else
9317                 return 0;
9318               break;
9319
9320             case CONST:
9321             case CONST_INT:
9322             case SYMBOL_REF:
9323             case LABEL_REF:
9324               if (disp)
9325                 return 0;
9326               disp = op;
9327               break;
9328
9329             default:
9330               return 0;
9331             }
9332         }
9333     }
9334   else if (GET_CODE (addr) == MULT)
9335     {
9336       index = XEXP (addr, 0);           /* index*scale */
9337       scale_rtx = XEXP (addr, 1);
9338     }
9339   else if (GET_CODE (addr) == ASHIFT)
9340     {
9341       rtx tmp;
9342
9343       /* We're called for lea too, which implements ashift on occasion.  */
9344       index = XEXP (addr, 0);
9345       tmp = XEXP (addr, 1);
9346       if (!CONST_INT_P (tmp))
9347         return 0;
9348       scale = INTVAL (tmp);
9349       if ((unsigned HOST_WIDE_INT) scale > 3)
9350         return 0;
9351       scale = 1 << scale;
9352       retval = -1;
9353     }
9354   else
9355     disp = addr;                        /* displacement */
9356
9357   /* Extract the integral value of scale.  */
9358   if (scale_rtx)
9359     {
9360       if (!CONST_INT_P (scale_rtx))
9361         return 0;
9362       scale = INTVAL (scale_rtx);
9363     }
9364
9365   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9366   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9367
9368   /* Avoid useless 0 displacement.  */
9369   if (disp == const0_rtx && (base || index))
9370     disp = NULL_RTX;
9371
9372   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
9373   if (base_reg && index_reg && scale == 1
9374       && (index_reg == arg_pointer_rtx
9375           || index_reg == frame_pointer_rtx
9376           || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9377     {
9378       rtx tmp;
9379       tmp = base, base = index, index = tmp;
9380       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9381     }
9382
9383   /* Special case: %ebp cannot be encoded as a base without a displacement.
9384      Similarly %r13.  */
9385   if (!disp
9386       && base_reg
9387       && (base_reg == hard_frame_pointer_rtx
9388           || base_reg == frame_pointer_rtx
9389           || base_reg == arg_pointer_rtx
9390           || (REG_P (base_reg)
9391               && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9392                   || REGNO (base_reg) == R13_REG))))
9393     disp = const0_rtx;
9394
9395   /* Special case: on K6, [%esi] makes the instruction vector decoded.
9396      Avoid this by transforming to [%esi+0].
9397      Reload calls address legitimization without cfun defined, so we need
9398      to test cfun for being non-NULL. */
9399   if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9400       && base_reg && !index_reg && !disp
9401       && REG_P (base_reg)
9402       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9403     disp = const0_rtx;
9404
9405   /* Special case: encode reg+reg instead of reg*2.  */
9406   if (!base && index && scale == 2)
9407     base = index, base_reg = index_reg, scale = 1;
9408
9409   /* Special case: scaling cannot be encoded without base or displacement.  */
9410   if (!base && !disp && index && scale != 1)
9411     disp = const0_rtx;
9412
9413   out->base = base;
9414   out->index = index;
9415   out->disp = disp;
9416   out->scale = scale;
9417   out->seg = seg;
9418
9419   return retval;
9420 }
9421 \f
9422 /* Return cost of the memory address x.
9423    For i386, it is better to use a complex address than let gcc copy
9424    the address into a reg and make a new pseudo.  But not if the address
9425    requires to two regs - that would mean more pseudos with longer
9426    lifetimes.  */
9427 static int
9428 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9429 {
9430   struct ix86_address parts;
9431   int cost = 1;
9432   int ok = ix86_decompose_address (x, &parts);
9433
9434   gcc_assert (ok);
9435
9436   if (parts.base && GET_CODE (parts.base) == SUBREG)
9437     parts.base = SUBREG_REG (parts.base);
9438   if (parts.index && GET_CODE (parts.index) == SUBREG)
9439     parts.index = SUBREG_REG (parts.index);
9440
9441   /* Attempt to minimize number of registers in the address.  */
9442   if ((parts.base
9443        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9444       || (parts.index
9445           && (!REG_P (parts.index)
9446               || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9447     cost++;
9448
9449   if (parts.base
9450       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9451       && parts.index
9452       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9453       && parts.base != parts.index)
9454     cost++;
9455
9456   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9457      since it's predecode logic can't detect the length of instructions
9458      and it degenerates to vector decoded.  Increase cost of such
9459      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
9460      to split such addresses or even refuse such addresses at all.
9461
9462      Following addressing modes are affected:
9463       [base+scale*index]
9464       [scale*index+disp]
9465       [base+index]
9466
9467      The first and last case  may be avoidable by explicitly coding the zero in
9468      memory address, but I don't have AMD-K6 machine handy to check this
9469      theory.  */
9470
9471   if (TARGET_K6
9472       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9473           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9474           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9475     cost += 10;
9476
9477   return cost;
9478 }
9479 \f
9480 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9481    this is used for to form addresses to local data when -fPIC is in
9482    use.  */
9483
9484 static bool
9485 darwin_local_data_pic (rtx disp)
9486 {
9487   return (GET_CODE (disp) == UNSPEC
9488           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9489 }
9490
9491 /* Determine if a given RTX is a valid constant.  We already know this
9492    satisfies CONSTANT_P.  */
9493
9494 bool
9495 legitimate_constant_p (rtx x)
9496 {
9497   switch (GET_CODE (x))
9498     {
9499     case CONST:
9500       x = XEXP (x, 0);
9501
9502       if (GET_CODE (x) == PLUS)
9503         {
9504           if (!CONST_INT_P (XEXP (x, 1)))
9505             return false;
9506           x = XEXP (x, 0);
9507         }
9508
9509       if (TARGET_MACHO && darwin_local_data_pic (x))
9510         return true;
9511
9512       /* Only some unspecs are valid as "constants".  */
9513       if (GET_CODE (x) == UNSPEC)
9514         switch (XINT (x, 1))
9515           {
9516           case UNSPEC_GOT:
9517           case UNSPEC_GOTOFF:
9518           case UNSPEC_PLTOFF:
9519             return TARGET_64BIT;
9520           case UNSPEC_TPOFF:
9521           case UNSPEC_NTPOFF:
9522             x = XVECEXP (x, 0, 0);
9523             return (GET_CODE (x) == SYMBOL_REF
9524                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9525           case UNSPEC_DTPOFF:
9526             x = XVECEXP (x, 0, 0);
9527             return (GET_CODE (x) == SYMBOL_REF
9528                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9529           default:
9530             return false;
9531           }
9532
9533       /* We must have drilled down to a symbol.  */
9534       if (GET_CODE (x) == LABEL_REF)
9535         return true;
9536       if (GET_CODE (x) != SYMBOL_REF)
9537         return false;
9538       /* FALLTHRU */
9539
9540     case SYMBOL_REF:
9541       /* TLS symbols are never valid.  */
9542       if (SYMBOL_REF_TLS_MODEL (x))
9543         return false;
9544
9545       /* DLLIMPORT symbols are never valid.  */
9546       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9547           && SYMBOL_REF_DLLIMPORT_P (x))
9548         return false;
9549       break;
9550
9551     case CONST_DOUBLE:
9552       if (GET_MODE (x) == TImode
9553           && x != CONST0_RTX (TImode)
9554           && !TARGET_64BIT)
9555         return false;
9556       break;
9557
9558     case CONST_VECTOR:
9559       if (!standard_sse_constant_p (x))
9560         return false;
9561
9562     default:
9563       break;
9564     }
9565
9566   /* Otherwise we handle everything else in the move patterns.  */
9567   return true;
9568 }
9569
9570 /* Determine if it's legal to put X into the constant pool.  This
9571    is not possible for the address of thread-local symbols, which
9572    is checked above.  */
9573
9574 static bool
9575 ix86_cannot_force_const_mem (rtx x)
9576 {
9577   /* We can always put integral constants and vectors in memory.  */
9578   switch (GET_CODE (x))
9579     {
9580     case CONST_INT:
9581     case CONST_DOUBLE:
9582     case CONST_VECTOR:
9583       return false;
9584
9585     default:
9586       break;
9587     }
9588   return !legitimate_constant_p (x);
9589 }
9590
9591
9592 /* Nonzero if the constant value X is a legitimate general operand
9593    when generating PIC code.  It is given that flag_pic is on and
9594    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
9595
9596 bool
9597 legitimate_pic_operand_p (rtx x)
9598 {
9599   rtx inner;
9600
9601   switch (GET_CODE (x))
9602     {
9603     case CONST:
9604       inner = XEXP (x, 0);
9605       if (GET_CODE (inner) == PLUS
9606           && CONST_INT_P (XEXP (inner, 1)))
9607         inner = XEXP (inner, 0);
9608
9609       /* Only some unspecs are valid as "constants".  */
9610       if (GET_CODE (inner) == UNSPEC)
9611         switch (XINT (inner, 1))
9612           {
9613           case UNSPEC_GOT:
9614           case UNSPEC_GOTOFF:
9615           case UNSPEC_PLTOFF:
9616             return TARGET_64BIT;
9617           case UNSPEC_TPOFF:
9618             x = XVECEXP (inner, 0, 0);
9619             return (GET_CODE (x) == SYMBOL_REF
9620                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9621           case UNSPEC_MACHOPIC_OFFSET:
9622             return legitimate_pic_address_disp_p (x);
9623           default:
9624             return false;
9625           }
9626       /* FALLTHRU */
9627
9628     case SYMBOL_REF:
9629     case LABEL_REF:
9630       return legitimate_pic_address_disp_p (x);
9631
9632     default:
9633       return true;
9634     }
9635 }
9636
9637 /* Determine if a given CONST RTX is a valid memory displacement
9638    in PIC mode.  */
9639
9640 int
9641 legitimate_pic_address_disp_p (rtx disp)
9642 {
9643   bool saw_plus;
9644
9645   /* In 64bit mode we can allow direct addresses of symbols and labels
9646      when they are not dynamic symbols.  */
9647   if (TARGET_64BIT)
9648     {
9649       rtx op0 = disp, op1;
9650
9651       switch (GET_CODE (disp))
9652         {
9653         case LABEL_REF:
9654           return true;
9655
9656         case CONST:
9657           if (GET_CODE (XEXP (disp, 0)) != PLUS)
9658             break;
9659           op0 = XEXP (XEXP (disp, 0), 0);
9660           op1 = XEXP (XEXP (disp, 0), 1);
9661           if (!CONST_INT_P (op1)
9662               || INTVAL (op1) >= 16*1024*1024
9663               || INTVAL (op1) < -16*1024*1024)
9664             break;
9665           if (GET_CODE (op0) == LABEL_REF)
9666             return true;
9667           if (GET_CODE (op0) != SYMBOL_REF)
9668             break;
9669           /* FALLTHRU */
9670
9671         case SYMBOL_REF:
9672           /* TLS references should always be enclosed in UNSPEC.  */
9673           if (SYMBOL_REF_TLS_MODEL (op0))
9674             return false;
9675           if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9676               && ix86_cmodel != CM_LARGE_PIC)
9677             return true;
9678           break;
9679
9680         default:
9681           break;
9682         }
9683     }
9684   if (GET_CODE (disp) != CONST)
9685     return 0;
9686   disp = XEXP (disp, 0);
9687
9688   if (TARGET_64BIT)
9689     {
9690       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
9691          of GOT tables.  We should not need these anyway.  */
9692       if (GET_CODE (disp) != UNSPEC
9693           || (XINT (disp, 1) != UNSPEC_GOTPCREL
9694               && XINT (disp, 1) != UNSPEC_GOTOFF
9695               && XINT (disp, 1) != UNSPEC_PLTOFF))
9696         return 0;
9697
9698       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9699           && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9700         return 0;
9701       return 1;
9702     }
9703
9704   saw_plus = false;
9705   if (GET_CODE (disp) == PLUS)
9706     {
9707       if (!CONST_INT_P (XEXP (disp, 1)))
9708         return 0;
9709       disp = XEXP (disp, 0);
9710       saw_plus = true;
9711     }
9712
9713   if (TARGET_MACHO && darwin_local_data_pic (disp))
9714     return 1;
9715
9716   if (GET_CODE (disp) != UNSPEC)
9717     return 0;
9718
9719   switch (XINT (disp, 1))
9720     {
9721     case UNSPEC_GOT:
9722       if (saw_plus)
9723         return false;
9724       /* We need to check for both symbols and labels because VxWorks loads
9725          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
9726          details.  */
9727       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9728               || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9729     case UNSPEC_GOTOFF:
9730       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9731          While ABI specify also 32bit relocation but we don't produce it in
9732          small PIC model at all.  */
9733       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9734            || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9735           && !TARGET_64BIT)
9736         return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9737       return false;
9738     case UNSPEC_GOTTPOFF:
9739     case UNSPEC_GOTNTPOFF:
9740     case UNSPEC_INDNTPOFF:
9741       if (saw_plus)
9742         return false;
9743       disp = XVECEXP (disp, 0, 0);
9744       return (GET_CODE (disp) == SYMBOL_REF
9745               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9746     case UNSPEC_NTPOFF:
9747       disp = XVECEXP (disp, 0, 0);
9748       return (GET_CODE (disp) == SYMBOL_REF
9749               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9750     case UNSPEC_DTPOFF:
9751       disp = XVECEXP (disp, 0, 0);
9752       return (GET_CODE (disp) == SYMBOL_REF
9753               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9754     }
9755
9756   return 0;
9757 }
9758
9759 /* Recognizes RTL expressions that are valid memory addresses for an
9760    instruction.  The MODE argument is the machine mode for the MEM
9761    expression that wants to use this address.
9762
9763    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
9764    convert common non-canonical forms to canonical form so that they will
9765    be recognized.  */
9766
9767 static bool
9768 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9769                            rtx addr, bool strict)
9770 {
9771   struct ix86_address parts;
9772   rtx base, index, disp;
9773   HOST_WIDE_INT scale;
9774
9775   if (ix86_decompose_address (addr, &parts) <= 0)
9776     /* Decomposition failed.  */
9777     return false;
9778
9779   base = parts.base;
9780   index = parts.index;
9781   disp = parts.disp;
9782   scale = parts.scale;
9783
9784   /* Validate base register.
9785
9786      Don't allow SUBREG's that span more than a word here.  It can lead to spill
9787      failures when the base is one word out of a two word structure, which is
9788      represented internally as a DImode int.  */
9789
9790   if (base)
9791     {
9792       rtx reg;
9793
9794       if (REG_P (base))
9795         reg = base;
9796       else if (GET_CODE (base) == SUBREG
9797                && REG_P (SUBREG_REG (base))
9798                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9799                   <= UNITS_PER_WORD)
9800         reg = SUBREG_REG (base);
9801       else
9802         /* Base is not a register.  */
9803         return false;
9804
9805       if (GET_MODE (base) != Pmode)
9806         /* Base is not in Pmode.  */
9807         return false;
9808
9809       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9810           || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9811         /* Base is not valid.  */
9812         return false;
9813     }
9814
9815   /* Validate index register.
9816
9817      Don't allow SUBREG's that span more than a word here -- same as above.  */
9818
9819   if (index)
9820     {
9821       rtx reg;
9822
9823       if (REG_P (index))
9824         reg = index;
9825       else if (GET_CODE (index) == SUBREG
9826                && REG_P (SUBREG_REG (index))
9827                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9828                   <= UNITS_PER_WORD)
9829         reg = SUBREG_REG (index);
9830       else
9831         /* Index is not a register.  */
9832         return false;
9833
9834       if (GET_MODE (index) != Pmode)
9835         /* Index is not in Pmode.  */
9836         return false;
9837
9838       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9839           || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9840         /* Index is not valid.  */
9841         return false;
9842     }
9843
9844   /* Validate scale factor.  */
9845   if (scale != 1)
9846     {
9847       if (!index)
9848         /* Scale without index.  */
9849         return false;
9850
9851       if (scale != 2 && scale != 4 && scale != 8)
9852         /* Scale is not a valid multiplier.  */
9853         return false;
9854     }
9855
9856   /* Validate displacement.  */
9857   if (disp)
9858     {
9859       if (GET_CODE (disp) == CONST
9860           && GET_CODE (XEXP (disp, 0)) == UNSPEC
9861           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9862         switch (XINT (XEXP (disp, 0), 1))
9863           {
9864           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9865              used.  While ABI specify also 32bit relocations, we don't produce
9866              them at all and use IP relative instead.  */
9867           case UNSPEC_GOT:
9868           case UNSPEC_GOTOFF:
9869             gcc_assert (flag_pic);
9870             if (!TARGET_64BIT)
9871               goto is_legitimate_pic;
9872
9873             /* 64bit address unspec.  */
9874             return false;
9875
9876           case UNSPEC_GOTPCREL:
9877             gcc_assert (flag_pic);
9878             goto is_legitimate_pic;
9879
9880           case UNSPEC_GOTTPOFF:
9881           case UNSPEC_GOTNTPOFF:
9882           case UNSPEC_INDNTPOFF:
9883           case UNSPEC_NTPOFF:
9884           case UNSPEC_DTPOFF:
9885             break;
9886
9887           default:
9888             /* Invalid address unspec.  */
9889             return false;
9890           }
9891
9892       else if (SYMBOLIC_CONST (disp)
9893                && (flag_pic
9894                    || (TARGET_MACHO
9895 #if TARGET_MACHO
9896                        && MACHOPIC_INDIRECT
9897                        && !machopic_operand_p (disp)
9898 #endif
9899                )))
9900         {
9901
9902         is_legitimate_pic:
9903           if (TARGET_64BIT && (index || base))
9904             {
9905               /* foo@dtpoff(%rX) is ok.  */
9906               if (GET_CODE (disp) != CONST
9907                   || GET_CODE (XEXP (disp, 0)) != PLUS
9908                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9909                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9910                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9911                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9912                 /* Non-constant pic memory reference.  */
9913                 return false;
9914             }
9915           else if (! legitimate_pic_address_disp_p (disp))
9916             /* Displacement is an invalid pic construct.  */
9917             return false;
9918
9919           /* This code used to verify that a symbolic pic displacement
9920              includes the pic_offset_table_rtx register.
9921
9922              While this is good idea, unfortunately these constructs may
9923              be created by "adds using lea" optimization for incorrect
9924              code like:
9925
9926              int a;
9927              int foo(int i)
9928                {
9929                  return *(&a+i);
9930                }
9931
9932              This code is nonsensical, but results in addressing
9933              GOT table with pic_offset_table_rtx base.  We can't
9934              just refuse it easily, since it gets matched by
9935              "addsi3" pattern, that later gets split to lea in the
9936              case output register differs from input.  While this
9937              can be handled by separate addsi pattern for this case
9938              that never results in lea, this seems to be easier and
9939              correct fix for crash to disable this test.  */
9940         }
9941       else if (GET_CODE (disp) != LABEL_REF
9942                && !CONST_INT_P (disp)
9943                && (GET_CODE (disp) != CONST
9944                    || !legitimate_constant_p (disp))
9945                && (GET_CODE (disp) != SYMBOL_REF
9946                    || !legitimate_constant_p (disp)))
9947         /* Displacement is not constant.  */
9948         return false;
9949       else if (TARGET_64BIT
9950                && !x86_64_immediate_operand (disp, VOIDmode))
9951         /* Displacement is out of range.  */
9952         return false;
9953     }
9954
9955   /* Everything looks valid.  */
9956   return true;
9957 }
9958
9959 /* Determine if a given RTX is a valid constant address.  */
9960
9961 bool
9962 constant_address_p (rtx x)
9963 {
9964   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9965 }
9966 \f
9967 /* Return a unique alias set for the GOT.  */
9968
9969 static alias_set_type
9970 ix86_GOT_alias_set (void)
9971 {
9972   static alias_set_type set = -1;
9973   if (set == -1)
9974     set = new_alias_set ();
9975   return set;
9976 }
9977
9978 /* Return a legitimate reference for ORIG (an address) using the
9979    register REG.  If REG is 0, a new pseudo is generated.
9980
9981    There are two types of references that must be handled:
9982
9983    1. Global data references must load the address from the GOT, via
9984       the PIC reg.  An insn is emitted to do this load, and the reg is
9985       returned.
9986
9987    2. Static data references, constant pool addresses, and code labels
9988       compute the address as an offset from the GOT, whose base is in
9989       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
9990       differentiate them from global data objects.  The returned
9991       address is the PIC reg + an unspec constant.
9992
9993    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9994    reg also appears in the address.  */
9995
9996 static rtx
9997 legitimize_pic_address (rtx orig, rtx reg)
9998 {
9999   rtx addr = orig;
10000   rtx new_rtx = orig;
10001   rtx base;
10002
10003 #if TARGET_MACHO
10004   if (TARGET_MACHO && !TARGET_64BIT)
10005     {
10006       if (reg == 0)
10007         reg = gen_reg_rtx (Pmode);
10008       /* Use the generic Mach-O PIC machinery.  */
10009       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10010     }
10011 #endif
10012
10013   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10014     new_rtx = addr;
10015   else if (TARGET_64BIT
10016            && ix86_cmodel != CM_SMALL_PIC
10017            && gotoff_operand (addr, Pmode))
10018     {
10019       rtx tmpreg;
10020       /* This symbol may be referenced via a displacement from the PIC
10021          base address (@GOTOFF).  */
10022
10023       if (reload_in_progress)
10024         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10025       if (GET_CODE (addr) == CONST)
10026         addr = XEXP (addr, 0);
10027       if (GET_CODE (addr) == PLUS)
10028           {
10029             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10030                                       UNSPEC_GOTOFF);
10031             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10032           }
10033         else
10034           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10035       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10036       if (!reg)
10037         tmpreg = gen_reg_rtx (Pmode);
10038       else
10039         tmpreg = reg;
10040       emit_move_insn (tmpreg, new_rtx);
10041
10042       if (reg != 0)
10043         {
10044           new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10045                                          tmpreg, 1, OPTAB_DIRECT);
10046           new_rtx = reg;
10047         }
10048       else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10049     }
10050   else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10051     {
10052       /* This symbol may be referenced via a displacement from the PIC
10053          base address (@GOTOFF).  */
10054
10055       if (reload_in_progress)
10056         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10057       if (GET_CODE (addr) == CONST)
10058         addr = XEXP (addr, 0);
10059       if (GET_CODE (addr) == PLUS)
10060           {
10061             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10062                                       UNSPEC_GOTOFF);
10063             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10064           }
10065         else
10066           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10067       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10068       new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10069
10070       if (reg != 0)
10071         {
10072           emit_move_insn (reg, new_rtx);
10073           new_rtx = reg;
10074         }
10075     }
10076   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10077            /* We can't use @GOTOFF for text labels on VxWorks;
10078               see gotoff_operand.  */
10079            || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10080     {
10081       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10082         {
10083           if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10084             return legitimize_dllimport_symbol (addr, true);
10085           if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10086               && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10087               && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10088             {
10089               rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10090               return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10091             }
10092         }
10093
10094       if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10095         {
10096           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10097           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10098           new_rtx = gen_const_mem (Pmode, new_rtx);
10099           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10100
10101           if (reg == 0)
10102             reg = gen_reg_rtx (Pmode);
10103           /* Use directly gen_movsi, otherwise the address is loaded
10104              into register for CSE.  We don't want to CSE this addresses,
10105              instead we CSE addresses from the GOT table, so skip this.  */
10106           emit_insn (gen_movsi (reg, new_rtx));
10107           new_rtx = reg;
10108         }
10109       else
10110         {
10111           /* This symbol must be referenced via a load from the
10112              Global Offset Table (@GOT).  */
10113
10114           if (reload_in_progress)
10115             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10116           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10117           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10118           if (TARGET_64BIT)
10119             new_rtx = force_reg (Pmode, new_rtx);
10120           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10121           new_rtx = gen_const_mem (Pmode, new_rtx);
10122           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10123
10124           if (reg == 0)
10125             reg = gen_reg_rtx (Pmode);
10126           emit_move_insn (reg, new_rtx);
10127           new_rtx = reg;
10128         }
10129     }
10130   else
10131     {
10132       if (CONST_INT_P (addr)
10133           && !x86_64_immediate_operand (addr, VOIDmode))
10134         {
10135           if (reg)
10136             {
10137               emit_move_insn (reg, addr);
10138               new_rtx = reg;
10139             }
10140           else
10141             new_rtx = force_reg (Pmode, addr);
10142         }
10143       else if (GET_CODE (addr) == CONST)
10144         {
10145           addr = XEXP (addr, 0);
10146
10147           /* We must match stuff we generate before.  Assume the only
10148              unspecs that can get here are ours.  Not that we could do
10149              anything with them anyway....  */
10150           if (GET_CODE (addr) == UNSPEC
10151               || (GET_CODE (addr) == PLUS
10152                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10153             return orig;
10154           gcc_assert (GET_CODE (addr) == PLUS);
10155         }
10156       if (GET_CODE (addr) == PLUS)
10157         {
10158           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10159
10160           /* Check first to see if this is a constant offset from a @GOTOFF
10161              symbol reference.  */
10162           if (gotoff_operand (op0, Pmode)
10163               && CONST_INT_P (op1))
10164             {
10165               if (!TARGET_64BIT)
10166                 {
10167                   if (reload_in_progress)
10168                     df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10169                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10170                                             UNSPEC_GOTOFF);
10171                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10172                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10173                   new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10174
10175                   if (reg != 0)
10176                     {
10177                       emit_move_insn (reg, new_rtx);
10178                       new_rtx = reg;
10179                     }
10180                 }
10181               else
10182                 {
10183                   if (INTVAL (op1) < -16*1024*1024
10184                       || INTVAL (op1) >= 16*1024*1024)
10185                     {
10186                       if (!x86_64_immediate_operand (op1, Pmode))
10187                         op1 = force_reg (Pmode, op1);
10188                       new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10189                     }
10190                 }
10191             }
10192           else
10193             {
10194               base = legitimize_pic_address (XEXP (addr, 0), reg);
10195               new_rtx  = legitimize_pic_address (XEXP (addr, 1),
10196                                                  base == reg ? NULL_RTX : reg);
10197
10198               if (CONST_INT_P (new_rtx))
10199                 new_rtx = plus_constant (base, INTVAL (new_rtx));
10200               else
10201                 {
10202                   if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10203                     {
10204                       base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10205                       new_rtx = XEXP (new_rtx, 1);
10206                     }
10207                   new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10208                 }
10209             }
10210         }
10211     }
10212   return new_rtx;
10213 }
10214 \f
10215 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
10216
10217 static rtx
10218 get_thread_pointer (int to_reg)
10219 {
10220   rtx tp, reg, insn;
10221
10222   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10223   if (!to_reg)
10224     return tp;
10225
10226   reg = gen_reg_rtx (Pmode);
10227   insn = gen_rtx_SET (VOIDmode, reg, tp);
10228   insn = emit_insn (insn);
10229
10230   return reg;
10231 }
10232
10233 /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
10234    false if we expect this to be used for a memory address and true if
10235    we expect to load the address into a register.  */
10236
10237 static rtx
10238 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10239 {
10240   rtx dest, base, off, pic, tp;
10241   int type;
10242
10243   switch (model)
10244     {
10245     case TLS_MODEL_GLOBAL_DYNAMIC:
10246       dest = gen_reg_rtx (Pmode);
10247       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10248
10249       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10250         {
10251           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10252
10253           start_sequence ();
10254           emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10255           insns = get_insns ();
10256           end_sequence ();
10257
10258           RTL_CONST_CALL_P (insns) = 1;
10259           emit_libcall_block (insns, dest, rax, x);
10260         }
10261       else if (TARGET_64BIT && TARGET_GNU2_TLS)
10262         emit_insn (gen_tls_global_dynamic_64 (dest, x));
10263       else
10264         emit_insn (gen_tls_global_dynamic_32 (dest, x));
10265
10266       if (TARGET_GNU2_TLS)
10267         {
10268           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10269
10270           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10271         }
10272       break;
10273
10274     case TLS_MODEL_LOCAL_DYNAMIC:
10275       base = gen_reg_rtx (Pmode);
10276       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10277
10278       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10279         {
10280           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10281
10282           start_sequence ();
10283           emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10284           insns = get_insns ();
10285           end_sequence ();
10286
10287           note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10288           note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10289           RTL_CONST_CALL_P (insns) = 1;
10290           emit_libcall_block (insns, base, rax, note);
10291         }
10292       else if (TARGET_64BIT && TARGET_GNU2_TLS)
10293         emit_insn (gen_tls_local_dynamic_base_64 (base));
10294       else
10295         emit_insn (gen_tls_local_dynamic_base_32 (base));
10296
10297       if (TARGET_GNU2_TLS)
10298         {
10299           rtx x = ix86_tls_module_base ();
10300
10301           set_unique_reg_note (get_last_insn (), REG_EQUIV,
10302                                gen_rtx_MINUS (Pmode, x, tp));
10303         }
10304
10305       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10306       off = gen_rtx_CONST (Pmode, off);
10307
10308       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10309
10310       if (TARGET_GNU2_TLS)
10311         {
10312           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10313
10314           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10315         }
10316
10317       break;
10318
10319     case TLS_MODEL_INITIAL_EXEC:
10320       if (TARGET_64BIT)
10321         {
10322           pic = NULL;
10323           type = UNSPEC_GOTNTPOFF;
10324         }
10325       else if (flag_pic)
10326         {
10327           if (reload_in_progress)
10328             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10329           pic = pic_offset_table_rtx;
10330           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10331         }
10332       else if (!TARGET_ANY_GNU_TLS)
10333         {
10334           pic = gen_reg_rtx (Pmode);
10335           emit_insn (gen_set_got (pic));
10336           type = UNSPEC_GOTTPOFF;
10337         }
10338       else
10339         {
10340           pic = NULL;
10341           type = UNSPEC_INDNTPOFF;
10342         }
10343
10344       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10345       off = gen_rtx_CONST (Pmode, off);
10346       if (pic)
10347         off = gen_rtx_PLUS (Pmode, pic, off);
10348       off = gen_const_mem (Pmode, off);
10349       set_mem_alias_set (off, ix86_GOT_alias_set ());
10350
10351       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10352         {
10353           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10354           off = force_reg (Pmode, off);
10355           return gen_rtx_PLUS (Pmode, base, off);
10356         }
10357       else
10358         {
10359           base = get_thread_pointer (true);
10360           dest = gen_reg_rtx (Pmode);
10361           emit_insn (gen_subsi3 (dest, base, off));
10362         }
10363       break;
10364
10365     case TLS_MODEL_LOCAL_EXEC:
10366       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10367                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10368                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10369       off = gen_rtx_CONST (Pmode, off);
10370
10371       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10372         {
10373           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10374           return gen_rtx_PLUS (Pmode, base, off);
10375         }
10376       else
10377         {
10378           base = get_thread_pointer (true);
10379           dest = gen_reg_rtx (Pmode);
10380           emit_insn (gen_subsi3 (dest, base, off));
10381         }
10382       break;
10383
10384     default:
10385       gcc_unreachable ();
10386     }
10387
10388   return dest;
10389 }
10390
10391 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10392    to symbol DECL.  */
10393
10394 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10395   htab_t dllimport_map;
10396
10397 static tree
10398 get_dllimport_decl (tree decl)
10399 {
10400   struct tree_map *h, in;
10401   void **loc;
10402   const char *name;
10403   const char *prefix;
10404   size_t namelen, prefixlen;
10405   char *imp_name;
10406   tree to;
10407   rtx rtl;
10408
10409   if (!dllimport_map)
10410     dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10411
10412   in.hash = htab_hash_pointer (decl);
10413   in.base.from = decl;
10414   loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10415   h = (struct tree_map *) *loc;
10416   if (h)
10417     return h->to;
10418
10419   *loc = h = GGC_NEW (struct tree_map);
10420   h->hash = in.hash;
10421   h->base.from = decl;
10422   h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10423                            VAR_DECL, NULL, ptr_type_node);
10424   DECL_ARTIFICIAL (to) = 1;
10425   DECL_IGNORED_P (to) = 1;
10426   DECL_EXTERNAL (to) = 1;
10427   TREE_READONLY (to) = 1;
10428
10429   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10430   name = targetm.strip_name_encoding (name);
10431   prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10432     ? "*__imp_" : "*__imp__";
10433   namelen = strlen (name);
10434   prefixlen = strlen (prefix);
10435   imp_name = (char *) alloca (namelen + prefixlen + 1);
10436   memcpy (imp_name, prefix, prefixlen);
10437   memcpy (imp_name + prefixlen, name, namelen + 1);
10438
10439   name = ggc_alloc_string (imp_name, namelen + prefixlen);
10440   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10441   SET_SYMBOL_REF_DECL (rtl, to);
10442   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10443
10444   rtl = gen_const_mem (Pmode, rtl);
10445   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10446
10447   SET_DECL_RTL (to, rtl);
10448   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10449
10450   return to;
10451 }
10452
10453 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
10454    true if we require the result be a register.  */
10455
10456 static rtx
10457 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10458 {
10459   tree imp_decl;
10460   rtx x;
10461
10462   gcc_assert (SYMBOL_REF_DECL (symbol));
10463   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10464
10465   x = DECL_RTL (imp_decl);
10466   if (want_reg)
10467     x = force_reg (Pmode, x);
10468   return x;
10469 }
10470
10471 /* Try machine-dependent ways of modifying an illegitimate address
10472    to be legitimate.  If we find one, return the new, valid address.
10473    This macro is used in only one place: `memory_address' in explow.c.
10474
10475    OLDX is the address as it was before break_out_memory_refs was called.
10476    In some cases it is useful to look at this to decide what needs to be done.
10477
10478    It is always safe for this macro to do nothing.  It exists to recognize
10479    opportunities to optimize the output.
10480
10481    For the 80386, we handle X+REG by loading X into a register R and
10482    using R+REG.  R will go in a general reg and indexing will be used.
10483    However, if REG is a broken-out memory address or multiplication,
10484    nothing needs to be done because REG can certainly go in a general reg.
10485
10486    When -fpic is used, special handling is needed for symbolic references.
10487    See comments by legitimize_pic_address in i386.c for details.  */
10488
10489 static rtx
10490 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10491                          enum machine_mode mode)
10492 {
10493   int changed = 0;
10494   unsigned log;
10495
10496   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10497   if (log)
10498     return legitimize_tls_address (x, (enum tls_model) log, false);
10499   if (GET_CODE (x) == CONST
10500       && GET_CODE (XEXP (x, 0)) == PLUS
10501       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10502       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10503     {
10504       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10505                                       (enum tls_model) log, false);
10506       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10507     }
10508
10509   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10510     {
10511       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10512         return legitimize_dllimport_symbol (x, true);
10513       if (GET_CODE (x) == CONST
10514           && GET_CODE (XEXP (x, 0)) == PLUS
10515           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10516           && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10517         {
10518           rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10519           return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10520         }
10521     }
10522
10523   if (flag_pic && SYMBOLIC_CONST (x))
10524     return legitimize_pic_address (x, 0);
10525
10526   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10527   if (GET_CODE (x) == ASHIFT
10528       && CONST_INT_P (XEXP (x, 1))
10529       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10530     {
10531       changed = 1;
10532       log = INTVAL (XEXP (x, 1));
10533       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10534                         GEN_INT (1 << log));
10535     }
10536
10537   if (GET_CODE (x) == PLUS)
10538     {
10539       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
10540
10541       if (GET_CODE (XEXP (x, 0)) == ASHIFT
10542           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10543           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10544         {
10545           changed = 1;
10546           log = INTVAL (XEXP (XEXP (x, 0), 1));
10547           XEXP (x, 0) = gen_rtx_MULT (Pmode,
10548                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10549                                       GEN_INT (1 << log));
10550         }
10551
10552       if (GET_CODE (XEXP (x, 1)) == ASHIFT
10553           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10554           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10555         {
10556           changed = 1;
10557           log = INTVAL (XEXP (XEXP (x, 1), 1));
10558           XEXP (x, 1) = gen_rtx_MULT (Pmode,
10559                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10560                                       GEN_INT (1 << log));
10561         }
10562
10563       /* Put multiply first if it isn't already.  */
10564       if (GET_CODE (XEXP (x, 1)) == MULT)
10565         {
10566           rtx tmp = XEXP (x, 0);
10567           XEXP (x, 0) = XEXP (x, 1);
10568           XEXP (x, 1) = tmp;
10569           changed = 1;
10570         }
10571
10572       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10573          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
10574          created by virtual register instantiation, register elimination, and
10575          similar optimizations.  */
10576       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10577         {
10578           changed = 1;
10579           x = gen_rtx_PLUS (Pmode,
10580                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
10581                                           XEXP (XEXP (x, 1), 0)),
10582                             XEXP (XEXP (x, 1), 1));
10583         }
10584
10585       /* Canonicalize
10586          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10587          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
10588       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10589                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10590                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10591                && CONSTANT_P (XEXP (x, 1)))
10592         {
10593           rtx constant;
10594           rtx other = NULL_RTX;
10595
10596           if (CONST_INT_P (XEXP (x, 1)))
10597             {
10598               constant = XEXP (x, 1);
10599               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10600             }
10601           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10602             {
10603               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10604               other = XEXP (x, 1);
10605             }
10606           else
10607             constant = 0;
10608
10609           if (constant)
10610             {
10611               changed = 1;
10612               x = gen_rtx_PLUS (Pmode,
10613                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10614                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
10615                                 plus_constant (other, INTVAL (constant)));
10616             }
10617         }
10618
10619       if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10620         return x;
10621
10622       if (GET_CODE (XEXP (x, 0)) == MULT)
10623         {
10624           changed = 1;
10625           XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10626         }
10627
10628       if (GET_CODE (XEXP (x, 1)) == MULT)
10629         {
10630           changed = 1;
10631           XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10632         }
10633
10634       if (changed
10635           && REG_P (XEXP (x, 1))
10636           && REG_P (XEXP (x, 0)))
10637         return x;
10638
10639       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10640         {
10641           changed = 1;
10642           x = legitimize_pic_address (x, 0);
10643         }
10644
10645       if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10646         return x;
10647
10648       if (REG_P (XEXP (x, 0)))
10649         {
10650           rtx temp = gen_reg_rtx (Pmode);
10651           rtx val  = force_operand (XEXP (x, 1), temp);
10652           if (val != temp)
10653             emit_move_insn (temp, val);
10654
10655           XEXP (x, 1) = temp;
10656           return x;
10657         }
10658
10659       else if (REG_P (XEXP (x, 1)))
10660         {
10661           rtx temp = gen_reg_rtx (Pmode);
10662           rtx val  = force_operand (XEXP (x, 0), temp);
10663           if (val != temp)
10664             emit_move_insn (temp, val);
10665
10666           XEXP (x, 0) = temp;
10667           return x;
10668         }
10669     }
10670
10671   return x;
10672 }
10673 \f
10674 /* Print an integer constant expression in assembler syntax.  Addition
10675    and subtraction are the only arithmetic that may appear in these
10676    expressions.  FILE is the stdio stream to write to, X is the rtx, and
10677    CODE is the operand print code from the output string.  */
10678
10679 static void
10680 output_pic_addr_const (FILE *file, rtx x, int code)
10681 {
10682   char buf[256];
10683
10684   switch (GET_CODE (x))
10685     {
10686     case PC:
10687       gcc_assert (flag_pic);
10688       putc ('.', file);
10689       break;
10690
10691     case SYMBOL_REF:
10692       if (! TARGET_MACHO || TARGET_64BIT)
10693         output_addr_const (file, x);
10694       else
10695         {
10696           const char *name = XSTR (x, 0);
10697
10698           /* Mark the decl as referenced so that cgraph will
10699              output the function.  */
10700           if (SYMBOL_REF_DECL (x))
10701             mark_decl_referenced (SYMBOL_REF_DECL (x));
10702
10703 #if TARGET_MACHO
10704           if (MACHOPIC_INDIRECT
10705               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10706             name = machopic_indirection_name (x, /*stub_p=*/true);
10707 #endif
10708           assemble_name (file, name);
10709         }
10710       if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10711           && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10712         fputs ("@PLT", file);
10713       break;
10714
10715     case LABEL_REF:
10716       x = XEXP (x, 0);
10717       /* FALLTHRU */
10718     case CODE_LABEL:
10719       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10720       assemble_name (asm_out_file, buf);
10721       break;
10722
10723     case CONST_INT:
10724       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10725       break;
10726
10727     case CONST:
10728       /* This used to output parentheses around the expression,
10729          but that does not work on the 386 (either ATT or BSD assembler).  */
10730       output_pic_addr_const (file, XEXP (x, 0), code);
10731       break;
10732
10733     case CONST_DOUBLE:
10734       if (GET_MODE (x) == VOIDmode)
10735         {
10736           /* We can use %d if the number is <32 bits and positive.  */
10737           if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10738             fprintf (file, "0x%lx%08lx",
10739                      (unsigned long) CONST_DOUBLE_HIGH (x),
10740                      (unsigned long) CONST_DOUBLE_LOW (x));
10741           else
10742             fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10743         }
10744       else
10745         /* We can't handle floating point constants;
10746            PRINT_OPERAND must handle them.  */
10747         output_operand_lossage ("floating constant misused");
10748       break;
10749
10750     case PLUS:
10751       /* Some assemblers need integer constants to appear first.  */
10752       if (CONST_INT_P (XEXP (x, 0)))
10753         {
10754           output_pic_addr_const (file, XEXP (x, 0), code);
10755           putc ('+', file);
10756           output_pic_addr_const (file, XEXP (x, 1), code);
10757         }
10758       else
10759         {
10760           gcc_assert (CONST_INT_P (XEXP (x, 1)));
10761           output_pic_addr_const (file, XEXP (x, 1), code);
10762           putc ('+', file);
10763           output_pic_addr_const (file, XEXP (x, 0), code);
10764         }
10765       break;
10766
10767     case MINUS:
10768       if (!TARGET_MACHO)
10769         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10770       output_pic_addr_const (file, XEXP (x, 0), code);
10771       putc ('-', file);
10772       output_pic_addr_const (file, XEXP (x, 1), code);
10773       if (!TARGET_MACHO)
10774         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10775       break;
10776
10777      case UNSPEC:
10778        gcc_assert (XVECLEN (x, 0) == 1);
10779        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10780        switch (XINT (x, 1))
10781         {
10782         case UNSPEC_GOT:
10783           fputs ("@GOT", file);
10784           break;
10785         case UNSPEC_GOTOFF:
10786           fputs ("@GOTOFF", file);
10787           break;
10788         case UNSPEC_PLTOFF:
10789           fputs ("@PLTOFF", file);
10790           break;
10791         case UNSPEC_GOTPCREL:
10792           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10793                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10794           break;
10795         case UNSPEC_GOTTPOFF:
10796           /* FIXME: This might be @TPOFF in Sun ld too.  */
10797           fputs ("@GOTTPOFF", file);
10798           break;
10799         case UNSPEC_TPOFF:
10800           fputs ("@TPOFF", file);
10801           break;
10802         case UNSPEC_NTPOFF:
10803           if (TARGET_64BIT)
10804             fputs ("@TPOFF", file);
10805           else
10806             fputs ("@NTPOFF", file);
10807           break;
10808         case UNSPEC_DTPOFF:
10809           fputs ("@DTPOFF", file);
10810           break;
10811         case UNSPEC_GOTNTPOFF:
10812           if (TARGET_64BIT)
10813             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10814                    "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10815           else
10816             fputs ("@GOTNTPOFF", file);
10817           break;
10818         case UNSPEC_INDNTPOFF:
10819           fputs ("@INDNTPOFF", file);
10820           break;
10821 #if TARGET_MACHO
10822         case UNSPEC_MACHOPIC_OFFSET:
10823           putc ('-', file);
10824           machopic_output_function_base_name (file);
10825           break;
10826 #endif
10827         default:
10828           output_operand_lossage ("invalid UNSPEC as operand");
10829           break;
10830         }
10831        break;
10832
10833     default:
10834       output_operand_lossage ("invalid expression as operand");
10835     }
10836 }
10837
10838 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10839    We need to emit DTP-relative relocations.  */
10840
10841 static void ATTRIBUTE_UNUSED
10842 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10843 {
10844   fputs (ASM_LONG, file);
10845   output_addr_const (file, x);
10846   fputs ("@DTPOFF", file);
10847   switch (size)
10848     {
10849     case 4:
10850       break;
10851     case 8:
10852       fputs (", 0", file);
10853       break;
10854     default:
10855       gcc_unreachable ();
10856    }
10857 }
10858
10859 /* Return true if X is a representation of the PIC register.  This copes
10860    with calls from ix86_find_base_term, where the register might have
10861    been replaced by a cselib value.  */
10862
10863 static bool
10864 ix86_pic_register_p (rtx x)
10865 {
10866   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10867     return (pic_offset_table_rtx
10868             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10869   else
10870     return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10871 }
10872
10873 /* In the name of slightly smaller debug output, and to cater to
10874    general assembler lossage, recognize PIC+GOTOFF and turn it back
10875    into a direct symbol reference.
10876
10877    On Darwin, this is necessary to avoid a crash, because Darwin
10878    has a different PIC label for each routine but the DWARF debugging
10879    information is not associated with any particular routine, so it's
10880    necessary to remove references to the PIC label from RTL stored by
10881    the DWARF output code.  */
10882
10883 static rtx
10884 ix86_delegitimize_address (rtx x)
10885 {
10886   rtx orig_x = delegitimize_mem_from_attrs (x);
10887   /* reg_addend is NULL or a multiple of some register.  */
10888   rtx reg_addend = NULL_RTX;
10889   /* const_addend is NULL or a const_int.  */
10890   rtx const_addend = NULL_RTX;
10891   /* This is the result, or NULL.  */
10892   rtx result = NULL_RTX;
10893
10894   x = orig_x;
10895
10896   if (MEM_P (x))
10897     x = XEXP (x, 0);
10898
10899   if (TARGET_64BIT)
10900     {
10901       if (GET_CODE (x) != CONST
10902           || GET_CODE (XEXP (x, 0)) != UNSPEC
10903           || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10904           || !MEM_P (orig_x))
10905         return orig_x;
10906       return XVECEXP (XEXP (x, 0), 0, 0);
10907     }
10908
10909   if (GET_CODE (x) != PLUS
10910       || GET_CODE (XEXP (x, 1)) != CONST)
10911     return orig_x;
10912
10913   if (ix86_pic_register_p (XEXP (x, 0)))
10914     /* %ebx + GOT/GOTOFF */
10915     ;
10916   else if (GET_CODE (XEXP (x, 0)) == PLUS)
10917     {
10918       /* %ebx + %reg * scale + GOT/GOTOFF */
10919       reg_addend = XEXP (x, 0);
10920       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10921         reg_addend = XEXP (reg_addend, 1);
10922       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10923         reg_addend = XEXP (reg_addend, 0);
10924       else
10925         return orig_x;
10926       if (!REG_P (reg_addend)
10927           && GET_CODE (reg_addend) != MULT
10928           && GET_CODE (reg_addend) != ASHIFT)
10929         return orig_x;
10930     }
10931   else
10932     return orig_x;
10933
10934   x = XEXP (XEXP (x, 1), 0);
10935   if (GET_CODE (x) == PLUS
10936       && CONST_INT_P (XEXP (x, 1)))
10937     {
10938       const_addend = XEXP (x, 1);
10939       x = XEXP (x, 0);
10940     }
10941
10942   if (GET_CODE (x) == UNSPEC
10943       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10944           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10945     result = XVECEXP (x, 0, 0);
10946
10947   if (TARGET_MACHO && darwin_local_data_pic (x)
10948       && !MEM_P (orig_x))
10949     result = XVECEXP (x, 0, 0);
10950
10951   if (! result)
10952     return orig_x;
10953
10954   if (const_addend)
10955     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10956   if (reg_addend)
10957     result = gen_rtx_PLUS (Pmode, reg_addend, result);
10958   return result;
10959 }
10960
10961 /* If X is a machine specific address (i.e. a symbol or label being
10962    referenced as a displacement from the GOT implemented using an
10963    UNSPEC), then return the base term.  Otherwise return X.  */
10964
10965 rtx
10966 ix86_find_base_term (rtx x)
10967 {
10968   rtx term;
10969
10970   if (TARGET_64BIT)
10971     {
10972       if (GET_CODE (x) != CONST)
10973         return x;
10974       term = XEXP (x, 0);
10975       if (GET_CODE (term) == PLUS
10976           && (CONST_INT_P (XEXP (term, 1))
10977               || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10978         term = XEXP (term, 0);
10979       if (GET_CODE (term) != UNSPEC
10980           || XINT (term, 1) != UNSPEC_GOTPCREL)
10981         return x;
10982
10983       return XVECEXP (term, 0, 0);
10984     }
10985
10986   return ix86_delegitimize_address (x);
10987 }
10988 \f
10989 static void
10990 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10991                     int fp, FILE *file)
10992 {
10993   const char *suffix;
10994
10995   if (mode == CCFPmode || mode == CCFPUmode)
10996     {
10997       code = ix86_fp_compare_code_to_integer (code);
10998       mode = CCmode;
10999     }
11000   if (reverse)
11001     code = reverse_condition (code);
11002
11003   switch (code)
11004     {
11005     case EQ:
11006       switch (mode)
11007         {
11008         case CCAmode:
11009           suffix = "a";
11010           break;
11011
11012         case CCCmode:
11013           suffix = "c";
11014           break;
11015
11016         case CCOmode:
11017           suffix = "o";
11018           break;
11019
11020         case CCSmode:
11021           suffix = "s";
11022           break;
11023
11024         default:
11025           suffix = "e";
11026         }
11027       break;
11028     case NE:
11029       switch (mode)
11030         {
11031         case CCAmode:
11032           suffix = "na";
11033           break;
11034
11035         case CCCmode:
11036           suffix = "nc";
11037           break;
11038
11039         case CCOmode:
11040           suffix = "no";
11041           break;
11042
11043         case CCSmode:
11044           suffix = "ns";
11045           break;
11046
11047         default:
11048           suffix = "ne";
11049         }
11050       break;
11051     case GT:
11052       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11053       suffix = "g";
11054       break;
11055     case GTU:
11056       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11057          Those same assemblers have the same but opposite lossage on cmov.  */
11058       if (mode == CCmode)
11059         suffix = fp ? "nbe" : "a";
11060       else if (mode == CCCmode)
11061         suffix = "b";
11062       else
11063         gcc_unreachable ();
11064       break;
11065     case LT:
11066       switch (mode)
11067         {
11068         case CCNOmode:
11069         case CCGOCmode:
11070           suffix = "s";
11071           break;
11072
11073         case CCmode:
11074         case CCGCmode:
11075           suffix = "l";
11076           break;
11077
11078         default:
11079           gcc_unreachable ();
11080         }
11081       break;
11082     case LTU:
11083       gcc_assert (mode == CCmode || mode == CCCmode);
11084       suffix = "b";
11085       break;
11086     case GE:
11087       switch (mode)
11088         {
11089         case CCNOmode:
11090         case CCGOCmode:
11091           suffix = "ns";
11092           break;
11093
11094         case CCmode:
11095         case CCGCmode:
11096           suffix = "ge";
11097           break;
11098
11099         default:
11100           gcc_unreachable ();
11101         }
11102       break;
11103     case GEU:
11104       /* ??? As above.  */
11105       gcc_assert (mode == CCmode || mode == CCCmode);
11106       suffix = fp ? "nb" : "ae";
11107       break;
11108     case LE:
11109       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11110       suffix = "le";
11111       break;
11112     case LEU:
11113       /* ??? As above.  */
11114       if (mode == CCmode)
11115         suffix = "be";
11116       else if (mode == CCCmode)
11117         suffix = fp ? "nb" : "ae";
11118       else
11119         gcc_unreachable ();
11120       break;
11121     case UNORDERED:
11122       suffix = fp ? "u" : "p";
11123       break;
11124     case ORDERED:
11125       suffix = fp ? "nu" : "np";
11126       break;
11127     default:
11128       gcc_unreachable ();
11129     }
11130   fputs (suffix, file);
11131 }
11132
11133 /* Print the name of register X to FILE based on its machine mode and number.
11134    If CODE is 'w', pretend the mode is HImode.
11135    If CODE is 'b', pretend the mode is QImode.
11136    If CODE is 'k', pretend the mode is SImode.
11137    If CODE is 'q', pretend the mode is DImode.
11138    If CODE is 'x', pretend the mode is V4SFmode.
11139    If CODE is 't', pretend the mode is V8SFmode.
11140    If CODE is 'h', pretend the reg is the 'high' byte register.
11141    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11142    If CODE is 'd', duplicate the operand for AVX instruction.
11143  */
11144
11145 void
11146 print_reg (rtx x, int code, FILE *file)
11147 {
11148   const char *reg;
11149   bool duplicated = code == 'd' && TARGET_AVX;
11150
11151   gcc_assert (x == pc_rtx
11152               || (REGNO (x) != ARG_POINTER_REGNUM
11153                   && REGNO (x) != FRAME_POINTER_REGNUM
11154                   && REGNO (x) != FLAGS_REG
11155                   && REGNO (x) != FPSR_REG
11156                   && REGNO (x) != FPCR_REG));
11157
11158   if (ASSEMBLER_DIALECT == ASM_ATT)
11159     putc ('%', file);
11160
11161   if (x == pc_rtx)
11162     {
11163       gcc_assert (TARGET_64BIT);
11164       fputs ("rip", file);
11165       return;
11166     }
11167
11168   if (code == 'w' || MMX_REG_P (x))
11169     code = 2;
11170   else if (code == 'b')
11171     code = 1;
11172   else if (code == 'k')
11173     code = 4;
11174   else if (code == 'q')
11175     code = 8;
11176   else if (code == 'y')
11177     code = 3;
11178   else if (code == 'h')
11179     code = 0;
11180   else if (code == 'x')
11181     code = 16;
11182   else if (code == 't')
11183     code = 32;
11184   else
11185     code = GET_MODE_SIZE (GET_MODE (x));
11186
11187   /* Irritatingly, AMD extended registers use different naming convention
11188      from the normal registers.  */
11189   if (REX_INT_REG_P (x))
11190     {
11191       gcc_assert (TARGET_64BIT);
11192       switch (code)
11193         {
11194           case 0:
11195             error ("extended registers have no high halves");
11196             break;
11197           case 1:
11198             fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11199             break;
11200           case 2:
11201             fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11202             break;
11203           case 4:
11204             fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11205             break;
11206           case 8:
11207             fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11208             break;
11209           default:
11210             error ("unsupported operand size for extended register");
11211             break;
11212         }
11213       return;
11214     }
11215
11216   reg = NULL;
11217   switch (code)
11218     {
11219     case 3:
11220       if (STACK_TOP_P (x))
11221         {
11222           reg = "st(0)";
11223           break;
11224         }
11225       /* FALLTHRU */
11226     case 8:
11227     case 4:
11228     case 12:
11229       if (! ANY_FP_REG_P (x))
11230         putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11231       /* FALLTHRU */
11232     case 16:
11233     case 2:
11234     normal:
11235       reg = hi_reg_name[REGNO (x)];
11236       break;
11237     case 1:
11238       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11239         goto normal;
11240       reg = qi_reg_name[REGNO (x)];
11241       break;
11242     case 0:
11243       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11244         goto normal;
11245       reg = qi_high_reg_name[REGNO (x)];
11246       break;
11247     case 32:
11248       if (SSE_REG_P (x))
11249         {
11250           gcc_assert (!duplicated);
11251           putc ('y', file);
11252           fputs (hi_reg_name[REGNO (x)] + 1, file);
11253           return;
11254         }
11255       break;
11256     default:
11257       gcc_unreachable ();
11258     }
11259
11260   fputs (reg, file);
11261   if (duplicated)
11262     {
11263       if (ASSEMBLER_DIALECT == ASM_ATT)
11264         fprintf (file, ", %%%s", reg);
11265       else
11266         fprintf (file, ", %s", reg);
11267     }
11268 }
11269
11270 /* Locate some local-dynamic symbol still in use by this function
11271    so that we can print its name in some tls_local_dynamic_base
11272    pattern.  */
11273
11274 static int
11275 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11276 {
11277   rtx x = *px;
11278
11279   if (GET_CODE (x) == SYMBOL_REF
11280       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11281     {
11282       cfun->machine->some_ld_name = XSTR (x, 0);
11283       return 1;
11284     }
11285
11286   return 0;
11287 }
11288
11289 static const char *
11290 get_some_local_dynamic_name (void)
11291 {
11292   rtx insn;
11293
11294   if (cfun->machine->some_ld_name)
11295     return cfun->machine->some_ld_name;
11296
11297   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11298     if (INSN_P (insn)
11299         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11300       return cfun->machine->some_ld_name;
11301
11302   return NULL;
11303 }
11304
11305 /* Meaning of CODE:
11306    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11307    C -- print opcode suffix for set/cmov insn.
11308    c -- like C, but print reversed condition
11309    E,e -- likewise, but for compare-and-branch fused insn.
11310    F,f -- likewise, but for floating-point.
11311    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11312         otherwise nothing
11313    R -- print the prefix for register names.
11314    z -- print the opcode suffix for the size of the current operand.
11315    Z -- likewise, with special suffixes for x87 instructions.
11316    * -- print a star (in certain assembler syntax)
11317    A -- print an absolute memory reference.
11318    w -- print the operand as if it's a "word" (HImode) even if it isn't.
11319    s -- print a shift double count, followed by the assemblers argument
11320         delimiter.
11321    b -- print the QImode name of the register for the indicated operand.
11322         %b0 would print %al if operands[0] is reg 0.
11323    w --  likewise, print the HImode name of the register.
11324    k --  likewise, print the SImode name of the register.
11325    q --  likewise, print the DImode name of the register.
11326    x --  likewise, print the V4SFmode name of the register.
11327    t --  likewise, print the V8SFmode name of the register.
11328    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11329    y -- print "st(0)" instead of "st" as a register.
11330    d -- print duplicated register operand for AVX instruction.
11331    D -- print condition for SSE cmp instruction.
11332    P -- if PIC, print an @PLT suffix.
11333    X -- don't print any sort of PIC '@' suffix for a symbol.
11334    & -- print some in-use local-dynamic symbol name.
11335    H -- print a memory address offset by 8; used for sse high-parts
11336    Y -- print condition for XOP pcom* instruction.
11337    + -- print a branch hint as 'cs' or 'ds' prefix
11338    ; -- print a semicolon (after prefixes due to bug in older gas).
11339  */
11340
11341 void
11342 print_operand (FILE *file, rtx x, int code)
11343 {
11344   if (code)
11345     {
11346       switch (code)
11347         {
11348         case '*':
11349           if (ASSEMBLER_DIALECT == ASM_ATT)
11350             putc ('*', file);
11351           return;
11352
11353         case '&':
11354           {
11355             const char *name = get_some_local_dynamic_name ();
11356             if (name == NULL)
11357               output_operand_lossage ("'%%&' used without any "
11358                                       "local dynamic TLS references");
11359             else
11360               assemble_name (file, name);
11361             return;
11362           }
11363
11364         case 'A':
11365           switch (ASSEMBLER_DIALECT)
11366             {
11367             case ASM_ATT:
11368               putc ('*', file);
11369               break;
11370
11371             case ASM_INTEL:
11372               /* Intel syntax. For absolute addresses, registers should not
11373                  be surrounded by braces.  */
11374               if (!REG_P (x))
11375                 {
11376                   putc ('[', file);
11377                   PRINT_OPERAND (file, x, 0);
11378                   putc (']', file);
11379                   return;
11380                 }
11381               break;
11382
11383             default:
11384               gcc_unreachable ();
11385             }
11386
11387           PRINT_OPERAND (file, x, 0);
11388           return;
11389
11390
11391         case 'L':
11392           if (ASSEMBLER_DIALECT == ASM_ATT)
11393             putc ('l', file);
11394           return;
11395
11396         case 'W':
11397           if (ASSEMBLER_DIALECT == ASM_ATT)
11398             putc ('w', file);
11399           return;
11400
11401         case 'B':
11402           if (ASSEMBLER_DIALECT == ASM_ATT)
11403             putc ('b', file);
11404           return;
11405
11406         case 'Q':
11407           if (ASSEMBLER_DIALECT == ASM_ATT)
11408             putc ('l', file);
11409           return;
11410
11411         case 'S':
11412           if (ASSEMBLER_DIALECT == ASM_ATT)
11413             putc ('s', file);
11414           return;
11415
11416         case 'T':
11417           if (ASSEMBLER_DIALECT == ASM_ATT)
11418             putc ('t', file);
11419           return;
11420
11421         case 'z':
11422           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11423             {
11424               /* Opcodes don't get size suffixes if using Intel opcodes.  */
11425               if (ASSEMBLER_DIALECT == ASM_INTEL)
11426                 return;
11427
11428               switch (GET_MODE_SIZE (GET_MODE (x)))
11429                 {
11430                 case 1:
11431                   putc ('b', file);
11432                   return;
11433
11434                 case 2:
11435                   putc ('w', file);
11436                   return;
11437
11438                 case 4:
11439                   putc ('l', file);
11440                   return;
11441
11442                 case 8:
11443                   putc ('q', file);
11444                   return;
11445
11446                 default:
11447                   output_operand_lossage
11448                     ("invalid operand size for operand code '%c'", code);
11449                   return;
11450                 }
11451             }
11452
11453           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11454             warning
11455               (0, "non-integer operand used with operand code '%c'", code);
11456           /* FALLTHRU */
11457
11458         case 'Z':
11459           /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
11460           if (ASSEMBLER_DIALECT == ASM_INTEL)
11461             return;
11462
11463           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11464             {
11465               switch (GET_MODE_SIZE (GET_MODE (x)))
11466                 {
11467                 case 2:
11468 #ifdef HAVE_AS_IX86_FILDS
11469                   putc ('s', file);
11470 #endif
11471                   return;
11472
11473                 case 4:
11474                   putc ('l', file);
11475                   return;
11476
11477                 case 8:
11478 #ifdef HAVE_AS_IX86_FILDQ
11479                   putc ('q', file);
11480 #else
11481                   fputs ("ll", file);
11482 #endif
11483                   return;
11484
11485                 default:
11486                   break;
11487                 }
11488             }
11489           else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11490             {
11491               /* 387 opcodes don't get size suffixes
11492                  if the operands are registers.  */
11493               if (STACK_REG_P (x))
11494                 return;
11495
11496               switch (GET_MODE_SIZE (GET_MODE (x)))
11497                 {
11498                 case 4:
11499                   putc ('s', file);
11500                   return;
11501
11502                 case 8:
11503                   putc ('l', file);
11504                   return;
11505
11506                 case 12:
11507                 case 16:
11508                   putc ('t', file);
11509                   return;
11510
11511                 default:
11512                   break;
11513                 }
11514             }
11515           else
11516             {
11517               output_operand_lossage
11518                 ("invalid operand type used with operand code '%c'", code);
11519               return;
11520             }
11521
11522           output_operand_lossage
11523             ("invalid operand size for operand code '%c'", code);
11524           return;
11525
11526         case 'd':
11527         case 'b':
11528         case 'w':
11529         case 'k':
11530         case 'q':
11531         case 'h':
11532         case 't':
11533         case 'y':
11534         case 'x':
11535         case 'X':
11536         case 'P':
11537           break;
11538
11539         case 's':
11540           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11541             {
11542               PRINT_OPERAND (file, x, 0);
11543               fputs (", ", file);
11544             }
11545           return;
11546
11547         case 'D':
11548           /* Little bit of braindamage here.  The SSE compare instructions
11549              does use completely different names for the comparisons that the
11550              fp conditional moves.  */
11551           if (TARGET_AVX)
11552             {
11553               switch (GET_CODE (x))
11554                 {
11555                 case EQ:
11556                   fputs ("eq", file);
11557                   break;
11558                 case UNEQ:
11559                   fputs ("eq_us", file);
11560                   break;
11561                 case LT:
11562                   fputs ("lt", file);
11563                   break;
11564                 case UNLT:
11565                   fputs ("nge", file);
11566                   break;
11567                 case LE:
11568                   fputs ("le", file);
11569                   break;
11570                 case UNLE:
11571                   fputs ("ngt", file);
11572                   break;
11573                 case UNORDERED:
11574                   fputs ("unord", file);
11575                   break;
11576                 case NE:
11577                   fputs ("neq", file);
11578                   break;
11579                 case LTGT:
11580                   fputs ("neq_oq", file);
11581                   break;
11582                 case GE:
11583                   fputs ("ge", file);
11584                   break;
11585                 case UNGE:
11586                   fputs ("nlt", file);
11587                   break;
11588                 case GT:
11589                   fputs ("gt", file);
11590                   break;
11591                 case UNGT:
11592                   fputs ("nle", file);
11593                   break;
11594                 case ORDERED:
11595                   fputs ("ord", file);
11596                   break;
11597                 default:
11598                   output_operand_lossage ("operand is not a condition code, "
11599                                           "invalid operand code 'D'");
11600                   return;
11601                 }
11602             }
11603           else
11604             {
11605               switch (GET_CODE (x))
11606                 {
11607                 case EQ:
11608                 case UNEQ:
11609                   fputs ("eq", file);
11610                   break;
11611                 case LT:
11612                 case UNLT:
11613                   fputs ("lt", file);
11614                   break;
11615                 case LE:
11616                 case UNLE:
11617                   fputs ("le", file);
11618                   break;
11619                 case UNORDERED:
11620                   fputs ("unord", file);
11621                   break;
11622                 case NE:
11623                 case LTGT:
11624                   fputs ("neq", file);
11625                   break;
11626                 case UNGE:
11627                 case GE:
11628                   fputs ("nlt", file);
11629                   break;
11630                 case UNGT:
11631                 case GT:
11632                   fputs ("nle", file);
11633                   break;
11634                 case ORDERED:
11635                   fputs ("ord", file);
11636                   break;
11637                 default:
11638                   output_operand_lossage ("operand is not a condition code, "
11639                                           "invalid operand code 'D'");
11640                   return;
11641                 }
11642             }
11643           return;
11644         case 'O':
11645 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11646           if (ASSEMBLER_DIALECT == ASM_ATT)
11647             {
11648               switch (GET_MODE (x))
11649                 {
11650                 case HImode: putc ('w', file); break;
11651                 case SImode:
11652                 case SFmode: putc ('l', file); break;
11653                 case DImode:
11654                 case DFmode: putc ('q', file); break;
11655                 default: gcc_unreachable ();
11656                 }
11657               putc ('.', file);
11658             }
11659 #endif
11660           return;
11661         case 'C':
11662           if (!COMPARISON_P (x))
11663             {
11664               output_operand_lossage ("operand is neither a constant nor a "
11665                                       "condition code, invalid operand code "
11666                                       "'C'");
11667               return;
11668             }
11669           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11670           return;
11671         case 'F':
11672           if (!COMPARISON_P (x))
11673             {
11674               output_operand_lossage ("operand is neither a constant nor a "
11675                                       "condition code, invalid operand code "
11676                                       "'F'");
11677               return;
11678             }
11679 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11680           if (ASSEMBLER_DIALECT == ASM_ATT)
11681             putc ('.', file);
11682 #endif
11683           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11684           return;
11685
11686           /* Like above, but reverse condition */
11687         case 'c':
11688           /* Check to see if argument to %c is really a constant
11689              and not a condition code which needs to be reversed.  */
11690           if (!COMPARISON_P (x))
11691             {
11692               output_operand_lossage ("operand is neither a constant nor a "
11693                                       "condition code, invalid operand "
11694                                       "code 'c'");
11695               return;
11696             }
11697           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11698           return;
11699         case 'f':
11700           if (!COMPARISON_P (x))
11701             {
11702               output_operand_lossage ("operand is neither a constant nor a "
11703                                       "condition code, invalid operand "
11704                                       "code 'f'");
11705               return;
11706             }
11707 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11708           if (ASSEMBLER_DIALECT == ASM_ATT)
11709             putc ('.', file);
11710 #endif
11711           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11712           return;
11713
11714         case 'E':
11715           put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11716           return;
11717
11718         case 'e':
11719           put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11720           return;
11721
11722         case 'H':
11723           /* It doesn't actually matter what mode we use here, as we're
11724              only going to use this for printing.  */
11725           x = adjust_address_nv (x, DImode, 8);
11726           break;
11727
11728         case '+':
11729           {
11730             rtx x;
11731
11732             if (!optimize
11733                 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11734               return;
11735
11736             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11737             if (x)
11738               {
11739                 int pred_val = INTVAL (XEXP (x, 0));
11740
11741                 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11742                     || pred_val > REG_BR_PROB_BASE * 55 / 100)
11743                   {
11744                     int taken = pred_val > REG_BR_PROB_BASE / 2;
11745                     int cputaken = final_forward_branch_p (current_output_insn) == 0;
11746
11747                     /* Emit hints only in the case default branch prediction
11748                        heuristics would fail.  */
11749                     if (taken != cputaken)
11750                       {
11751                         /* We use 3e (DS) prefix for taken branches and
11752                            2e (CS) prefix for not taken branches.  */
11753                         if (taken)
11754                           fputs ("ds ; ", file);
11755                         else
11756                           fputs ("cs ; ", file);
11757                       }
11758                   }
11759               }
11760             return;
11761           }
11762
11763         case 'Y':
11764           switch (GET_CODE (x))
11765             {
11766             case NE:
11767               fputs ("neq", file);
11768               break;
11769             case EQ:
11770               fputs ("eq", file);
11771               break;
11772             case GE:
11773             case GEU:
11774               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11775               break;
11776             case GT:
11777             case GTU:
11778               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11779               break;
11780             case LE:
11781             case LEU:
11782               fputs ("le", file);
11783               break;
11784             case LT:
11785             case LTU:
11786               fputs ("lt", file);
11787               break;
11788             case UNORDERED:
11789               fputs ("unord", file);
11790               break;
11791             case ORDERED:
11792               fputs ("ord", file);
11793               break;
11794             case UNEQ:
11795               fputs ("ueq", file);
11796               break;
11797             case UNGE:
11798               fputs ("nlt", file);
11799               break;
11800             case UNGT:
11801               fputs ("nle", file);
11802               break;
11803             case UNLE:
11804               fputs ("ule", file);
11805               break;
11806             case UNLT:
11807               fputs ("ult", file);
11808               break;
11809             case LTGT:
11810               fputs ("une", file);
11811               break;
11812             default:
11813               output_operand_lossage ("operand is not a condition code, "
11814                                       "invalid operand code 'Y'");
11815               return;
11816             }
11817           return;
11818
11819         case ';':
11820 #if TARGET_MACHO
11821           fputs (" ; ", file);
11822 #else
11823           putc (' ', file);
11824 #endif
11825           return;
11826
11827         default:
11828             output_operand_lossage ("invalid operand code '%c'", code);
11829         }
11830     }
11831
11832   if (REG_P (x))
11833     print_reg (x, code, file);
11834
11835   else if (MEM_P (x))
11836     {
11837       /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
11838       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11839           && GET_MODE (x) != BLKmode)
11840         {
11841           const char * size;
11842           switch (GET_MODE_SIZE (GET_MODE (x)))
11843             {
11844             case 1: size = "BYTE"; break;
11845             case 2: size = "WORD"; break;
11846             case 4: size = "DWORD"; break;
11847             case 8: size = "QWORD"; break;
11848             case 12: size = "TBYTE"; break;
11849             case 16:
11850               if (GET_MODE (x) == XFmode)
11851                 size = "TBYTE";
11852               else
11853                 size = "XMMWORD";
11854               break;
11855             case 32: size = "YMMWORD"; break;
11856             default:
11857               gcc_unreachable ();
11858             }
11859
11860           /* Check for explicit size override (codes 'b', 'w' and 'k')  */
11861           if (code == 'b')
11862             size = "BYTE";
11863           else if (code == 'w')
11864             size = "WORD";
11865           else if (code == 'k')
11866             size = "DWORD";
11867
11868           fputs (size, file);
11869           fputs (" PTR ", file);
11870         }
11871
11872       x = XEXP (x, 0);
11873       /* Avoid (%rip) for call operands.  */
11874       if (CONSTANT_ADDRESS_P (x) && code == 'P'
11875           && !CONST_INT_P (x))
11876         output_addr_const (file, x);
11877       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11878         output_operand_lossage ("invalid constraints for operand");
11879       else
11880         output_address (x);
11881     }
11882
11883   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11884     {
11885       REAL_VALUE_TYPE r;
11886       long l;
11887
11888       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11889       REAL_VALUE_TO_TARGET_SINGLE (r, l);
11890
11891       if (ASSEMBLER_DIALECT == ASM_ATT)
11892         putc ('$', file);
11893       fprintf (file, "0x%08lx", (long unsigned int) l);
11894     }
11895
11896   /* These float cases don't actually occur as immediate operands.  */
11897   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11898     {
11899       char dstr[30];
11900
11901       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11902       fputs (dstr, file);
11903     }
11904
11905   else if (GET_CODE (x) == CONST_DOUBLE
11906            && GET_MODE (x) == XFmode)
11907     {
11908       char dstr[30];
11909
11910       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11911       fputs (dstr, file);
11912     }
11913
11914   else
11915     {
11916       /* We have patterns that allow zero sets of memory, for instance.
11917          In 64-bit mode, we should probably support all 8-byte vectors,
11918          since we can in fact encode that into an immediate.  */
11919       if (GET_CODE (x) == CONST_VECTOR)
11920         {
11921           gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11922           x = const0_rtx;
11923         }
11924
11925       if (code != 'P')
11926         {
11927           if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11928             {
11929               if (ASSEMBLER_DIALECT == ASM_ATT)
11930                 putc ('$', file);
11931             }
11932           else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11933                    || GET_CODE (x) == LABEL_REF)
11934             {
11935               if (ASSEMBLER_DIALECT == ASM_ATT)
11936                 putc ('$', file);
11937               else
11938                 fputs ("OFFSET FLAT:", file);
11939             }
11940         }
11941       if (CONST_INT_P (x))
11942         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11943       else if (flag_pic)
11944         output_pic_addr_const (file, x, code);
11945       else
11946         output_addr_const (file, x);
11947     }
11948 }
11949 \f
11950 /* Print a memory operand whose address is ADDR.  */
11951
11952 void
11953 print_operand_address (FILE *file, rtx addr)
11954 {
11955   struct ix86_address parts;
11956   rtx base, index, disp;
11957   int scale;
11958   int ok = ix86_decompose_address (addr, &parts);
11959
11960   gcc_assert (ok);
11961
11962   base = parts.base;
11963   index = parts.index;
11964   disp = parts.disp;
11965   scale = parts.scale;
11966
11967   switch (parts.seg)
11968     {
11969     case SEG_DEFAULT:
11970       break;
11971     case SEG_FS:
11972     case SEG_GS:
11973       if (ASSEMBLER_DIALECT == ASM_ATT)
11974         putc ('%', file);
11975       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11976       break;
11977     default:
11978       gcc_unreachable ();
11979     }
11980
11981   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
11982   if (TARGET_64BIT && !base && !index)
11983     {
11984       rtx symbol = disp;
11985
11986       if (GET_CODE (disp) == CONST
11987           && GET_CODE (XEXP (disp, 0)) == PLUS
11988           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11989         symbol = XEXP (XEXP (disp, 0), 0);
11990
11991       if (GET_CODE (symbol) == LABEL_REF
11992           || (GET_CODE (symbol) == SYMBOL_REF
11993               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11994         base = pc_rtx;
11995     }
11996   if (!base && !index)
11997     {
11998       /* Displacement only requires special attention.  */
11999
12000       if (CONST_INT_P (disp))
12001         {
12002           if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12003             fputs ("ds:", file);
12004           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12005         }
12006       else if (flag_pic)
12007         output_pic_addr_const (file, disp, 0);
12008       else
12009         output_addr_const (file, disp);
12010     }
12011   else
12012     {
12013       if (ASSEMBLER_DIALECT == ASM_ATT)
12014         {
12015           if (disp)
12016             {
12017               if (flag_pic)
12018                 output_pic_addr_const (file, disp, 0);
12019               else if (GET_CODE (disp) == LABEL_REF)
12020                 output_asm_label (disp);
12021               else
12022                 output_addr_const (file, disp);
12023             }
12024
12025           putc ('(', file);
12026           if (base)
12027             print_reg (base, 0, file);
12028           if (index)
12029             {
12030               putc (',', file);
12031               print_reg (index, 0, file);
12032               if (scale != 1)
12033                 fprintf (file, ",%d", scale);
12034             }
12035           putc (')', file);
12036         }
12037       else
12038         {
12039           rtx offset = NULL_RTX;
12040
12041           if (disp)
12042             {
12043               /* Pull out the offset of a symbol; print any symbol itself.  */
12044               if (GET_CODE (disp) == CONST
12045                   && GET_CODE (XEXP (disp, 0)) == PLUS
12046                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12047                 {
12048                   offset = XEXP (XEXP (disp, 0), 1);
12049                   disp = gen_rtx_CONST (VOIDmode,
12050                                         XEXP (XEXP (disp, 0), 0));
12051                 }
12052
12053               if (flag_pic)
12054                 output_pic_addr_const (file, disp, 0);
12055               else if (GET_CODE (disp) == LABEL_REF)
12056                 output_asm_label (disp);
12057               else if (CONST_INT_P (disp))
12058                 offset = disp;
12059               else
12060                 output_addr_const (file, disp);
12061             }
12062
12063           putc ('[', file);
12064           if (base)
12065             {
12066               print_reg (base, 0, file);
12067               if (offset)
12068                 {
12069                   if (INTVAL (offset) >= 0)
12070                     putc ('+', file);
12071                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12072                 }
12073             }
12074           else if (offset)
12075             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12076           else
12077             putc ('0', file);
12078
12079           if (index)
12080             {
12081               putc ('+', file);
12082               print_reg (index, 0, file);
12083               if (scale != 1)
12084                 fprintf (file, "*%d", scale);
12085             }
12086           putc (']', file);
12087         }
12088     }
12089 }
12090
12091 bool
12092 output_addr_const_extra (FILE *file, rtx x)
12093 {
12094   rtx op;
12095
12096   if (GET_CODE (x) != UNSPEC)
12097     return false;
12098
12099   op = XVECEXP (x, 0, 0);
12100   switch (XINT (x, 1))
12101     {
12102     case UNSPEC_GOTTPOFF:
12103       output_addr_const (file, op);
12104       /* FIXME: This might be @TPOFF in Sun ld.  */
12105       fputs ("@GOTTPOFF", file);
12106       break;
12107     case UNSPEC_TPOFF:
12108       output_addr_const (file, op);
12109       fputs ("@TPOFF", file);
12110       break;
12111     case UNSPEC_NTPOFF:
12112       output_addr_const (file, op);
12113       if (TARGET_64BIT)
12114         fputs ("@TPOFF", file);
12115       else
12116         fputs ("@NTPOFF", file);
12117       break;
12118     case UNSPEC_DTPOFF:
12119       output_addr_const (file, op);
12120       fputs ("@DTPOFF", file);
12121       break;
12122     case UNSPEC_GOTNTPOFF:
12123       output_addr_const (file, op);
12124       if (TARGET_64BIT)
12125         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12126                "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
12127       else
12128         fputs ("@GOTNTPOFF", file);
12129       break;
12130     case UNSPEC_INDNTPOFF:
12131       output_addr_const (file, op);
12132       fputs ("@INDNTPOFF", file);
12133       break;
12134 #if TARGET_MACHO
12135     case UNSPEC_MACHOPIC_OFFSET:
12136       output_addr_const (file, op);
12137       putc ('-', file);
12138       machopic_output_function_base_name (file);
12139       break;
12140 #endif
12141
12142     default:
12143       return false;
12144     }
12145
12146   return true;
12147 }
12148 \f
12149 /* Split one or more DImode RTL references into pairs of SImode
12150    references.  The RTL can be REG, offsettable MEM, integer constant, or
12151    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
12152    split and "num" is its length.  lo_half and hi_half are output arrays
12153    that parallel "operands".  */
12154
12155 void
12156 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12157 {
12158   while (num--)
12159     {
12160       rtx op = operands[num];
12161
12162       /* simplify_subreg refuse to split volatile memory addresses,
12163          but we still have to handle it.  */
12164       if (MEM_P (op))
12165         {
12166           lo_half[num] = adjust_address (op, SImode, 0);
12167           hi_half[num] = adjust_address (op, SImode, 4);
12168         }
12169       else
12170         {
12171           lo_half[num] = simplify_gen_subreg (SImode, op,
12172                                               GET_MODE (op) == VOIDmode
12173                                               ? DImode : GET_MODE (op), 0);
12174           hi_half[num] = simplify_gen_subreg (SImode, op,
12175                                               GET_MODE (op) == VOIDmode
12176                                               ? DImode : GET_MODE (op), 4);
12177         }
12178     }
12179 }
12180 /* Split one or more TImode RTL references into pairs of DImode
12181    references.  The RTL can be REG, offsettable MEM, integer constant, or
12182    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
12183    split and "num" is its length.  lo_half and hi_half are output arrays
12184    that parallel "operands".  */
12185
12186 void
12187 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12188 {
12189   while (num--)
12190     {
12191       rtx op = operands[num];
12192
12193       /* simplify_subreg refuse to split volatile memory addresses, but we
12194          still have to handle it.  */
12195       if (MEM_P (op))
12196         {
12197           lo_half[num] = adjust_address (op, DImode, 0);
12198           hi_half[num] = adjust_address (op, DImode, 8);
12199         }
12200       else
12201         {
12202           lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12203           hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12204         }
12205     }
12206 }
12207 \f
12208 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12209    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
12210    is the expression of the binary operation.  The output may either be
12211    emitted here, or returned to the caller, like all output_* functions.
12212
12213    There is no guarantee that the operands are the same mode, as they
12214    might be within FLOAT or FLOAT_EXTEND expressions.  */
12215
12216 #ifndef SYSV386_COMPAT
12217 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
12218    wants to fix the assemblers because that causes incompatibility
12219    with gcc.  No-one wants to fix gcc because that causes
12220    incompatibility with assemblers...  You can use the option of
12221    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
12222 #define SYSV386_COMPAT 1
12223 #endif
12224
12225 const char *
12226 output_387_binary_op (rtx insn, rtx *operands)
12227 {
12228   static char buf[40];
12229   const char *p;
12230   const char *ssep;
12231   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12232
12233 #ifdef ENABLE_CHECKING
12234   /* Even if we do not want to check the inputs, this documents input
12235      constraints.  Which helps in understanding the following code.  */
12236   if (STACK_REG_P (operands[0])
12237       && ((REG_P (operands[1])
12238            && REGNO (operands[0]) == REGNO (operands[1])
12239            && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12240           || (REG_P (operands[2])
12241               && REGNO (operands[0]) == REGNO (operands[2])
12242               && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12243       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12244     ; /* ok */
12245   else
12246     gcc_assert (is_sse);
12247 #endif
12248
12249   switch (GET_CODE (operands[3]))
12250     {
12251     case PLUS:
12252       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12253           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12254         p = "fiadd";
12255       else
12256         p = "fadd";
12257       ssep = "vadd";
12258       break;
12259
12260     case MINUS:
12261       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12262           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12263         p = "fisub";
12264       else
12265         p = "fsub";
12266       ssep = "vsub";
12267       break;
12268
12269     case MULT:
12270       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12271           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12272         p = "fimul";
12273       else
12274         p = "fmul";
12275       ssep = "vmul";
12276       break;
12277
12278     case DIV:
12279       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12280           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12281         p = "fidiv";
12282       else
12283         p = "fdiv";
12284       ssep = "vdiv";
12285       break;
12286
12287     default:
12288       gcc_unreachable ();
12289     }
12290
12291   if (is_sse)
12292    {
12293      if (TARGET_AVX)
12294        {
12295          strcpy (buf, ssep);
12296          if (GET_MODE (operands[0]) == SFmode)
12297            strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12298          else
12299            strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12300        }
12301      else
12302        {
12303          strcpy (buf, ssep + 1);
12304          if (GET_MODE (operands[0]) == SFmode)
12305            strcat (buf, "ss\t{%2, %0|%0, %2}");
12306          else
12307            strcat (buf, "sd\t{%2, %0|%0, %2}");
12308        }
12309       return buf;
12310    }
12311   strcpy (buf, p);
12312
12313   switch (GET_CODE (operands[3]))
12314     {
12315     case MULT:
12316     case PLUS:
12317       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12318         {
12319           rtx temp = operands[2];
12320           operands[2] = operands[1];
12321           operands[1] = temp;
12322         }
12323
12324       /* know operands[0] == operands[1].  */
12325
12326       if (MEM_P (operands[2]))
12327         {
12328           p = "%Z2\t%2";
12329           break;
12330         }
12331
12332       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12333         {
12334           if (STACK_TOP_P (operands[0]))
12335             /* How is it that we are storing to a dead operand[2]?
12336                Well, presumably operands[1] is dead too.  We can't
12337                store the result to st(0) as st(0) gets popped on this
12338                instruction.  Instead store to operands[2] (which I
12339                think has to be st(1)).  st(1) will be popped later.
12340                gcc <= 2.8.1 didn't have this check and generated
12341                assembly code that the Unixware assembler rejected.  */
12342             p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
12343           else
12344             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
12345           break;
12346         }
12347
12348       if (STACK_TOP_P (operands[0]))
12349         p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
12350       else
12351         p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
12352       break;
12353
12354     case MINUS:
12355     case DIV:
12356       if (MEM_P (operands[1]))
12357         {
12358           p = "r%Z1\t%1";
12359           break;
12360         }
12361
12362       if (MEM_P (operands[2]))
12363         {
12364           p = "%Z2\t%2";
12365           break;
12366         }
12367
12368       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12369         {
12370 #if SYSV386_COMPAT
12371           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12372              derived assemblers, confusingly reverse the direction of
12373              the operation for fsub{r} and fdiv{r} when the
12374              destination register is not st(0).  The Intel assembler
12375              doesn't have this brain damage.  Read !SYSV386_COMPAT to
12376              figure out what the hardware really does.  */
12377           if (STACK_TOP_P (operands[0]))
12378             p = "{p\t%0, %2|rp\t%2, %0}";
12379           else
12380             p = "{rp\t%2, %0|p\t%0, %2}";
12381 #else
12382           if (STACK_TOP_P (operands[0]))
12383             /* As above for fmul/fadd, we can't store to st(0).  */
12384             p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
12385           else
12386             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
12387 #endif
12388           break;
12389         }
12390
12391       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12392         {
12393 #if SYSV386_COMPAT
12394           if (STACK_TOP_P (operands[0]))
12395             p = "{rp\t%0, %1|p\t%1, %0}";
12396           else
12397             p = "{p\t%1, %0|rp\t%0, %1}";
12398 #else
12399           if (STACK_TOP_P (operands[0]))
12400             p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
12401           else
12402             p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
12403 #endif
12404           break;
12405         }
12406
12407       if (STACK_TOP_P (operands[0]))
12408         {
12409           if (STACK_TOP_P (operands[1]))
12410             p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
12411           else
12412             p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12413           break;
12414         }
12415       else if (STACK_TOP_P (operands[1]))
12416         {
12417 #if SYSV386_COMPAT
12418           p = "{\t%1, %0|r\t%0, %1}";
12419 #else
12420           p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
12421 #endif
12422         }
12423       else
12424         {
12425 #if SYSV386_COMPAT
12426           p = "{r\t%2, %0|\t%0, %2}";
12427 #else
12428           p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
12429 #endif
12430         }
12431       break;
12432
12433     default:
12434       gcc_unreachable ();
12435     }
12436
12437   strcat (buf, p);
12438   return buf;
12439 }
12440
12441 /* Return needed mode for entity in optimize_mode_switching pass.  */
12442
12443 int
12444 ix86_mode_needed (int entity, rtx insn)
12445 {
12446   enum attr_i387_cw mode;
12447
12448   /* The mode UNINITIALIZED is used to store control word after a
12449      function call or ASM pattern.  The mode ANY specify that function
12450      has no requirements on the control word and make no changes in the
12451      bits we are interested in.  */
12452
12453   if (CALL_P (insn)
12454       || (NONJUMP_INSN_P (insn)
12455           && (asm_noperands (PATTERN (insn)) >= 0
12456               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12457     return I387_CW_UNINITIALIZED;
12458
12459   if (recog_memoized (insn) < 0)
12460     return I387_CW_ANY;
12461
12462   mode = get_attr_i387_cw (insn);
12463
12464   switch (entity)
12465     {
12466     case I387_TRUNC:
12467       if (mode == I387_CW_TRUNC)
12468         return mode;
12469       break;
12470
12471     case I387_FLOOR:
12472       if (mode == I387_CW_FLOOR)
12473         return mode;
12474       break;
12475
12476     case I387_CEIL:
12477       if (mode == I387_CW_CEIL)
12478         return mode;
12479       break;
12480
12481     case I387_MASK_PM:
12482       if (mode == I387_CW_MASK_PM)
12483         return mode;
12484       break;
12485
12486     default:
12487       gcc_unreachable ();
12488     }
12489
12490   return I387_CW_ANY;
12491 }
12492
12493 /* Output code to initialize control word copies used by trunc?f?i and
12494    rounding patterns.  CURRENT_MODE is set to current control word,
12495    while NEW_MODE is set to new control word.  */
12496
12497 void
12498 emit_i387_cw_initialization (int mode)
12499 {
12500   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12501   rtx new_mode;
12502
12503   enum ix86_stack_slot slot;
12504
12505   rtx reg = gen_reg_rtx (HImode);
12506
12507   emit_insn (gen_x86_fnstcw_1 (stored_mode));
12508   emit_move_insn (reg, copy_rtx (stored_mode));
12509
12510   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12511       || optimize_function_for_size_p (cfun))
12512     {
12513       switch (mode)
12514         {
12515         case I387_CW_TRUNC:
12516           /* round toward zero (truncate) */
12517           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12518           slot = SLOT_CW_TRUNC;
12519           break;
12520
12521         case I387_CW_FLOOR:
12522           /* round down toward -oo */
12523           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12524           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12525           slot = SLOT_CW_FLOOR;
12526           break;
12527
12528         case I387_CW_CEIL:
12529           /* round up toward +oo */
12530           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12531           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12532           slot = SLOT_CW_CEIL;
12533           break;
12534
12535         case I387_CW_MASK_PM:
12536           /* mask precision exception for nearbyint() */
12537           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12538           slot = SLOT_CW_MASK_PM;
12539           break;
12540
12541         default:
12542           gcc_unreachable ();
12543         }
12544     }
12545   else
12546     {
12547       switch (mode)
12548         {
12549         case I387_CW_TRUNC:
12550           /* round toward zero (truncate) */
12551           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12552           slot = SLOT_CW_TRUNC;
12553           break;
12554
12555         case I387_CW_FLOOR:
12556           /* round down toward -oo */
12557           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12558           slot = SLOT_CW_FLOOR;
12559           break;
12560
12561         case I387_CW_CEIL:
12562           /* round up toward +oo */
12563           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12564           slot = SLOT_CW_CEIL;
12565           break;
12566
12567         case I387_CW_MASK_PM:
12568           /* mask precision exception for nearbyint() */
12569           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12570           slot = SLOT_CW_MASK_PM;
12571           break;
12572
12573         default:
12574           gcc_unreachable ();
12575         }
12576     }
12577
12578   gcc_assert (slot < MAX_386_STACK_LOCALS);
12579
12580   new_mode = assign_386_stack_local (HImode, slot);
12581   emit_move_insn (new_mode, reg);
12582 }
12583
12584 /* Output code for INSN to convert a float to a signed int.  OPERANDS
12585    are the insn operands.  The output may be [HSD]Imode and the input
12586    operand may be [SDX]Fmode.  */
12587
12588 const char *
12589 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12590 {
12591   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12592   int dimode_p = GET_MODE (operands[0]) == DImode;
12593   int round_mode = get_attr_i387_cw (insn);
12594
12595   /* Jump through a hoop or two for DImode, since the hardware has no
12596      non-popping instruction.  We used to do this a different way, but
12597      that was somewhat fragile and broke with post-reload splitters.  */
12598   if ((dimode_p || fisttp) && !stack_top_dies)
12599     output_asm_insn ("fld\t%y1", operands);
12600
12601   gcc_assert (STACK_TOP_P (operands[1]));
12602   gcc_assert (MEM_P (operands[0]));
12603   gcc_assert (GET_MODE (operands[1]) != TFmode);
12604
12605   if (fisttp)
12606       output_asm_insn ("fisttp%Z0\t%0", operands);
12607   else
12608     {
12609       if (round_mode != I387_CW_ANY)
12610         output_asm_insn ("fldcw\t%3", operands);
12611       if (stack_top_dies || dimode_p)
12612         output_asm_insn ("fistp%Z0\t%0", operands);
12613       else
12614         output_asm_insn ("fist%Z0\t%0", operands);
12615       if (round_mode != I387_CW_ANY)
12616         output_asm_insn ("fldcw\t%2", operands);
12617     }
12618
12619   return "";
12620 }
12621
12622 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
12623    have the values zero or one, indicates the ffreep insn's operand
12624    from the OPERANDS array.  */
12625
12626 static const char *
12627 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12628 {
12629   if (TARGET_USE_FFREEP)
12630 #ifdef HAVE_AS_IX86_FFREEP
12631     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12632 #else
12633     {
12634       static char retval[32];
12635       int regno = REGNO (operands[opno]);
12636
12637       gcc_assert (FP_REGNO_P (regno));
12638
12639       regno -= FIRST_STACK_REG;
12640
12641       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12642       return retval;
12643     }
12644 #endif
12645
12646   return opno ? "fstp\t%y1" : "fstp\t%y0";
12647 }
12648
12649
12650 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
12651    should be used.  UNORDERED_P is true when fucom should be used.  */
12652
12653 const char *
12654 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12655 {
12656   int stack_top_dies;
12657   rtx cmp_op0, cmp_op1;
12658   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12659
12660   if (eflags_p)
12661     {
12662       cmp_op0 = operands[0];
12663       cmp_op1 = operands[1];
12664     }
12665   else
12666     {
12667       cmp_op0 = operands[1];
12668       cmp_op1 = operands[2];
12669     }
12670
12671   if (is_sse)
12672     {
12673       static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12674       static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12675       static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12676       static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12677
12678       if (GET_MODE (operands[0]) == SFmode)
12679         if (unordered_p)
12680           return &ucomiss[TARGET_AVX ? 0 : 1];
12681         else
12682           return &comiss[TARGET_AVX ? 0 : 1];
12683       else
12684         if (unordered_p)
12685           return &ucomisd[TARGET_AVX ? 0 : 1];
12686         else
12687           return &comisd[TARGET_AVX ? 0 : 1];
12688     }
12689
12690   gcc_assert (STACK_TOP_P (cmp_op0));
12691
12692   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12693
12694   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12695     {
12696       if (stack_top_dies)
12697         {
12698           output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12699           return output_387_ffreep (operands, 1);
12700         }
12701       else
12702         return "ftst\n\tfnstsw\t%0";
12703     }
12704
12705   if (STACK_REG_P (cmp_op1)
12706       && stack_top_dies
12707       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12708       && REGNO (cmp_op1) != FIRST_STACK_REG)
12709     {
12710       /* If both the top of the 387 stack dies, and the other operand
12711          is also a stack register that dies, then this must be a
12712          `fcompp' float compare */
12713
12714       if (eflags_p)
12715         {
12716           /* There is no double popping fcomi variant.  Fortunately,
12717              eflags is immune from the fstp's cc clobbering.  */
12718           if (unordered_p)
12719             output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12720           else
12721             output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12722           return output_387_ffreep (operands, 0);
12723         }
12724       else
12725         {
12726           if (unordered_p)
12727             return "fucompp\n\tfnstsw\t%0";
12728           else
12729             return "fcompp\n\tfnstsw\t%0";
12730         }
12731     }
12732   else
12733     {
12734       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
12735
12736       static const char * const alt[16] =
12737       {
12738         "fcom%Z2\t%y2\n\tfnstsw\t%0",
12739         "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12740         "fucom%Z2\t%y2\n\tfnstsw\t%0",
12741         "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12742
12743         "ficom%Z2\t%y2\n\tfnstsw\t%0",
12744         "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12745         NULL,
12746         NULL,
12747
12748         "fcomi\t{%y1, %0|%0, %y1}",
12749         "fcomip\t{%y1, %0|%0, %y1}",
12750         "fucomi\t{%y1, %0|%0, %y1}",
12751         "fucomip\t{%y1, %0|%0, %y1}",
12752
12753         NULL,
12754         NULL,
12755         NULL,
12756         NULL
12757       };
12758
12759       int mask;
12760       const char *ret;
12761
12762       mask  = eflags_p << 3;
12763       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12764       mask |= unordered_p << 1;
12765       mask |= stack_top_dies;
12766
12767       gcc_assert (mask < 16);
12768       ret = alt[mask];
12769       gcc_assert (ret);
12770
12771       return ret;
12772     }
12773 }
12774
12775 void
12776 ix86_output_addr_vec_elt (FILE *file, int value)
12777 {
12778   const char *directive = ASM_LONG;
12779
12780 #ifdef ASM_QUAD
12781   if (TARGET_64BIT)
12782     directive = ASM_QUAD;
12783 #else
12784   gcc_assert (!TARGET_64BIT);
12785 #endif
12786
12787   fprintf (file, "%s" LPREFIX "%d\n", directive, value);
12788 }
12789
12790 void
12791 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12792 {
12793   const char *directive = ASM_LONG;
12794
12795 #ifdef ASM_QUAD
12796   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12797     directive = ASM_QUAD;
12798 #else
12799   gcc_assert (!TARGET_64BIT);
12800 #endif
12801   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
12802   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12803     fprintf (file, "%s" LPREFIX "%d-" LPREFIX "%d\n",
12804              directive, value, rel);
12805   else if (HAVE_AS_GOTOFF_IN_DATA)
12806     fprintf (file, ASM_LONG LPREFIX "%d@GOTOFF\n", value);
12807 #if TARGET_MACHO
12808   else if (TARGET_MACHO)
12809     {
12810       fprintf (file, ASM_LONG LPREFIX "%d-", value);
12811       machopic_output_function_base_name (file);
12812       putc ('\n', file);
12813     }
12814 #endif
12815   else
12816     asm_fprintf (file, ASM_LONG "%U%s+[.-" LPREFIX "%d]\n",
12817                  GOT_SYMBOL_NAME, value);
12818 }
12819 \f
12820 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12821    for the target.  */
12822
12823 void
12824 ix86_expand_clear (rtx dest)
12825 {
12826   rtx tmp;
12827
12828   /* We play register width games, which are only valid after reload.  */
12829   gcc_assert (reload_completed);
12830
12831   /* Avoid HImode and its attendant prefix byte.  */
12832   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12833     dest = gen_rtx_REG (SImode, REGNO (dest));
12834   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12835
12836   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
12837   if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
12838     {
12839       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12840       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12841     }
12842
12843   emit_insn (tmp);
12844 }
12845
12846 /* X is an unchanging MEM.  If it is a constant pool reference, return
12847    the constant pool rtx, else NULL.  */
12848
12849 rtx
12850 maybe_get_pool_constant (rtx x)
12851 {
12852   x = ix86_delegitimize_address (XEXP (x, 0));
12853
12854   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12855     return get_pool_constant (x);
12856
12857   return NULL_RTX;
12858 }
12859
12860 void
12861 ix86_expand_move (enum machine_mode mode, rtx operands[])
12862 {
12863   rtx op0, op1;
12864   enum tls_model model;
12865
12866   op0 = operands[0];
12867   op1 = operands[1];
12868
12869   if (GET_CODE (op1) == SYMBOL_REF)
12870     {
12871       model = SYMBOL_REF_TLS_MODEL (op1);
12872       if (model)
12873         {
12874           op1 = legitimize_tls_address (op1, model, true);
12875           op1 = force_operand (op1, op0);
12876           if (op1 == op0)
12877             return;
12878         }
12879       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12880                && SYMBOL_REF_DLLIMPORT_P (op1))
12881         op1 = legitimize_dllimport_symbol (op1, false);
12882     }
12883   else if (GET_CODE (op1) == CONST
12884            && GET_CODE (XEXP (op1, 0)) == PLUS
12885            && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12886     {
12887       rtx addend = XEXP (XEXP (op1, 0), 1);
12888       rtx symbol = XEXP (XEXP (op1, 0), 0);
12889       rtx tmp = NULL;
12890
12891       model = SYMBOL_REF_TLS_MODEL (symbol);
12892       if (model)
12893         tmp = legitimize_tls_address (symbol, model, true);
12894       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12895                && SYMBOL_REF_DLLIMPORT_P (symbol))
12896         tmp = legitimize_dllimport_symbol (symbol, true);
12897
12898       if (tmp)
12899         {
12900           tmp = force_operand (tmp, NULL);
12901           tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12902                                      op0, 1, OPTAB_DIRECT);
12903           if (tmp == op0)
12904             return;
12905         }
12906     }
12907
12908   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12909     {
12910       if (TARGET_MACHO && !TARGET_64BIT)
12911         {
12912 #if TARGET_MACHO
12913           if (MACHOPIC_PURE)
12914             {
12915               rtx temp = ((reload_in_progress
12916                            || ((op0 && REG_P (op0))
12917                                && mode == Pmode))
12918                           ? op0 : gen_reg_rtx (Pmode));
12919               op1 = machopic_indirect_data_reference (op1, temp);
12920               op1 = machopic_legitimize_pic_address (op1, mode,
12921                                                      temp == op1 ? 0 : temp);
12922             }
12923           else if (MACHOPIC_INDIRECT)
12924             op1 = machopic_indirect_data_reference (op1, 0);
12925           if (op0 == op1)
12926             return;
12927 #endif
12928         }
12929       else
12930         {
12931           if (MEM_P (op0))
12932             op1 = force_reg (Pmode, op1);
12933           else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12934             {
12935               rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
12936               op1 = legitimize_pic_address (op1, reg);
12937               if (op0 == op1)
12938                 return;
12939             }
12940         }
12941     }
12942   else
12943     {
12944       if (MEM_P (op0)
12945           && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12946               || !push_operand (op0, mode))
12947           && MEM_P (op1))
12948         op1 = force_reg (mode, op1);
12949
12950       if (push_operand (op0, mode)
12951           && ! general_no_elim_operand (op1, mode))
12952         op1 = copy_to_mode_reg (mode, op1);
12953
12954       /* Force large constants in 64bit compilation into register
12955          to get them CSEed.  */
12956       if (can_create_pseudo_p ()
12957           && (mode == DImode) && TARGET_64BIT
12958           && immediate_operand (op1, mode)
12959           && !x86_64_zext_immediate_operand (op1, VOIDmode)
12960           && !register_operand (op0, mode)
12961           && optimize)
12962         op1 = copy_to_mode_reg (mode, op1);
12963
12964       if (can_create_pseudo_p ()
12965           && FLOAT_MODE_P (mode)
12966           && GET_CODE (op1) == CONST_DOUBLE)
12967         {
12968           /* If we are loading a floating point constant to a register,
12969              force the value to memory now, since we'll get better code
12970              out the back end.  */
12971
12972           op1 = validize_mem (force_const_mem (mode, op1));
12973           if (!register_operand (op0, mode))
12974             {
12975               rtx temp = gen_reg_rtx (mode);
12976               emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12977               emit_move_insn (op0, temp);
12978               return;
12979             }
12980         }
12981     }
12982
12983   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12984 }
12985
12986 void
12987 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12988 {
12989   rtx op0 = operands[0], op1 = operands[1];
12990   unsigned int align = GET_MODE_ALIGNMENT (mode);
12991
12992   /* Force constants other than zero into memory.  We do not know how
12993      the instructions used to build constants modify the upper 64 bits
12994      of the register, once we have that information we may be able
12995      to handle some of them more efficiently.  */
12996   if (can_create_pseudo_p ()
12997       && register_operand (op0, mode)
12998       && (CONSTANT_P (op1)
12999           || (GET_CODE (op1) == SUBREG
13000               && CONSTANT_P (SUBREG_REG (op1))))
13001       && !standard_sse_constant_p (op1))
13002     op1 = validize_mem (force_const_mem (mode, op1));
13003
13004   /* We need to check memory alignment for SSE mode since attribute
13005      can make operands unaligned.  */
13006   if (can_create_pseudo_p ()
13007       && SSE_REG_MODE_P (mode)
13008       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13009           || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13010     {
13011       rtx tmp[2];
13012
13013       /* ix86_expand_vector_move_misalign() does not like constants ... */
13014       if (CONSTANT_P (op1)
13015           || (GET_CODE (op1) == SUBREG
13016               && CONSTANT_P (SUBREG_REG (op1))))
13017         op1 = validize_mem (force_const_mem (mode, op1));
13018
13019       /* ... nor both arguments in memory.  */
13020       if (!register_operand (op0, mode)
13021           && !register_operand (op1, mode))
13022         op1 = force_reg (mode, op1);
13023
13024       tmp[0] = op0; tmp[1] = op1;
13025       ix86_expand_vector_move_misalign (mode, tmp);
13026       return;
13027     }
13028
13029   /* Make operand1 a register if it isn't already.  */
13030   if (can_create_pseudo_p ()
13031       && !register_operand (op0, mode)
13032       && !register_operand (op1, mode))
13033     {
13034       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13035       return;
13036     }
13037
13038   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13039 }
13040
13041 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
13042    straight to ix86_expand_vector_move.  */
13043 /* Code generation for scalar reg-reg moves of single and double precision data:
13044      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13045        movaps reg, reg
13046      else
13047        movss reg, reg
13048      if (x86_sse_partial_reg_dependency == true)
13049        movapd reg, reg
13050      else
13051        movsd reg, reg
13052
13053    Code generation for scalar loads of double precision data:
13054      if (x86_sse_split_regs == true)
13055        movlpd mem, reg      (gas syntax)
13056      else
13057        movsd mem, reg
13058
13059    Code generation for unaligned packed loads of single precision data
13060    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13061      if (x86_sse_unaligned_move_optimal)
13062        movups mem, reg
13063
13064      if (x86_sse_partial_reg_dependency == true)
13065        {
13066          xorps  reg, reg
13067          movlps mem, reg
13068          movhps mem+8, reg
13069        }
13070      else
13071        {
13072          movlps mem, reg
13073          movhps mem+8, reg
13074        }
13075
13076    Code generation for unaligned packed loads of double precision data
13077    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13078      if (x86_sse_unaligned_move_optimal)
13079        movupd mem, reg
13080
13081      if (x86_sse_split_regs == true)
13082        {
13083          movlpd mem, reg
13084          movhpd mem+8, reg
13085        }
13086      else
13087        {
13088          movsd  mem, reg
13089          movhpd mem+8, reg
13090        }
13091  */
13092
13093 void
13094 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13095 {
13096   rtx op0, op1, m;
13097
13098   op0 = operands[0];
13099   op1 = operands[1];
13100
13101   if (TARGET_AVX)
13102     {
13103       switch (GET_MODE_CLASS (mode))
13104         {
13105         case MODE_VECTOR_INT:
13106         case MODE_INT:
13107           switch (GET_MODE_SIZE (mode))
13108             {
13109             case 16:
13110               op0 = gen_lowpart (V16QImode, op0);
13111               op1 = gen_lowpart (V16QImode, op1);
13112               emit_insn (gen_avx_movdqu (op0, op1));
13113               break;
13114             case 32:
13115               op0 = gen_lowpart (V32QImode, op0);
13116               op1 = gen_lowpart (V32QImode, op1);
13117               emit_insn (gen_avx_movdqu256 (op0, op1));
13118               break;
13119             default:
13120               gcc_unreachable ();
13121             }
13122           break;
13123         case MODE_VECTOR_FLOAT:
13124           op0 = gen_lowpart (mode, op0);
13125           op1 = gen_lowpart (mode, op1);
13126
13127           switch (mode)
13128             {
13129             case V4SFmode:
13130               emit_insn (gen_avx_movups (op0, op1));
13131               break;
13132             case V8SFmode:
13133               emit_insn (gen_avx_movups256 (op0, op1));
13134               break;
13135             case V2DFmode:
13136               emit_insn (gen_avx_movupd (op0, op1));
13137               break;
13138             case V4DFmode:
13139               emit_insn (gen_avx_movupd256 (op0, op1));
13140               break;
13141             default:
13142               gcc_unreachable ();
13143             }
13144           break;
13145
13146         default:
13147           gcc_unreachable ();
13148         }
13149
13150       return;
13151     }
13152
13153   if (MEM_P (op1))
13154     {
13155       /* If we're optimizing for size, movups is the smallest.  */
13156       if (optimize_insn_for_size_p ())
13157         {
13158           op0 = gen_lowpart (V4SFmode, op0);
13159           op1 = gen_lowpart (V4SFmode, op1);
13160           emit_insn (gen_sse_movups (op0, op1));
13161           return;
13162         }
13163
13164       /* ??? If we have typed data, then it would appear that using
13165          movdqu is the only way to get unaligned data loaded with
13166          integer type.  */
13167       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13168         {
13169           op0 = gen_lowpart (V16QImode, op0);
13170           op1 = gen_lowpart (V16QImode, op1);
13171           emit_insn (gen_sse2_movdqu (op0, op1));
13172           return;
13173         }
13174
13175       if (TARGET_SSE2 && mode == V2DFmode)
13176         {
13177           rtx zero;
13178
13179           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13180             {
13181               op0 = gen_lowpart (V2DFmode, op0);
13182               op1 = gen_lowpart (V2DFmode, op1);
13183               emit_insn (gen_sse2_movupd (op0, op1));
13184               return;
13185             }
13186
13187           /* When SSE registers are split into halves, we can avoid
13188              writing to the top half twice.  */
13189           if (TARGET_SSE_SPLIT_REGS)
13190             {
13191               emit_clobber (op0);
13192               zero = op0;
13193             }
13194           else
13195             {
13196               /* ??? Not sure about the best option for the Intel chips.
13197                  The following would seem to satisfy; the register is
13198                  entirely cleared, breaking the dependency chain.  We
13199                  then store to the upper half, with a dependency depth
13200                  of one.  A rumor has it that Intel recommends two movsd
13201                  followed by an unpacklpd, but this is unconfirmed.  And
13202                  given that the dependency depth of the unpacklpd would
13203                  still be one, I'm not sure why this would be better.  */
13204               zero = CONST0_RTX (V2DFmode);
13205             }
13206
13207           m = adjust_address (op1, DFmode, 0);
13208           emit_insn (gen_sse2_loadlpd (op0, zero, m));
13209           m = adjust_address (op1, DFmode, 8);
13210           emit_insn (gen_sse2_loadhpd (op0, op0, m));
13211         }
13212       else
13213         {
13214           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13215             {
13216               op0 = gen_lowpart (V4SFmode, op0);
13217               op1 = gen_lowpart (V4SFmode, op1);
13218               emit_insn (gen_sse_movups (op0, op1));
13219               return;
13220             }
13221
13222           if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13223             emit_move_insn (op0, CONST0_RTX (mode));
13224           else
13225             emit_clobber (op0);
13226
13227           if (mode != V4SFmode)
13228             op0 = gen_lowpart (V4SFmode, op0);
13229           m = adjust_address (op1, V2SFmode, 0);
13230           emit_insn (gen_sse_loadlps (op0, op0, m));
13231           m = adjust_address (op1, V2SFmode, 8);
13232           emit_insn (gen_sse_loadhps (op0, op0, m));
13233         }
13234     }
13235   else if (MEM_P (op0))
13236     {
13237       /* If we're optimizing for size, movups is the smallest.  */
13238       if (optimize_insn_for_size_p ())
13239         {
13240           op0 = gen_lowpart (V4SFmode, op0);
13241           op1 = gen_lowpart (V4SFmode, op1);
13242           emit_insn (gen_sse_movups (op0, op1));
13243           return;
13244         }
13245
13246       /* ??? Similar to above, only less clear because of quote
13247          typeless stores unquote.  */
13248       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13249           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13250         {
13251           op0 = gen_lowpart (V16QImode, op0);
13252           op1 = gen_lowpart (V16QImode, op1);
13253           emit_insn (gen_sse2_movdqu (op0, op1));
13254           return;
13255         }
13256
13257       if (TARGET_SSE2 && mode == V2DFmode)
13258         {
13259           m = adjust_address (op0, DFmode, 0);
13260           emit_insn (gen_sse2_storelpd (m, op1));
13261           m = adjust_address (op0, DFmode, 8);
13262           emit_insn (gen_sse2_storehpd (m, op1));
13263         }
13264       else
13265         {
13266           if (mode != V4SFmode)
13267             op1 = gen_lowpart (V4SFmode, op1);
13268           m = adjust_address (op0, V2SFmode, 0);
13269           emit_insn (gen_sse_storelps (m, op1));
13270           m = adjust_address (op0, V2SFmode, 8);
13271           emit_insn (gen_sse_storehps (m, op1));
13272         }
13273     }
13274   else
13275     gcc_unreachable ();
13276 }
13277
13278 /* Expand a push in MODE.  This is some mode for which we do not support
13279    proper push instructions, at least from the registers that we expect
13280    the value to live in.  */
13281
13282 void
13283 ix86_expand_push (enum machine_mode mode, rtx x)
13284 {
13285   rtx tmp;
13286
13287   tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13288                              GEN_INT (-GET_MODE_SIZE (mode)),
13289                              stack_pointer_rtx, 1, OPTAB_DIRECT);
13290   if (tmp != stack_pointer_rtx)
13291     emit_move_insn (stack_pointer_rtx, tmp);
13292
13293   tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13294
13295   /* When we push an operand onto stack, it has to be aligned at least
13296      at the function argument boundary.  However since we don't have
13297      the argument type, we can't determine the actual argument
13298      boundary.  */
13299   emit_move_insn (tmp, x);
13300 }
13301
13302 /* Helper function of ix86_fixup_binary_operands to canonicalize
13303    operand order.  Returns true if the operands should be swapped.  */
13304
13305 static bool
13306 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13307                              rtx operands[])
13308 {
13309   rtx dst = operands[0];
13310   rtx src1 = operands[1];
13311   rtx src2 = operands[2];
13312
13313   /* If the operation is not commutative, we can't do anything.  */
13314   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13315     return false;
13316
13317   /* Highest priority is that src1 should match dst.  */
13318   if (rtx_equal_p (dst, src1))
13319     return false;
13320   if (rtx_equal_p (dst, src2))
13321     return true;
13322
13323   /* Next highest priority is that immediate constants come second.  */
13324   if (immediate_operand (src2, mode))
13325     return false;
13326   if (immediate_operand (src1, mode))
13327     return true;
13328
13329   /* Lowest priority is that memory references should come second.  */
13330   if (MEM_P (src2))
13331     return false;
13332   if (MEM_P (src1))
13333     return true;
13334
13335   return false;
13336 }
13337
13338
13339 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
13340    destination to use for the operation.  If different from the true
13341    destination in operands[0], a copy operation will be required.  */
13342
13343 rtx
13344 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13345                             rtx operands[])
13346 {
13347   rtx dst = operands[0];
13348   rtx src1 = operands[1];
13349   rtx src2 = operands[2];
13350
13351   /* Canonicalize operand order.  */
13352   if (ix86_swap_binary_operands_p (code, mode, operands))
13353     {
13354       rtx temp;
13355
13356       /* It is invalid to swap operands of different modes.  */
13357       gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13358
13359       temp = src1;
13360       src1 = src2;
13361       src2 = temp;
13362     }
13363
13364   /* Both source operands cannot be in memory.  */
13365   if (MEM_P (src1) && MEM_P (src2))
13366     {
13367       /* Optimization: Only read from memory once.  */
13368       if (rtx_equal_p (src1, src2))
13369         {
13370           src2 = force_reg (mode, src2);
13371           src1 = src2;
13372         }
13373       else
13374         src2 = force_reg (mode, src2);
13375     }
13376
13377   /* If the destination is memory, and we do not have matching source
13378      operands, do things in registers.  */
13379   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13380     dst = gen_reg_rtx (mode);
13381
13382   /* Source 1 cannot be a constant.  */
13383   if (CONSTANT_P (src1))
13384     src1 = force_reg (mode, src1);
13385
13386   /* Source 1 cannot be a non-matching memory.  */
13387   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13388     src1 = force_reg (mode, src1);
13389
13390   operands[1] = src1;
13391   operands[2] = src2;
13392   return dst;
13393 }
13394
13395 /* Similarly, but assume that the destination has already been
13396    set up properly.  */
13397
13398 void
13399 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13400                                     enum machine_mode mode, rtx operands[])
13401 {
13402   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13403   gcc_assert (dst == operands[0]);
13404 }
13405
13406 /* Attempt to expand a binary operator.  Make the expansion closer to the
13407    actual machine, then just general_operand, which will allow 3 separate
13408    memory references (one output, two input) in a single insn.  */
13409
13410 void
13411 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13412                              rtx operands[])
13413 {
13414   rtx src1, src2, dst, op, clob;
13415
13416   dst = ix86_fixup_binary_operands (code, mode, operands);
13417   src1 = operands[1];
13418   src2 = operands[2];
13419
13420  /* Emit the instruction.  */
13421
13422   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13423   if (reload_in_progress)
13424     {
13425       /* Reload doesn't know about the flags register, and doesn't know that
13426          it doesn't want to clobber it.  We can only do this with PLUS.  */
13427       gcc_assert (code == PLUS);
13428       emit_insn (op);
13429     }
13430   else
13431     {
13432       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13433       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13434     }
13435
13436   /* Fix up the destination if needed.  */
13437   if (dst != operands[0])
13438     emit_move_insn (operands[0], dst);
13439 }
13440
13441 /* Return TRUE or FALSE depending on whether the binary operator meets the
13442    appropriate constraints.  */
13443
13444 int
13445 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13446                          rtx operands[3])
13447 {
13448   rtx dst = operands[0];
13449   rtx src1 = operands[1];
13450   rtx src2 = operands[2];
13451
13452   /* Both source operands cannot be in memory.  */
13453   if (MEM_P (src1) && MEM_P (src2))
13454     return 0;
13455
13456   /* Canonicalize operand order for commutative operators.  */
13457   if (ix86_swap_binary_operands_p (code, mode, operands))
13458     {
13459       rtx temp = src1;
13460       src1 = src2;
13461       src2 = temp;
13462     }
13463
13464   /* If the destination is memory, we must have a matching source operand.  */
13465   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13466       return 0;
13467
13468   /* Source 1 cannot be a constant.  */
13469   if (CONSTANT_P (src1))
13470     return 0;
13471
13472   /* Source 1 cannot be a non-matching memory.  */
13473   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13474     return 0;
13475
13476   return 1;
13477 }
13478
13479 /* Attempt to expand a unary operator.  Make the expansion closer to the
13480    actual machine, then just general_operand, which will allow 2 separate
13481    memory references (one output, one input) in a single insn.  */
13482
13483 void
13484 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13485                             rtx operands[])
13486 {
13487   int matching_memory;
13488   rtx src, dst, op, clob;
13489
13490   dst = operands[0];
13491   src = operands[1];
13492
13493   /* If the destination is memory, and we do not have matching source
13494      operands, do things in registers.  */
13495   matching_memory = 0;
13496   if (MEM_P (dst))
13497     {
13498       if (rtx_equal_p (dst, src))
13499         matching_memory = 1;
13500       else
13501         dst = gen_reg_rtx (mode);
13502     }
13503
13504   /* When source operand is memory, destination must match.  */
13505   if (MEM_P (src) && !matching_memory)
13506     src = force_reg (mode, src);
13507
13508   /* Emit the instruction.  */
13509
13510   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13511   if (reload_in_progress || code == NOT)
13512     {
13513       /* Reload doesn't know about the flags register, and doesn't know that
13514          it doesn't want to clobber it.  */
13515       gcc_assert (code == NOT);
13516       emit_insn (op);
13517     }
13518   else
13519     {
13520       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13521       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13522     }
13523
13524   /* Fix up the destination if needed.  */
13525   if (dst != operands[0])
13526     emit_move_insn (operands[0], dst);
13527 }
13528
13529 #define LEA_SEARCH_THRESHOLD 12
13530
13531 /* Search backward for non-agu definition of register number REGNO1
13532    or register number REGNO2 in INSN's basic block until
13533    1. Pass LEA_SEARCH_THRESHOLD instructions, or
13534    2. Reach BB boundary, or
13535    3. Reach agu definition.
13536    Returns the distance between the non-agu definition point and INSN.
13537    If no definition point, returns -1.  */
13538
13539 static int
13540 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13541                          rtx insn)
13542 {
13543   basic_block bb = BLOCK_FOR_INSN (insn);
13544   int distance = 0;
13545   df_ref *def_rec;
13546   enum attr_type insn_type;
13547
13548   if (insn != BB_HEAD (bb))
13549     {
13550       rtx prev = PREV_INSN (insn);
13551       while (prev && distance < LEA_SEARCH_THRESHOLD)
13552         {
13553           if (INSN_P (prev))
13554             {
13555               distance++;
13556               for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13557                 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13558                     && !DF_REF_IS_ARTIFICIAL (*def_rec)
13559                     && (regno1 == DF_REF_REGNO (*def_rec)
13560                         || regno2 == DF_REF_REGNO (*def_rec)))
13561                   {
13562                     insn_type = get_attr_type (prev);
13563                     if (insn_type != TYPE_LEA)
13564                       goto done;
13565                   }
13566             }
13567           if (prev == BB_HEAD (bb))
13568             break;
13569           prev = PREV_INSN (prev);
13570         }
13571     }
13572
13573   if (distance < LEA_SEARCH_THRESHOLD)
13574     {
13575       edge e;
13576       edge_iterator ei;
13577       bool simple_loop = false;
13578
13579       FOR_EACH_EDGE (e, ei, bb->preds)
13580         if (e->src == bb)
13581           {
13582             simple_loop = true;
13583             break;
13584           }
13585
13586       if (simple_loop)
13587         {
13588           rtx prev = BB_END (bb);
13589           while (prev
13590                  && prev != insn
13591                  && distance < LEA_SEARCH_THRESHOLD)
13592             {
13593               if (INSN_P (prev))
13594                 {
13595                   distance++;
13596                   for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13597                     if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13598                         && !DF_REF_IS_ARTIFICIAL (*def_rec)
13599                         && (regno1 == DF_REF_REGNO (*def_rec)
13600                             || regno2 == DF_REF_REGNO (*def_rec)))
13601                       {
13602                         insn_type = get_attr_type (prev);
13603                         if (insn_type != TYPE_LEA)
13604                           goto done;
13605                       }
13606                 }
13607               prev = PREV_INSN (prev);
13608             }
13609         }
13610     }
13611
13612   distance = -1;
13613
13614 done:
13615   /* get_attr_type may modify recog data.  We want to make sure
13616      that recog data is valid for instruction INSN, on which
13617      distance_non_agu_define is called.  INSN is unchanged here.  */
13618   extract_insn_cached (insn);
13619   return distance;
13620 }
13621
13622 /* Return the distance between INSN and the next insn that uses
13623    register number REGNO0 in memory address.  Return -1 if no such
13624    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
13625
13626 static int
13627 distance_agu_use (unsigned int regno0, rtx insn)
13628 {
13629   basic_block bb = BLOCK_FOR_INSN (insn);
13630   int distance = 0;
13631   df_ref *def_rec;
13632   df_ref *use_rec;
13633
13634   if (insn != BB_END (bb))
13635     {
13636       rtx next = NEXT_INSN (insn);
13637       while (next && distance < LEA_SEARCH_THRESHOLD)
13638         {
13639           if (INSN_P (next))
13640             {
13641               distance++;
13642
13643               for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13644                 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13645                      || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13646                     && regno0 == DF_REF_REGNO (*use_rec))
13647                   {
13648                     /* Return DISTANCE if OP0 is used in memory
13649                        address in NEXT.  */
13650                     return distance;
13651                   }
13652
13653               for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13654                 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13655                     && !DF_REF_IS_ARTIFICIAL (*def_rec)
13656                     && regno0 == DF_REF_REGNO (*def_rec))
13657                   {
13658                     /* Return -1 if OP0 is set in NEXT.  */
13659                     return -1;
13660                   }
13661             }
13662           if (next == BB_END (bb))
13663             break;
13664           next = NEXT_INSN (next);
13665         }
13666     }
13667
13668   if (distance < LEA_SEARCH_THRESHOLD)
13669     {
13670       edge e;
13671       edge_iterator ei;
13672       bool simple_loop = false;
13673
13674       FOR_EACH_EDGE (e, ei, bb->succs)
13675         if (e->dest == bb)
13676           {
13677             simple_loop = true;
13678             break;
13679           }
13680
13681       if (simple_loop)
13682         {
13683           rtx next = BB_HEAD (bb);
13684           while (next
13685                  && next != insn
13686                  && distance < LEA_SEARCH_THRESHOLD)
13687             {
13688               if (INSN_P (next))
13689                 {
13690                   distance++;
13691
13692                   for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13693                     if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13694                          || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13695                         && regno0 == DF_REF_REGNO (*use_rec))
13696                       {
13697                         /* Return DISTANCE if OP0 is used in memory
13698                            address in NEXT.  */
13699                         return distance;
13700                       }
13701
13702                   for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13703                     if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13704                         && !DF_REF_IS_ARTIFICIAL (*def_rec)
13705                         && regno0 == DF_REF_REGNO (*def_rec))
13706                       {
13707                         /* Return -1 if OP0 is set in NEXT.  */
13708                         return -1;
13709                       }
13710
13711                 }
13712               next = NEXT_INSN (next);
13713             }
13714         }
13715     }
13716
13717   return -1;
13718 }
13719
13720 /* Define this macro to tune LEA priority vs ADD, it take effect when
13721    there is a dilemma of choicing LEA or ADD
13722    Negative value: ADD is more preferred than LEA
13723    Zero: Netrual
13724    Positive value: LEA is more preferred than ADD*/
13725 #define IX86_LEA_PRIORITY 2
13726
13727 /* Return true if it is ok to optimize an ADD operation to LEA
13728    operation to avoid flag register consumation.  For the processors
13729    like ATOM, if the destination register of LEA holds an actual
13730    address which will be used soon, LEA is better and otherwise ADD
13731    is better.  */
13732
13733 bool
13734 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13735                      rtx insn, rtx operands[])
13736 {
13737   unsigned int regno0 = true_regnum (operands[0]);
13738   unsigned int regno1 = true_regnum (operands[1]);
13739   unsigned int regno2;
13740
13741   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13742     return regno0 != regno1;
13743
13744   regno2 = true_regnum (operands[2]);
13745
13746   /* If a = b + c, (a!=b && a!=c), must use lea form. */
13747   if (regno0 != regno1 && regno0 != regno2)
13748     return true;
13749   else
13750     {
13751       int dist_define, dist_use;
13752       dist_define = distance_non_agu_define (regno1, regno2, insn);
13753       if (dist_define <= 0)
13754         return true;
13755
13756       /* If this insn has both backward non-agu dependence and forward
13757          agu dependence, the one with short distance take effect. */
13758       dist_use = distance_agu_use (regno0, insn);
13759       if (dist_use <= 0
13760           || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13761         return false;
13762
13763       return true;
13764     }
13765 }
13766
13767 /* Return true if destination reg of SET_BODY is shift count of
13768    USE_BODY.  */
13769
13770 static bool
13771 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13772 {
13773   rtx set_dest;
13774   rtx shift_rtx;
13775   int i;
13776
13777   /* Retrieve destination of SET_BODY.  */
13778   switch (GET_CODE (set_body))
13779     {
13780     case SET:
13781       set_dest = SET_DEST (set_body);
13782       if (!set_dest || !REG_P (set_dest))
13783         return false;
13784       break;
13785     case PARALLEL:
13786       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13787         if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13788                                           use_body))
13789           return true;
13790     default:
13791       return false;
13792       break;
13793     }
13794
13795   /* Retrieve shift count of USE_BODY.  */
13796   switch (GET_CODE (use_body))
13797     {
13798     case SET:
13799       shift_rtx = XEXP (use_body, 1);
13800       break;
13801     case PARALLEL:
13802       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13803         if (ix86_dep_by_shift_count_body (set_body,
13804                                           XVECEXP (use_body, 0, i)))
13805           return true;
13806     default:
13807       return false;
13808       break;
13809     }
13810
13811   if (shift_rtx
13812       && (GET_CODE (shift_rtx) == ASHIFT
13813           || GET_CODE (shift_rtx) == LSHIFTRT
13814           || GET_CODE (shift_rtx) == ASHIFTRT
13815           || GET_CODE (shift_rtx) == ROTATE
13816           || GET_CODE (shift_rtx) == ROTATERT))
13817     {
13818       rtx shift_count = XEXP (shift_rtx, 1);
13819
13820       /* Return true if shift count is dest of SET_BODY.  */
13821       if (REG_P (shift_count)
13822           && true_regnum (set_dest) == true_regnum (shift_count))
13823         return true;
13824     }
13825
13826   return false;
13827 }
13828
13829 /* Return true if destination reg of SET_INSN is shift count of
13830    USE_INSN.  */
13831
13832 bool
13833 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13834 {
13835   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13836                                        PATTERN (use_insn));
13837 }
13838
13839 /* Return TRUE or FALSE depending on whether the unary operator meets the
13840    appropriate constraints.  */
13841
13842 int
13843 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13844                         enum machine_mode mode ATTRIBUTE_UNUSED,
13845                         rtx operands[2] ATTRIBUTE_UNUSED)
13846 {
13847   /* If one of operands is memory, source and destination must match.  */
13848   if ((MEM_P (operands[0])
13849        || MEM_P (operands[1]))
13850       && ! rtx_equal_p (operands[0], operands[1]))
13851     return FALSE;
13852   return TRUE;
13853 }
13854
13855 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
13856    are ok, keeping in mind the possible movddup alternative.  */
13857
13858 bool
13859 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
13860 {
13861   if (MEM_P (operands[0]))
13862     return rtx_equal_p (operands[0], operands[1 + high]);
13863   if (MEM_P (operands[1]) && MEM_P (operands[2]))
13864     return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
13865   return true;
13866 }
13867
13868 /* Post-reload splitter for converting an SF or DFmode value in an
13869    SSE register into an unsigned SImode.  */
13870
13871 void
13872 ix86_split_convert_uns_si_sse (rtx operands[])
13873 {
13874   enum machine_mode vecmode;
13875   rtx value, large, zero_or_two31, input, two31, x;
13876
13877   large = operands[1];
13878   zero_or_two31 = operands[2];
13879   input = operands[3];
13880   two31 = operands[4];
13881   vecmode = GET_MODE (large);
13882   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13883
13884   /* Load up the value into the low element.  We must ensure that the other
13885      elements are valid floats -- zero is the easiest such value.  */
13886   if (MEM_P (input))
13887     {
13888       if (vecmode == V4SFmode)
13889         emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13890       else
13891         emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13892     }
13893   else
13894     {
13895       input = gen_rtx_REG (vecmode, REGNO (input));
13896       emit_move_insn (value, CONST0_RTX (vecmode));
13897       if (vecmode == V4SFmode)
13898         emit_insn (gen_sse_movss (value, value, input));
13899       else
13900         emit_insn (gen_sse2_movsd (value, value, input));
13901     }
13902
13903   emit_move_insn (large, two31);
13904   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13905
13906   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13907   emit_insn (gen_rtx_SET (VOIDmode, large, x));
13908
13909   x = gen_rtx_AND (vecmode, zero_or_two31, large);
13910   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13911
13912   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13913   emit_insn (gen_rtx_SET (VOIDmode, value, x));
13914
13915   large = gen_rtx_REG (V4SImode, REGNO (large));
13916   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13917
13918   x = gen_rtx_REG (V4SImode, REGNO (value));
13919   if (vecmode == V4SFmode)
13920     emit_insn (gen_sse2_cvttps2dq (x, value));
13921   else
13922     emit_insn (gen_sse2_cvttpd2dq (x, value));
13923   value = x;
13924
13925   emit_insn (gen_xorv4si3 (value, value, large));
13926 }
13927
13928 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13929    Expects the 64-bit DImode to be supplied in a pair of integral
13930    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
13931    -mfpmath=sse, !optimize_size only.  */
13932
13933 void
13934 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13935 {
13936   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13937   rtx int_xmm, fp_xmm;
13938   rtx biases, exponents;
13939   rtx x;
13940
13941   int_xmm = gen_reg_rtx (V4SImode);
13942   if (TARGET_INTER_UNIT_MOVES)
13943     emit_insn (gen_movdi_to_sse (int_xmm, input));
13944   else if (TARGET_SSE_SPLIT_REGS)
13945     {
13946       emit_clobber (int_xmm);
13947       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13948     }
13949   else
13950     {
13951       x = gen_reg_rtx (V2DImode);
13952       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13953       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13954     }
13955
13956   x = gen_rtx_CONST_VECTOR (V4SImode,
13957                             gen_rtvec (4, GEN_INT (0x43300000UL),
13958                                        GEN_INT (0x45300000UL),
13959                                        const0_rtx, const0_rtx));
13960   exponents = validize_mem (force_const_mem (V4SImode, x));
13961
13962   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13963   emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
13964
13965   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13966      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13967      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13968      (0x1.0p84 + double(fp_value_hi_xmm)).
13969      Note these exponents differ by 32.  */
13970
13971   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13972
13973   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13974      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
13975   real_ldexp (&bias_lo_rvt, &dconst1, 52);
13976   real_ldexp (&bias_hi_rvt, &dconst1, 84);
13977   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13978   x = const_double_from_real_value (bias_hi_rvt, DFmode);
13979   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13980   biases = validize_mem (force_const_mem (V2DFmode, biases));
13981   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13982
13983   /* Add the upper and lower DFmode values together.  */
13984   if (TARGET_SSE3)
13985     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13986   else
13987     {
13988       x = copy_to_mode_reg (V2DFmode, fp_xmm);
13989       emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
13990       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13991     }
13992
13993   ix86_expand_vector_extract (false, target, fp_xmm, 0);
13994 }
13995
13996 /* Not used, but eases macroization of patterns.  */
13997 void
13998 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13999                                   rtx input ATTRIBUTE_UNUSED)
14000 {
14001   gcc_unreachable ();
14002 }
14003
14004 /* Convert an unsigned SImode value into a DFmode.  Only currently used
14005    for SSE, but applicable anywhere.  */
14006
14007 void
14008 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14009 {
14010   REAL_VALUE_TYPE TWO31r;
14011   rtx x, fp;
14012
14013   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14014                            NULL, 1, OPTAB_DIRECT);
14015
14016   fp = gen_reg_rtx (DFmode);
14017   emit_insn (gen_floatsidf2 (fp, x));
14018
14019   real_ldexp (&TWO31r, &dconst1, 31);
14020   x = const_double_from_real_value (TWO31r, DFmode);
14021
14022   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14023   if (x != target)
14024     emit_move_insn (target, x);
14025 }
14026
14027 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
14028    32-bit mode; otherwise we have a direct convert instruction.  */
14029
14030 void
14031 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14032 {
14033   REAL_VALUE_TYPE TWO32r;
14034   rtx fp_lo, fp_hi, x;
14035
14036   fp_lo = gen_reg_rtx (DFmode);
14037   fp_hi = gen_reg_rtx (DFmode);
14038
14039   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14040
14041   real_ldexp (&TWO32r, &dconst1, 32);
14042   x = const_double_from_real_value (TWO32r, DFmode);
14043   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14044
14045   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14046
14047   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14048                            0, OPTAB_DIRECT);
14049   if (x != target)
14050     emit_move_insn (target, x);
14051 }
14052
14053 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14054    For x86_32, -mfpmath=sse, !optimize_size only.  */
14055 void
14056 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14057 {
14058   REAL_VALUE_TYPE ONE16r;
14059   rtx fp_hi, fp_lo, int_hi, int_lo, x;
14060
14061   real_ldexp (&ONE16r, &dconst1, 16);
14062   x = const_double_from_real_value (ONE16r, SFmode);
14063   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14064                                       NULL, 0, OPTAB_DIRECT);
14065   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14066                                       NULL, 0, OPTAB_DIRECT);
14067   fp_hi = gen_reg_rtx (SFmode);
14068   fp_lo = gen_reg_rtx (SFmode);
14069   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14070   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14071   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14072                                0, OPTAB_DIRECT);
14073   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14074                                0, OPTAB_DIRECT);
14075   if (!rtx_equal_p (target, fp_hi))
14076     emit_move_insn (target, fp_hi);
14077 }
14078
14079 /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
14080    then replicate the value for all elements of the vector
14081    register.  */
14082
14083 rtx
14084 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14085 {
14086   rtvec v;
14087   switch (mode)
14088     {
14089     case SImode:
14090       gcc_assert (vect);
14091       v = gen_rtvec (4, value, value, value, value);
14092       return gen_rtx_CONST_VECTOR (V4SImode, v);
14093
14094     case DImode:
14095       gcc_assert (vect);
14096       v = gen_rtvec (2, value, value);
14097       return gen_rtx_CONST_VECTOR (V2DImode, v);
14098
14099     case SFmode:
14100       if (vect)
14101         v = gen_rtvec (4, value, value, value, value);
14102       else
14103         v = gen_rtvec (4, value, CONST0_RTX (SFmode),
14104                        CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14105       return gen_rtx_CONST_VECTOR (V4SFmode, v);
14106
14107     case DFmode:
14108       if (vect)
14109         v = gen_rtvec (2, value, value);
14110       else
14111         v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14112       return gen_rtx_CONST_VECTOR (V2DFmode, v);
14113
14114     default:
14115       gcc_unreachable ();
14116     }
14117 }
14118
14119 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14120    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
14121    for an SSE register.  If VECT is true, then replicate the mask for
14122    all elements of the vector register.  If INVERT is true, then create
14123    a mask excluding the sign bit.  */
14124
14125 rtx
14126 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
14127 {
14128   enum machine_mode vec_mode, imode;
14129   HOST_WIDE_INT hi, lo;
14130   int shift = 63;
14131   rtx v;
14132   rtx mask;
14133
14134   /* Find the sign bit, sign extended to 2*HWI.  */
14135   switch (mode)
14136     {
14137     case SImode:
14138     case SFmode:
14139       imode = SImode;
14140       vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
14141       lo = 0x80000000, hi = lo < 0;
14142       break;
14143
14144     case DImode:
14145     case DFmode:
14146       imode = DImode;
14147       vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
14148       if (HOST_BITS_PER_WIDE_INT >= 64)
14149         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
14150       else
14151         lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14152       break;
14153
14154     case TImode:
14155     case TFmode:
14156       vec_mode = VOIDmode;
14157       if (HOST_BITS_PER_WIDE_INT >= 64)
14158         {
14159           imode = TImode;
14160           lo = 0, hi = (HOST_WIDE_INT)1 << shift;
14161         }
14162       else
14163         {
14164           rtvec vec;
14165
14166           imode = DImode;
14167           lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14168
14169           if (invert)
14170             {
14171               lo = ~lo, hi = ~hi;
14172               v = constm1_rtx;
14173             }
14174           else
14175             v = const0_rtx;
14176
14177           mask = immed_double_const (lo, hi, imode);
14178
14179           vec = gen_rtvec (2, v, mask);
14180           v = gen_rtx_CONST_VECTOR (V2DImode, vec);
14181           v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14182
14183           return v;
14184         }
14185      break;
14186
14187     default:
14188       gcc_unreachable ();
14189     }
14190
14191   if (invert)
14192     lo = ~lo, hi = ~hi;
14193
14194   /* Force this value into the low part of a fp vector constant.  */
14195   mask = immed_double_const (lo, hi, imode);
14196   mask = gen_lowpart (mode, mask);
14197
14198   if (vec_mode == VOIDmode)
14199     return force_reg (mode, mask);
14200
14201   v = ix86_build_const_vector (mode, vect, mask);
14202   return force_reg (vec_mode, v);
14203 }
14204
14205 /* Generate code for floating point ABS or NEG.  */
14206
14207 void
14208 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14209                                 rtx operands[])
14210 {
14211   rtx mask, set, use, clob, dst, src;
14212   bool use_sse = false;
14213   bool vector_mode = VECTOR_MODE_P (mode);
14214   enum machine_mode elt_mode = mode;
14215
14216   if (vector_mode)
14217     {
14218       elt_mode = GET_MODE_INNER (mode);
14219       use_sse = true;
14220     }
14221   else if (mode == TFmode)
14222     use_sse = true;
14223   else if (TARGET_SSE_MATH)
14224     use_sse = SSE_FLOAT_MODE_P (mode);
14225
14226   /* NEG and ABS performed with SSE use bitwise mask operations.
14227      Create the appropriate mask now.  */
14228   if (use_sse)
14229     mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14230   else
14231     mask = NULL_RTX;
14232
14233   dst = operands[0];
14234   src = operands[1];
14235
14236   if (vector_mode)
14237     {
14238       set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14239       set = gen_rtx_SET (VOIDmode, dst, set);
14240       emit_insn (set);
14241     }
14242   else
14243     {
14244       set = gen_rtx_fmt_e (code, mode, src);
14245       set = gen_rtx_SET (VOIDmode, dst, set);
14246       if (mask)
14247         {
14248           use = gen_rtx_USE (VOIDmode, mask);
14249           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14250           emit_insn (gen_rtx_PARALLEL (VOIDmode,
14251                                        gen_rtvec (3, set, use, clob)));
14252         }
14253       else
14254         emit_insn (set);
14255     }
14256 }
14257
14258 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
14259
14260 void
14261 ix86_expand_copysign (rtx operands[])
14262 {
14263   enum machine_mode mode;
14264   rtx dest, op0, op1, mask, nmask;
14265
14266   dest = operands[0];
14267   op0 = operands[1];
14268   op1 = operands[2];
14269
14270   mode = GET_MODE (dest);
14271
14272   if (GET_CODE (op0) == CONST_DOUBLE)
14273     {
14274       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14275
14276       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14277         op0 = simplify_unary_operation (ABS, mode, op0, mode);
14278
14279       if (mode == SFmode || mode == DFmode)
14280         {
14281           enum machine_mode vmode;
14282
14283           vmode = mode == SFmode ? V4SFmode : V2DFmode;
14284
14285           if (op0 == CONST0_RTX (mode))
14286             op0 = CONST0_RTX (vmode);
14287           else
14288             {
14289               rtx v = ix86_build_const_vector (mode, false, op0);
14290
14291               op0 = force_reg (vmode, v);
14292             }
14293         }
14294       else if (op0 != CONST0_RTX (mode))
14295         op0 = force_reg (mode, op0);
14296
14297       mask = ix86_build_signbit_mask (mode, 0, 0);
14298
14299       if (mode == SFmode)
14300         copysign_insn = gen_copysignsf3_const;
14301       else if (mode == DFmode)
14302         copysign_insn = gen_copysigndf3_const;
14303       else
14304         copysign_insn = gen_copysigntf3_const;
14305
14306         emit_insn (copysign_insn (dest, op0, op1, mask));
14307     }
14308   else
14309     {
14310       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14311
14312       nmask = ix86_build_signbit_mask (mode, 0, 1);
14313       mask = ix86_build_signbit_mask (mode, 0, 0);
14314
14315       if (mode == SFmode)
14316         copysign_insn = gen_copysignsf3_var;
14317       else if (mode == DFmode)
14318         copysign_insn = gen_copysigndf3_var;
14319       else
14320         copysign_insn = gen_copysigntf3_var;
14321
14322       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14323     }
14324 }
14325
14326 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
14327    be a constant, and so has already been expanded into a vector constant.  */
14328
14329 void
14330 ix86_split_copysign_const (rtx operands[])
14331 {
14332   enum machine_mode mode, vmode;
14333   rtx dest, op0, mask, x;
14334
14335   dest = operands[0];
14336   op0 = operands[1];
14337   mask = operands[3];
14338
14339   mode = GET_MODE (dest);
14340   vmode = GET_MODE (mask);
14341
14342   dest = simplify_gen_subreg (vmode, dest, mode, 0);
14343   x = gen_rtx_AND (vmode, dest, mask);
14344   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14345
14346   if (op0 != CONST0_RTX (vmode))
14347     {
14348       x = gen_rtx_IOR (vmode, dest, op0);
14349       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14350     }
14351 }
14352
14353 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
14354    so we have to do two masks.  */
14355
14356 void
14357 ix86_split_copysign_var (rtx operands[])
14358 {
14359   enum machine_mode mode, vmode;
14360   rtx dest, scratch, op0, op1, mask, nmask, x;
14361
14362   dest = operands[0];
14363   scratch = operands[1];
14364   op0 = operands[2];
14365   op1 = operands[3];
14366   nmask = operands[4];
14367   mask = operands[5];
14368
14369   mode = GET_MODE (dest);
14370   vmode = GET_MODE (mask);
14371
14372   if (rtx_equal_p (op0, op1))
14373     {
14374       /* Shouldn't happen often (it's useless, obviously), but when it does
14375          we'd generate incorrect code if we continue below.  */
14376       emit_move_insn (dest, op0);
14377       return;
14378     }
14379
14380   if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
14381     {
14382       gcc_assert (REGNO (op1) == REGNO (scratch));
14383
14384       x = gen_rtx_AND (vmode, scratch, mask);
14385       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14386
14387       dest = mask;
14388       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14389       x = gen_rtx_NOT (vmode, dest);
14390       x = gen_rtx_AND (vmode, x, op0);
14391       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14392     }
14393   else
14394     {
14395       if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
14396         {
14397           x = gen_rtx_AND (vmode, scratch, mask);
14398         }
14399       else                                              /* alternative 2,4 */
14400         {
14401           gcc_assert (REGNO (mask) == REGNO (scratch));
14402           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14403           x = gen_rtx_AND (vmode, scratch, op1);
14404         }
14405       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14406
14407       if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
14408         {
14409           dest = simplify_gen_subreg (vmode, op0, mode, 0);
14410           x = gen_rtx_AND (vmode, dest, nmask);
14411         }
14412       else                                              /* alternative 3,4 */
14413         {
14414           gcc_assert (REGNO (nmask) == REGNO (dest));
14415           dest = nmask;
14416           op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14417           x = gen_rtx_AND (vmode, dest, op0);
14418         }
14419       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14420     }
14421
14422   x = gen_rtx_IOR (vmode, dest, scratch);
14423   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14424 }
14425
14426 /* Return TRUE or FALSE depending on whether the first SET in INSN
14427    has source and destination with matching CC modes, and that the
14428    CC mode is at least as constrained as REQ_MODE.  */
14429
14430 int
14431 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14432 {
14433   rtx set;
14434   enum machine_mode set_mode;
14435
14436   set = PATTERN (insn);
14437   if (GET_CODE (set) == PARALLEL)
14438     set = XVECEXP (set, 0, 0);
14439   gcc_assert (GET_CODE (set) == SET);
14440   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14441
14442   set_mode = GET_MODE (SET_DEST (set));
14443   switch (set_mode)
14444     {
14445     case CCNOmode:
14446       if (req_mode != CCNOmode
14447           && (req_mode != CCmode
14448               || XEXP (SET_SRC (set), 1) != const0_rtx))
14449         return 0;
14450       break;
14451     case CCmode:
14452       if (req_mode == CCGCmode)
14453         return 0;
14454       /* FALLTHRU */
14455     case CCGCmode:
14456       if (req_mode == CCGOCmode || req_mode == CCNOmode)
14457         return 0;
14458       /* FALLTHRU */
14459     case CCGOCmode:
14460       if (req_mode == CCZmode)
14461         return 0;
14462       /* FALLTHRU */
14463     case CCAmode:
14464     case CCCmode:
14465     case CCOmode:
14466     case CCSmode:
14467     case CCZmode:
14468       break;
14469
14470     default:
14471       gcc_unreachable ();
14472     }
14473
14474   return (GET_MODE (SET_SRC (set)) == set_mode);
14475 }
14476
14477 /* Generate insn patterns to do an integer compare of OPERANDS.  */
14478
14479 static rtx
14480 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14481 {
14482   enum machine_mode cmpmode;
14483   rtx tmp, flags;
14484
14485   cmpmode = SELECT_CC_MODE (code, op0, op1);
14486   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14487
14488   /* This is very simple, but making the interface the same as in the
14489      FP case makes the rest of the code easier.  */
14490   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14491   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14492
14493   /* Return the test that should be put into the flags user, i.e.
14494      the bcc, scc, or cmov instruction.  */
14495   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14496 }
14497
14498 /* Figure out whether to use ordered or unordered fp comparisons.
14499    Return the appropriate mode to use.  */
14500
14501 enum machine_mode
14502 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14503 {
14504   /* ??? In order to make all comparisons reversible, we do all comparisons
14505      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
14506      all forms trapping and nontrapping comparisons, we can make inequality
14507      comparisons trapping again, since it results in better code when using
14508      FCOM based compares.  */
14509   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14510 }
14511
14512 enum machine_mode
14513 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14514 {
14515   enum machine_mode mode = GET_MODE (op0);
14516
14517   if (SCALAR_FLOAT_MODE_P (mode))
14518     {
14519       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14520       return ix86_fp_compare_mode (code);
14521     }
14522
14523   switch (code)
14524     {
14525       /* Only zero flag is needed.  */
14526     case EQ:                    /* ZF=0 */
14527     case NE:                    /* ZF!=0 */
14528       return CCZmode;
14529       /* Codes needing carry flag.  */
14530     case GEU:                   /* CF=0 */
14531     case LTU:                   /* CF=1 */
14532       /* Detect overflow checks.  They need just the carry flag.  */
14533       if (GET_CODE (op0) == PLUS
14534           && rtx_equal_p (op1, XEXP (op0, 0)))
14535         return CCCmode;
14536       else
14537         return CCmode;
14538     case GTU:                   /* CF=0 & ZF=0 */
14539     case LEU:                   /* CF=1 | ZF=1 */
14540       /* Detect overflow checks.  They need just the carry flag.  */
14541       if (GET_CODE (op0) == MINUS
14542           && rtx_equal_p (op1, XEXP (op0, 0)))
14543         return CCCmode;
14544       else
14545         return CCmode;
14546       /* Codes possibly doable only with sign flag when
14547          comparing against zero.  */
14548     case GE:                    /* SF=OF   or   SF=0 */
14549     case LT:                    /* SF<>OF  or   SF=1 */
14550       if (op1 == const0_rtx)
14551         return CCGOCmode;
14552       else
14553         /* For other cases Carry flag is not required.  */
14554         return CCGCmode;
14555       /* Codes doable only with sign flag when comparing
14556          against zero, but we miss jump instruction for it
14557          so we need to use relational tests against overflow
14558          that thus needs to be zero.  */
14559     case GT:                    /* ZF=0 & SF=OF */
14560     case LE:                    /* ZF=1 | SF<>OF */
14561       if (op1 == const0_rtx)
14562         return CCNOmode;
14563       else
14564         return CCGCmode;
14565       /* strcmp pattern do (use flags) and combine may ask us for proper
14566          mode.  */
14567     case USE:
14568       return CCmode;
14569     default:
14570       gcc_unreachable ();
14571     }
14572 }
14573
14574 /* Return the fixed registers used for condition codes.  */
14575
14576 static bool
14577 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14578 {
14579   *p1 = FLAGS_REG;
14580   *p2 = FPSR_REG;
14581   return true;
14582 }
14583
14584 /* If two condition code modes are compatible, return a condition code
14585    mode which is compatible with both.  Otherwise, return
14586    VOIDmode.  */
14587
14588 static enum machine_mode
14589 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14590 {
14591   if (m1 == m2)
14592     return m1;
14593
14594   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14595     return VOIDmode;
14596
14597   if ((m1 == CCGCmode && m2 == CCGOCmode)
14598       || (m1 == CCGOCmode && m2 == CCGCmode))
14599     return CCGCmode;
14600
14601   switch (m1)
14602     {
14603     default:
14604       gcc_unreachable ();
14605
14606     case CCmode:
14607     case CCGCmode:
14608     case CCGOCmode:
14609     case CCNOmode:
14610     case CCAmode:
14611     case CCCmode:
14612     case CCOmode:
14613     case CCSmode:
14614     case CCZmode:
14615       switch (m2)
14616         {
14617         default:
14618           return VOIDmode;
14619
14620         case CCmode:
14621         case CCGCmode:
14622         case CCGOCmode:
14623         case CCNOmode:
14624         case CCAmode:
14625         case CCCmode:
14626         case CCOmode:
14627         case CCSmode:
14628         case CCZmode:
14629           return CCmode;
14630         }
14631
14632     case CCFPmode:
14633     case CCFPUmode:
14634       /* These are only compatible with themselves, which we already
14635          checked above.  */
14636       return VOIDmode;
14637     }
14638 }
14639
14640
14641 /* Return a comparison we can do and that it is equivalent to
14642    swap_condition (code) apart possibly from orderedness.
14643    But, never change orderedness if TARGET_IEEE_FP, returning
14644    UNKNOWN in that case if necessary.  */
14645
14646 static enum rtx_code
14647 ix86_fp_swap_condition (enum rtx_code code)
14648 {
14649   switch (code)
14650     {
14651     case GT:                   /* GTU - CF=0 & ZF=0 */
14652       return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14653     case GE:                   /* GEU - CF=0 */
14654       return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14655     case UNLT:                 /* LTU - CF=1 */
14656       return TARGET_IEEE_FP ? UNKNOWN : GT;
14657     case UNLE:                 /* LEU - CF=1 | ZF=1 */
14658       return TARGET_IEEE_FP ? UNKNOWN : GE;
14659     default:
14660       return swap_condition (code);
14661     }
14662 }
14663
14664 /* Return cost of comparison CODE using the best strategy for performance.
14665    All following functions do use number of instructions as a cost metrics.
14666    In future this should be tweaked to compute bytes for optimize_size and
14667    take into account performance of various instructions on various CPUs.  */
14668
14669 static int
14670 ix86_fp_comparison_cost (enum rtx_code code)
14671 {
14672   int arith_cost;
14673
14674   /* The cost of code using bit-twiddling on %ah.  */
14675   switch (code)
14676     {
14677     case UNLE:
14678     case UNLT:
14679     case LTGT:
14680     case GT:
14681     case GE:
14682     case UNORDERED:
14683     case ORDERED:
14684     case UNEQ:
14685       arith_cost = 4;
14686       break;
14687     case LT:
14688     case NE:
14689     case EQ:
14690     case UNGE:
14691       arith_cost = TARGET_IEEE_FP ? 5 : 4;
14692       break;
14693     case LE:
14694     case UNGT:
14695       arith_cost = TARGET_IEEE_FP ? 6 : 4;
14696       break;
14697     default:
14698       gcc_unreachable ();
14699     }
14700
14701   switch (ix86_fp_comparison_strategy (code))
14702     {
14703     case IX86_FPCMP_COMI:
14704       return arith_cost > 4 ? 3 : 2;
14705     case IX86_FPCMP_SAHF:
14706       return arith_cost > 4 ? 4 : 3;
14707     default:
14708       return arith_cost;
14709     }
14710 }
14711
14712 /* Return strategy to use for floating-point.  We assume that fcomi is always
14713    preferrable where available, since that is also true when looking at size
14714    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
14715
14716 enum ix86_fpcmp_strategy
14717 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14718 {
14719   /* Do fcomi/sahf based test when profitable.  */
14720
14721   if (TARGET_CMOVE)
14722     return IX86_FPCMP_COMI;
14723
14724   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
14725     return IX86_FPCMP_SAHF;
14726
14727   return IX86_FPCMP_ARITH;
14728 }
14729
14730 /* Swap, force into registers, or otherwise massage the two operands
14731    to a fp comparison.  The operands are updated in place; the new
14732    comparison code is returned.  */
14733
14734 static enum rtx_code
14735 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14736 {
14737   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14738   rtx op0 = *pop0, op1 = *pop1;
14739   enum machine_mode op_mode = GET_MODE (op0);
14740   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14741
14742   /* All of the unordered compare instructions only work on registers.
14743      The same is true of the fcomi compare instructions.  The XFmode
14744      compare instructions require registers except when comparing
14745      against zero or when converting operand 1 from fixed point to
14746      floating point.  */
14747
14748   if (!is_sse
14749       && (fpcmp_mode == CCFPUmode
14750           || (op_mode == XFmode
14751               && ! (standard_80387_constant_p (op0) == 1
14752                     || standard_80387_constant_p (op1) == 1)
14753               && GET_CODE (op1) != FLOAT)
14754           || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14755     {
14756       op0 = force_reg (op_mode, op0);
14757       op1 = force_reg (op_mode, op1);
14758     }
14759   else
14760     {
14761       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
14762          things around if they appear profitable, otherwise force op0
14763          into a register.  */
14764
14765       if (standard_80387_constant_p (op0) == 0
14766           || (MEM_P (op0)
14767               && ! (standard_80387_constant_p (op1) == 0
14768                     || MEM_P (op1))))
14769         {
14770           enum rtx_code new_code = ix86_fp_swap_condition (code);
14771           if (new_code != UNKNOWN)
14772             {
14773               rtx tmp;
14774               tmp = op0, op0 = op1, op1 = tmp;
14775               code = new_code;
14776             }
14777         }
14778
14779       if (!REG_P (op0))
14780         op0 = force_reg (op_mode, op0);
14781
14782       if (CONSTANT_P (op1))
14783         {
14784           int tmp = standard_80387_constant_p (op1);
14785           if (tmp == 0)
14786             op1 = validize_mem (force_const_mem (op_mode, op1));
14787           else if (tmp == 1)
14788             {
14789               if (TARGET_CMOVE)
14790                 op1 = force_reg (op_mode, op1);
14791             }
14792           else
14793             op1 = force_reg (op_mode, op1);
14794         }
14795     }
14796
14797   /* Try to rearrange the comparison to make it cheaper.  */
14798   if (ix86_fp_comparison_cost (code)
14799       > ix86_fp_comparison_cost (swap_condition (code))
14800       && (REG_P (op1) || can_create_pseudo_p ()))
14801     {
14802       rtx tmp;
14803       tmp = op0, op0 = op1, op1 = tmp;
14804       code = swap_condition (code);
14805       if (!REG_P (op0))
14806         op0 = force_reg (op_mode, op0);
14807     }
14808
14809   *pop0 = op0;
14810   *pop1 = op1;
14811   return code;
14812 }
14813
14814 /* Convert comparison codes we use to represent FP comparison to integer
14815    code that will result in proper branch.  Return UNKNOWN if no such code
14816    is available.  */
14817
14818 enum rtx_code
14819 ix86_fp_compare_code_to_integer (enum rtx_code code)
14820 {
14821   switch (code)
14822     {
14823     case GT:
14824       return GTU;
14825     case GE:
14826       return GEU;
14827     case ORDERED:
14828     case UNORDERED:
14829       return code;
14830       break;
14831     case UNEQ:
14832       return EQ;
14833       break;
14834     case UNLT:
14835       return LTU;
14836       break;
14837     case UNLE:
14838       return LEU;
14839       break;
14840     case LTGT:
14841       return NE;
14842       break;
14843     default:
14844       return UNKNOWN;
14845     }
14846 }
14847
14848 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
14849
14850 static rtx
14851 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14852 {
14853   enum machine_mode fpcmp_mode, intcmp_mode;
14854   rtx tmp, tmp2;
14855
14856   fpcmp_mode = ix86_fp_compare_mode (code);
14857   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14858
14859   /* Do fcomi/sahf based test when profitable.  */
14860   switch (ix86_fp_comparison_strategy (code))
14861     {
14862     case IX86_FPCMP_COMI:
14863       intcmp_mode = fpcmp_mode;
14864       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14865       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14866                          tmp);
14867       emit_insn (tmp);
14868       break;
14869
14870     case IX86_FPCMP_SAHF:
14871       intcmp_mode = fpcmp_mode;
14872       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14873       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14874                          tmp);
14875
14876       if (!scratch)
14877         scratch = gen_reg_rtx (HImode);
14878       tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14879       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14880       break;
14881
14882     case IX86_FPCMP_ARITH:
14883       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
14884       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14885       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14886       if (!scratch)
14887         scratch = gen_reg_rtx (HImode);
14888       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14889
14890       /* In the unordered case, we have to check C2 for NaN's, which
14891          doesn't happen to work out to anything nice combination-wise.
14892          So do some bit twiddling on the value we've got in AH to come
14893          up with an appropriate set of condition codes.  */
14894
14895       intcmp_mode = CCNOmode;
14896       switch (code)
14897         {
14898         case GT:
14899         case UNGT:
14900           if (code == GT || !TARGET_IEEE_FP)
14901             {
14902               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14903               code = EQ;
14904             }
14905           else
14906             {
14907               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14908               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14909               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14910               intcmp_mode = CCmode;
14911               code = GEU;
14912             }
14913           break;
14914         case LT:
14915         case UNLT:
14916           if (code == LT && TARGET_IEEE_FP)
14917             {
14918               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14919               emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
14920               intcmp_mode = CCmode;
14921               code = EQ;
14922             }
14923           else
14924             {
14925               emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
14926               code = NE;
14927             }
14928           break;
14929         case GE:
14930         case UNGE:
14931           if (code == GE || !TARGET_IEEE_FP)
14932             {
14933               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14934               code = EQ;
14935             }
14936           else
14937             {
14938               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14939               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
14940               code = NE;
14941             }
14942           break;
14943         case LE:
14944         case UNLE:
14945           if (code == LE && TARGET_IEEE_FP)
14946             {
14947               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14948               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14949               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14950               intcmp_mode = CCmode;
14951               code = LTU;
14952             }
14953           else
14954             {
14955               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14956               code = NE;
14957             }
14958           break;
14959         case EQ:
14960         case UNEQ:
14961           if (code == EQ && TARGET_IEEE_FP)
14962             {
14963               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14964               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14965               intcmp_mode = CCmode;
14966               code = EQ;
14967             }
14968           else
14969             {
14970               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14971               code = NE;
14972             }
14973           break;
14974         case NE:
14975         case LTGT:
14976           if (code == NE && TARGET_IEEE_FP)
14977             {
14978               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14979               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14980                                              GEN_INT (0x40)));
14981               code = NE;
14982             }
14983           else
14984             {
14985               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14986               code = EQ;
14987             }
14988           break;
14989
14990         case UNORDERED:
14991           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14992           code = NE;
14993           break;
14994         case ORDERED:
14995           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14996           code = EQ;
14997           break;
14998
14999         default:
15000           gcc_unreachable ();
15001         }
15002         break;
15003
15004     default:
15005       gcc_unreachable();
15006     }
15007
15008   /* Return the test that should be put into the flags user, i.e.
15009      the bcc, scc, or cmov instruction.  */
15010   return gen_rtx_fmt_ee (code, VOIDmode,
15011                          gen_rtx_REG (intcmp_mode, FLAGS_REG),
15012                          const0_rtx);
15013 }
15014
15015 rtx
15016 ix86_expand_compare (enum rtx_code code)
15017 {
15018   rtx op0, op1, ret;
15019   op0 = ix86_compare_op0;
15020   op1 = ix86_compare_op1;
15021
15022   if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15023     ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15024
15025   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15026     {
15027       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15028       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15029     }
15030   else
15031     ret = ix86_expand_int_compare (code, op0, op1);
15032
15033   return ret;
15034 }
15035
15036 void
15037 ix86_expand_branch (enum rtx_code code, rtx label)
15038 {
15039   rtx tmp;
15040
15041   switch (GET_MODE (ix86_compare_op0))
15042     {
15043     case SFmode:
15044     case DFmode:
15045     case XFmode:
15046     case QImode:
15047     case HImode:
15048     case SImode:
15049       simple:
15050       tmp = ix86_expand_compare (code);
15051       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15052                                   gen_rtx_LABEL_REF (VOIDmode, label),
15053                                   pc_rtx);
15054       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15055       return;
15056
15057     case DImode:
15058       if (TARGET_64BIT)
15059         goto simple;
15060     case TImode:
15061       /* Expand DImode branch into multiple compare+branch.  */
15062       {
15063         rtx lo[2], hi[2], label2;
15064         enum rtx_code code1, code2, code3;
15065         enum machine_mode submode;
15066
15067         if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15068           {
15069             tmp = ix86_compare_op0;
15070             ix86_compare_op0 = ix86_compare_op1;
15071             ix86_compare_op1 = tmp;
15072             code = swap_condition (code);
15073           }
15074         if (GET_MODE (ix86_compare_op0) == DImode)
15075           {
15076             split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15077             split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15078             submode = SImode;
15079           }
15080         else
15081           {
15082             split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15083             split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15084             submode = DImode;
15085           }
15086
15087         /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15088            avoid two branches.  This costs one extra insn, so disable when
15089            optimizing for size.  */
15090
15091         if ((code == EQ || code == NE)
15092             && (!optimize_insn_for_size_p ()
15093                 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15094           {
15095             rtx xor0, xor1;
15096
15097             xor1 = hi[0];
15098             if (hi[1] != const0_rtx)
15099               xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15100                                    NULL_RTX, 0, OPTAB_WIDEN);
15101
15102             xor0 = lo[0];
15103             if (lo[1] != const0_rtx)
15104               xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15105                                    NULL_RTX, 0, OPTAB_WIDEN);
15106
15107             tmp = expand_binop (submode, ior_optab, xor1, xor0,
15108                                 NULL_RTX, 0, OPTAB_WIDEN);
15109
15110             ix86_compare_op0 = tmp;
15111             ix86_compare_op1 = const0_rtx;
15112             ix86_expand_branch (code, label);
15113             return;
15114           }
15115
15116         /* Otherwise, if we are doing less-than or greater-or-equal-than,
15117            op1 is a constant and the low word is zero, then we can just
15118            examine the high word.  Similarly for low word -1 and
15119            less-or-equal-than or greater-than.  */
15120
15121         if (CONST_INT_P (hi[1]))
15122           switch (code)
15123             {
15124             case LT: case LTU: case GE: case GEU:
15125               if (lo[1] == const0_rtx)
15126                 {
15127                   ix86_compare_op0 = hi[0];
15128                   ix86_compare_op1 = hi[1];
15129                   ix86_expand_branch (code, label);
15130                   return;
15131                 }
15132               break;
15133             case LE: case LEU: case GT: case GTU:
15134               if (lo[1] == constm1_rtx)
15135                 {
15136                   ix86_compare_op0 = hi[0];
15137                   ix86_compare_op1 = hi[1];
15138                   ix86_expand_branch (code, label);
15139                   return;
15140                 }
15141               break;
15142             default:
15143               break;
15144             }
15145
15146         /* Otherwise, we need two or three jumps.  */
15147
15148         label2 = gen_label_rtx ();
15149
15150         code1 = code;
15151         code2 = swap_condition (code);
15152         code3 = unsigned_condition (code);
15153
15154         switch (code)
15155           {
15156           case LT: case GT: case LTU: case GTU:
15157             break;
15158
15159           case LE:   code1 = LT;  code2 = GT;  break;
15160           case GE:   code1 = GT;  code2 = LT;  break;
15161           case LEU:  code1 = LTU; code2 = GTU; break;
15162           case GEU:  code1 = GTU; code2 = LTU; break;
15163
15164           case EQ:   code1 = UNKNOWN; code2 = NE;  break;
15165           case NE:   code2 = UNKNOWN; break;
15166
15167           default:
15168             gcc_unreachable ();
15169           }
15170
15171         /*
15172          * a < b =>
15173          *    if (hi(a) < hi(b)) goto true;
15174          *    if (hi(a) > hi(b)) goto false;
15175          *    if (lo(a) < lo(b)) goto true;
15176          *  false:
15177          */
15178
15179         ix86_compare_op0 = hi[0];
15180         ix86_compare_op1 = hi[1];
15181
15182         if (code1 != UNKNOWN)
15183           ix86_expand_branch (code1, label);
15184         if (code2 != UNKNOWN)
15185           ix86_expand_branch (code2, label2);
15186
15187         ix86_compare_op0 = lo[0];
15188         ix86_compare_op1 = lo[1];
15189         ix86_expand_branch (code3, label);
15190
15191         if (code2 != UNKNOWN)
15192           emit_label (label2);
15193         return;
15194       }
15195
15196     default:
15197       /* If we have already emitted a compare insn, go straight to simple.
15198          ix86_expand_compare won't emit anything if ix86_compare_emitted
15199          is non NULL.  */
15200       gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15201       goto simple;
15202     }
15203 }
15204
15205 /* Split branch based on floating point condition.  */
15206 void
15207 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15208                       rtx target1, rtx target2, rtx tmp, rtx pushed)
15209 {
15210   rtx condition;
15211   rtx i;
15212
15213   if (target2 != pc_rtx)
15214     {
15215       rtx tmp = target2;
15216       code = reverse_condition_maybe_unordered (code);
15217       target2 = target1;
15218       target1 = tmp;
15219     }
15220
15221   condition = ix86_expand_fp_compare (code, op1, op2,
15222                                       tmp);
15223
15224   /* Remove pushed operand from stack.  */
15225   if (pushed)
15226     ix86_free_from_memory (GET_MODE (pushed));
15227
15228   i = emit_jump_insn (gen_rtx_SET
15229                       (VOIDmode, pc_rtx,
15230                        gen_rtx_IF_THEN_ELSE (VOIDmode,
15231                                              condition, target1, target2)));
15232   if (split_branch_probability >= 0)
15233     add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15234 }
15235
15236 void
15237 ix86_expand_setcc (enum rtx_code code, rtx dest)
15238 {
15239   rtx ret;
15240
15241   gcc_assert (GET_MODE (dest) == QImode);
15242
15243   ret = ix86_expand_compare (code);
15244   PUT_MODE (ret, QImode);
15245   emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15246 }
15247
15248 /* Expand comparison setting or clearing carry flag.  Return true when
15249    successful and set pop for the operation.  */
15250 static bool
15251 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15252 {
15253   enum machine_mode mode =
15254     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15255
15256   /* Do not handle DImode compares that go through special path.  */
15257   if (mode == (TARGET_64BIT ? TImode : DImode))
15258     return false;
15259
15260   if (SCALAR_FLOAT_MODE_P (mode))
15261     {
15262       rtx compare_op, compare_seq;
15263
15264       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15265
15266       /* Shortcut:  following common codes never translate
15267          into carry flag compares.  */
15268       if (code == EQ || code == NE || code == UNEQ || code == LTGT
15269           || code == ORDERED || code == UNORDERED)
15270         return false;
15271
15272       /* These comparisons require zero flag; swap operands so they won't.  */
15273       if ((code == GT || code == UNLE || code == LE || code == UNGT)
15274           && !TARGET_IEEE_FP)
15275         {
15276           rtx tmp = op0;
15277           op0 = op1;
15278           op1 = tmp;
15279           code = swap_condition (code);
15280         }
15281
15282       /* Try to expand the comparison and verify that we end up with
15283          carry flag based comparison.  This fails to be true only when
15284          we decide to expand comparison using arithmetic that is not
15285          too common scenario.  */
15286       start_sequence ();
15287       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15288       compare_seq = get_insns ();
15289       end_sequence ();
15290
15291       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15292           || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15293         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15294       else
15295         code = GET_CODE (compare_op);
15296
15297       if (code != LTU && code != GEU)
15298         return false;
15299
15300       emit_insn (compare_seq);
15301       *pop = compare_op;
15302       return true;
15303     }
15304
15305   if (!INTEGRAL_MODE_P (mode))
15306     return false;
15307
15308   switch (code)
15309     {
15310     case LTU:
15311     case GEU:
15312       break;
15313
15314     /* Convert a==0 into (unsigned)a<1.  */
15315     case EQ:
15316     case NE:
15317       if (op1 != const0_rtx)
15318         return false;
15319       op1 = const1_rtx;
15320       code = (code == EQ ? LTU : GEU);
15321       break;
15322
15323     /* Convert a>b into b<a or a>=b-1.  */
15324     case GTU:
15325     case LEU:
15326       if (CONST_INT_P (op1))
15327         {
15328           op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15329           /* Bail out on overflow.  We still can swap operands but that
15330              would force loading of the constant into register.  */
15331           if (op1 == const0_rtx
15332               || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15333             return false;
15334           code = (code == GTU ? GEU : LTU);
15335         }
15336       else
15337         {
15338           rtx tmp = op1;
15339           op1 = op0;
15340           op0 = tmp;
15341           code = (code == GTU ? LTU : GEU);
15342         }
15343       break;
15344
15345     /* Convert a>=0 into (unsigned)a<0x80000000.  */
15346     case LT:
15347     case GE:
15348       if (mode == DImode || op1 != const0_rtx)
15349         return false;
15350       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15351       code = (code == LT ? GEU : LTU);
15352       break;
15353     case LE:
15354     case GT:
15355       if (mode == DImode || op1 != constm1_rtx)
15356         return false;
15357       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15358       code = (code == LE ? GEU : LTU);
15359       break;
15360
15361     default:
15362       return false;
15363     }
15364   /* Swapping operands may cause constant to appear as first operand.  */
15365   if (!nonimmediate_operand (op0, VOIDmode))
15366     {
15367       if (!can_create_pseudo_p ())
15368         return false;
15369       op0 = force_reg (mode, op0);
15370     }
15371   ix86_compare_op0 = op0;
15372   ix86_compare_op1 = op1;
15373   *pop = ix86_expand_compare (code);
15374   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15375   return true;
15376 }
15377
15378 int
15379 ix86_expand_int_movcc (rtx operands[])
15380 {
15381   enum rtx_code code = GET_CODE (operands[1]), compare_code;
15382   rtx compare_seq, compare_op;
15383   enum machine_mode mode = GET_MODE (operands[0]);
15384   bool sign_bit_compare_p = false;;
15385
15386   start_sequence ();
15387   ix86_compare_op0 = XEXP (operands[1], 0);
15388   ix86_compare_op1 = XEXP (operands[1], 1);
15389   compare_op = ix86_expand_compare (code);
15390   compare_seq = get_insns ();
15391   end_sequence ();
15392
15393   compare_code = GET_CODE (compare_op);
15394
15395   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15396       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15397     sign_bit_compare_p = true;
15398
15399   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15400      HImode insns, we'd be swallowed in word prefix ops.  */
15401
15402   if ((mode != HImode || TARGET_FAST_PREFIX)
15403       && (mode != (TARGET_64BIT ? TImode : DImode))
15404       && CONST_INT_P (operands[2])
15405       && CONST_INT_P (operands[3]))
15406     {
15407       rtx out = operands[0];
15408       HOST_WIDE_INT ct = INTVAL (operands[2]);
15409       HOST_WIDE_INT cf = INTVAL (operands[3]);
15410       HOST_WIDE_INT diff;
15411
15412       diff = ct - cf;
15413       /*  Sign bit compares are better done using shifts than we do by using
15414           sbb.  */
15415       if (sign_bit_compare_p
15416           || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15417                                              ix86_compare_op1, &compare_op))
15418         {
15419           /* Detect overlap between destination and compare sources.  */
15420           rtx tmp = out;
15421
15422           if (!sign_bit_compare_p)
15423             {
15424               rtx flags;
15425               rtx (*insn)(rtx, rtx, rtx);
15426               bool fpcmp = false;
15427
15428               compare_code = GET_CODE (compare_op);
15429
15430               flags = XEXP (compare_op, 0);
15431
15432               if (GET_MODE (flags) == CCFPmode
15433                   || GET_MODE (flags) == CCFPUmode)
15434                 {
15435                   fpcmp = true;
15436                   compare_code
15437                     = ix86_fp_compare_code_to_integer (compare_code);
15438                 }
15439
15440               /* To simplify rest of code, restrict to the GEU case.  */
15441               if (compare_code == LTU)
15442                 {
15443                   HOST_WIDE_INT tmp = ct;
15444                   ct = cf;
15445                   cf = tmp;
15446                   compare_code = reverse_condition (compare_code);
15447                   code = reverse_condition (code);
15448                 }
15449               else
15450                 {
15451                   if (fpcmp)
15452                     PUT_CODE (compare_op,
15453                               reverse_condition_maybe_unordered
15454                                 (GET_CODE (compare_op)));
15455                   else
15456                     PUT_CODE (compare_op,
15457                               reverse_condition (GET_CODE (compare_op)));
15458                 }
15459               diff = ct - cf;
15460
15461               if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15462                   || reg_overlap_mentioned_p (out, ix86_compare_op1))
15463                 tmp = gen_reg_rtx (mode);
15464
15465               if (mode == DImode)
15466                 insn = gen_x86_movdicc_0_m1;
15467               else
15468                 insn = gen_x86_movsicc_0_m1;
15469
15470               emit_insn (insn (tmp, flags, compare_op));
15471             }
15472           else
15473             {
15474               if (code == GT || code == GE)
15475                 code = reverse_condition (code);
15476               else
15477                 {
15478                   HOST_WIDE_INT tmp = ct;
15479                   ct = cf;
15480                   cf = tmp;
15481                   diff = ct - cf;
15482                 }
15483               tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15484                                      ix86_compare_op1, VOIDmode, 0, -1);
15485             }
15486
15487           if (diff == 1)
15488             {
15489               /*
15490                * cmpl op0,op1
15491                * sbbl dest,dest
15492                * [addl dest, ct]
15493                *
15494                * Size 5 - 8.
15495                */
15496               if (ct)
15497                 tmp = expand_simple_binop (mode, PLUS,
15498                                            tmp, GEN_INT (ct),
15499                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15500             }
15501           else if (cf == -1)
15502             {
15503               /*
15504                * cmpl op0,op1
15505                * sbbl dest,dest
15506                * orl $ct, dest
15507                *
15508                * Size 8.
15509                */
15510               tmp = expand_simple_binop (mode, IOR,
15511                                          tmp, GEN_INT (ct),
15512                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
15513             }
15514           else if (diff == -1 && ct)
15515             {
15516               /*
15517                * cmpl op0,op1
15518                * sbbl dest,dest
15519                * notl dest
15520                * [addl dest, cf]
15521                *
15522                * Size 8 - 11.
15523                */
15524               tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15525               if (cf)
15526                 tmp = expand_simple_binop (mode, PLUS,
15527                                            copy_rtx (tmp), GEN_INT (cf),
15528                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15529             }
15530           else
15531             {
15532               /*
15533                * cmpl op0,op1
15534                * sbbl dest,dest
15535                * [notl dest]
15536                * andl cf - ct, dest
15537                * [addl dest, ct]
15538                *
15539                * Size 8 - 11.
15540                */
15541
15542               if (cf == 0)
15543                 {
15544                   cf = ct;
15545                   ct = 0;
15546                   tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15547                 }
15548
15549               tmp = expand_simple_binop (mode, AND,
15550                                          copy_rtx (tmp),
15551                                          gen_int_mode (cf - ct, mode),
15552                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
15553               if (ct)
15554                 tmp = expand_simple_binop (mode, PLUS,
15555                                            copy_rtx (tmp), GEN_INT (ct),
15556                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15557             }
15558
15559           if (!rtx_equal_p (tmp, out))
15560             emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15561
15562           return 1; /* DONE */
15563         }
15564
15565       if (diff < 0)
15566         {
15567           enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15568
15569           HOST_WIDE_INT tmp;
15570           tmp = ct, ct = cf, cf = tmp;
15571           diff = -diff;
15572
15573           if (SCALAR_FLOAT_MODE_P (cmp_mode))
15574             {
15575               gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15576
15577               /* We may be reversing unordered compare to normal compare, that
15578                  is not valid in general (we may convert non-trapping condition
15579                  to trapping one), however on i386 we currently emit all
15580                  comparisons unordered.  */
15581               compare_code = reverse_condition_maybe_unordered (compare_code);
15582               code = reverse_condition_maybe_unordered (code);
15583             }
15584           else
15585             {
15586               compare_code = reverse_condition (compare_code);
15587               code = reverse_condition (code);
15588             }
15589         }
15590
15591       compare_code = UNKNOWN;
15592       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15593           && CONST_INT_P (ix86_compare_op1))
15594         {
15595           if (ix86_compare_op1 == const0_rtx
15596               && (code == LT || code == GE))
15597             compare_code = code;
15598           else if (ix86_compare_op1 == constm1_rtx)
15599             {
15600               if (code == LE)
15601                 compare_code = LT;
15602               else if (code == GT)
15603                 compare_code = GE;
15604             }
15605         }
15606
15607       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
15608       if (compare_code != UNKNOWN
15609           && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15610           && (cf == -1 || ct == -1))
15611         {
15612           /* If lea code below could be used, only optimize
15613              if it results in a 2 insn sequence.  */
15614
15615           if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15616                  || diff == 3 || diff == 5 || diff == 9)
15617               || (compare_code == LT && ct == -1)
15618               || (compare_code == GE && cf == -1))
15619             {
15620               /*
15621                * notl op1       (if necessary)
15622                * sarl $31, op1
15623                * orl cf, op1
15624                */
15625               if (ct != -1)
15626                 {
15627                   cf = ct;
15628                   ct = -1;
15629                   code = reverse_condition (code);
15630                 }
15631
15632               out = emit_store_flag (out, code, ix86_compare_op0,
15633                                      ix86_compare_op1, VOIDmode, 0, -1);
15634
15635               out = expand_simple_binop (mode, IOR,
15636                                          out, GEN_INT (cf),
15637                                          out, 1, OPTAB_DIRECT);
15638               if (out != operands[0])
15639                 emit_move_insn (operands[0], out);
15640
15641               return 1; /* DONE */
15642             }
15643         }
15644
15645
15646       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15647            || diff == 3 || diff == 5 || diff == 9)
15648           && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15649           && (mode != DImode
15650               || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15651         {
15652           /*
15653            * xorl dest,dest
15654            * cmpl op1,op2
15655            * setcc dest
15656            * lea cf(dest*(ct-cf)),dest
15657            *
15658            * Size 14.
15659            *
15660            * This also catches the degenerate setcc-only case.
15661            */
15662
15663           rtx tmp;
15664           int nops;
15665
15666           out = emit_store_flag (out, code, ix86_compare_op0,
15667                                  ix86_compare_op1, VOIDmode, 0, 1);
15668
15669           nops = 0;
15670           /* On x86_64 the lea instruction operates on Pmode, so we need
15671              to get arithmetics done in proper mode to match.  */
15672           if (diff == 1)
15673             tmp = copy_rtx (out);
15674           else
15675             {
15676               rtx out1;
15677               out1 = copy_rtx (out);
15678               tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15679               nops++;
15680               if (diff & 1)
15681                 {
15682                   tmp = gen_rtx_PLUS (mode, tmp, out1);
15683                   nops++;
15684                 }
15685             }
15686           if (cf != 0)
15687             {
15688               tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15689               nops++;
15690             }
15691           if (!rtx_equal_p (tmp, out))
15692             {
15693               if (nops == 1)
15694                 out = force_operand (tmp, copy_rtx (out));
15695               else
15696                 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15697             }
15698           if (!rtx_equal_p (out, operands[0]))
15699             emit_move_insn (operands[0], copy_rtx (out));
15700
15701           return 1; /* DONE */
15702         }
15703
15704       /*
15705        * General case:                  Jumpful:
15706        *   xorl dest,dest               cmpl op1, op2
15707        *   cmpl op1, op2                movl ct, dest
15708        *   setcc dest                   jcc 1f
15709        *   decl dest                    movl cf, dest
15710        *   andl (cf-ct),dest            1:
15711        *   addl ct,dest
15712        *
15713        * Size 20.                       Size 14.
15714        *
15715        * This is reasonably steep, but branch mispredict costs are
15716        * high on modern cpus, so consider failing only if optimizing
15717        * for space.
15718        */
15719
15720       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15721           && BRANCH_COST (optimize_insn_for_speed_p (),
15722                           false) >= 2)
15723         {
15724           if (cf == 0)
15725             {
15726               enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15727
15728               cf = ct;
15729               ct = 0;
15730
15731               if (SCALAR_FLOAT_MODE_P (cmp_mode))
15732                 {
15733                   gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15734
15735                   /* We may be reversing unordered compare to normal compare,
15736                      that is not valid in general (we may convert non-trapping
15737                      condition to trapping one), however on i386 we currently
15738                      emit all comparisons unordered.  */
15739                   code = reverse_condition_maybe_unordered (code);
15740                 }
15741               else
15742                 {
15743                   code = reverse_condition (code);
15744                   if (compare_code != UNKNOWN)
15745                     compare_code = reverse_condition (compare_code);
15746                 }
15747             }
15748
15749           if (compare_code != UNKNOWN)
15750             {
15751               /* notl op1       (if needed)
15752                  sarl $31, op1
15753                  andl (cf-ct), op1
15754                  addl ct, op1
15755
15756                  For x < 0 (resp. x <= -1) there will be no notl,
15757                  so if possible swap the constants to get rid of the
15758                  complement.
15759                  True/false will be -1/0 while code below (store flag
15760                  followed by decrement) is 0/-1, so the constants need
15761                  to be exchanged once more.  */
15762
15763               if (compare_code == GE || !cf)
15764                 {
15765                   code = reverse_condition (code);
15766                   compare_code = LT;
15767                 }
15768               else
15769                 {
15770                   HOST_WIDE_INT tmp = cf;
15771                   cf = ct;
15772                   ct = tmp;
15773                 }
15774
15775               out = emit_store_flag (out, code, ix86_compare_op0,
15776                                      ix86_compare_op1, VOIDmode, 0, -1);
15777             }
15778           else
15779             {
15780               out = emit_store_flag (out, code, ix86_compare_op0,
15781                                      ix86_compare_op1, VOIDmode, 0, 1);
15782
15783               out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15784                                          copy_rtx (out), 1, OPTAB_DIRECT);
15785             }
15786
15787           out = expand_simple_binop (mode, AND, copy_rtx (out),
15788                                      gen_int_mode (cf - ct, mode),
15789                                      copy_rtx (out), 1, OPTAB_DIRECT);
15790           if (ct)
15791             out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15792                                        copy_rtx (out), 1, OPTAB_DIRECT);
15793           if (!rtx_equal_p (out, operands[0]))
15794             emit_move_insn (operands[0], copy_rtx (out));
15795
15796           return 1; /* DONE */
15797         }
15798     }
15799
15800   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15801     {
15802       /* Try a few things more with specific constants and a variable.  */
15803
15804       optab op;
15805       rtx var, orig_out, out, tmp;
15806
15807       if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15808         return 0; /* FAIL */
15809
15810       /* If one of the two operands is an interesting constant, load a
15811          constant with the above and mask it in with a logical operation.  */
15812
15813       if (CONST_INT_P (operands[2]))
15814         {
15815           var = operands[3];
15816           if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15817             operands[3] = constm1_rtx, op = and_optab;
15818           else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15819             operands[3] = const0_rtx, op = ior_optab;
15820           else
15821             return 0; /* FAIL */
15822         }
15823       else if (CONST_INT_P (operands[3]))
15824         {
15825           var = operands[2];
15826           if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15827             operands[2] = constm1_rtx, op = and_optab;
15828           else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15829             operands[2] = const0_rtx, op = ior_optab;
15830           else
15831             return 0; /* FAIL */
15832         }
15833       else
15834         return 0; /* FAIL */
15835
15836       orig_out = operands[0];
15837       tmp = gen_reg_rtx (mode);
15838       operands[0] = tmp;
15839
15840       /* Recurse to get the constant loaded.  */
15841       if (ix86_expand_int_movcc (operands) == 0)
15842         return 0; /* FAIL */
15843
15844       /* Mask in the interesting variable.  */
15845       out = expand_binop (mode, op, var, tmp, orig_out, 0,
15846                           OPTAB_WIDEN);
15847       if (!rtx_equal_p (out, orig_out))
15848         emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15849
15850       return 1; /* DONE */
15851     }
15852
15853   /*
15854    * For comparison with above,
15855    *
15856    * movl cf,dest
15857    * movl ct,tmp
15858    * cmpl op1,op2
15859    * cmovcc tmp,dest
15860    *
15861    * Size 15.
15862    */
15863
15864   if (! nonimmediate_operand (operands[2], mode))
15865     operands[2] = force_reg (mode, operands[2]);
15866   if (! nonimmediate_operand (operands[3], mode))
15867     operands[3] = force_reg (mode, operands[3]);
15868
15869   if (! register_operand (operands[2], VOIDmode)
15870       && (mode == QImode
15871           || ! register_operand (operands[3], VOIDmode)))
15872     operands[2] = force_reg (mode, operands[2]);
15873
15874   if (mode == QImode
15875       && ! register_operand (operands[3], VOIDmode))
15876     operands[3] = force_reg (mode, operands[3]);
15877
15878   emit_insn (compare_seq);
15879   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15880                           gen_rtx_IF_THEN_ELSE (mode,
15881                                                 compare_op, operands[2],
15882                                                 operands[3])));
15883
15884   return 1; /* DONE */
15885 }
15886
15887 /* Swap, force into registers, or otherwise massage the two operands
15888    to an sse comparison with a mask result.  Thus we differ a bit from
15889    ix86_prepare_fp_compare_args which expects to produce a flags result.
15890
15891    The DEST operand exists to help determine whether to commute commutative
15892    operators.  The POP0/POP1 operands are updated in place.  The new
15893    comparison code is returned, or UNKNOWN if not implementable.  */
15894
15895 static enum rtx_code
15896 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15897                                   rtx *pop0, rtx *pop1)
15898 {
15899   rtx tmp;
15900
15901   switch (code)
15902     {
15903     case LTGT:
15904     case UNEQ:
15905       /* We have no LTGT as an operator.  We could implement it with
15906          NE & ORDERED, but this requires an extra temporary.  It's
15907          not clear that it's worth it.  */
15908       return UNKNOWN;
15909
15910     case LT:
15911     case LE:
15912     case UNGT:
15913     case UNGE:
15914       /* These are supported directly.  */
15915       break;
15916
15917     case EQ:
15918     case NE:
15919     case UNORDERED:
15920     case ORDERED:
15921       /* For commutative operators, try to canonicalize the destination
15922          operand to be first in the comparison - this helps reload to
15923          avoid extra moves.  */
15924       if (!dest || !rtx_equal_p (dest, *pop1))
15925         break;
15926       /* FALLTHRU */
15927
15928     case GE:
15929     case GT:
15930     case UNLE:
15931     case UNLT:
15932       /* These are not supported directly.  Swap the comparison operands
15933          to transform into something that is supported.  */
15934       tmp = *pop0;
15935       *pop0 = *pop1;
15936       *pop1 = tmp;
15937       code = swap_condition (code);
15938       break;
15939
15940     default:
15941       gcc_unreachable ();
15942     }
15943
15944   return code;
15945 }
15946
15947 /* Detect conditional moves that exactly match min/max operational
15948    semantics.  Note that this is IEEE safe, as long as we don't
15949    interchange the operands.
15950
15951    Returns FALSE if this conditional move doesn't match a MIN/MAX,
15952    and TRUE if the operation is successful and instructions are emitted.  */
15953
15954 static bool
15955 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15956                            rtx cmp_op1, rtx if_true, rtx if_false)
15957 {
15958   enum machine_mode mode;
15959   bool is_min;
15960   rtx tmp;
15961
15962   if (code == LT)
15963     ;
15964   else if (code == UNGE)
15965     {
15966       tmp = if_true;
15967       if_true = if_false;
15968       if_false = tmp;
15969     }
15970   else
15971     return false;
15972
15973   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15974     is_min = true;
15975   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15976     is_min = false;
15977   else
15978     return false;
15979
15980   mode = GET_MODE (dest);
15981
15982   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15983      but MODE may be a vector mode and thus not appropriate.  */
15984   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15985     {
15986       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15987       rtvec v;
15988
15989       if_true = force_reg (mode, if_true);
15990       v = gen_rtvec (2, if_true, if_false);
15991       tmp = gen_rtx_UNSPEC (mode, v, u);
15992     }
15993   else
15994     {
15995       code = is_min ? SMIN : SMAX;
15996       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15997     }
15998
15999   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16000   return true;
16001 }
16002
16003 /* Expand an sse vector comparison.  Return the register with the result.  */
16004
16005 static rtx
16006 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16007                      rtx op_true, rtx op_false)
16008 {
16009   enum machine_mode mode = GET_MODE (dest);
16010   rtx x;
16011
16012   cmp_op0 = force_reg (mode, cmp_op0);
16013   if (!nonimmediate_operand (cmp_op1, mode))
16014     cmp_op1 = force_reg (mode, cmp_op1);
16015
16016   if (optimize
16017       || reg_overlap_mentioned_p (dest, op_true)
16018       || reg_overlap_mentioned_p (dest, op_false))
16019     dest = gen_reg_rtx (mode);
16020
16021   x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16022   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16023
16024   return dest;
16025 }
16026
16027 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16028    operations.  This is used for both scalar and vector conditional moves.  */
16029
16030 static void
16031 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16032 {
16033   enum machine_mode mode = GET_MODE (dest);
16034   rtx t2, t3, x;
16035
16036   if (op_false == CONST0_RTX (mode))
16037     {
16038       op_true = force_reg (mode, op_true);
16039       x = gen_rtx_AND (mode, cmp, op_true);
16040       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16041     }
16042   else if (op_true == CONST0_RTX (mode))
16043     {
16044       op_false = force_reg (mode, op_false);
16045       x = gen_rtx_NOT (mode, cmp);
16046       x = gen_rtx_AND (mode, x, op_false);
16047       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16048     }
16049   else if (TARGET_XOP)
16050     {
16051       rtx pcmov = gen_rtx_SET (mode, dest,
16052                                gen_rtx_IF_THEN_ELSE (mode, cmp,
16053                                                      op_true,
16054                                                      op_false));
16055       emit_insn (pcmov);
16056     }
16057   else
16058     {
16059       op_true = force_reg (mode, op_true);
16060       op_false = force_reg (mode, op_false);
16061
16062       t2 = gen_reg_rtx (mode);
16063       if (optimize)
16064         t3 = gen_reg_rtx (mode);
16065       else
16066         t3 = dest;
16067
16068       x = gen_rtx_AND (mode, op_true, cmp);
16069       emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16070
16071       x = gen_rtx_NOT (mode, cmp);
16072       x = gen_rtx_AND (mode, x, op_false);
16073       emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16074
16075       x = gen_rtx_IOR (mode, t3, t2);
16076       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16077     }
16078 }
16079
16080 /* Expand a floating-point conditional move.  Return true if successful.  */
16081
16082 int
16083 ix86_expand_fp_movcc (rtx operands[])
16084 {
16085   enum machine_mode mode = GET_MODE (operands[0]);
16086   enum rtx_code code = GET_CODE (operands[1]);
16087   rtx tmp, compare_op;
16088
16089   ix86_compare_op0 = XEXP (operands[1], 0);
16090   ix86_compare_op1 = XEXP (operands[1], 1);
16091   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16092     {
16093       enum machine_mode cmode;
16094
16095       /* Since we've no cmove for sse registers, don't force bad register
16096          allocation just to gain access to it.  Deny movcc when the
16097          comparison mode doesn't match the move mode.  */
16098       cmode = GET_MODE (ix86_compare_op0);
16099       if (cmode == VOIDmode)
16100         cmode = GET_MODE (ix86_compare_op1);
16101       if (cmode != mode)
16102         return 0;
16103
16104       code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16105                                                &ix86_compare_op0,
16106                                                &ix86_compare_op1);
16107       if (code == UNKNOWN)
16108         return 0;
16109
16110       if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16111                                      ix86_compare_op1, operands[2],
16112                                      operands[3]))
16113         return 1;
16114
16115       tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16116                                  ix86_compare_op1, operands[2], operands[3]);
16117       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16118       return 1;
16119     }
16120
16121   /* The floating point conditional move instructions don't directly
16122      support conditions resulting from a signed integer comparison.  */
16123
16124   compare_op = ix86_expand_compare (code);
16125   if (!fcmov_comparison_operator (compare_op, VOIDmode))
16126     {
16127       tmp = gen_reg_rtx (QImode);
16128       ix86_expand_setcc (code, tmp);
16129       code = NE;
16130       ix86_compare_op0 = tmp;
16131       ix86_compare_op1 = const0_rtx;
16132       compare_op = ix86_expand_compare (code);
16133     }
16134
16135   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16136                           gen_rtx_IF_THEN_ELSE (mode, compare_op,
16137                                                 operands[2], operands[3])));
16138
16139   return 1;
16140 }
16141
16142 /* Expand a floating-point vector conditional move; a vcond operation
16143    rather than a movcc operation.  */
16144
16145 bool
16146 ix86_expand_fp_vcond (rtx operands[])
16147 {
16148   enum rtx_code code = GET_CODE (operands[3]);
16149   rtx cmp;
16150
16151   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16152                                            &operands[4], &operands[5]);
16153   if (code == UNKNOWN)
16154     return false;
16155
16156   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16157                                  operands[5], operands[1], operands[2]))
16158     return true;
16159
16160   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16161                              operands[1], operands[2]);
16162   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16163   return true;
16164 }
16165
16166 /* Expand a signed/unsigned integral vector conditional move.  */
16167
16168 bool
16169 ix86_expand_int_vcond (rtx operands[])
16170 {
16171   enum machine_mode mode = GET_MODE (operands[0]);
16172   enum rtx_code code = GET_CODE (operands[3]);
16173   bool negate = false;
16174   rtx x, cop0, cop1;
16175
16176   cop0 = operands[4];
16177   cop1 = operands[5];
16178
16179   /* XOP supports all of the comparisons on all vector int types.  */
16180   if (!TARGET_XOP)
16181     {
16182       /* Canonicalize the comparison to EQ, GT, GTU.  */
16183       switch (code)
16184         {
16185         case EQ:
16186         case GT:
16187         case GTU:
16188           break;
16189
16190         case NE:
16191         case LE:
16192         case LEU:
16193           code = reverse_condition (code);
16194           negate = true;
16195           break;
16196
16197         case GE:
16198         case GEU:
16199           code = reverse_condition (code);
16200           negate = true;
16201           /* FALLTHRU */
16202
16203         case LT:
16204         case LTU:
16205           code = swap_condition (code);
16206           x = cop0, cop0 = cop1, cop1 = x;
16207           break;
16208
16209         default:
16210           gcc_unreachable ();
16211         }
16212
16213       /* Only SSE4.1/SSE4.2 supports V2DImode.  */
16214       if (mode == V2DImode)
16215         {
16216           switch (code)
16217             {
16218             case EQ:
16219               /* SSE4.1 supports EQ.  */
16220               if (!TARGET_SSE4_1)
16221                 return false;
16222               break;
16223
16224             case GT:
16225             case GTU:
16226               /* SSE4.2 supports GT/GTU.  */
16227               if (!TARGET_SSE4_2)
16228                 return false;
16229               break;
16230
16231             default:
16232               gcc_unreachable ();
16233             }
16234         }
16235
16236       /* Unsigned parallel compare is not supported by the hardware.
16237          Play some tricks to turn this into a signed comparison
16238          against 0.  */
16239       if (code == GTU)
16240         {
16241           cop0 = force_reg (mode, cop0);
16242
16243           switch (mode)
16244             {
16245             case V4SImode:
16246             case V2DImode:
16247                 {
16248                   rtx t1, t2, mask;
16249                   rtx (*gen_sub3) (rtx, rtx, rtx);
16250
16251                   /* Subtract (-(INT MAX) - 1) from both operands to make
16252                      them signed.  */
16253                   mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16254                                                   true, false);
16255                   gen_sub3 = (mode == V4SImode
16256                               ? gen_subv4si3 : gen_subv2di3);
16257                   t1 = gen_reg_rtx (mode);
16258                   emit_insn (gen_sub3 (t1, cop0, mask));
16259
16260                   t2 = gen_reg_rtx (mode);
16261                   emit_insn (gen_sub3 (t2, cop1, mask));
16262
16263                   cop0 = t1;
16264                   cop1 = t2;
16265                   code = GT;
16266                 }
16267               break;
16268
16269             case V16QImode:
16270             case V8HImode:
16271               /* Perform a parallel unsigned saturating subtraction.  */
16272               x = gen_reg_rtx (mode);
16273               emit_insn (gen_rtx_SET (VOIDmode, x,
16274                                       gen_rtx_US_MINUS (mode, cop0, cop1)));
16275
16276               cop0 = x;
16277               cop1 = CONST0_RTX (mode);
16278               code = EQ;
16279               negate = !negate;
16280               break;
16281
16282             default:
16283               gcc_unreachable ();
16284             }
16285         }
16286     }
16287
16288   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16289                            operands[1+negate], operands[2-negate]);
16290
16291   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16292                          operands[2-negate]);
16293   return true;
16294 }
16295
16296 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
16297    true if we should do zero extension, else sign extension.  HIGH_P is
16298    true if we want the N/2 high elements, else the low elements.  */
16299
16300 void
16301 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16302 {
16303   enum machine_mode imode = GET_MODE (operands[1]);
16304   rtx (*unpack)(rtx, rtx, rtx);
16305   rtx se, dest;
16306
16307   switch (imode)
16308     {
16309     case V16QImode:
16310       if (high_p)
16311         unpack = gen_vec_interleave_highv16qi;
16312       else
16313         unpack = gen_vec_interleave_lowv16qi;
16314       break;
16315     case V8HImode:
16316       if (high_p)
16317         unpack = gen_vec_interleave_highv8hi;
16318       else
16319         unpack = gen_vec_interleave_lowv8hi;
16320       break;
16321     case V4SImode:
16322       if (high_p)
16323         unpack = gen_vec_interleave_highv4si;
16324       else
16325         unpack = gen_vec_interleave_lowv4si;
16326       break;
16327     default:
16328       gcc_unreachable ();
16329     }
16330
16331   dest = gen_lowpart (imode, operands[0]);
16332
16333   if (unsigned_p)
16334     se = force_reg (imode, CONST0_RTX (imode));
16335   else
16336     se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16337                               operands[1], pc_rtx, pc_rtx);
16338
16339   emit_insn (unpack (dest, operands[1], se));
16340 }
16341
16342 /* This function performs the same task as ix86_expand_sse_unpack,
16343    but with SSE4.1 instructions.  */
16344
16345 void
16346 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16347 {
16348   enum machine_mode imode = GET_MODE (operands[1]);
16349   rtx (*unpack)(rtx, rtx);
16350   rtx src, dest;
16351
16352   switch (imode)
16353     {
16354     case V16QImode:
16355       if (unsigned_p)
16356         unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16357       else
16358         unpack = gen_sse4_1_extendv8qiv8hi2;
16359       break;
16360     case V8HImode:
16361       if (unsigned_p)
16362         unpack = gen_sse4_1_zero_extendv4hiv4si2;
16363       else
16364         unpack = gen_sse4_1_extendv4hiv4si2;
16365       break;
16366     case V4SImode:
16367       if (unsigned_p)
16368         unpack = gen_sse4_1_zero_extendv2siv2di2;
16369       else
16370         unpack = gen_sse4_1_extendv2siv2di2;
16371       break;
16372     default:
16373       gcc_unreachable ();
16374     }
16375
16376   dest = operands[0];
16377   if (high_p)
16378     {
16379       /* Shift higher 8 bytes to lower 8 bytes.  */
16380       src = gen_reg_rtx (imode);
16381       emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
16382                                      gen_lowpart (V1TImode, operands[1]),
16383                                      GEN_INT (64)));
16384     }
16385   else
16386     src = operands[1];
16387
16388   emit_insn (unpack (dest, src));
16389 }
16390
16391 /* Expand conditional increment or decrement using adb/sbb instructions.
16392    The default case using setcc followed by the conditional move can be
16393    done by generic code.  */
16394 int
16395 ix86_expand_int_addcc (rtx operands[])
16396 {
16397   enum rtx_code code = GET_CODE (operands[1]);
16398   rtx flags;
16399   rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
16400   rtx compare_op;
16401   rtx val = const0_rtx;
16402   bool fpcmp = false;
16403   enum machine_mode mode;
16404
16405   ix86_compare_op0 = XEXP (operands[1], 0);
16406   ix86_compare_op1 = XEXP (operands[1], 1);
16407   if (operands[3] != const1_rtx
16408       && operands[3] != constm1_rtx)
16409     return 0;
16410   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16411                                        ix86_compare_op1, &compare_op))
16412      return 0;
16413   code = GET_CODE (compare_op);
16414
16415   flags = XEXP (compare_op, 0);
16416
16417   if (GET_MODE (flags) == CCFPmode
16418       || GET_MODE (flags) == CCFPUmode)
16419     {
16420       fpcmp = true;
16421       code = ix86_fp_compare_code_to_integer (code);
16422     }
16423
16424   if (code != LTU)
16425     {
16426       val = constm1_rtx;
16427       if (fpcmp)
16428         PUT_CODE (compare_op,
16429                   reverse_condition_maybe_unordered
16430                     (GET_CODE (compare_op)));
16431       else
16432         PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16433     }
16434
16435   mode = GET_MODE (operands[0]);
16436
16437   /* Construct either adc or sbb insn.  */
16438   if ((code == LTU) == (operands[3] == constm1_rtx))
16439     {
16440       switch (mode)
16441         {
16442           case QImode:
16443             insn = gen_subqi3_carry;
16444             break;
16445           case HImode:
16446             insn = gen_subhi3_carry;
16447             break;
16448           case SImode:
16449             insn = gen_subsi3_carry;
16450             break;
16451           case DImode:
16452             insn = gen_subdi3_carry;
16453             break;
16454           default:
16455             gcc_unreachable ();
16456         }
16457     }
16458   else
16459     {
16460       switch (mode)
16461         {
16462           case QImode:
16463             insn = gen_addqi3_carry;
16464             break;
16465           case HImode:
16466             insn = gen_addhi3_carry;
16467             break;
16468           case SImode:
16469             insn = gen_addsi3_carry;
16470             break;
16471           case DImode:
16472             insn = gen_adddi3_carry;
16473             break;
16474           default:
16475             gcc_unreachable ();
16476         }
16477     }
16478   emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
16479
16480   return 1; /* DONE */
16481 }
16482
16483
16484 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
16485    works for floating pointer parameters and nonoffsetable memories.
16486    For pushes, it returns just stack offsets; the values will be saved
16487    in the right order.  Maximally three parts are generated.  */
16488
16489 static int
16490 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16491 {
16492   int size;
16493
16494   if (!TARGET_64BIT)
16495     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16496   else
16497     size = (GET_MODE_SIZE (mode) + 4) / 8;
16498
16499   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16500   gcc_assert (size >= 2 && size <= 4);
16501
16502   /* Optimize constant pool reference to immediates.  This is used by fp
16503      moves, that force all constants to memory to allow combining.  */
16504   if (MEM_P (operand) && MEM_READONLY_P (operand))
16505     {
16506       rtx tmp = maybe_get_pool_constant (operand);
16507       if (tmp)
16508         operand = tmp;
16509     }
16510
16511   if (MEM_P (operand) && !offsettable_memref_p (operand))
16512     {
16513       /* The only non-offsetable memories we handle are pushes.  */
16514       int ok = push_operand (operand, VOIDmode);
16515
16516       gcc_assert (ok);
16517
16518       operand = copy_rtx (operand);
16519       PUT_MODE (operand, Pmode);
16520       parts[0] = parts[1] = parts[2] = parts[3] = operand;
16521       return size;
16522     }
16523
16524   if (GET_CODE (operand) == CONST_VECTOR)
16525     {
16526       enum machine_mode imode = int_mode_for_mode (mode);
16527       /* Caution: if we looked through a constant pool memory above,
16528          the operand may actually have a different mode now.  That's
16529          ok, since we want to pun this all the way back to an integer.  */
16530       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16531       gcc_assert (operand != NULL);
16532       mode = imode;
16533     }
16534
16535   if (!TARGET_64BIT)
16536     {
16537       if (mode == DImode)
16538         split_di (&operand, 1, &parts[0], &parts[1]);
16539       else
16540         {
16541           int i;
16542
16543           if (REG_P (operand))
16544             {
16545               gcc_assert (reload_completed);
16546               for (i = 0; i < size; i++)
16547                 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16548             }
16549           else if (offsettable_memref_p (operand))
16550             {
16551               operand = adjust_address (operand, SImode, 0);
16552               parts[0] = operand;
16553               for (i = 1; i < size; i++)
16554                 parts[i] = adjust_address (operand, SImode, 4 * i);
16555             }
16556           else if (GET_CODE (operand) == CONST_DOUBLE)
16557             {
16558               REAL_VALUE_TYPE r;
16559               long l[4];
16560
16561               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16562               switch (mode)
16563                 {
16564                 case TFmode:
16565                   real_to_target (l, &r, mode);
16566                   parts[3] = gen_int_mode (l[3], SImode);
16567                   parts[2] = gen_int_mode (l[2], SImode);
16568                   break;
16569                 case XFmode:
16570                   REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16571                   parts[2] = gen_int_mode (l[2], SImode);
16572                   break;
16573                 case DFmode:
16574                   REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16575                   break;
16576                 default:
16577                   gcc_unreachable ();
16578                 }
16579               parts[1] = gen_int_mode (l[1], SImode);
16580               parts[0] = gen_int_mode (l[0], SImode);
16581             }
16582           else
16583             gcc_unreachable ();
16584         }
16585     }
16586   else
16587     {
16588       if (mode == TImode)
16589         split_ti (&operand, 1, &parts[0], &parts[1]);
16590       if (mode == XFmode || mode == TFmode)
16591         {
16592           enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16593           if (REG_P (operand))
16594             {
16595               gcc_assert (reload_completed);
16596               parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16597               parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16598             }
16599           else if (offsettable_memref_p (operand))
16600             {
16601               operand = adjust_address (operand, DImode, 0);
16602               parts[0] = operand;
16603               parts[1] = adjust_address (operand, upper_mode, 8);
16604             }
16605           else if (GET_CODE (operand) == CONST_DOUBLE)
16606             {
16607               REAL_VALUE_TYPE r;
16608               long l[4];
16609
16610               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16611               real_to_target (l, &r, mode);
16612
16613               /* Do not use shift by 32 to avoid warning on 32bit systems.  */
16614               if (HOST_BITS_PER_WIDE_INT >= 64)
16615                 parts[0]
16616                   = gen_int_mode
16617                       ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16618                        + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16619                        DImode);
16620               else
16621                 parts[0] = immed_double_const (l[0], l[1], DImode);
16622
16623               if (upper_mode == SImode)
16624                 parts[1] = gen_int_mode (l[2], SImode);
16625               else if (HOST_BITS_PER_WIDE_INT >= 64)
16626                 parts[1]
16627                   = gen_int_mode
16628                       ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16629                        + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16630                        DImode);
16631               else
16632                 parts[1] = immed_double_const (l[2], l[3], DImode);
16633             }
16634           else
16635             gcc_unreachable ();
16636         }
16637     }
16638
16639   return size;
16640 }
16641
16642 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16643    Return false when normal moves are needed; true when all required
16644    insns have been emitted.  Operands 2-4 contain the input values
16645    int the correct order; operands 5-7 contain the output values.  */
16646
16647 void
16648 ix86_split_long_move (rtx operands[])
16649 {
16650   rtx part[2][4];
16651   int nparts, i, j;
16652   int push = 0;
16653   int collisions = 0;
16654   enum machine_mode mode = GET_MODE (operands[0]);
16655   bool collisionparts[4];
16656
16657   /* The DFmode expanders may ask us to move double.
16658      For 64bit target this is single move.  By hiding the fact
16659      here we simplify i386.md splitters.  */
16660   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16661     {
16662       /* Optimize constant pool reference to immediates.  This is used by
16663          fp moves, that force all constants to memory to allow combining.  */
16664
16665       if (MEM_P (operands[1])
16666           && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16667           && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16668         operands[1] = get_pool_constant (XEXP (operands[1], 0));
16669       if (push_operand (operands[0], VOIDmode))
16670         {
16671           operands[0] = copy_rtx (operands[0]);
16672           PUT_MODE (operands[0], Pmode);
16673         }
16674       else
16675         operands[0] = gen_lowpart (DImode, operands[0]);
16676       operands[1] = gen_lowpart (DImode, operands[1]);
16677       emit_move_insn (operands[0], operands[1]);
16678       return;
16679     }
16680
16681   /* The only non-offsettable memory we handle is push.  */
16682   if (push_operand (operands[0], VOIDmode))
16683     push = 1;
16684   else
16685     gcc_assert (!MEM_P (operands[0])
16686                 || offsettable_memref_p (operands[0]));
16687
16688   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16689   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16690
16691   /* When emitting push, take care for source operands on the stack.  */
16692   if (push && MEM_P (operands[1])
16693       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16694     {
16695       rtx src_base = XEXP (part[1][nparts - 1], 0);
16696
16697       /* Compensate for the stack decrement by 4.  */
16698       if (!TARGET_64BIT && nparts == 3
16699           && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16700         src_base = plus_constant (src_base, 4);
16701
16702       /* src_base refers to the stack pointer and is
16703          automatically decreased by emitted push.  */
16704       for (i = 0; i < nparts; i++)
16705         part[1][i] = change_address (part[1][i],
16706                                      GET_MODE (part[1][i]), src_base);
16707     }
16708
16709   /* We need to do copy in the right order in case an address register
16710      of the source overlaps the destination.  */
16711   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16712     {
16713       rtx tmp;
16714
16715       for (i = 0; i < nparts; i++)
16716         {
16717           collisionparts[i]
16718             = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16719           if (collisionparts[i])
16720             collisions++;
16721         }
16722
16723       /* Collision in the middle part can be handled by reordering.  */
16724       if (collisions == 1 && nparts == 3 && collisionparts [1])
16725         {
16726           tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16727           tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16728         }
16729       else if (collisions == 1
16730                && nparts == 4
16731                && (collisionparts [1] || collisionparts [2]))
16732         {
16733           if (collisionparts [1])
16734             {
16735               tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16736               tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16737             }
16738           else
16739             {
16740               tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16741               tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16742             }
16743         }
16744
16745       /* If there are more collisions, we can't handle it by reordering.
16746          Do an lea to the last part and use only one colliding move.  */
16747       else if (collisions > 1)
16748         {
16749           rtx base;
16750
16751           collisions = 1;
16752
16753           base = part[0][nparts - 1];
16754
16755           /* Handle the case when the last part isn't valid for lea.
16756              Happens in 64-bit mode storing the 12-byte XFmode.  */
16757           if (GET_MODE (base) != Pmode)
16758             base = gen_rtx_REG (Pmode, REGNO (base));
16759
16760           emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16761           part[1][0] = replace_equiv_address (part[1][0], base);
16762           for (i = 1; i < nparts; i++)
16763             {
16764               tmp = plus_constant (base, UNITS_PER_WORD * i);
16765               part[1][i] = replace_equiv_address (part[1][i], tmp);
16766             }
16767         }
16768     }
16769
16770   if (push)
16771     {
16772       if (!TARGET_64BIT)
16773         {
16774           if (nparts == 3)
16775             {
16776               if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16777                 emit_insn (gen_addsi3 (stack_pointer_rtx,
16778                                        stack_pointer_rtx, GEN_INT (-4)));
16779               emit_move_insn (part[0][2], part[1][2]);
16780             }
16781           else if (nparts == 4)
16782             {
16783               emit_move_insn (part[0][3], part[1][3]);
16784               emit_move_insn (part[0][2], part[1][2]);
16785             }
16786         }
16787       else
16788         {
16789           /* In 64bit mode we don't have 32bit push available.  In case this is
16790              register, it is OK - we will just use larger counterpart.  We also
16791              retype memory - these comes from attempt to avoid REX prefix on
16792              moving of second half of TFmode value.  */
16793           if (GET_MODE (part[1][1]) == SImode)
16794             {
16795               switch (GET_CODE (part[1][1]))
16796                 {
16797                 case MEM:
16798                   part[1][1] = adjust_address (part[1][1], DImode, 0);
16799                   break;
16800
16801                 case REG:
16802                   part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16803                   break;
16804
16805                 default:
16806                   gcc_unreachable ();
16807                 }
16808
16809               if (GET_MODE (part[1][0]) == SImode)
16810                 part[1][0] = part[1][1];
16811             }
16812         }
16813       emit_move_insn (part[0][1], part[1][1]);
16814       emit_move_insn (part[0][0], part[1][0]);
16815       return;
16816     }
16817
16818   /* Choose correct order to not overwrite the source before it is copied.  */
16819   if ((REG_P (part[0][0])
16820        && REG_P (part[1][1])
16821        && (REGNO (part[0][0]) == REGNO (part[1][1])
16822            || (nparts == 3
16823                && REGNO (part[0][0]) == REGNO (part[1][2]))
16824            || (nparts == 4
16825                && REGNO (part[0][0]) == REGNO (part[1][3]))))
16826       || (collisions > 0
16827           && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16828     {
16829       for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16830         {
16831           operands[2 + i] = part[0][j];
16832           operands[6 + i] = part[1][j];
16833         }
16834     }
16835   else
16836     {
16837       for (i = 0; i < nparts; i++)
16838         {
16839           operands[2 + i] = part[0][i];
16840           operands[6 + i] = part[1][i];
16841         }
16842     }
16843
16844   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
16845   if (optimize_insn_for_size_p ())
16846     {
16847       for (j = 0; j < nparts - 1; j++)
16848         if (CONST_INT_P (operands[6 + j])
16849             && operands[6 + j] != const0_rtx
16850             && REG_P (operands[2 + j]))
16851           for (i = j; i < nparts - 1; i++)
16852             if (CONST_INT_P (operands[7 + i])
16853                 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16854               operands[7 + i] = operands[2 + j];
16855     }
16856
16857   for (i = 0; i < nparts; i++)
16858     emit_move_insn (operands[2 + i], operands[6 + i]);
16859
16860   return;
16861 }
16862
16863 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16864    left shift by a constant, either using a single shift or
16865    a sequence of add instructions.  */
16866
16867 static void
16868 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16869 {
16870   if (count == 1)
16871     {
16872       emit_insn ((mode == DImode
16873                   ? gen_addsi3
16874                   : gen_adddi3) (operand, operand, operand));
16875     }
16876   else if (!optimize_insn_for_size_p ()
16877            && count * ix86_cost->add <= ix86_cost->shift_const)
16878     {
16879       int i;
16880       for (i=0; i<count; i++)
16881         {
16882           emit_insn ((mode == DImode
16883                       ? gen_addsi3
16884                       : gen_adddi3) (operand, operand, operand));
16885         }
16886     }
16887   else
16888     emit_insn ((mode == DImode
16889                 ? gen_ashlsi3
16890                 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16891 }
16892
16893 void
16894 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16895 {
16896   rtx low[2], high[2];
16897   int count;
16898   const int single_width = mode == DImode ? 32 : 64;
16899
16900   if (CONST_INT_P (operands[2]))
16901     {
16902       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16903       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16904
16905       if (count >= single_width)
16906         {
16907           emit_move_insn (high[0], low[1]);
16908           emit_move_insn (low[0], const0_rtx);
16909
16910           if (count > single_width)
16911             ix86_expand_ashl_const (high[0], count - single_width, mode);
16912         }
16913       else
16914         {
16915           if (!rtx_equal_p (operands[0], operands[1]))
16916             emit_move_insn (operands[0], operands[1]);
16917           emit_insn ((mode == DImode
16918                      ? gen_x86_shld
16919                      : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16920           ix86_expand_ashl_const (low[0], count, mode);
16921         }
16922       return;
16923     }
16924
16925   (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16926
16927   if (operands[1] == const1_rtx)
16928     {
16929       /* Assuming we've chosen a QImode capable registers, then 1 << N
16930          can be done with two 32/64-bit shifts, no branches, no cmoves.  */
16931       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16932         {
16933           rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16934
16935           ix86_expand_clear (low[0]);
16936           ix86_expand_clear (high[0]);
16937           emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16938
16939           d = gen_lowpart (QImode, low[0]);
16940           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16941           s = gen_rtx_EQ (QImode, flags, const0_rtx);
16942           emit_insn (gen_rtx_SET (VOIDmode, d, s));
16943
16944           d = gen_lowpart (QImode, high[0]);
16945           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16946           s = gen_rtx_NE (QImode, flags, const0_rtx);
16947           emit_insn (gen_rtx_SET (VOIDmode, d, s));
16948         }
16949
16950       /* Otherwise, we can get the same results by manually performing
16951          a bit extract operation on bit 5/6, and then performing the two
16952          shifts.  The two methods of getting 0/1 into low/high are exactly
16953          the same size.  Avoiding the shift in the bit extract case helps
16954          pentium4 a bit; no one else seems to care much either way.  */
16955       else
16956         {
16957           rtx x;
16958
16959           if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16960             x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16961           else
16962             x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16963           emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16964
16965           emit_insn ((mode == DImode
16966                       ? gen_lshrsi3
16967                       : gen_lshrdi3) (high[0], high[0],
16968                                       GEN_INT (mode == DImode ? 5 : 6)));
16969           emit_insn ((mode == DImode
16970                       ? gen_andsi3
16971                       : gen_anddi3) (high[0], high[0], const1_rtx));
16972           emit_move_insn (low[0], high[0]);
16973           emit_insn ((mode == DImode
16974                       ? gen_xorsi3
16975                       : gen_xordi3) (low[0], low[0], const1_rtx));
16976         }
16977
16978       emit_insn ((mode == DImode
16979                     ? gen_ashlsi3
16980                     : gen_ashldi3) (low[0], low[0], operands[2]));
16981       emit_insn ((mode == DImode
16982                     ? gen_ashlsi3
16983                     : gen_ashldi3) (high[0], high[0], operands[2]));
16984       return;
16985     }
16986
16987   if (operands[1] == constm1_rtx)
16988     {
16989       /* For -1 << N, we can avoid the shld instruction, because we
16990          know that we're shifting 0...31/63 ones into a -1.  */
16991       emit_move_insn (low[0], constm1_rtx);
16992       if (optimize_insn_for_size_p ())
16993         emit_move_insn (high[0], low[0]);
16994       else
16995         emit_move_insn (high[0], constm1_rtx);
16996     }
16997   else
16998     {
16999       if (!rtx_equal_p (operands[0], operands[1]))
17000         emit_move_insn (operands[0], operands[1]);
17001
17002       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17003       emit_insn ((mode == DImode
17004                   ? gen_x86_shld
17005                   : gen_x86_64_shld) (high[0], low[0], operands[2]));
17006     }
17007
17008   emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17009
17010   if (TARGET_CMOVE && scratch)
17011     {
17012       ix86_expand_clear (scratch);
17013       emit_insn ((mode == DImode
17014                   ? gen_x86_shift_adj_1
17015                   : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17016                                              scratch));
17017     }
17018   else
17019     emit_insn ((mode == DImode
17020                 ? gen_x86_shift_adj_2
17021                 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17022 }
17023
17024 void
17025 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17026 {
17027   rtx low[2], high[2];
17028   int count;
17029   const int single_width = mode == DImode ? 32 : 64;
17030
17031   if (CONST_INT_P (operands[2]))
17032     {
17033       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17034       count = INTVAL (operands[2]) & (single_width * 2 - 1);
17035
17036       if (count == single_width * 2 - 1)
17037         {
17038           emit_move_insn (high[0], high[1]);
17039           emit_insn ((mode == DImode
17040                       ? gen_ashrsi3
17041                       : gen_ashrdi3) (high[0], high[0],
17042                                       GEN_INT (single_width - 1)));
17043           emit_move_insn (low[0], high[0]);
17044
17045         }
17046       else if (count >= single_width)
17047         {
17048           emit_move_insn (low[0], high[1]);
17049           emit_move_insn (high[0], low[0]);
17050           emit_insn ((mode == DImode
17051                       ? gen_ashrsi3
17052                       : gen_ashrdi3) (high[0], high[0],
17053                                       GEN_INT (single_width - 1)));
17054           if (count > single_width)
17055             emit_insn ((mode == DImode
17056                         ? gen_ashrsi3
17057                         : gen_ashrdi3) (low[0], low[0],
17058                                         GEN_INT (count - single_width)));
17059         }
17060       else
17061         {
17062           if (!rtx_equal_p (operands[0], operands[1]))
17063             emit_move_insn (operands[0], operands[1]);
17064           emit_insn ((mode == DImode
17065                       ? gen_x86_shrd
17066                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17067           emit_insn ((mode == DImode
17068                       ? gen_ashrsi3
17069                       : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17070         }
17071     }
17072   else
17073     {
17074       if (!rtx_equal_p (operands[0], operands[1]))
17075         emit_move_insn (operands[0], operands[1]);
17076
17077       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17078
17079       emit_insn ((mode == DImode
17080                   ? gen_x86_shrd
17081                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17082       emit_insn ((mode == DImode
17083                   ? gen_ashrsi3
17084                   : gen_ashrdi3)  (high[0], high[0], operands[2]));
17085
17086       if (TARGET_CMOVE && scratch)
17087         {
17088           emit_move_insn (scratch, high[0]);
17089           emit_insn ((mode == DImode
17090                       ? gen_ashrsi3
17091                       : gen_ashrdi3) (scratch, scratch,
17092                                       GEN_INT (single_width - 1)));
17093           emit_insn ((mode == DImode
17094                       ? gen_x86_shift_adj_1
17095                       : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17096                                                  scratch));
17097         }
17098       else
17099         emit_insn ((mode == DImode
17100                     ? gen_x86_shift_adj_3
17101                     : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17102     }
17103 }
17104
17105 void
17106 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17107 {
17108   rtx low[2], high[2];
17109   int count;
17110   const int single_width = mode == DImode ? 32 : 64;
17111
17112   if (CONST_INT_P (operands[2]))
17113     {
17114       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17115       count = INTVAL (operands[2]) & (single_width * 2 - 1);
17116
17117       if (count >= single_width)
17118         {
17119           emit_move_insn (low[0], high[1]);
17120           ix86_expand_clear (high[0]);
17121
17122           if (count > single_width)
17123             emit_insn ((mode == DImode
17124                         ? gen_lshrsi3
17125                         : gen_lshrdi3) (low[0], low[0],
17126                                         GEN_INT (count - single_width)));
17127         }
17128       else
17129         {
17130           if (!rtx_equal_p (operands[0], operands[1]))
17131             emit_move_insn (operands[0], operands[1]);
17132           emit_insn ((mode == DImode
17133                       ? gen_x86_shrd
17134                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17135           emit_insn ((mode == DImode
17136                       ? gen_lshrsi3
17137                       : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17138         }
17139     }
17140   else
17141     {
17142       if (!rtx_equal_p (operands[0], operands[1]))
17143         emit_move_insn (operands[0], operands[1]);
17144
17145       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17146
17147       emit_insn ((mode == DImode
17148                   ? gen_x86_shrd
17149                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17150       emit_insn ((mode == DImode
17151                   ? gen_lshrsi3
17152                   : gen_lshrdi3) (high[0], high[0], operands[2]));
17153
17154       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
17155       if (TARGET_CMOVE && scratch)
17156         {
17157           ix86_expand_clear (scratch);
17158           emit_insn ((mode == DImode
17159                       ? gen_x86_shift_adj_1
17160                       : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17161                                                  scratch));
17162         }
17163       else
17164         emit_insn ((mode == DImode
17165                     ? gen_x86_shift_adj_2
17166                     : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17167     }
17168 }
17169
17170 /* Predict just emitted jump instruction to be taken with probability PROB.  */
17171 static void
17172 predict_jump (int prob)
17173 {
17174   rtx insn = get_last_insn ();
17175   gcc_assert (JUMP_P (insn));
17176   add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17177 }
17178
17179 /* Helper function for the string operations below.  Dest VARIABLE whether
17180    it is aligned to VALUE bytes.  If true, jump to the label.  */
17181 static rtx
17182 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17183 {
17184   rtx label = gen_label_rtx ();
17185   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17186   if (GET_MODE (variable) == DImode)
17187     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17188   else
17189     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17190   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17191                            1, label);
17192   if (epilogue)
17193     predict_jump (REG_BR_PROB_BASE * 50 / 100);
17194   else
17195     predict_jump (REG_BR_PROB_BASE * 90 / 100);
17196   return label;
17197 }
17198
17199 /* Adjust COUNTER by the VALUE.  */
17200 static void
17201 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17202 {
17203   if (GET_MODE (countreg) == DImode)
17204     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17205   else
17206     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17207 }
17208
17209 /* Zero extend possibly SImode EXP to Pmode register.  */
17210 rtx
17211 ix86_zero_extend_to_Pmode (rtx exp)
17212 {
17213   rtx r;
17214   if (GET_MODE (exp) == VOIDmode)
17215     return force_reg (Pmode, exp);
17216   if (GET_MODE (exp) == Pmode)
17217     return copy_to_mode_reg (Pmode, exp);
17218   r = gen_reg_rtx (Pmode);
17219   emit_insn (gen_zero_extendsidi2 (r, exp));
17220   return r;
17221 }
17222
17223 /* Divide COUNTREG by SCALE.  */
17224 static rtx
17225 scale_counter (rtx countreg, int scale)
17226 {
17227   rtx sc;
17228
17229   if (scale == 1)
17230     return countreg;
17231   if (CONST_INT_P (countreg))
17232     return GEN_INT (INTVAL (countreg) / scale);
17233   gcc_assert (REG_P (countreg));
17234
17235   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17236                             GEN_INT (exact_log2 (scale)),
17237                             NULL, 1, OPTAB_DIRECT);
17238   return sc;
17239 }
17240
17241 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
17242    DImode for constant loop counts.  */
17243
17244 static enum machine_mode
17245 counter_mode (rtx count_exp)
17246 {
17247   if (GET_MODE (count_exp) != VOIDmode)
17248     return GET_MODE (count_exp);
17249   if (!CONST_INT_P (count_exp))
17250     return Pmode;
17251   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17252     return DImode;
17253   return SImode;
17254 }
17255
17256 /* When SRCPTR is non-NULL, output simple loop to move memory
17257    pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17258    overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
17259    equivalent loop to set memory by VALUE (supposed to be in MODE).
17260
17261    The size is rounded down to whole number of chunk size moved at once.
17262    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
17263
17264
17265 static void
17266 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17267                                rtx destptr, rtx srcptr, rtx value,
17268                                rtx count, enum machine_mode mode, int unroll,
17269                                int expected_size)
17270 {
17271   rtx out_label, top_label, iter, tmp;
17272   enum machine_mode iter_mode = counter_mode (count);
17273   rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17274   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17275   rtx size;
17276   rtx x_addr;
17277   rtx y_addr;
17278   int i;
17279
17280   top_label = gen_label_rtx ();
17281   out_label = gen_label_rtx ();
17282   iter = gen_reg_rtx (iter_mode);
17283
17284   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17285                               NULL, 1, OPTAB_DIRECT);
17286   /* Those two should combine.  */
17287   if (piece_size == const1_rtx)
17288     {
17289       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17290                                true, out_label);
17291       predict_jump (REG_BR_PROB_BASE * 10 / 100);
17292     }
17293   emit_move_insn (iter, const0_rtx);
17294
17295   emit_label (top_label);
17296
17297   tmp = convert_modes (Pmode, iter_mode, iter, true);
17298   x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17299   destmem = change_address (destmem, mode, x_addr);
17300
17301   if (srcmem)
17302     {
17303       y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17304       srcmem = change_address (srcmem, mode, y_addr);
17305
17306       /* When unrolling for chips that reorder memory reads and writes,
17307          we can save registers by using single temporary.
17308          Also using 4 temporaries is overkill in 32bit mode.  */
17309       if (!TARGET_64BIT && 0)
17310         {
17311           for (i = 0; i < unroll; i++)
17312             {
17313               if (i)
17314                 {
17315                   destmem =
17316                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17317                   srcmem =
17318                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17319                 }
17320               emit_move_insn (destmem, srcmem);
17321             }
17322         }
17323       else
17324         {
17325           rtx tmpreg[4];
17326           gcc_assert (unroll <= 4);
17327           for (i = 0; i < unroll; i++)
17328             {
17329               tmpreg[i] = gen_reg_rtx (mode);
17330               if (i)
17331                 {
17332                   srcmem =
17333                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17334                 }
17335               emit_move_insn (tmpreg[i], srcmem);
17336             }
17337           for (i = 0; i < unroll; i++)
17338             {
17339               if (i)
17340                 {
17341                   destmem =
17342                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17343                 }
17344               emit_move_insn (destmem, tmpreg[i]);
17345             }
17346         }
17347     }
17348   else
17349     for (i = 0; i < unroll; i++)
17350       {
17351         if (i)
17352           destmem =
17353             adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17354         emit_move_insn (destmem, value);
17355       }
17356
17357   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17358                              true, OPTAB_LIB_WIDEN);
17359   if (tmp != iter)
17360     emit_move_insn (iter, tmp);
17361
17362   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17363                            true, top_label);
17364   if (expected_size != -1)
17365     {
17366       expected_size /= GET_MODE_SIZE (mode) * unroll;
17367       if (expected_size == 0)
17368         predict_jump (0);
17369       else if (expected_size > REG_BR_PROB_BASE)
17370         predict_jump (REG_BR_PROB_BASE - 1);
17371       else
17372         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17373     }
17374   else
17375     predict_jump (REG_BR_PROB_BASE * 80 / 100);
17376   iter = ix86_zero_extend_to_Pmode (iter);
17377   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17378                              true, OPTAB_LIB_WIDEN);
17379   if (tmp != destptr)
17380     emit_move_insn (destptr, tmp);
17381   if (srcptr)
17382     {
17383       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17384                                  true, OPTAB_LIB_WIDEN);
17385       if (tmp != srcptr)
17386         emit_move_insn (srcptr, tmp);
17387     }
17388   emit_label (out_label);
17389 }
17390
17391 /* Output "rep; mov" instruction.
17392    Arguments have same meaning as for previous function */
17393 static void
17394 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17395                            rtx destptr, rtx srcptr,
17396                            rtx count,
17397                            enum machine_mode mode)
17398 {
17399   rtx destexp;
17400   rtx srcexp;
17401   rtx countreg;
17402
17403   /* If the size is known, it is shorter to use rep movs.  */
17404   if (mode == QImode && CONST_INT_P (count)
17405       && !(INTVAL (count) & 3))
17406     mode = SImode;
17407
17408   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17409     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17410   if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17411     srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17412   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17413   if (mode != QImode)
17414     {
17415       destexp = gen_rtx_ASHIFT (Pmode, countreg,
17416                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17417       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17418       srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17419                                GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17420       srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17421     }
17422   else
17423     {
17424       destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17425       srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17426     }
17427   if (CONST_INT_P (count))
17428     {
17429       count = GEN_INT (INTVAL (count)
17430                        & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17431       destmem = shallow_copy_rtx (destmem);
17432       srcmem = shallow_copy_rtx (srcmem);
17433       set_mem_size (destmem, count);
17434       set_mem_size (srcmem, count);
17435     }
17436   else
17437     {
17438       if (MEM_SIZE (destmem))
17439         set_mem_size (destmem, NULL_RTX);
17440       if (MEM_SIZE (srcmem))
17441         set_mem_size (srcmem, NULL_RTX);
17442     }
17443   emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17444                           destexp, srcexp));
17445 }
17446
17447 /* Output "rep; stos" instruction.
17448    Arguments have same meaning as for previous function */
17449 static void
17450 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17451                             rtx count, enum machine_mode mode,
17452                             rtx orig_value)
17453 {
17454   rtx destexp;
17455   rtx countreg;
17456
17457   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17458     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17459   value = force_reg (mode, gen_lowpart (mode, value));
17460   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17461   if (mode != QImode)
17462     {
17463       destexp = gen_rtx_ASHIFT (Pmode, countreg,
17464                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17465       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17466     }
17467   else
17468     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17469   if (orig_value == const0_rtx && CONST_INT_P (count))
17470     {
17471       count = GEN_INT (INTVAL (count)
17472                        & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17473       destmem = shallow_copy_rtx (destmem);
17474       set_mem_size (destmem, count);
17475     }
17476   else if (MEM_SIZE (destmem))
17477     set_mem_size (destmem, NULL_RTX);
17478   emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17479 }
17480
17481 static void
17482 emit_strmov (rtx destmem, rtx srcmem,
17483              rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17484 {
17485   rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17486   rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17487   emit_insn (gen_strmov (destptr, dest, srcptr, src));
17488 }
17489
17490 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
17491 static void
17492 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17493                         rtx destptr, rtx srcptr, rtx count, int max_size)
17494 {
17495   rtx src, dest;
17496   if (CONST_INT_P (count))
17497     {
17498       HOST_WIDE_INT countval = INTVAL (count);
17499       int offset = 0;
17500
17501       if ((countval & 0x10) && max_size > 16)
17502         {
17503           if (TARGET_64BIT)
17504             {
17505               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17506               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17507             }
17508           else
17509             gcc_unreachable ();
17510           offset += 16;
17511         }
17512       if ((countval & 0x08) && max_size > 8)
17513         {
17514           if (TARGET_64BIT)
17515             emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17516           else
17517             {
17518               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17519               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17520             }
17521           offset += 8;
17522         }
17523       if ((countval & 0x04) && max_size > 4)
17524         {
17525           emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17526           offset += 4;
17527         }
17528       if ((countval & 0x02) && max_size > 2)
17529         {
17530           emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17531           offset += 2;
17532         }
17533       if ((countval & 0x01) && max_size > 1)
17534         {
17535           emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17536           offset += 1;
17537         }
17538       return;
17539     }
17540   if (max_size > 8)
17541     {
17542       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17543                                     count, 1, OPTAB_DIRECT);
17544       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17545                                      count, QImode, 1, 4);
17546       return;
17547     }
17548
17549   /* When there are stringops, we can cheaply increase dest and src pointers.
17550      Otherwise we save code size by maintaining offset (zero is readily
17551      available from preceding rep operation) and using x86 addressing modes.
17552    */
17553   if (TARGET_SINGLE_STRINGOP)
17554     {
17555       if (max_size > 4)
17556         {
17557           rtx label = ix86_expand_aligntest (count, 4, true);
17558           src = change_address (srcmem, SImode, srcptr);
17559           dest = change_address (destmem, SImode, destptr);
17560           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17561           emit_label (label);
17562           LABEL_NUSES (label) = 1;
17563         }
17564       if (max_size > 2)
17565         {
17566           rtx label = ix86_expand_aligntest (count, 2, true);
17567           src = change_address (srcmem, HImode, srcptr);
17568           dest = change_address (destmem, HImode, destptr);
17569           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17570           emit_label (label);
17571           LABEL_NUSES (label) = 1;
17572         }
17573       if (max_size > 1)
17574         {
17575           rtx label = ix86_expand_aligntest (count, 1, true);
17576           src = change_address (srcmem, QImode, srcptr);
17577           dest = change_address (destmem, QImode, destptr);
17578           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17579           emit_label (label);
17580           LABEL_NUSES (label) = 1;
17581         }
17582     }
17583   else
17584     {
17585       rtx offset = force_reg (Pmode, const0_rtx);
17586       rtx tmp;
17587
17588       if (max_size > 4)
17589         {
17590           rtx label = ix86_expand_aligntest (count, 4, true);
17591           src = change_address (srcmem, SImode, srcptr);
17592           dest = change_address (destmem, SImode, destptr);
17593           emit_move_insn (dest, src);
17594           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17595                                      true, OPTAB_LIB_WIDEN);
17596           if (tmp != offset)
17597             emit_move_insn (offset, tmp);
17598           emit_label (label);
17599           LABEL_NUSES (label) = 1;
17600         }
17601       if (max_size > 2)
17602         {
17603           rtx label = ix86_expand_aligntest (count, 2, true);
17604           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17605           src = change_address (srcmem, HImode, tmp);
17606           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17607           dest = change_address (destmem, HImode, tmp);
17608           emit_move_insn (dest, src);
17609           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17610                                      true, OPTAB_LIB_WIDEN);
17611           if (tmp != offset)
17612             emit_move_insn (offset, tmp);
17613           emit_label (label);
17614           LABEL_NUSES (label) = 1;
17615         }
17616       if (max_size > 1)
17617         {
17618           rtx label = ix86_expand_aligntest (count, 1, true);
17619           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17620           src = change_address (srcmem, QImode, tmp);
17621           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17622           dest = change_address (destmem, QImode, tmp);
17623           emit_move_insn (dest, src);
17624           emit_label (label);
17625           LABEL_NUSES (label) = 1;
17626         }
17627     }
17628 }
17629
17630 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17631 static void
17632 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17633                                  rtx count, int max_size)
17634 {
17635   count =
17636     expand_simple_binop (counter_mode (count), AND, count,
17637                          GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17638   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17639                                  gen_lowpart (QImode, value), count, QImode,
17640                                  1, max_size / 2);
17641 }
17642
17643 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17644 static void
17645 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17646 {
17647   rtx dest;
17648
17649   if (CONST_INT_P (count))
17650     {
17651       HOST_WIDE_INT countval = INTVAL (count);
17652       int offset = 0;
17653
17654       if ((countval & 0x10) && max_size > 16)
17655         {
17656           if (TARGET_64BIT)
17657             {
17658               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17659               emit_insn (gen_strset (destptr, dest, value));
17660               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17661               emit_insn (gen_strset (destptr, dest, value));
17662             }
17663           else
17664             gcc_unreachable ();
17665           offset += 16;
17666         }
17667       if ((countval & 0x08) && max_size > 8)
17668         {
17669           if (TARGET_64BIT)
17670             {
17671               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17672               emit_insn (gen_strset (destptr, dest, value));
17673             }
17674           else
17675             {
17676               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17677               emit_insn (gen_strset (destptr, dest, value));
17678               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17679               emit_insn (gen_strset (destptr, dest, value));
17680             }
17681           offset += 8;
17682         }
17683       if ((countval & 0x04) && max_size > 4)
17684         {
17685           dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17686           emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17687           offset += 4;
17688         }
17689       if ((countval & 0x02) && max_size > 2)
17690         {
17691           dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17692           emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17693           offset += 2;
17694         }
17695       if ((countval & 0x01) && max_size > 1)
17696         {
17697           dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17698           emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17699           offset += 1;
17700         }
17701       return;
17702     }
17703   if (max_size > 32)
17704     {
17705       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17706       return;
17707     }
17708   if (max_size > 16)
17709     {
17710       rtx label = ix86_expand_aligntest (count, 16, true);
17711       if (TARGET_64BIT)
17712         {
17713           dest = change_address (destmem, DImode, destptr);
17714           emit_insn (gen_strset (destptr, dest, value));
17715           emit_insn (gen_strset (destptr, dest, value));
17716         }
17717       else
17718         {
17719           dest = change_address (destmem, SImode, destptr);
17720           emit_insn (gen_strset (destptr, dest, value));
17721           emit_insn (gen_strset (destptr, dest, value));
17722           emit_insn (gen_strset (destptr, dest, value));
17723           emit_insn (gen_strset (destptr, dest, value));
17724         }
17725       emit_label (label);
17726       LABEL_NUSES (label) = 1;
17727     }
17728   if (max_size > 8)
17729     {
17730       rtx label = ix86_expand_aligntest (count, 8, true);
17731       if (TARGET_64BIT)
17732         {
17733           dest = change_address (destmem, DImode, destptr);
17734           emit_insn (gen_strset (destptr, dest, value));
17735         }
17736       else
17737         {
17738           dest = change_address (destmem, SImode, destptr);
17739           emit_insn (gen_strset (destptr, dest, value));
17740           emit_insn (gen_strset (destptr, dest, value));
17741         }
17742       emit_label (label);
17743       LABEL_NUSES (label) = 1;
17744     }
17745   if (max_size > 4)
17746     {
17747       rtx label = ix86_expand_aligntest (count, 4, true);
17748       dest = change_address (destmem, SImode, destptr);
17749       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17750       emit_label (label);
17751       LABEL_NUSES (label) = 1;
17752     }
17753   if (max_size > 2)
17754     {
17755       rtx label = ix86_expand_aligntest (count, 2, true);
17756       dest = change_address (destmem, HImode, destptr);
17757       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17758       emit_label (label);
17759       LABEL_NUSES (label) = 1;
17760     }
17761   if (max_size > 1)
17762     {
17763       rtx label = ix86_expand_aligntest (count, 1, true);
17764       dest = change_address (destmem, QImode, destptr);
17765       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17766       emit_label (label);
17767       LABEL_NUSES (label) = 1;
17768     }
17769 }
17770
17771 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17772    DESIRED_ALIGNMENT.  */
17773 static void
17774 expand_movmem_prologue (rtx destmem, rtx srcmem,
17775                         rtx destptr, rtx srcptr, rtx count,
17776                         int align, int desired_alignment)
17777 {
17778   if (align <= 1 && desired_alignment > 1)
17779     {
17780       rtx label = ix86_expand_aligntest (destptr, 1, false);
17781       srcmem = change_address (srcmem, QImode, srcptr);
17782       destmem = change_address (destmem, QImode, destptr);
17783       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17784       ix86_adjust_counter (count, 1);
17785       emit_label (label);
17786       LABEL_NUSES (label) = 1;
17787     }
17788   if (align <= 2 && desired_alignment > 2)
17789     {
17790       rtx label = ix86_expand_aligntest (destptr, 2, false);
17791       srcmem = change_address (srcmem, HImode, srcptr);
17792       destmem = change_address (destmem, HImode, destptr);
17793       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17794       ix86_adjust_counter (count, 2);
17795       emit_label (label);
17796       LABEL_NUSES (label) = 1;
17797     }
17798   if (align <= 4 && desired_alignment > 4)
17799     {
17800       rtx label = ix86_expand_aligntest (destptr, 4, false);
17801       srcmem = change_address (srcmem, SImode, srcptr);
17802       destmem = change_address (destmem, SImode, destptr);
17803       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17804       ix86_adjust_counter (count, 4);
17805       emit_label (label);
17806       LABEL_NUSES (label) = 1;
17807     }
17808   gcc_assert (desired_alignment <= 8);
17809 }
17810
17811 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17812    ALIGN_BYTES is how many bytes need to be copied.  */
17813 static rtx
17814 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17815                                  int desired_align, int align_bytes)
17816 {
17817   rtx src = *srcp;
17818   rtx src_size, dst_size;
17819   int off = 0;
17820   int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17821   if (src_align_bytes >= 0)
17822     src_align_bytes = desired_align - src_align_bytes;
17823   src_size = MEM_SIZE (src);
17824   dst_size = MEM_SIZE (dst);
17825   if (align_bytes & 1)
17826     {
17827       dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17828       src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17829       off = 1;
17830       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17831     }
17832   if (align_bytes & 2)
17833     {
17834       dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17835       src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17836       if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17837         set_mem_align (dst, 2 * BITS_PER_UNIT);
17838       if (src_align_bytes >= 0
17839           && (src_align_bytes & 1) == (align_bytes & 1)
17840           && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17841         set_mem_align (src, 2 * BITS_PER_UNIT);
17842       off = 2;
17843       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17844     }
17845   if (align_bytes & 4)
17846     {
17847       dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17848       src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17849       if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17850         set_mem_align (dst, 4 * BITS_PER_UNIT);
17851       if (src_align_bytes >= 0)
17852         {
17853           unsigned int src_align = 0;
17854           if ((src_align_bytes & 3) == (align_bytes & 3))
17855             src_align = 4;
17856           else if ((src_align_bytes & 1) == (align_bytes & 1))
17857             src_align = 2;
17858           if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17859             set_mem_align (src, src_align * BITS_PER_UNIT);
17860         }
17861       off = 4;
17862       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17863     }
17864   dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17865   src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17866   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17867     set_mem_align (dst, desired_align * BITS_PER_UNIT);
17868   if (src_align_bytes >= 0)
17869     {
17870       unsigned int src_align = 0;
17871       if ((src_align_bytes & 7) == (align_bytes & 7))
17872         src_align = 8;
17873       else if ((src_align_bytes & 3) == (align_bytes & 3))
17874         src_align = 4;
17875       else if ((src_align_bytes & 1) == (align_bytes & 1))
17876         src_align = 2;
17877       if (src_align > (unsigned int) desired_align)
17878         src_align = desired_align;
17879       if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17880         set_mem_align (src, src_align * BITS_PER_UNIT);
17881     }
17882   if (dst_size)
17883     set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17884   if (src_size)
17885     set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17886   *srcp = src;
17887   return dst;
17888 }
17889
17890 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17891    DESIRED_ALIGNMENT.  */
17892 static void
17893 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17894                         int align, int desired_alignment)
17895 {
17896   if (align <= 1 && desired_alignment > 1)
17897     {
17898       rtx label = ix86_expand_aligntest (destptr, 1, false);
17899       destmem = change_address (destmem, QImode, destptr);
17900       emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17901       ix86_adjust_counter (count, 1);
17902       emit_label (label);
17903       LABEL_NUSES (label) = 1;
17904     }
17905   if (align <= 2 && desired_alignment > 2)
17906     {
17907       rtx label = ix86_expand_aligntest (destptr, 2, false);
17908       destmem = change_address (destmem, HImode, destptr);
17909       emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17910       ix86_adjust_counter (count, 2);
17911       emit_label (label);
17912       LABEL_NUSES (label) = 1;
17913     }
17914   if (align <= 4 && desired_alignment > 4)
17915     {
17916       rtx label = ix86_expand_aligntest (destptr, 4, false);
17917       destmem = change_address (destmem, SImode, destptr);
17918       emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17919       ix86_adjust_counter (count, 4);
17920       emit_label (label);
17921       LABEL_NUSES (label) = 1;
17922     }
17923   gcc_assert (desired_alignment <= 8);
17924 }
17925
17926 /* Set enough from DST to align DST known to by aligned by ALIGN to
17927    DESIRED_ALIGN.  ALIGN_BYTES is how many bytes need to be stored.  */
17928 static rtx
17929 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17930                                  int desired_align, int align_bytes)
17931 {
17932   int off = 0;
17933   rtx dst_size = MEM_SIZE (dst);
17934   if (align_bytes & 1)
17935     {
17936       dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17937       off = 1;
17938       emit_insn (gen_strset (destreg, dst,
17939                              gen_lowpart (QImode, value)));
17940     }
17941   if (align_bytes & 2)
17942     {
17943       dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17944       if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17945         set_mem_align (dst, 2 * BITS_PER_UNIT);
17946       off = 2;
17947       emit_insn (gen_strset (destreg, dst,
17948                              gen_lowpart (HImode, value)));
17949     }
17950   if (align_bytes & 4)
17951     {
17952       dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17953       if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17954         set_mem_align (dst, 4 * BITS_PER_UNIT);
17955       off = 4;
17956       emit_insn (gen_strset (destreg, dst,
17957                              gen_lowpart (SImode, value)));
17958     }
17959   dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17960   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17961     set_mem_align (dst, desired_align * BITS_PER_UNIT);
17962   if (dst_size)
17963     set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17964   return dst;
17965 }
17966
17967 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
17968 static enum stringop_alg
17969 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17970             int *dynamic_check)
17971 {
17972   const struct stringop_algs * algs;
17973   bool optimize_for_speed;
17974   /* Algorithms using the rep prefix want at least edi and ecx;
17975      additionally, memset wants eax and memcpy wants esi.  Don't
17976      consider such algorithms if the user has appropriated those
17977      registers for their own purposes.  */
17978   bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17979                              || (memset
17980                                  ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17981
17982 #define ALG_USABLE_P(alg) (rep_prefix_usable                    \
17983                            || (alg != rep_prefix_1_byte         \
17984                                && alg != rep_prefix_4_byte      \
17985                                && alg != rep_prefix_8_byte))
17986   const struct processor_costs *cost;
17987
17988   /* Even if the string operation call is cold, we still might spend a lot
17989      of time processing large blocks.  */
17990   if (optimize_function_for_size_p (cfun)
17991       || (optimize_insn_for_size_p ()
17992           && expected_size != -1 && expected_size < 256))
17993     optimize_for_speed = false;
17994   else
17995     optimize_for_speed = true;
17996
17997   cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17998
17999   *dynamic_check = -1;
18000   if (memset)
18001     algs = &cost->memset[TARGET_64BIT != 0];
18002   else
18003     algs = &cost->memcpy[TARGET_64BIT != 0];
18004   if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18005     return stringop_alg;
18006   /* rep; movq or rep; movl is the smallest variant.  */
18007   else if (!optimize_for_speed)
18008     {
18009       if (!count || (count & 3))
18010         return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18011       else
18012         return rep_prefix_usable ? rep_prefix_4_byte : loop;
18013     }
18014   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18015    */
18016   else if (expected_size != -1 && expected_size < 4)
18017     return loop_1_byte;
18018   else if (expected_size != -1)
18019     {
18020       unsigned int i;
18021       enum stringop_alg alg = libcall;
18022       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18023         {
18024           /* We get here if the algorithms that were not libcall-based
18025              were rep-prefix based and we are unable to use rep prefixes
18026              based on global register usage.  Break out of the loop and
18027              use the heuristic below.  */
18028           if (algs->size[i].max == 0)
18029             break;
18030           if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18031             {
18032               enum stringop_alg candidate = algs->size[i].alg;
18033
18034               if (candidate != libcall && ALG_USABLE_P (candidate))
18035                 alg = candidate;
18036               /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18037                  last non-libcall inline algorithm.  */
18038               if (TARGET_INLINE_ALL_STRINGOPS)
18039                 {
18040                   /* When the current size is best to be copied by a libcall,
18041                      but we are still forced to inline, run the heuristic below
18042                      that will pick code for medium sized blocks.  */
18043                   if (alg != libcall)
18044                     return alg;
18045                   break;
18046                 }
18047               else if (ALG_USABLE_P (candidate))
18048                 return candidate;
18049             }
18050         }
18051       gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18052     }
18053   /* When asked to inline the call anyway, try to pick meaningful choice.
18054      We look for maximal size of block that is faster to copy by hand and
18055      take blocks of at most of that size guessing that average size will
18056      be roughly half of the block.
18057
18058      If this turns out to be bad, we might simply specify the preferred
18059      choice in ix86_costs.  */
18060   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18061       && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18062     {
18063       int max = -1;
18064       enum stringop_alg alg;
18065       int i;
18066       bool any_alg_usable_p = true;
18067
18068       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18069         {
18070           enum stringop_alg candidate = algs->size[i].alg;
18071           any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18072
18073           if (candidate != libcall && candidate
18074               && ALG_USABLE_P (candidate))
18075               max = algs->size[i].max;
18076         }
18077       /* If there aren't any usable algorithms, then recursing on
18078          smaller sizes isn't going to find anything.  Just return the
18079          simple byte-at-a-time copy loop.  */
18080       if (!any_alg_usable_p)
18081         {
18082           /* Pick something reasonable.  */
18083           if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18084             *dynamic_check = 128;
18085           return loop_1_byte;
18086         }
18087       if (max == -1)
18088         max = 4096;
18089       alg = decide_alg (count, max / 2, memset, dynamic_check);
18090       gcc_assert (*dynamic_check == -1);
18091       gcc_assert (alg != libcall);
18092       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18093         *dynamic_check = max;
18094       return alg;
18095     }
18096   return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18097 #undef ALG_USABLE_P
18098 }
18099
18100 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
18101    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
18102 static int
18103 decide_alignment (int align,
18104                   enum stringop_alg alg,
18105                   int expected_size)
18106 {
18107   int desired_align = 0;
18108   switch (alg)
18109     {
18110       case no_stringop:
18111         gcc_unreachable ();
18112       case loop:
18113       case unrolled_loop:
18114         desired_align = GET_MODE_SIZE (Pmode);
18115         break;
18116       case rep_prefix_8_byte:
18117         desired_align = 8;
18118         break;
18119       case rep_prefix_4_byte:
18120         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18121            copying whole cacheline at once.  */
18122         if (TARGET_PENTIUMPRO)
18123           desired_align = 8;
18124         else
18125           desired_align = 4;
18126         break;
18127       case rep_prefix_1_byte:
18128         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18129            copying whole cacheline at once.  */
18130         if (TARGET_PENTIUMPRO)
18131           desired_align = 8;
18132         else
18133           desired_align = 1;
18134         break;
18135       case loop_1_byte:
18136         desired_align = 1;
18137         break;
18138       case libcall:
18139         return 0;
18140     }
18141
18142   if (optimize_size)
18143     desired_align = 1;
18144   if (desired_align < align)
18145     desired_align = align;
18146   if (expected_size != -1 && expected_size < 4)
18147     desired_align = align;
18148   return desired_align;
18149 }
18150
18151 /* Return the smallest power of 2 greater than VAL.  */
18152 static int
18153 smallest_pow2_greater_than (int val)
18154 {
18155   int ret = 1;
18156   while (ret <= val)
18157     ret <<= 1;
18158   return ret;
18159 }
18160
18161 /* Expand string move (memcpy) operation.  Use i386 string operations when
18162    profitable.  expand_setmem contains similar code.  The code depends upon
18163    architecture, block size and alignment, but always has the same
18164    overall structure:
18165
18166    1) Prologue guard: Conditional that jumps up to epilogues for small
18167       blocks that can be handled by epilogue alone.  This is faster but
18168       also needed for correctness, since prologue assume the block is larger
18169       than the desired alignment.
18170
18171       Optional dynamic check for size and libcall for large
18172       blocks is emitted here too, with -minline-stringops-dynamically.
18173
18174    2) Prologue: copy first few bytes in order to get destination aligned
18175       to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
18176       DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18177       We emit either a jump tree on power of two sized blocks, or a byte loop.
18178
18179    3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18180       with specified algorithm.
18181
18182    4) Epilogue: code copying tail of the block that is too small to be
18183       handled by main body (or up to size guarded by prologue guard).  */
18184
18185 int
18186 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18187                     rtx expected_align_exp, rtx expected_size_exp)
18188 {
18189   rtx destreg;
18190   rtx srcreg;
18191   rtx label = NULL;
18192   rtx tmp;
18193   rtx jump_around_label = NULL;
18194   HOST_WIDE_INT align = 1;
18195   unsigned HOST_WIDE_INT count = 0;
18196   HOST_WIDE_INT expected_size = -1;
18197   int size_needed = 0, epilogue_size_needed;
18198   int desired_align = 0, align_bytes = 0;
18199   enum stringop_alg alg;
18200   int dynamic_check;
18201   bool need_zero_guard = false;
18202
18203   if (CONST_INT_P (align_exp))
18204     align = INTVAL (align_exp);
18205   /* i386 can do misaligned access on reasonably increased cost.  */
18206   if (CONST_INT_P (expected_align_exp)
18207       && INTVAL (expected_align_exp) > align)
18208     align = INTVAL (expected_align_exp);
18209   /* ALIGN is the minimum of destination and source alignment, but we care here
18210      just about destination alignment.  */
18211   else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18212     align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18213
18214   if (CONST_INT_P (count_exp))
18215     count = expected_size = INTVAL (count_exp);
18216   if (CONST_INT_P (expected_size_exp) && count == 0)
18217     expected_size = INTVAL (expected_size_exp);
18218
18219   /* Make sure we don't need to care about overflow later on.  */
18220   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18221     return 0;
18222
18223   /* Step 0: Decide on preferred algorithm, desired alignment and
18224      size of chunks to be copied by main loop.  */
18225
18226   alg = decide_alg (count, expected_size, false, &dynamic_check);
18227   desired_align = decide_alignment (align, alg, expected_size);
18228
18229   if (!TARGET_ALIGN_STRINGOPS)
18230     align = desired_align;
18231
18232   if (alg == libcall)
18233     return 0;
18234   gcc_assert (alg != no_stringop);
18235   if (!count)
18236     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18237   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18238   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18239   switch (alg)
18240     {
18241     case libcall:
18242     case no_stringop:
18243       gcc_unreachable ();
18244     case loop:
18245       need_zero_guard = true;
18246       size_needed = GET_MODE_SIZE (Pmode);
18247       break;
18248     case unrolled_loop:
18249       need_zero_guard = true;
18250       size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18251       break;
18252     case rep_prefix_8_byte:
18253       size_needed = 8;
18254       break;
18255     case rep_prefix_4_byte:
18256       size_needed = 4;
18257       break;
18258     case rep_prefix_1_byte:
18259       size_needed = 1;
18260       break;
18261     case loop_1_byte:
18262       need_zero_guard = true;
18263       size_needed = 1;
18264       break;
18265     }
18266
18267   epilogue_size_needed = size_needed;
18268
18269   /* Step 1: Prologue guard.  */
18270
18271   /* Alignment code needs count to be in register.  */
18272   if (CONST_INT_P (count_exp) && desired_align > align)
18273     {
18274       if (INTVAL (count_exp) > desired_align
18275           && INTVAL (count_exp) > size_needed)
18276         {
18277           align_bytes
18278             = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18279           if (align_bytes <= 0)
18280             align_bytes = 0;
18281           else
18282             align_bytes = desired_align - align_bytes;
18283         }
18284       if (align_bytes == 0)
18285         count_exp = force_reg (counter_mode (count_exp), count_exp);
18286     }
18287   gcc_assert (desired_align >= 1 && align >= 1);
18288
18289   /* Ensure that alignment prologue won't copy past end of block.  */
18290   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18291     {
18292       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18293       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18294          Make sure it is power of 2.  */
18295       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18296
18297       if (count)
18298         {
18299           if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18300             {
18301               /* If main algorithm works on QImode, no epilogue is needed.
18302                  For small sizes just don't align anything.  */
18303               if (size_needed == 1)
18304                 desired_align = align;
18305               else
18306                 goto epilogue;
18307             }
18308         }
18309       else
18310         {
18311           label = gen_label_rtx ();
18312           emit_cmp_and_jump_insns (count_exp,
18313                                    GEN_INT (epilogue_size_needed),
18314                                    LTU, 0, counter_mode (count_exp), 1, label);
18315           if (expected_size == -1 || expected_size < epilogue_size_needed)
18316             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18317           else
18318             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18319         }
18320     }
18321
18322   /* Emit code to decide on runtime whether library call or inline should be
18323      used.  */
18324   if (dynamic_check != -1)
18325     {
18326       if (CONST_INT_P (count_exp))
18327         {
18328           if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18329             {
18330               emit_block_move_via_libcall (dst, src, count_exp, false);
18331               count_exp = const0_rtx;
18332               goto epilogue;
18333             }
18334         }
18335       else
18336         {
18337           rtx hot_label = gen_label_rtx ();
18338           jump_around_label = gen_label_rtx ();
18339           emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18340                                    LEU, 0, GET_MODE (count_exp), 1, hot_label);
18341           predict_jump (REG_BR_PROB_BASE * 90 / 100);
18342           emit_block_move_via_libcall (dst, src, count_exp, false);
18343           emit_jump (jump_around_label);
18344           emit_label (hot_label);
18345         }
18346     }
18347
18348   /* Step 2: Alignment prologue.  */
18349
18350   if (desired_align > align)
18351     {
18352       if (align_bytes == 0)
18353         {
18354           /* Except for the first move in epilogue, we no longer know
18355              constant offset in aliasing info.  It don't seems to worth
18356              the pain to maintain it for the first move, so throw away
18357              the info early.  */
18358           src = change_address (src, BLKmode, srcreg);
18359           dst = change_address (dst, BLKmode, destreg);
18360           expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18361                                   desired_align);
18362         }
18363       else
18364         {
18365           /* If we know how many bytes need to be stored before dst is
18366              sufficiently aligned, maintain aliasing info accurately.  */
18367           dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18368                                                  desired_align, align_bytes);
18369           count_exp = plus_constant (count_exp, -align_bytes);
18370           count -= align_bytes;
18371         }
18372       if (need_zero_guard
18373           && (count < (unsigned HOST_WIDE_INT) size_needed
18374               || (align_bytes == 0
18375                   && count < ((unsigned HOST_WIDE_INT) size_needed
18376                               + desired_align - align))))
18377         {
18378           /* It is possible that we copied enough so the main loop will not
18379              execute.  */
18380           gcc_assert (size_needed > 1);
18381           if (label == NULL_RTX)
18382             label = gen_label_rtx ();
18383           emit_cmp_and_jump_insns (count_exp,
18384                                    GEN_INT (size_needed),
18385                                    LTU, 0, counter_mode (count_exp), 1, label);
18386           if (expected_size == -1
18387               || expected_size < (desired_align - align) / 2 + size_needed)
18388             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18389           else
18390             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18391         }
18392     }
18393   if (label && size_needed == 1)
18394     {
18395       emit_label (label);
18396       LABEL_NUSES (label) = 1;
18397       label = NULL;
18398       epilogue_size_needed = 1;
18399     }
18400   else if (label == NULL_RTX)
18401     epilogue_size_needed = size_needed;
18402
18403   /* Step 3: Main loop.  */
18404
18405   switch (alg)
18406     {
18407     case libcall:
18408     case no_stringop:
18409       gcc_unreachable ();
18410     case loop_1_byte:
18411       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18412                                      count_exp, QImode, 1, expected_size);
18413       break;
18414     case loop:
18415       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18416                                      count_exp, Pmode, 1, expected_size);
18417       break;
18418     case unrolled_loop:
18419       /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18420          registers for 4 temporaries anyway.  */
18421       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18422                                      count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18423                                      expected_size);
18424       break;
18425     case rep_prefix_8_byte:
18426       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18427                                  DImode);
18428       break;
18429     case rep_prefix_4_byte:
18430       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18431                                  SImode);
18432       break;
18433     case rep_prefix_1_byte:
18434       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18435                                  QImode);
18436       break;
18437     }
18438   /* Adjust properly the offset of src and dest memory for aliasing.  */
18439   if (CONST_INT_P (count_exp))
18440     {
18441       src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18442                                           (count / size_needed) * size_needed);
18443       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18444                                           (count / size_needed) * size_needed);
18445     }
18446   else
18447     {
18448       src = change_address (src, BLKmode, srcreg);
18449       dst = change_address (dst, BLKmode, destreg);
18450     }
18451
18452   /* Step 4: Epilogue to copy the remaining bytes.  */
18453  epilogue:
18454   if (label)
18455     {
18456       /* When the main loop is done, COUNT_EXP might hold original count,
18457          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18458          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18459          bytes. Compensate if needed.  */
18460
18461       if (size_needed < epilogue_size_needed)
18462         {
18463           tmp =
18464             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18465                                  GEN_INT (size_needed - 1), count_exp, 1,
18466                                  OPTAB_DIRECT);
18467           if (tmp != count_exp)
18468             emit_move_insn (count_exp, tmp);
18469         }
18470       emit_label (label);
18471       LABEL_NUSES (label) = 1;
18472     }
18473
18474   if (count_exp != const0_rtx && epilogue_size_needed > 1)
18475     expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18476                             epilogue_size_needed);
18477   if (jump_around_label)
18478     emit_label (jump_around_label);
18479   return 1;
18480 }
18481
18482 /* Helper function for memcpy.  For QImode value 0xXY produce
18483    0xXYXYXYXY of wide specified by MODE.  This is essentially
18484    a * 0x10101010, but we can do slightly better than
18485    synth_mult by unwinding the sequence by hand on CPUs with
18486    slow multiply.  */
18487 static rtx
18488 promote_duplicated_reg (enum machine_mode mode, rtx val)
18489 {
18490   enum machine_mode valmode = GET_MODE (val);
18491   rtx tmp;
18492   int nops = mode == DImode ? 3 : 2;
18493
18494   gcc_assert (mode == SImode || mode == DImode);
18495   if (val == const0_rtx)
18496     return copy_to_mode_reg (mode, const0_rtx);
18497   if (CONST_INT_P (val))
18498     {
18499       HOST_WIDE_INT v = INTVAL (val) & 255;
18500
18501       v |= v << 8;
18502       v |= v << 16;
18503       if (mode == DImode)
18504         v |= (v << 16) << 16;
18505       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18506     }
18507
18508   if (valmode == VOIDmode)
18509     valmode = QImode;
18510   if (valmode != QImode)
18511     val = gen_lowpart (QImode, val);
18512   if (mode == QImode)
18513     return val;
18514   if (!TARGET_PARTIAL_REG_STALL)
18515     nops--;
18516   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18517       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18518       <= (ix86_cost->shift_const + ix86_cost->add) * nops
18519           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18520     {
18521       rtx reg = convert_modes (mode, QImode, val, true);
18522       tmp = promote_duplicated_reg (mode, const1_rtx);
18523       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18524                                   OPTAB_DIRECT);
18525     }
18526   else
18527     {
18528       rtx reg = convert_modes (mode, QImode, val, true);
18529
18530       if (!TARGET_PARTIAL_REG_STALL)
18531         if (mode == SImode)
18532           emit_insn (gen_movsi_insv_1 (reg, reg));
18533         else
18534           emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18535       else
18536         {
18537           tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18538                                      NULL, 1, OPTAB_DIRECT);
18539           reg =
18540             expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18541         }
18542       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18543                                  NULL, 1, OPTAB_DIRECT);
18544       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18545       if (mode == SImode)
18546         return reg;
18547       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18548                                  NULL, 1, OPTAB_DIRECT);
18549       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18550       return reg;
18551     }
18552 }
18553
18554 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18555    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18556    alignment from ALIGN to DESIRED_ALIGN.  */
18557 static rtx
18558 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18559 {
18560   rtx promoted_val;
18561
18562   if (TARGET_64BIT
18563       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18564     promoted_val = promote_duplicated_reg (DImode, val);
18565   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18566     promoted_val = promote_duplicated_reg (SImode, val);
18567   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18568     promoted_val = promote_duplicated_reg (HImode, val);
18569   else
18570     promoted_val = val;
18571
18572   return promoted_val;
18573 }
18574
18575 /* Expand string clear operation (bzero).  Use i386 string operations when
18576    profitable.  See expand_movmem comment for explanation of individual
18577    steps performed.  */
18578 int
18579 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18580                     rtx expected_align_exp, rtx expected_size_exp)
18581 {
18582   rtx destreg;
18583   rtx label = NULL;
18584   rtx tmp;
18585   rtx jump_around_label = NULL;
18586   HOST_WIDE_INT align = 1;
18587   unsigned HOST_WIDE_INT count = 0;
18588   HOST_WIDE_INT expected_size = -1;
18589   int size_needed = 0, epilogue_size_needed;
18590   int desired_align = 0, align_bytes = 0;
18591   enum stringop_alg alg;
18592   rtx promoted_val = NULL;
18593   bool force_loopy_epilogue = false;
18594   int dynamic_check;
18595   bool need_zero_guard = false;
18596
18597   if (CONST_INT_P (align_exp))
18598     align = INTVAL (align_exp);
18599   /* i386 can do misaligned access on reasonably increased cost.  */
18600   if (CONST_INT_P (expected_align_exp)
18601       && INTVAL (expected_align_exp) > align)
18602     align = INTVAL (expected_align_exp);
18603   if (CONST_INT_P (count_exp))
18604     count = expected_size = INTVAL (count_exp);
18605   if (CONST_INT_P (expected_size_exp) && count == 0)
18606     expected_size = INTVAL (expected_size_exp);
18607
18608   /* Make sure we don't need to care about overflow later on.  */
18609   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18610     return 0;
18611
18612   /* Step 0: Decide on preferred algorithm, desired alignment and
18613      size of chunks to be copied by main loop.  */
18614
18615   alg = decide_alg (count, expected_size, true, &dynamic_check);
18616   desired_align = decide_alignment (align, alg, expected_size);
18617
18618   if (!TARGET_ALIGN_STRINGOPS)
18619     align = desired_align;
18620
18621   if (alg == libcall)
18622     return 0;
18623   gcc_assert (alg != no_stringop);
18624   if (!count)
18625     count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18626   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18627   switch (alg)
18628     {
18629     case libcall:
18630     case no_stringop:
18631       gcc_unreachable ();
18632     case loop:
18633       need_zero_guard = true;
18634       size_needed = GET_MODE_SIZE (Pmode);
18635       break;
18636     case unrolled_loop:
18637       need_zero_guard = true;
18638       size_needed = GET_MODE_SIZE (Pmode) * 4;
18639       break;
18640     case rep_prefix_8_byte:
18641       size_needed = 8;
18642       break;
18643     case rep_prefix_4_byte:
18644       size_needed = 4;
18645       break;
18646     case rep_prefix_1_byte:
18647       size_needed = 1;
18648       break;
18649     case loop_1_byte:
18650       need_zero_guard = true;
18651       size_needed = 1;
18652       break;
18653     }
18654   epilogue_size_needed = size_needed;
18655
18656   /* Step 1: Prologue guard.  */
18657
18658   /* Alignment code needs count to be in register.  */
18659   if (CONST_INT_P (count_exp) && desired_align > align)
18660     {
18661       if (INTVAL (count_exp) > desired_align
18662           && INTVAL (count_exp) > size_needed)
18663         {
18664           align_bytes
18665             = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18666           if (align_bytes <= 0)
18667             align_bytes = 0;
18668           else
18669             align_bytes = desired_align - align_bytes;
18670         }
18671       if (align_bytes == 0)
18672         {
18673           enum machine_mode mode = SImode;
18674           if (TARGET_64BIT && (count & ~0xffffffff))
18675             mode = DImode;
18676           count_exp = force_reg (mode, count_exp);
18677         }
18678     }
18679   /* Do the cheap promotion to allow better CSE across the
18680      main loop and epilogue (ie one load of the big constant in the
18681      front of all code.  */
18682   if (CONST_INT_P (val_exp))
18683     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18684                                                    desired_align, align);
18685   /* Ensure that alignment prologue won't copy past end of block.  */
18686   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18687     {
18688       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18689       /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18690          Make sure it is power of 2.  */
18691       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18692
18693       /* To improve performance of small blocks, we jump around the VAL
18694          promoting mode.  This mean that if the promoted VAL is not constant,
18695          we might not use it in the epilogue and have to use byte
18696          loop variant.  */
18697       if (epilogue_size_needed > 2 && !promoted_val)
18698         force_loopy_epilogue = true;
18699       if (count)
18700         {
18701           if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18702             {
18703               /* If main algorithm works on QImode, no epilogue is needed.
18704                  For small sizes just don't align anything.  */
18705               if (size_needed == 1)
18706                 desired_align = align;
18707               else
18708                 goto epilogue;
18709             }
18710         }
18711       else
18712         {
18713           label = gen_label_rtx ();
18714           emit_cmp_and_jump_insns (count_exp,
18715                                    GEN_INT (epilogue_size_needed),
18716                                    LTU, 0, counter_mode (count_exp), 1, label);
18717           if (expected_size == -1 || expected_size <= epilogue_size_needed)
18718             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18719           else
18720             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18721         }
18722     }
18723   if (dynamic_check != -1)
18724     {
18725       rtx hot_label = gen_label_rtx ();
18726       jump_around_label = gen_label_rtx ();
18727       emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18728                                LEU, 0, counter_mode (count_exp), 1, hot_label);
18729       predict_jump (REG_BR_PROB_BASE * 90 / 100);
18730       set_storage_via_libcall (dst, count_exp, val_exp, false);
18731       emit_jump (jump_around_label);
18732       emit_label (hot_label);
18733     }
18734
18735   /* Step 2: Alignment prologue.  */
18736
18737   /* Do the expensive promotion once we branched off the small blocks.  */
18738   if (!promoted_val)
18739     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18740                                                    desired_align, align);
18741   gcc_assert (desired_align >= 1 && align >= 1);
18742
18743   if (desired_align > align)
18744     {
18745       if (align_bytes == 0)
18746         {
18747           /* Except for the first move in epilogue, we no longer know
18748              constant offset in aliasing info.  It don't seems to worth
18749              the pain to maintain it for the first move, so throw away
18750              the info early.  */
18751           dst = change_address (dst, BLKmode, destreg);
18752           expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18753                                   desired_align);
18754         }
18755       else
18756         {
18757           /* If we know how many bytes need to be stored before dst is
18758              sufficiently aligned, maintain aliasing info accurately.  */
18759           dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18760                                                  desired_align, align_bytes);
18761           count_exp = plus_constant (count_exp, -align_bytes);
18762           count -= align_bytes;
18763         }
18764       if (need_zero_guard
18765           && (count < (unsigned HOST_WIDE_INT) size_needed
18766               || (align_bytes == 0
18767                   && count < ((unsigned HOST_WIDE_INT) size_needed
18768                               + desired_align - align))))
18769         {
18770           /* It is possible that we copied enough so the main loop will not
18771              execute.  */
18772           gcc_assert (size_needed > 1);
18773           if (label == NULL_RTX)
18774             label = gen_label_rtx ();
18775           emit_cmp_and_jump_insns (count_exp,
18776                                    GEN_INT (size_needed),
18777                                    LTU, 0, counter_mode (count_exp), 1, label);
18778           if (expected_size == -1
18779               || expected_size < (desired_align - align) / 2 + size_needed)
18780             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18781           else
18782             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18783         }
18784     }
18785   if (label && size_needed == 1)
18786     {
18787       emit_label (label);
18788       LABEL_NUSES (label) = 1;
18789       label = NULL;
18790       promoted_val = val_exp;
18791       epilogue_size_needed = 1;
18792     }
18793   else if (label == NULL_RTX)
18794     epilogue_size_needed = size_needed;
18795
18796   /* Step 3: Main loop.  */
18797
18798   switch (alg)
18799     {
18800     case libcall:
18801     case no_stringop:
18802       gcc_unreachable ();
18803     case loop_1_byte:
18804       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18805                                      count_exp, QImode, 1, expected_size);
18806       break;
18807     case loop:
18808       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18809                                      count_exp, Pmode, 1, expected_size);
18810       break;
18811     case unrolled_loop:
18812       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18813                                      count_exp, Pmode, 4, expected_size);
18814       break;
18815     case rep_prefix_8_byte:
18816       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18817                                   DImode, val_exp);
18818       break;
18819     case rep_prefix_4_byte:
18820       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18821                                   SImode, val_exp);
18822       break;
18823     case rep_prefix_1_byte:
18824       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18825                                   QImode, val_exp);
18826       break;
18827     }
18828   /* Adjust properly the offset of src and dest memory for aliasing.  */
18829   if (CONST_INT_P (count_exp))
18830     dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18831                                         (count / size_needed) * size_needed);
18832   else
18833     dst = change_address (dst, BLKmode, destreg);
18834
18835   /* Step 4: Epilogue to copy the remaining bytes.  */
18836
18837   if (label)
18838     {
18839       /* When the main loop is done, COUNT_EXP might hold original count,
18840          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18841          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18842          bytes. Compensate if needed.  */
18843
18844       if (size_needed < epilogue_size_needed)
18845         {
18846           tmp =
18847             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18848                                  GEN_INT (size_needed - 1), count_exp, 1,
18849                                  OPTAB_DIRECT);
18850           if (tmp != count_exp)
18851             emit_move_insn (count_exp, tmp);
18852         }
18853       emit_label (label);
18854       LABEL_NUSES (label) = 1;
18855     }
18856  epilogue:
18857   if (count_exp != const0_rtx && epilogue_size_needed > 1)
18858     {
18859       if (force_loopy_epilogue)
18860         expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18861                                          epilogue_size_needed);
18862       else
18863         expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18864                                 epilogue_size_needed);
18865     }
18866   if (jump_around_label)
18867     emit_label (jump_around_label);
18868   return 1;
18869 }
18870
18871 /* Expand the appropriate insns for doing strlen if not just doing
18872    repnz; scasb
18873
18874    out = result, initialized with the start address
18875    align_rtx = alignment of the address.
18876    scratch = scratch register, initialized with the startaddress when
18877         not aligned, otherwise undefined
18878
18879    This is just the body. It needs the initializations mentioned above and
18880    some address computing at the end.  These things are done in i386.md.  */
18881
18882 static void
18883 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18884 {
18885   int align;
18886   rtx tmp;
18887   rtx align_2_label = NULL_RTX;
18888   rtx align_3_label = NULL_RTX;
18889   rtx align_4_label = gen_label_rtx ();
18890   rtx end_0_label = gen_label_rtx ();
18891   rtx mem;
18892   rtx tmpreg = gen_reg_rtx (SImode);
18893   rtx scratch = gen_reg_rtx (SImode);
18894   rtx cmp;
18895
18896   align = 0;
18897   if (CONST_INT_P (align_rtx))
18898     align = INTVAL (align_rtx);
18899
18900   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
18901
18902   /* Is there a known alignment and is it less than 4?  */
18903   if (align < 4)
18904     {
18905       rtx scratch1 = gen_reg_rtx (Pmode);
18906       emit_move_insn (scratch1, out);
18907       /* Is there a known alignment and is it not 2? */
18908       if (align != 2)
18909         {
18910           align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18911           align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18912
18913           /* Leave just the 3 lower bits.  */
18914           align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18915                                     NULL_RTX, 0, OPTAB_WIDEN);
18916
18917           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18918                                    Pmode, 1, align_4_label);
18919           emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18920                                    Pmode, 1, align_2_label);
18921           emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18922                                    Pmode, 1, align_3_label);
18923         }
18924       else
18925         {
18926           /* Since the alignment is 2, we have to check 2 or 0 bytes;
18927              check if is aligned to 4 - byte.  */
18928
18929           align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18930                                     NULL_RTX, 0, OPTAB_WIDEN);
18931
18932           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18933                                    Pmode, 1, align_4_label);
18934         }
18935
18936       mem = change_address (src, QImode, out);
18937
18938       /* Now compare the bytes.  */
18939
18940       /* Compare the first n unaligned byte on a byte per byte basis.  */
18941       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18942                                QImode, 1, end_0_label);
18943
18944       /* Increment the address.  */
18945       emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18946
18947       /* Not needed with an alignment of 2 */
18948       if (align != 2)
18949         {
18950           emit_label (align_2_label);
18951
18952           emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18953                                    end_0_label);
18954
18955           emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18956
18957           emit_label (align_3_label);
18958         }
18959
18960       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18961                                end_0_label);
18962
18963       emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18964     }
18965
18966   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
18967      align this loop.  It gives only huge programs, but does not help to
18968      speed up.  */
18969   emit_label (align_4_label);
18970
18971   mem = change_address (src, SImode, out);
18972   emit_move_insn (scratch, mem);
18973   emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18974
18975   /* This formula yields a nonzero result iff one of the bytes is zero.
18976      This saves three branches inside loop and many cycles.  */
18977
18978   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18979   emit_insn (gen_one_cmplsi2 (scratch, scratch));
18980   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18981   emit_insn (gen_andsi3 (tmpreg, tmpreg,
18982                          gen_int_mode (0x80808080, SImode)));
18983   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18984                            align_4_label);
18985
18986   if (TARGET_CMOVE)
18987     {
18988        rtx reg = gen_reg_rtx (SImode);
18989        rtx reg2 = gen_reg_rtx (Pmode);
18990        emit_move_insn (reg, tmpreg);
18991        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18992
18993        /* If zero is not in the first two bytes, move two bytes forward.  */
18994        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18995        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18996        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18997        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18998                                gen_rtx_IF_THEN_ELSE (SImode, tmp,
18999                                                      reg,
19000                                                      tmpreg)));
19001        /* Emit lea manually to avoid clobbering of flags.  */
19002        emit_insn (gen_rtx_SET (SImode, reg2,
19003                                gen_rtx_PLUS (Pmode, out, const2_rtx)));
19004
19005        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19006        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19007        emit_insn (gen_rtx_SET (VOIDmode, out,
19008                                gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19009                                                      reg2,
19010                                                      out)));
19011     }
19012   else
19013     {
19014        rtx end_2_label = gen_label_rtx ();
19015        /* Is zero in the first two bytes? */
19016
19017        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19018        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19019        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19020        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19021                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19022                             pc_rtx);
19023        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19024        JUMP_LABEL (tmp) = end_2_label;
19025
19026        /* Not in the first two.  Move two bytes forward.  */
19027        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19028        emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19029
19030        emit_label (end_2_label);
19031
19032     }
19033
19034   /* Avoid branch in fixing the byte.  */
19035   tmpreg = gen_lowpart (QImode, tmpreg);
19036   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19037   tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19038   cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19039   emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp));
19040
19041   emit_label (end_0_label);
19042 }
19043
19044 /* Expand strlen.  */
19045
19046 int
19047 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19048 {
19049   rtx addr, scratch1, scratch2, scratch3, scratch4;
19050
19051   /* The generic case of strlen expander is long.  Avoid it's
19052      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
19053
19054   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19055       && !TARGET_INLINE_ALL_STRINGOPS
19056       && !optimize_insn_for_size_p ()
19057       && (!CONST_INT_P (align) || INTVAL (align) < 4))
19058     return 0;
19059
19060   addr = force_reg (Pmode, XEXP (src, 0));
19061   scratch1 = gen_reg_rtx (Pmode);
19062
19063   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19064       && !optimize_insn_for_size_p ())
19065     {
19066       /* Well it seems that some optimizer does not combine a call like
19067          foo(strlen(bar), strlen(bar));
19068          when the move and the subtraction is done here.  It does calculate
19069          the length just once when these instructions are done inside of
19070          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
19071          often used and I use one fewer register for the lifetime of
19072          output_strlen_unroll() this is better.  */
19073
19074       emit_move_insn (out, addr);
19075
19076       ix86_expand_strlensi_unroll_1 (out, src, align);
19077
19078       /* strlensi_unroll_1 returns the address of the zero at the end of
19079          the string, like memchr(), so compute the length by subtracting
19080          the start address.  */
19081       emit_insn ((*ix86_gen_sub3) (out, out, addr));
19082     }
19083   else
19084     {
19085       rtx unspec;
19086
19087       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
19088       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19089         return false;
19090
19091       scratch2 = gen_reg_rtx (Pmode);
19092       scratch3 = gen_reg_rtx (Pmode);
19093       scratch4 = force_reg (Pmode, constm1_rtx);
19094
19095       emit_move_insn (scratch3, addr);
19096       eoschar = force_reg (QImode, eoschar);
19097
19098       src = replace_equiv_address_nv (src, scratch3);
19099
19100       /* If .md starts supporting :P, this can be done in .md.  */
19101       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19102                                                  scratch4), UNSPEC_SCAS);
19103       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19104       emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19105       emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19106     }
19107   return 1;
19108 }
19109
19110 /* For given symbol (function) construct code to compute address of it's PLT
19111    entry in large x86-64 PIC model.  */
19112 rtx
19113 construct_plt_address (rtx symbol)
19114 {
19115   rtx tmp = gen_reg_rtx (Pmode);
19116   rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19117
19118   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19119   gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19120
19121   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19122   emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19123   return tmp;
19124 }
19125
19126 void
19127 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19128                   rtx callarg2,
19129                   rtx pop, int sibcall)
19130 {
19131   rtx use = NULL, call;
19132
19133   if (pop == const0_rtx)
19134     pop = NULL;
19135   gcc_assert (!TARGET_64BIT || !pop);
19136
19137   if (TARGET_MACHO && !TARGET_64BIT)
19138     {
19139 #if TARGET_MACHO
19140       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19141         fnaddr = machopic_indirect_call_target (fnaddr);
19142 #endif
19143     }
19144   else
19145     {
19146       /* Static functions and indirect calls don't need the pic register.  */
19147       if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19148           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19149           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19150         use_reg (&use, pic_offset_table_rtx);
19151     }
19152
19153   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19154     {
19155       rtx al = gen_rtx_REG (QImode, AX_REG);
19156       emit_move_insn (al, callarg2);
19157       use_reg (&use, al);
19158     }
19159
19160   if (ix86_cmodel == CM_LARGE_PIC
19161       && MEM_P (fnaddr)
19162       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19163       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19164     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19165   else if (sibcall
19166            ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
19167            : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
19168     {
19169       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19170       fnaddr = gen_rtx_MEM (QImode, fnaddr);
19171     }
19172
19173   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19174   if (retval)
19175     call = gen_rtx_SET (VOIDmode, retval, call);
19176   if (pop)
19177     {
19178       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19179       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19180       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19181     }
19182   if (TARGET_64BIT
19183       && ix86_cfun_abi () == MS_ABI
19184       && (!callarg2 || INTVAL (callarg2) != -2))
19185     {
19186       /* We need to represent that SI and DI registers are clobbered
19187          by SYSV calls.  */
19188       static int clobbered_registers[] = {
19189         XMM6_REG, XMM7_REG, XMM8_REG,
19190         XMM9_REG, XMM10_REG, XMM11_REG,
19191         XMM12_REG, XMM13_REG, XMM14_REG,
19192         XMM15_REG, SI_REG, DI_REG
19193       };
19194       unsigned int i;
19195       rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19196       rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19197                                    UNSPEC_MS_TO_SYSV_CALL);
19198
19199       vec[0] = call;
19200       vec[1] = unspec;
19201       for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19202         vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19203                                       ? TImode : DImode,
19204                                       gen_rtx_REG
19205                                         (SSE_REGNO_P (clobbered_registers[i])
19206                                                       ? TImode : DImode,
19207                                          clobbered_registers[i]));
19208
19209       call = gen_rtx_PARALLEL (VOIDmode,
19210                                gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19211                                + 2, vec));
19212     }
19213
19214   call = emit_call_insn (call);
19215   if (use)
19216     CALL_INSN_FUNCTION_USAGE (call) = use;
19217 }
19218
19219 \f
19220 /* Clear stack slot assignments remembered from previous functions.
19221    This is called from INIT_EXPANDERS once before RTL is emitted for each
19222    function.  */
19223
19224 static struct machine_function *
19225 ix86_init_machine_status (void)
19226 {
19227   struct machine_function *f;
19228
19229   f = GGC_CNEW (struct machine_function);
19230   f->use_fast_prologue_epilogue_nregs = -1;
19231   f->tls_descriptor_call_expanded_p = 0;
19232   f->call_abi = ix86_abi;
19233
19234   return f;
19235 }
19236
19237 /* Return a MEM corresponding to a stack slot with mode MODE.
19238    Allocate a new slot if necessary.
19239
19240    The RTL for a function can have several slots available: N is
19241    which slot to use.  */
19242
19243 rtx
19244 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19245 {
19246   struct stack_local_entry *s;
19247
19248   gcc_assert (n < MAX_386_STACK_LOCALS);
19249
19250   /* Virtual slot is valid only before vregs are instantiated.  */
19251   gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19252
19253   for (s = ix86_stack_locals; s; s = s->next)
19254     if (s->mode == mode && s->n == n)
19255       return copy_rtx (s->rtl);
19256
19257   s = (struct stack_local_entry *)
19258     ggc_alloc (sizeof (struct stack_local_entry));
19259   s->n = n;
19260   s->mode = mode;
19261   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19262
19263   s->next = ix86_stack_locals;
19264   ix86_stack_locals = s;
19265   return s->rtl;
19266 }
19267
19268 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
19269
19270 static GTY(()) rtx ix86_tls_symbol;
19271 rtx
19272 ix86_tls_get_addr (void)
19273 {
19274
19275   if (!ix86_tls_symbol)
19276     {
19277       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19278                                             (TARGET_ANY_GNU_TLS
19279                                              && !TARGET_64BIT)
19280                                             ? "___tls_get_addr"
19281                                             : "__tls_get_addr");
19282     }
19283
19284   return ix86_tls_symbol;
19285 }
19286
19287 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
19288
19289 static GTY(()) rtx ix86_tls_module_base_symbol;
19290 rtx
19291 ix86_tls_module_base (void)
19292 {
19293
19294   if (!ix86_tls_module_base_symbol)
19295     {
19296       ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19297                                                         "_TLS_MODULE_BASE_");
19298       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19299         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19300     }
19301
19302   return ix86_tls_module_base_symbol;
19303 }
19304 \f
19305 /* Calculate the length of the memory address in the instruction
19306    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
19307
19308 int
19309 memory_address_length (rtx addr)
19310 {
19311   struct ix86_address parts;
19312   rtx base, index, disp;
19313   int len;
19314   int ok;
19315
19316   if (GET_CODE (addr) == PRE_DEC
19317       || GET_CODE (addr) == POST_INC
19318       || GET_CODE (addr) == PRE_MODIFY
19319       || GET_CODE (addr) == POST_MODIFY)
19320     return 0;
19321
19322   ok = ix86_decompose_address (addr, &parts);
19323   gcc_assert (ok);
19324
19325   if (parts.base && GET_CODE (parts.base) == SUBREG)
19326     parts.base = SUBREG_REG (parts.base);
19327   if (parts.index && GET_CODE (parts.index) == SUBREG)
19328     parts.index = SUBREG_REG (parts.index);
19329
19330   base = parts.base;
19331   index = parts.index;
19332   disp = parts.disp;
19333   len = 0;
19334
19335   /* Rule of thumb:
19336        - esp as the base always wants an index,
19337        - ebp as the base always wants a displacement,
19338        - r12 as the base always wants an index,
19339        - r13 as the base always wants a displacement.  */
19340
19341   /* Register Indirect.  */
19342   if (base && !index && !disp)
19343     {
19344       /* esp (for its index) and ebp (for its displacement) need
19345          the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
19346          code.  */
19347       if (REG_P (addr)
19348           && (addr == arg_pointer_rtx
19349               || addr == frame_pointer_rtx
19350               || REGNO (addr) == SP_REG
19351               || REGNO (addr) == BP_REG
19352               || REGNO (addr) == R12_REG
19353               || REGNO (addr) == R13_REG))
19354         len = 1;
19355     }
19356
19357   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
19358      is not disp32, but disp32(%rip), so for disp32
19359      SIB byte is needed, unless print_operand_address
19360      optimizes it into disp32(%rip) or (%rip) is implied
19361      by UNSPEC.  */
19362   else if (disp && !base && !index)
19363     {
19364       len = 4;
19365       if (TARGET_64BIT)
19366         {
19367           rtx symbol = disp;
19368
19369           if (GET_CODE (disp) == CONST)
19370             symbol = XEXP (disp, 0);
19371           if (GET_CODE (symbol) == PLUS
19372               && CONST_INT_P (XEXP (symbol, 1)))
19373             symbol = XEXP (symbol, 0);
19374
19375           if (GET_CODE (symbol) != LABEL_REF
19376               && (GET_CODE (symbol) != SYMBOL_REF
19377                   || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19378               && (GET_CODE (symbol) != UNSPEC
19379                   || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19380                       && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19381             len += 1;
19382         }
19383     }
19384
19385   else
19386     {
19387       /* Find the length of the displacement constant.  */
19388       if (disp)
19389         {
19390           if (base && satisfies_constraint_K (disp))
19391             len = 1;
19392           else
19393             len = 4;
19394         }
19395       /* ebp always wants a displacement.  Similarly r13.  */
19396       else if (base && REG_P (base)
19397                && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19398         len = 1;
19399
19400       /* An index requires the two-byte modrm form....  */
19401       if (index
19402           /* ...like esp (or r12), which always wants an index.  */
19403           || base == arg_pointer_rtx
19404           || base == frame_pointer_rtx
19405           || (base && REG_P (base)
19406               && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19407         len += 1;
19408     }
19409
19410   switch (parts.seg)
19411     {
19412     case SEG_FS:
19413     case SEG_GS:
19414       len += 1;
19415       break;
19416     default:
19417       break;
19418     }
19419
19420   return len;
19421 }
19422
19423 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
19424    is set, expect that insn have 8bit immediate alternative.  */
19425 int
19426 ix86_attr_length_immediate_default (rtx insn, int shortform)
19427 {
19428   int len = 0;
19429   int i;
19430   extract_insn_cached (insn);
19431   for (i = recog_data.n_operands - 1; i >= 0; --i)
19432     if (CONSTANT_P (recog_data.operand[i]))
19433       {
19434         enum attr_mode mode = get_attr_mode (insn);
19435
19436         gcc_assert (!len);
19437         if (shortform && CONST_INT_P (recog_data.operand[i]))
19438           {
19439             HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19440             switch (mode)
19441               {
19442               case MODE_QI:
19443                 len = 1;
19444                 continue;
19445               case MODE_HI:
19446                 ival = trunc_int_for_mode (ival, HImode);
19447                 break;
19448               case MODE_SI:
19449                 ival = trunc_int_for_mode (ival, SImode);
19450                 break;
19451               default:
19452                 break;
19453               }
19454             if (IN_RANGE (ival, -128, 127))
19455               {
19456                 len = 1;
19457                 continue;
19458               }
19459           }
19460         switch (mode)
19461           {
19462           case MODE_QI:
19463             len = 1;
19464             break;
19465           case MODE_HI:
19466             len = 2;
19467             break;
19468           case MODE_SI:
19469             len = 4;
19470             break;
19471           /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
19472           case MODE_DI:
19473             len = 4;
19474             break;
19475           default:
19476             fatal_insn ("unknown insn mode", insn);
19477         }
19478       }
19479   return len;
19480 }
19481 /* Compute default value for "length_address" attribute.  */
19482 int
19483 ix86_attr_length_address_default (rtx insn)
19484 {
19485   int i;
19486
19487   if (get_attr_type (insn) == TYPE_LEA)
19488     {
19489       rtx set = PATTERN (insn), addr;
19490
19491       if (GET_CODE (set) == PARALLEL)
19492         set = XVECEXP (set, 0, 0);
19493
19494       gcc_assert (GET_CODE (set) == SET);
19495
19496       addr = SET_SRC (set);
19497       if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19498         {
19499           if (GET_CODE (addr) == ZERO_EXTEND)
19500             addr = XEXP (addr, 0);
19501           if (GET_CODE (addr) == SUBREG)
19502             addr = SUBREG_REG (addr);
19503         }
19504
19505       return memory_address_length (addr);
19506     }
19507
19508   extract_insn_cached (insn);
19509   for (i = recog_data.n_operands - 1; i >= 0; --i)
19510     if (MEM_P (recog_data.operand[i]))
19511       {
19512         constrain_operands_cached (reload_completed);
19513         if (which_alternative != -1)
19514           {
19515             const char *constraints = recog_data.constraints[i];
19516             int alt = which_alternative;
19517
19518             while (*constraints == '=' || *constraints == '+')
19519               constraints++;
19520             while (alt-- > 0)
19521               while (*constraints++ != ',')
19522                 ;
19523             /* Skip ignored operands.  */
19524             if (*constraints == 'X')
19525               continue;
19526           }
19527         return memory_address_length (XEXP (recog_data.operand[i], 0));
19528       }
19529   return 0;
19530 }
19531
19532 /* Compute default value for "length_vex" attribute. It includes
19533    2 or 3 byte VEX prefix and 1 opcode byte.  */
19534
19535 int
19536 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19537                               int has_vex_w)
19538 {
19539   int i;
19540
19541   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
19542      byte VEX prefix.  */
19543   if (!has_0f_opcode || has_vex_w)
19544     return 3 + 1;
19545
19546  /* We can always use 2 byte VEX prefix in 32bit.  */
19547   if (!TARGET_64BIT)
19548     return 2 + 1;
19549
19550   extract_insn_cached (insn);
19551
19552   for (i = recog_data.n_operands - 1; i >= 0; --i)
19553     if (REG_P (recog_data.operand[i]))
19554       {
19555         /* REX.W bit uses 3 byte VEX prefix.  */
19556         if (GET_MODE (recog_data.operand[i]) == DImode
19557             && GENERAL_REG_P (recog_data.operand[i]))
19558           return 3 + 1;
19559       }
19560     else
19561       {
19562         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
19563         if (MEM_P (recog_data.operand[i])
19564             && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19565           return 3 + 1;
19566       }
19567
19568   return 2 + 1;
19569 }
19570 \f
19571 /* Return the maximum number of instructions a cpu can issue.  */
19572
19573 static int
19574 ix86_issue_rate (void)
19575 {
19576   switch (ix86_tune)
19577     {
19578     case PROCESSOR_PENTIUM:
19579     case PROCESSOR_ATOM:
19580     case PROCESSOR_K6:
19581       return 2;
19582
19583     case PROCESSOR_PENTIUMPRO:
19584     case PROCESSOR_PENTIUM4:
19585     case PROCESSOR_ATHLON:
19586     case PROCESSOR_K8:
19587     case PROCESSOR_AMDFAM10:
19588     case PROCESSOR_NOCONA:
19589     case PROCESSOR_GENERIC32:
19590     case PROCESSOR_GENERIC64:
19591       return 3;
19592
19593     case PROCESSOR_CORE2:
19594       return 4;
19595
19596     default:
19597       return 1;
19598     }
19599 }
19600
19601 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19602    by DEP_INSN and nothing set by DEP_INSN.  */
19603
19604 static int
19605 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19606 {
19607   rtx set, set2;
19608
19609   /* Simplify the test for uninteresting insns.  */
19610   if (insn_type != TYPE_SETCC
19611       && insn_type != TYPE_ICMOV
19612       && insn_type != TYPE_FCMOV
19613       && insn_type != TYPE_IBR)
19614     return 0;
19615
19616   if ((set = single_set (dep_insn)) != 0)
19617     {
19618       set = SET_DEST (set);
19619       set2 = NULL_RTX;
19620     }
19621   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19622            && XVECLEN (PATTERN (dep_insn), 0) == 2
19623            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19624            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19625     {
19626       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19627       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19628     }
19629   else
19630     return 0;
19631
19632   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19633     return 0;
19634
19635   /* This test is true if the dependent insn reads the flags but
19636      not any other potentially set register.  */
19637   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19638     return 0;
19639
19640   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19641     return 0;
19642
19643   return 1;
19644 }
19645
19646 /* Return true iff USE_INSN has a memory address with operands set by
19647    SET_INSN.  */
19648
19649 bool
19650 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19651 {
19652   int i;
19653   extract_insn_cached (use_insn);
19654   for (i = recog_data.n_operands - 1; i >= 0; --i)
19655     if (MEM_P (recog_data.operand[i]))
19656       {
19657         rtx addr = XEXP (recog_data.operand[i], 0);
19658         return modified_in_p (addr, set_insn) != 0;
19659       }
19660   return false;
19661 }
19662
19663 static int
19664 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19665 {
19666   enum attr_type insn_type, dep_insn_type;
19667   enum attr_memory memory;
19668   rtx set, set2;
19669   int dep_insn_code_number;
19670
19671   /* Anti and output dependencies have zero cost on all CPUs.  */
19672   if (REG_NOTE_KIND (link) != 0)
19673     return 0;
19674
19675   dep_insn_code_number = recog_memoized (dep_insn);
19676
19677   /* If we can't recognize the insns, we can't really do anything.  */
19678   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19679     return cost;
19680
19681   insn_type = get_attr_type (insn);
19682   dep_insn_type = get_attr_type (dep_insn);
19683
19684   switch (ix86_tune)
19685     {
19686     case PROCESSOR_PENTIUM:
19687       /* Address Generation Interlock adds a cycle of latency.  */
19688       if (insn_type == TYPE_LEA)
19689         {
19690           rtx addr = PATTERN (insn);
19691
19692           if (GET_CODE (addr) == PARALLEL)
19693             addr = XVECEXP (addr, 0, 0);
19694
19695           gcc_assert (GET_CODE (addr) == SET);
19696
19697           addr = SET_SRC (addr);
19698           if (modified_in_p (addr, dep_insn))
19699             cost += 1;
19700         }
19701       else if (ix86_agi_dependent (dep_insn, insn))
19702         cost += 1;
19703
19704       /* ??? Compares pair with jump/setcc.  */
19705       if (ix86_flags_dependent (insn, dep_insn, insn_type))
19706         cost = 0;
19707
19708       /* Floating point stores require value to be ready one cycle earlier.  */
19709       if (insn_type == TYPE_FMOV
19710           && get_attr_memory (insn) == MEMORY_STORE
19711           && !ix86_agi_dependent (dep_insn, insn))
19712         cost += 1;
19713       break;
19714
19715     case PROCESSOR_PENTIUMPRO:
19716       memory = get_attr_memory (insn);
19717
19718       /* INT->FP conversion is expensive.  */
19719       if (get_attr_fp_int_src (dep_insn))
19720         cost += 5;
19721
19722       /* There is one cycle extra latency between an FP op and a store.  */
19723       if (insn_type == TYPE_FMOV
19724           && (set = single_set (dep_insn)) != NULL_RTX
19725           && (set2 = single_set (insn)) != NULL_RTX
19726           && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19727           && MEM_P (SET_DEST (set2)))
19728         cost += 1;
19729
19730       /* Show ability of reorder buffer to hide latency of load by executing
19731          in parallel with previous instruction in case
19732          previous instruction is not needed to compute the address.  */
19733       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19734           && !ix86_agi_dependent (dep_insn, insn))
19735         {
19736           /* Claim moves to take one cycle, as core can issue one load
19737              at time and the next load can start cycle later.  */
19738           if (dep_insn_type == TYPE_IMOV
19739               || dep_insn_type == TYPE_FMOV)
19740             cost = 1;
19741           else if (cost > 1)
19742             cost--;
19743         }
19744       break;
19745
19746     case PROCESSOR_K6:
19747       memory = get_attr_memory (insn);
19748
19749       /* The esp dependency is resolved before the instruction is really
19750          finished.  */
19751       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19752           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19753         return 1;
19754
19755       /* INT->FP conversion is expensive.  */
19756       if (get_attr_fp_int_src (dep_insn))
19757         cost += 5;
19758
19759       /* Show ability of reorder buffer to hide latency of load by executing
19760          in parallel with previous instruction in case
19761          previous instruction is not needed to compute the address.  */
19762       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19763           && !ix86_agi_dependent (dep_insn, insn))
19764         {
19765           /* Claim moves to take one cycle, as core can issue one load
19766              at time and the next load can start cycle later.  */
19767           if (dep_insn_type == TYPE_IMOV
19768               || dep_insn_type == TYPE_FMOV)
19769             cost = 1;
19770           else if (cost > 2)
19771             cost -= 2;
19772           else
19773             cost = 1;
19774         }
19775       break;
19776
19777     case PROCESSOR_ATHLON:
19778     case PROCESSOR_K8:
19779     case PROCESSOR_AMDFAM10:
19780     case PROCESSOR_ATOM:
19781     case PROCESSOR_GENERIC32:
19782     case PROCESSOR_GENERIC64:
19783       memory = get_attr_memory (insn);
19784
19785       /* Show ability of reorder buffer to hide latency of load by executing
19786          in parallel with previous instruction in case
19787          previous instruction is not needed to compute the address.  */
19788       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19789           && !ix86_agi_dependent (dep_insn, insn))
19790         {
19791           enum attr_unit unit = get_attr_unit (insn);
19792           int loadcost = 3;
19793
19794           /* Because of the difference between the length of integer and
19795              floating unit pipeline preparation stages, the memory operands
19796              for floating point are cheaper.
19797
19798              ??? For Athlon it the difference is most probably 2.  */
19799           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19800             loadcost = 3;
19801           else
19802             loadcost = TARGET_ATHLON ? 2 : 0;
19803
19804           if (cost >= loadcost)
19805             cost -= loadcost;
19806           else
19807             cost = 0;
19808         }
19809
19810     default:
19811       break;
19812     }
19813
19814   return cost;
19815 }
19816
19817 /* How many alternative schedules to try.  This should be as wide as the
19818    scheduling freedom in the DFA, but no wider.  Making this value too
19819    large results extra work for the scheduler.  */
19820
19821 static int
19822 ia32_multipass_dfa_lookahead (void)
19823 {
19824   switch (ix86_tune)
19825     {
19826     case PROCESSOR_PENTIUM:
19827       return 2;
19828
19829     case PROCESSOR_PENTIUMPRO:
19830     case PROCESSOR_K6:
19831       return 1;
19832
19833     default:
19834       return 0;
19835     }
19836 }
19837
19838 \f
19839 /* Compute the alignment given to a constant that is being placed in memory.
19840    EXP is the constant and ALIGN is the alignment that the object would
19841    ordinarily have.
19842    The value of this function is used instead of that alignment to align
19843    the object.  */
19844
19845 int
19846 ix86_constant_alignment (tree exp, int align)
19847 {
19848   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19849       || TREE_CODE (exp) == INTEGER_CST)
19850     {
19851       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19852         return 64;
19853       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19854         return 128;
19855     }
19856   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19857            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19858     return BITS_PER_WORD;
19859
19860   return align;
19861 }
19862
19863 /* Compute the alignment for a static variable.
19864    TYPE is the data type, and ALIGN is the alignment that
19865    the object would ordinarily have.  The value of this function is used
19866    instead of that alignment to align the object.  */
19867
19868 int
19869 ix86_data_alignment (tree type, int align)
19870 {
19871   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19872
19873   if (AGGREGATE_TYPE_P (type)
19874       && TYPE_SIZE (type)
19875       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19876       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19877           || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19878       && align < max_align)
19879     align = max_align;
19880
19881   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19882      to 16byte boundary.  */
19883   if (TARGET_64BIT)
19884     {
19885       if (AGGREGATE_TYPE_P (type)
19886            && TYPE_SIZE (type)
19887            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19888            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19889                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19890         return 128;
19891     }
19892
19893   if (TREE_CODE (type) == ARRAY_TYPE)
19894     {
19895       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19896         return 64;
19897       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19898         return 128;
19899     }
19900   else if (TREE_CODE (type) == COMPLEX_TYPE)
19901     {
19902
19903       if (TYPE_MODE (type) == DCmode && align < 64)
19904         return 64;
19905       if ((TYPE_MODE (type) == XCmode
19906            || TYPE_MODE (type) == TCmode) && align < 128)
19907         return 128;
19908     }
19909   else if ((TREE_CODE (type) == RECORD_TYPE
19910             || TREE_CODE (type) == UNION_TYPE
19911             || TREE_CODE (type) == QUAL_UNION_TYPE)
19912            && TYPE_FIELDS (type))
19913     {
19914       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19915         return 64;
19916       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19917         return 128;
19918     }
19919   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19920            || TREE_CODE (type) == INTEGER_TYPE)
19921     {
19922       if (TYPE_MODE (type) == DFmode && align < 64)
19923         return 64;
19924       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19925         return 128;
19926     }
19927
19928   return align;
19929 }
19930
19931 /* Compute the alignment for a local variable or a stack slot.  EXP is
19932    the data type or decl itself, MODE is the widest mode available and
19933    ALIGN is the alignment that the object would ordinarily have.  The
19934    value of this macro is used instead of that alignment to align the
19935    object.  */
19936
19937 unsigned int
19938 ix86_local_alignment (tree exp, enum machine_mode mode,
19939                       unsigned int align)
19940 {
19941   tree type, decl;
19942
19943   if (exp && DECL_P (exp))
19944     {
19945       type = TREE_TYPE (exp);
19946       decl = exp;
19947     }
19948   else
19949     {
19950       type = exp;
19951       decl = NULL;
19952     }
19953
19954   /* Don't do dynamic stack realignment for long long objects with
19955      -mpreferred-stack-boundary=2.  */
19956   if (!TARGET_64BIT
19957       && align == 64
19958       && ix86_preferred_stack_boundary < 64
19959       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19960       && (!type || !TYPE_USER_ALIGN (type))
19961       && (!decl || !DECL_USER_ALIGN (decl)))
19962     align = 32;
19963
19964   /* If TYPE is NULL, we are allocating a stack slot for caller-save
19965      register in MODE.  We will return the largest alignment of XF
19966      and DF.  */
19967   if (!type)
19968     {
19969       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19970         align = GET_MODE_ALIGNMENT (DFmode);
19971       return align;
19972     }
19973
19974   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19975      to 16byte boundary.  */
19976   if (TARGET_64BIT)
19977     {
19978       if (AGGREGATE_TYPE_P (type)
19979            && TYPE_SIZE (type)
19980            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19981            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19982                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19983         return 128;
19984     }
19985   if (TREE_CODE (type) == ARRAY_TYPE)
19986     {
19987       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19988         return 64;
19989       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19990         return 128;
19991     }
19992   else if (TREE_CODE (type) == COMPLEX_TYPE)
19993     {
19994       if (TYPE_MODE (type) == DCmode && align < 64)
19995         return 64;
19996       if ((TYPE_MODE (type) == XCmode
19997            || TYPE_MODE (type) == TCmode) && align < 128)
19998         return 128;
19999     }
20000   else if ((TREE_CODE (type) == RECORD_TYPE
20001             || TREE_CODE (type) == UNION_TYPE
20002             || TREE_CODE (type) == QUAL_UNION_TYPE)
20003            && TYPE_FIELDS (type))
20004     {
20005       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20006         return 64;
20007       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20008         return 128;
20009     }
20010   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20011            || TREE_CODE (type) == INTEGER_TYPE)
20012     {
20013
20014       if (TYPE_MODE (type) == DFmode && align < 64)
20015         return 64;
20016       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20017         return 128;
20018     }
20019   return align;
20020 }
20021
20022 /* Compute the minimum required alignment for dynamic stack realignment
20023    purposes for a local variable, parameter or a stack slot.  EXP is
20024    the data type or decl itself, MODE is its mode and ALIGN is the
20025    alignment that the object would ordinarily have.  */
20026
20027 unsigned int
20028 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20029                         unsigned int align)
20030 {
20031   tree type, decl;
20032
20033   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20034     return align;
20035
20036   if (exp && DECL_P (exp))
20037     {
20038       type = TREE_TYPE (exp);
20039       decl = exp;
20040     }
20041   else
20042     {
20043       type = exp;
20044       decl = NULL;
20045     }
20046
20047   /* Don't do dynamic stack realignment for long long objects with
20048      -mpreferred-stack-boundary=2.  */
20049   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20050       && (!type || !TYPE_USER_ALIGN (type))
20051       && (!decl || !DECL_USER_ALIGN (decl)))
20052     return 32;
20053
20054   return align;
20055 }
20056 \f
20057 /* Find a location for the static chain incoming to a nested function.
20058    This is a register, unless all free registers are used by arguments.  */
20059
20060 static rtx
20061 ix86_static_chain (const_tree fndecl, bool incoming_p)
20062 {
20063   unsigned regno;
20064
20065   if (!DECL_STATIC_CHAIN (fndecl))
20066     return NULL;
20067
20068   if (TARGET_64BIT)
20069     {
20070       /* We always use R10 in 64-bit mode.  */
20071       regno = R10_REG;
20072     }
20073   else
20074     {
20075       tree fntype;
20076       /* By default in 32-bit mode we use ECX to pass the static chain.  */
20077       regno = CX_REG;
20078
20079       fntype = TREE_TYPE (fndecl);
20080       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20081         {
20082           /* Fastcall functions use ecx/edx for arguments, which leaves
20083              us with EAX for the static chain.  */
20084           regno = AX_REG;
20085         }
20086       else if (ix86_function_regparm (fntype, fndecl) == 3)
20087         {
20088           /* For regparm 3, we have no free call-clobbered registers in
20089              which to store the static chain.  In order to implement this,
20090              we have the trampoline push the static chain to the stack.
20091              However, we can't push a value below the return address when
20092              we call the nested function directly, so we have to use an
20093              alternate entry point.  For this we use ESI, and have the
20094              alternate entry point push ESI, so that things appear the
20095              same once we're executing the nested function.  */
20096           if (incoming_p)
20097             {
20098               if (fndecl == current_function_decl)
20099                 ix86_static_chain_on_stack = true;
20100               return gen_frame_mem (SImode,
20101                                     plus_constant (arg_pointer_rtx, -8));
20102             }
20103           regno = SI_REG;
20104         }
20105     }
20106
20107   return gen_rtx_REG (Pmode, regno);
20108 }
20109
20110 /* Emit RTL insns to initialize the variable parts of a trampoline.
20111    FNDECL is the decl of the target address; M_TRAMP is a MEM for
20112    the trampoline, and CHAIN_VALUE is an RTX for the static chain
20113    to be passed to the target function.  */
20114
20115 static void
20116 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
20117 {
20118   rtx mem, fnaddr;
20119
20120   fnaddr = XEXP (DECL_RTL (fndecl), 0);
20121
20122   if (!TARGET_64BIT)
20123     {
20124       rtx disp, chain;
20125       int opcode;
20126
20127       /* Depending on the static chain location, either load a register
20128          with a constant, or push the constant to the stack.  All of the
20129          instructions are the same size.  */
20130       chain = ix86_static_chain (fndecl, true);
20131       if (REG_P (chain))
20132         {
20133           if (REGNO (chain) == CX_REG)
20134             opcode = 0xb9;
20135           else if (REGNO (chain) == AX_REG)
20136             opcode = 0xb8;
20137           else
20138             gcc_unreachable ();
20139         }
20140       else
20141         opcode = 0x68;
20142
20143       mem = adjust_address (m_tramp, QImode, 0);
20144       emit_move_insn (mem, gen_int_mode (opcode, QImode));
20145
20146       mem = adjust_address (m_tramp, SImode, 1);
20147       emit_move_insn (mem, chain_value);
20148
20149       /* Compute offset from the end of the jmp to the target function.
20150          In the case in which the trampoline stores the static chain on
20151          the stack, we need to skip the first insn which pushes the
20152          (call-saved) register static chain; this push is 1 byte.  */
20153       disp = expand_binop (SImode, sub_optab, fnaddr,
20154                            plus_constant (XEXP (m_tramp, 0),
20155                                           MEM_P (chain) ? 9 : 10),
20156                            NULL_RTX, 1, OPTAB_DIRECT);
20157
20158       mem = adjust_address (m_tramp, QImode, 5);
20159       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
20160
20161       mem = adjust_address (m_tramp, SImode, 6);
20162       emit_move_insn (mem, disp);
20163     }
20164   else
20165     {
20166       int offset = 0;
20167
20168       /* Load the function address to r11.  Try to load address using
20169          the shorter movl instead of movabs.  We may want to support
20170          movq for kernel mode, but kernel does not use trampolines at
20171          the moment.  */
20172       if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20173         {
20174           fnaddr = copy_to_mode_reg (DImode, fnaddr);
20175
20176           mem = adjust_address (m_tramp, HImode, offset);
20177           emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
20178
20179           mem = adjust_address (m_tramp, SImode, offset + 2);
20180           emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
20181           offset += 6;
20182         }
20183       else
20184         {
20185           mem = adjust_address (m_tramp, HImode, offset);
20186           emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
20187
20188           mem = adjust_address (m_tramp, DImode, offset + 2);
20189           emit_move_insn (mem, fnaddr);
20190           offset += 10;
20191         }
20192
20193       /* Load static chain using movabs to r10.  */
20194       mem = adjust_address (m_tramp, HImode, offset);
20195       emit_move_insn (mem, gen_int_mode (0xba49, HImode));
20196
20197       mem = adjust_address (m_tramp, DImode, offset + 2);
20198       emit_move_insn (mem, chain_value);
20199       offset += 10;
20200
20201       /* Jump to r11; the last (unused) byte is a nop, only there to
20202          pad the write out to a single 32-bit store.  */
20203       mem = adjust_address (m_tramp, SImode, offset);
20204       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
20205       offset += 4;
20206
20207       gcc_assert (offset <= TRAMPOLINE_SIZE);
20208     }
20209
20210 #ifdef ENABLE_EXECUTE_STACK
20211 #ifdef CHECK_EXECUTE_STACK_ENABLED
20212   if (CHECK_EXECUTE_STACK_ENABLED)
20213 #endif
20214   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20215                      LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
20216 #endif
20217 }
20218 \f
20219 /* The following file contains several enumerations and data structures
20220    built from the definitions in i386-builtin-types.def.  */
20221
20222 #include "i386-builtin-types.inc"
20223
20224 /* Table for the ix86 builtin non-function types.  */
20225 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
20226
20227 /* Retrieve an element from the above table, building some of
20228    the types lazily.  */
20229
20230 static tree
20231 ix86_get_builtin_type (enum ix86_builtin_type tcode)
20232 {
20233   unsigned int index;
20234   tree type, itype;
20235
20236   gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
20237
20238   type = ix86_builtin_type_tab[(int) tcode];
20239   if (type != NULL)
20240     return type;
20241
20242   gcc_assert (tcode > IX86_BT_LAST_PRIM);
20243   if (tcode <= IX86_BT_LAST_VECT)
20244     {
20245       enum machine_mode mode;
20246
20247       index = tcode - IX86_BT_LAST_PRIM - 1;
20248       itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
20249       mode = ix86_builtin_type_vect_mode[index];
20250
20251       type = build_vector_type_for_mode (itype, mode);
20252     }
20253   else
20254     {
20255       int quals;
20256
20257       index = tcode - IX86_BT_LAST_VECT - 1;
20258       if (tcode <= IX86_BT_LAST_PTR)
20259         quals = TYPE_UNQUALIFIED;
20260       else
20261         quals = TYPE_QUAL_CONST;
20262
20263       itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
20264       if (quals != TYPE_UNQUALIFIED)
20265         itype = build_qualified_type (itype, quals);
20266
20267       type = build_pointer_type (itype);
20268     }
20269
20270   ix86_builtin_type_tab[(int) tcode] = type;
20271   return type;
20272 }
20273
20274 /* Table for the ix86 builtin function types.  */
20275 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
20276
20277 /* Retrieve an element from the above table, building some of
20278    the types lazily.  */
20279
20280 static tree
20281 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
20282 {
20283   tree type;
20284
20285   gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
20286
20287   type = ix86_builtin_func_type_tab[(int) tcode];
20288   if (type != NULL)
20289     return type;
20290
20291   if (tcode <= IX86_BT_LAST_FUNC)
20292     {
20293       unsigned start = ix86_builtin_func_start[(int) tcode];
20294       unsigned after = ix86_builtin_func_start[(int) tcode + 1];
20295       tree rtype, atype, args = void_list_node;
20296       unsigned i;
20297
20298       rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
20299       for (i = after - 1; i > start; --i)
20300         {
20301           atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
20302           args = tree_cons (NULL, atype, args);
20303         }
20304
20305       type = build_function_type (rtype, args);
20306     }
20307   else
20308     {
20309       unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
20310       enum ix86_builtin_func_type icode;
20311
20312       icode = ix86_builtin_func_alias_base[index];
20313       type = ix86_get_builtin_func_type (icode);
20314     }
20315
20316   ix86_builtin_func_type_tab[(int) tcode] = type;
20317   return type;
20318 }
20319
20320
20321 /* Codes for all the SSE/MMX builtins.  */
20322 enum ix86_builtins
20323 {
20324   IX86_BUILTIN_ADDPS,
20325   IX86_BUILTIN_ADDSS,
20326   IX86_BUILTIN_DIVPS,
20327   IX86_BUILTIN_DIVSS,
20328   IX86_BUILTIN_MULPS,
20329   IX86_BUILTIN_MULSS,
20330   IX86_BUILTIN_SUBPS,
20331   IX86_BUILTIN_SUBSS,
20332
20333   IX86_BUILTIN_CMPEQPS,
20334   IX86_BUILTIN_CMPLTPS,
20335   IX86_BUILTIN_CMPLEPS,
20336   IX86_BUILTIN_CMPGTPS,
20337   IX86_BUILTIN_CMPGEPS,
20338   IX86_BUILTIN_CMPNEQPS,
20339   IX86_BUILTIN_CMPNLTPS,
20340   IX86_BUILTIN_CMPNLEPS,
20341   IX86_BUILTIN_CMPNGTPS,
20342   IX86_BUILTIN_CMPNGEPS,
20343   IX86_BUILTIN_CMPORDPS,
20344   IX86_BUILTIN_CMPUNORDPS,
20345   IX86_BUILTIN_CMPEQSS,
20346   IX86_BUILTIN_CMPLTSS,
20347   IX86_BUILTIN_CMPLESS,
20348   IX86_BUILTIN_CMPNEQSS,
20349   IX86_BUILTIN_CMPNLTSS,
20350   IX86_BUILTIN_CMPNLESS,
20351   IX86_BUILTIN_CMPNGTSS,
20352   IX86_BUILTIN_CMPNGESS,
20353   IX86_BUILTIN_CMPORDSS,
20354   IX86_BUILTIN_CMPUNORDSS,
20355
20356   IX86_BUILTIN_COMIEQSS,
20357   IX86_BUILTIN_COMILTSS,
20358   IX86_BUILTIN_COMILESS,
20359   IX86_BUILTIN_COMIGTSS,
20360   IX86_BUILTIN_COMIGESS,
20361   IX86_BUILTIN_COMINEQSS,
20362   IX86_BUILTIN_UCOMIEQSS,
20363   IX86_BUILTIN_UCOMILTSS,
20364   IX86_BUILTIN_UCOMILESS,
20365   IX86_BUILTIN_UCOMIGTSS,
20366   IX86_BUILTIN_UCOMIGESS,
20367   IX86_BUILTIN_UCOMINEQSS,
20368
20369   IX86_BUILTIN_CVTPI2PS,
20370   IX86_BUILTIN_CVTPS2PI,
20371   IX86_BUILTIN_CVTSI2SS,
20372   IX86_BUILTIN_CVTSI642SS,
20373   IX86_BUILTIN_CVTSS2SI,
20374   IX86_BUILTIN_CVTSS2SI64,
20375   IX86_BUILTIN_CVTTPS2PI,
20376   IX86_BUILTIN_CVTTSS2SI,
20377   IX86_BUILTIN_CVTTSS2SI64,
20378
20379   IX86_BUILTIN_MAXPS,
20380   IX86_BUILTIN_MAXSS,
20381   IX86_BUILTIN_MINPS,
20382   IX86_BUILTIN_MINSS,
20383
20384   IX86_BUILTIN_LOADUPS,
20385   IX86_BUILTIN_STOREUPS,
20386   IX86_BUILTIN_MOVSS,
20387
20388   IX86_BUILTIN_MOVHLPS,
20389   IX86_BUILTIN_MOVLHPS,
20390   IX86_BUILTIN_LOADHPS,
20391   IX86_BUILTIN_LOADLPS,
20392   IX86_BUILTIN_STOREHPS,
20393   IX86_BUILTIN_STORELPS,
20394
20395   IX86_BUILTIN_MASKMOVQ,
20396   IX86_BUILTIN_MOVMSKPS,
20397   IX86_BUILTIN_PMOVMSKB,
20398
20399   IX86_BUILTIN_MOVNTPS,
20400   IX86_BUILTIN_MOVNTQ,
20401
20402   IX86_BUILTIN_LOADDQU,
20403   IX86_BUILTIN_STOREDQU,
20404
20405   IX86_BUILTIN_PACKSSWB,
20406   IX86_BUILTIN_PACKSSDW,
20407   IX86_BUILTIN_PACKUSWB,
20408
20409   IX86_BUILTIN_PADDB,
20410   IX86_BUILTIN_PADDW,
20411   IX86_BUILTIN_PADDD,
20412   IX86_BUILTIN_PADDQ,
20413   IX86_BUILTIN_PADDSB,
20414   IX86_BUILTIN_PADDSW,
20415   IX86_BUILTIN_PADDUSB,
20416   IX86_BUILTIN_PADDUSW,
20417   IX86_BUILTIN_PSUBB,
20418   IX86_BUILTIN_PSUBW,
20419   IX86_BUILTIN_PSUBD,
20420   IX86_BUILTIN_PSUBQ,
20421   IX86_BUILTIN_PSUBSB,
20422   IX86_BUILTIN_PSUBSW,
20423   IX86_BUILTIN_PSUBUSB,
20424   IX86_BUILTIN_PSUBUSW,
20425
20426   IX86_BUILTIN_PAND,
20427   IX86_BUILTIN_PANDN,
20428   IX86_BUILTIN_POR,
20429   IX86_BUILTIN_PXOR,
20430
20431   IX86_BUILTIN_PAVGB,
20432   IX86_BUILTIN_PAVGW,
20433
20434   IX86_BUILTIN_PCMPEQB,
20435   IX86_BUILTIN_PCMPEQW,
20436   IX86_BUILTIN_PCMPEQD,
20437   IX86_BUILTIN_PCMPGTB,
20438   IX86_BUILTIN_PCMPGTW,
20439   IX86_BUILTIN_PCMPGTD,
20440
20441   IX86_BUILTIN_PMADDWD,
20442
20443   IX86_BUILTIN_PMAXSW,
20444   IX86_BUILTIN_PMAXUB,
20445   IX86_BUILTIN_PMINSW,
20446   IX86_BUILTIN_PMINUB,
20447
20448   IX86_BUILTIN_PMULHUW,
20449   IX86_BUILTIN_PMULHW,
20450   IX86_BUILTIN_PMULLW,
20451
20452   IX86_BUILTIN_PSADBW,
20453   IX86_BUILTIN_PSHUFW,
20454
20455   IX86_BUILTIN_PSLLW,
20456   IX86_BUILTIN_PSLLD,
20457   IX86_BUILTIN_PSLLQ,
20458   IX86_BUILTIN_PSRAW,
20459   IX86_BUILTIN_PSRAD,
20460   IX86_BUILTIN_PSRLW,
20461   IX86_BUILTIN_PSRLD,
20462   IX86_BUILTIN_PSRLQ,
20463   IX86_BUILTIN_PSLLWI,
20464   IX86_BUILTIN_PSLLDI,
20465   IX86_BUILTIN_PSLLQI,
20466   IX86_BUILTIN_PSRAWI,
20467   IX86_BUILTIN_PSRADI,
20468   IX86_BUILTIN_PSRLWI,
20469   IX86_BUILTIN_PSRLDI,
20470   IX86_BUILTIN_PSRLQI,
20471
20472   IX86_BUILTIN_PUNPCKHBW,
20473   IX86_BUILTIN_PUNPCKHWD,
20474   IX86_BUILTIN_PUNPCKHDQ,
20475   IX86_BUILTIN_PUNPCKLBW,
20476   IX86_BUILTIN_PUNPCKLWD,
20477   IX86_BUILTIN_PUNPCKLDQ,
20478
20479   IX86_BUILTIN_SHUFPS,
20480
20481   IX86_BUILTIN_RCPPS,
20482   IX86_BUILTIN_RCPSS,
20483   IX86_BUILTIN_RSQRTPS,
20484   IX86_BUILTIN_RSQRTPS_NR,
20485   IX86_BUILTIN_RSQRTSS,
20486   IX86_BUILTIN_RSQRTF,
20487   IX86_BUILTIN_SQRTPS,
20488   IX86_BUILTIN_SQRTPS_NR,
20489   IX86_BUILTIN_SQRTSS,
20490
20491   IX86_BUILTIN_UNPCKHPS,
20492   IX86_BUILTIN_UNPCKLPS,
20493
20494   IX86_BUILTIN_ANDPS,
20495   IX86_BUILTIN_ANDNPS,
20496   IX86_BUILTIN_ORPS,
20497   IX86_BUILTIN_XORPS,
20498
20499   IX86_BUILTIN_EMMS,
20500   IX86_BUILTIN_LDMXCSR,
20501   IX86_BUILTIN_STMXCSR,
20502   IX86_BUILTIN_SFENCE,
20503
20504   /* 3DNow! Original */
20505   IX86_BUILTIN_FEMMS,
20506   IX86_BUILTIN_PAVGUSB,
20507   IX86_BUILTIN_PF2ID,
20508   IX86_BUILTIN_PFACC,
20509   IX86_BUILTIN_PFADD,
20510   IX86_BUILTIN_PFCMPEQ,
20511   IX86_BUILTIN_PFCMPGE,
20512   IX86_BUILTIN_PFCMPGT,
20513   IX86_BUILTIN_PFMAX,
20514   IX86_BUILTIN_PFMIN,
20515   IX86_BUILTIN_PFMUL,
20516   IX86_BUILTIN_PFRCP,
20517   IX86_BUILTIN_PFRCPIT1,
20518   IX86_BUILTIN_PFRCPIT2,
20519   IX86_BUILTIN_PFRSQIT1,
20520   IX86_BUILTIN_PFRSQRT,
20521   IX86_BUILTIN_PFSUB,
20522   IX86_BUILTIN_PFSUBR,
20523   IX86_BUILTIN_PI2FD,
20524   IX86_BUILTIN_PMULHRW,
20525
20526   /* 3DNow! Athlon Extensions */
20527   IX86_BUILTIN_PF2IW,
20528   IX86_BUILTIN_PFNACC,
20529   IX86_BUILTIN_PFPNACC,
20530   IX86_BUILTIN_PI2FW,
20531   IX86_BUILTIN_PSWAPDSI,
20532   IX86_BUILTIN_PSWAPDSF,
20533
20534   /* SSE2 */
20535   IX86_BUILTIN_ADDPD,
20536   IX86_BUILTIN_ADDSD,
20537   IX86_BUILTIN_DIVPD,
20538   IX86_BUILTIN_DIVSD,
20539   IX86_BUILTIN_MULPD,
20540   IX86_BUILTIN_MULSD,
20541   IX86_BUILTIN_SUBPD,
20542   IX86_BUILTIN_SUBSD,
20543
20544   IX86_BUILTIN_CMPEQPD,
20545   IX86_BUILTIN_CMPLTPD,
20546   IX86_BUILTIN_CMPLEPD,
20547   IX86_BUILTIN_CMPGTPD,
20548   IX86_BUILTIN_CMPGEPD,
20549   IX86_BUILTIN_CMPNEQPD,
20550   IX86_BUILTIN_CMPNLTPD,
20551   IX86_BUILTIN_CMPNLEPD,
20552   IX86_BUILTIN_CMPNGTPD,
20553   IX86_BUILTIN_CMPNGEPD,
20554   IX86_BUILTIN_CMPORDPD,
20555   IX86_BUILTIN_CMPUNORDPD,
20556   IX86_BUILTIN_CMPEQSD,
20557   IX86_BUILTIN_CMPLTSD,
20558   IX86_BUILTIN_CMPLESD,
20559   IX86_BUILTIN_CMPNEQSD,
20560   IX86_BUILTIN_CMPNLTSD,
20561   IX86_BUILTIN_CMPNLESD,
20562   IX86_BUILTIN_CMPORDSD,
20563   IX86_BUILTIN_CMPUNORDSD,
20564
20565   IX86_BUILTIN_COMIEQSD,
20566   IX86_BUILTIN_COMILTSD,
20567   IX86_BUILTIN_COMILESD,
20568   IX86_BUILTIN_COMIGTSD,
20569   IX86_BUILTIN_COMIGESD,
20570   IX86_BUILTIN_COMINEQSD,
20571   IX86_BUILTIN_UCOMIEQSD,
20572   IX86_BUILTIN_UCOMILTSD,
20573   IX86_BUILTIN_UCOMILESD,
20574   IX86_BUILTIN_UCOMIGTSD,
20575   IX86_BUILTIN_UCOMIGESD,
20576   IX86_BUILTIN_UCOMINEQSD,
20577
20578   IX86_BUILTIN_MAXPD,
20579   IX86_BUILTIN_MAXSD,
20580   IX86_BUILTIN_MINPD,
20581   IX86_BUILTIN_MINSD,
20582
20583   IX86_BUILTIN_ANDPD,
20584   IX86_BUILTIN_ANDNPD,
20585   IX86_BUILTIN_ORPD,
20586   IX86_BUILTIN_XORPD,
20587
20588   IX86_BUILTIN_SQRTPD,
20589   IX86_BUILTIN_SQRTSD,
20590
20591   IX86_BUILTIN_UNPCKHPD,
20592   IX86_BUILTIN_UNPCKLPD,
20593
20594   IX86_BUILTIN_SHUFPD,
20595
20596   IX86_BUILTIN_LOADUPD,
20597   IX86_BUILTIN_STOREUPD,
20598   IX86_BUILTIN_MOVSD,
20599
20600   IX86_BUILTIN_LOADHPD,
20601   IX86_BUILTIN_LOADLPD,
20602
20603   IX86_BUILTIN_CVTDQ2PD,
20604   IX86_BUILTIN_CVTDQ2PS,
20605
20606   IX86_BUILTIN_CVTPD2DQ,
20607   IX86_BUILTIN_CVTPD2PI,
20608   IX86_BUILTIN_CVTPD2PS,
20609   IX86_BUILTIN_CVTTPD2DQ,
20610   IX86_BUILTIN_CVTTPD2PI,
20611
20612   IX86_BUILTIN_CVTPI2PD,
20613   IX86_BUILTIN_CVTSI2SD,
20614   IX86_BUILTIN_CVTSI642SD,
20615
20616   IX86_BUILTIN_CVTSD2SI,
20617   IX86_BUILTIN_CVTSD2SI64,
20618   IX86_BUILTIN_CVTSD2SS,
20619   IX86_BUILTIN_CVTSS2SD,
20620   IX86_BUILTIN_CVTTSD2SI,
20621   IX86_BUILTIN_CVTTSD2SI64,
20622
20623   IX86_BUILTIN_CVTPS2DQ,
20624   IX86_BUILTIN_CVTPS2PD,
20625   IX86_BUILTIN_CVTTPS2DQ,
20626
20627   IX86_BUILTIN_MOVNTI,
20628   IX86_BUILTIN_MOVNTPD,
20629   IX86_BUILTIN_MOVNTDQ,
20630
20631   IX86_BUILTIN_MOVQ128,
20632
20633   /* SSE2 MMX */
20634   IX86_BUILTIN_MASKMOVDQU,
20635   IX86_BUILTIN_MOVMSKPD,
20636   IX86_BUILTIN_PMOVMSKB128,
20637
20638   IX86_BUILTIN_PACKSSWB128,
20639   IX86_BUILTIN_PACKSSDW128,
20640   IX86_BUILTIN_PACKUSWB128,
20641
20642   IX86_BUILTIN_PADDB128,
20643   IX86_BUILTIN_PADDW128,
20644   IX86_BUILTIN_PADDD128,
20645   IX86_BUILTIN_PADDQ128,
20646   IX86_BUILTIN_PADDSB128,
20647   IX86_BUILTIN_PADDSW128,
20648   IX86_BUILTIN_PADDUSB128,
20649   IX86_BUILTIN_PADDUSW128,
20650   IX86_BUILTIN_PSUBB128,
20651   IX86_BUILTIN_PSUBW128,
20652   IX86_BUILTIN_PSUBD128,
20653   IX86_BUILTIN_PSUBQ128,
20654   IX86_BUILTIN_PSUBSB128,
20655   IX86_BUILTIN_PSUBSW128,
20656   IX86_BUILTIN_PSUBUSB128,
20657   IX86_BUILTIN_PSUBUSW128,
20658
20659   IX86_BUILTIN_PAND128,
20660   IX86_BUILTIN_PANDN128,
20661   IX86_BUILTIN_POR128,
20662   IX86_BUILTIN_PXOR128,
20663
20664   IX86_BUILTIN_PAVGB128,
20665   IX86_BUILTIN_PAVGW128,
20666
20667   IX86_BUILTIN_PCMPEQB128,
20668   IX86_BUILTIN_PCMPEQW128,
20669   IX86_BUILTIN_PCMPEQD128,
20670   IX86_BUILTIN_PCMPGTB128,
20671   IX86_BUILTIN_PCMPGTW128,
20672   IX86_BUILTIN_PCMPGTD128,
20673
20674   IX86_BUILTIN_PMADDWD128,
20675
20676   IX86_BUILTIN_PMAXSW128,
20677   IX86_BUILTIN_PMAXUB128,
20678   IX86_BUILTIN_PMINSW128,
20679   IX86_BUILTIN_PMINUB128,
20680
20681   IX86_BUILTIN_PMULUDQ,
20682   IX86_BUILTIN_PMULUDQ128,
20683   IX86_BUILTIN_PMULHUW128,
20684   IX86_BUILTIN_PMULHW128,
20685   IX86_BUILTIN_PMULLW128,
20686
20687   IX86_BUILTIN_PSADBW128,
20688   IX86_BUILTIN_PSHUFHW,
20689   IX86_BUILTIN_PSHUFLW,
20690   IX86_BUILTIN_PSHUFD,
20691
20692   IX86_BUILTIN_PSLLDQI128,
20693   IX86_BUILTIN_PSLLWI128,
20694   IX86_BUILTIN_PSLLDI128,
20695   IX86_BUILTIN_PSLLQI128,
20696   IX86_BUILTIN_PSRAWI128,
20697   IX86_BUILTIN_PSRADI128,
20698   IX86_BUILTIN_PSRLDQI128,
20699   IX86_BUILTIN_PSRLWI128,
20700   IX86_BUILTIN_PSRLDI128,
20701   IX86_BUILTIN_PSRLQI128,
20702
20703   IX86_BUILTIN_PSLLDQ128,
20704   IX86_BUILTIN_PSLLW128,
20705   IX86_BUILTIN_PSLLD128,
20706   IX86_BUILTIN_PSLLQ128,
20707   IX86_BUILTIN_PSRAW128,
20708   IX86_BUILTIN_PSRAD128,
20709   IX86_BUILTIN_PSRLW128,
20710   IX86_BUILTIN_PSRLD128,
20711   IX86_BUILTIN_PSRLQ128,
20712
20713   IX86_BUILTIN_PUNPCKHBW128,
20714   IX86_BUILTIN_PUNPCKHWD128,
20715   IX86_BUILTIN_PUNPCKHDQ128,
20716   IX86_BUILTIN_PUNPCKHQDQ128,
20717   IX86_BUILTIN_PUNPCKLBW128,
20718   IX86_BUILTIN_PUNPCKLWD128,
20719   IX86_BUILTIN_PUNPCKLDQ128,
20720   IX86_BUILTIN_PUNPCKLQDQ128,
20721
20722   IX86_BUILTIN_CLFLUSH,
20723   IX86_BUILTIN_MFENCE,
20724   IX86_BUILTIN_LFENCE,
20725
20726   IX86_BUILTIN_BSRSI,
20727   IX86_BUILTIN_BSRDI,
20728   IX86_BUILTIN_RDPMC,
20729   IX86_BUILTIN_RDTSC,
20730   IX86_BUILTIN_RDTSCP,
20731   IX86_BUILTIN_ROLQI,
20732   IX86_BUILTIN_ROLHI,
20733   IX86_BUILTIN_RORQI,
20734   IX86_BUILTIN_RORHI,
20735
20736   /* SSE3.  */
20737   IX86_BUILTIN_ADDSUBPS,
20738   IX86_BUILTIN_HADDPS,
20739   IX86_BUILTIN_HSUBPS,
20740   IX86_BUILTIN_MOVSHDUP,
20741   IX86_BUILTIN_MOVSLDUP,
20742   IX86_BUILTIN_ADDSUBPD,
20743   IX86_BUILTIN_HADDPD,
20744   IX86_BUILTIN_HSUBPD,
20745   IX86_BUILTIN_LDDQU,
20746
20747   IX86_BUILTIN_MONITOR,
20748   IX86_BUILTIN_MWAIT,
20749
20750   /* SSSE3.  */
20751   IX86_BUILTIN_PHADDW,
20752   IX86_BUILTIN_PHADDD,
20753   IX86_BUILTIN_PHADDSW,
20754   IX86_BUILTIN_PHSUBW,
20755   IX86_BUILTIN_PHSUBD,
20756   IX86_BUILTIN_PHSUBSW,
20757   IX86_BUILTIN_PMADDUBSW,
20758   IX86_BUILTIN_PMULHRSW,
20759   IX86_BUILTIN_PSHUFB,
20760   IX86_BUILTIN_PSIGNB,
20761   IX86_BUILTIN_PSIGNW,
20762   IX86_BUILTIN_PSIGND,
20763   IX86_BUILTIN_PALIGNR,
20764   IX86_BUILTIN_PABSB,
20765   IX86_BUILTIN_PABSW,
20766   IX86_BUILTIN_PABSD,
20767
20768   IX86_BUILTIN_PHADDW128,
20769   IX86_BUILTIN_PHADDD128,
20770   IX86_BUILTIN_PHADDSW128,
20771   IX86_BUILTIN_PHSUBW128,
20772   IX86_BUILTIN_PHSUBD128,
20773   IX86_BUILTIN_PHSUBSW128,
20774   IX86_BUILTIN_PMADDUBSW128,
20775   IX86_BUILTIN_PMULHRSW128,
20776   IX86_BUILTIN_PSHUFB128,
20777   IX86_BUILTIN_PSIGNB128,
20778   IX86_BUILTIN_PSIGNW128,
20779   IX86_BUILTIN_PSIGND128,
20780   IX86_BUILTIN_PALIGNR128,
20781   IX86_BUILTIN_PABSB128,
20782   IX86_BUILTIN_PABSW128,
20783   IX86_BUILTIN_PABSD128,
20784
20785   /* AMDFAM10 - SSE4A New Instructions.  */
20786   IX86_BUILTIN_MOVNTSD,
20787   IX86_BUILTIN_MOVNTSS,
20788   IX86_BUILTIN_EXTRQI,
20789   IX86_BUILTIN_EXTRQ,
20790   IX86_BUILTIN_INSERTQI,
20791   IX86_BUILTIN_INSERTQ,
20792
20793   /* SSE4.1.  */
20794   IX86_BUILTIN_BLENDPD,
20795   IX86_BUILTIN_BLENDPS,
20796   IX86_BUILTIN_BLENDVPD,
20797   IX86_BUILTIN_BLENDVPS,
20798   IX86_BUILTIN_PBLENDVB128,
20799   IX86_BUILTIN_PBLENDW128,
20800
20801   IX86_BUILTIN_DPPD,
20802   IX86_BUILTIN_DPPS,
20803
20804   IX86_BUILTIN_INSERTPS128,
20805
20806   IX86_BUILTIN_MOVNTDQA,
20807   IX86_BUILTIN_MPSADBW128,
20808   IX86_BUILTIN_PACKUSDW128,
20809   IX86_BUILTIN_PCMPEQQ,
20810   IX86_BUILTIN_PHMINPOSUW128,
20811
20812   IX86_BUILTIN_PMAXSB128,
20813   IX86_BUILTIN_PMAXSD128,
20814   IX86_BUILTIN_PMAXUD128,
20815   IX86_BUILTIN_PMAXUW128,
20816
20817   IX86_BUILTIN_PMINSB128,
20818   IX86_BUILTIN_PMINSD128,
20819   IX86_BUILTIN_PMINUD128,
20820   IX86_BUILTIN_PMINUW128,
20821
20822   IX86_BUILTIN_PMOVSXBW128,
20823   IX86_BUILTIN_PMOVSXBD128,
20824   IX86_BUILTIN_PMOVSXBQ128,
20825   IX86_BUILTIN_PMOVSXWD128,
20826   IX86_BUILTIN_PMOVSXWQ128,
20827   IX86_BUILTIN_PMOVSXDQ128,
20828
20829   IX86_BUILTIN_PMOVZXBW128,
20830   IX86_BUILTIN_PMOVZXBD128,
20831   IX86_BUILTIN_PMOVZXBQ128,
20832   IX86_BUILTIN_PMOVZXWD128,
20833   IX86_BUILTIN_PMOVZXWQ128,
20834   IX86_BUILTIN_PMOVZXDQ128,
20835
20836   IX86_BUILTIN_PMULDQ128,
20837   IX86_BUILTIN_PMULLD128,
20838
20839   IX86_BUILTIN_ROUNDPD,
20840   IX86_BUILTIN_ROUNDPS,
20841   IX86_BUILTIN_ROUNDSD,
20842   IX86_BUILTIN_ROUNDSS,
20843
20844   IX86_BUILTIN_PTESTZ,
20845   IX86_BUILTIN_PTESTC,
20846   IX86_BUILTIN_PTESTNZC,
20847
20848   IX86_BUILTIN_VEC_INIT_V2SI,
20849   IX86_BUILTIN_VEC_INIT_V4HI,
20850   IX86_BUILTIN_VEC_INIT_V8QI,
20851   IX86_BUILTIN_VEC_EXT_V2DF,
20852   IX86_BUILTIN_VEC_EXT_V2DI,
20853   IX86_BUILTIN_VEC_EXT_V4SF,
20854   IX86_BUILTIN_VEC_EXT_V4SI,
20855   IX86_BUILTIN_VEC_EXT_V8HI,
20856   IX86_BUILTIN_VEC_EXT_V2SI,
20857   IX86_BUILTIN_VEC_EXT_V4HI,
20858   IX86_BUILTIN_VEC_EXT_V16QI,
20859   IX86_BUILTIN_VEC_SET_V2DI,
20860   IX86_BUILTIN_VEC_SET_V4SF,
20861   IX86_BUILTIN_VEC_SET_V4SI,
20862   IX86_BUILTIN_VEC_SET_V8HI,
20863   IX86_BUILTIN_VEC_SET_V4HI,
20864   IX86_BUILTIN_VEC_SET_V16QI,
20865
20866   IX86_BUILTIN_VEC_PACK_SFIX,
20867
20868   /* SSE4.2.  */
20869   IX86_BUILTIN_CRC32QI,
20870   IX86_BUILTIN_CRC32HI,
20871   IX86_BUILTIN_CRC32SI,
20872   IX86_BUILTIN_CRC32DI,
20873
20874   IX86_BUILTIN_PCMPESTRI128,
20875   IX86_BUILTIN_PCMPESTRM128,
20876   IX86_BUILTIN_PCMPESTRA128,
20877   IX86_BUILTIN_PCMPESTRC128,
20878   IX86_BUILTIN_PCMPESTRO128,
20879   IX86_BUILTIN_PCMPESTRS128,
20880   IX86_BUILTIN_PCMPESTRZ128,
20881   IX86_BUILTIN_PCMPISTRI128,
20882   IX86_BUILTIN_PCMPISTRM128,
20883   IX86_BUILTIN_PCMPISTRA128,
20884   IX86_BUILTIN_PCMPISTRC128,
20885   IX86_BUILTIN_PCMPISTRO128,
20886   IX86_BUILTIN_PCMPISTRS128,
20887   IX86_BUILTIN_PCMPISTRZ128,
20888
20889   IX86_BUILTIN_PCMPGTQ,
20890
20891   /* AES instructions */
20892   IX86_BUILTIN_AESENC128,
20893   IX86_BUILTIN_AESENCLAST128,
20894   IX86_BUILTIN_AESDEC128,
20895   IX86_BUILTIN_AESDECLAST128,
20896   IX86_BUILTIN_AESIMC128,
20897   IX86_BUILTIN_AESKEYGENASSIST128,
20898
20899   /* PCLMUL instruction */
20900   IX86_BUILTIN_PCLMULQDQ128,
20901
20902   /* AVX */
20903   IX86_BUILTIN_ADDPD256,
20904   IX86_BUILTIN_ADDPS256,
20905   IX86_BUILTIN_ADDSUBPD256,
20906   IX86_BUILTIN_ADDSUBPS256,
20907   IX86_BUILTIN_ANDPD256,
20908   IX86_BUILTIN_ANDPS256,
20909   IX86_BUILTIN_ANDNPD256,
20910   IX86_BUILTIN_ANDNPS256,
20911   IX86_BUILTIN_BLENDPD256,
20912   IX86_BUILTIN_BLENDPS256,
20913   IX86_BUILTIN_BLENDVPD256,
20914   IX86_BUILTIN_BLENDVPS256,
20915   IX86_BUILTIN_DIVPD256,
20916   IX86_BUILTIN_DIVPS256,
20917   IX86_BUILTIN_DPPS256,
20918   IX86_BUILTIN_HADDPD256,
20919   IX86_BUILTIN_HADDPS256,
20920   IX86_BUILTIN_HSUBPD256,
20921   IX86_BUILTIN_HSUBPS256,
20922   IX86_BUILTIN_MAXPD256,
20923   IX86_BUILTIN_MAXPS256,
20924   IX86_BUILTIN_MINPD256,
20925   IX86_BUILTIN_MINPS256,
20926   IX86_BUILTIN_MULPD256,
20927   IX86_BUILTIN_MULPS256,
20928   IX86_BUILTIN_ORPD256,
20929   IX86_BUILTIN_ORPS256,
20930   IX86_BUILTIN_SHUFPD256,
20931   IX86_BUILTIN_SHUFPS256,
20932   IX86_BUILTIN_SUBPD256,
20933   IX86_BUILTIN_SUBPS256,
20934   IX86_BUILTIN_XORPD256,
20935   IX86_BUILTIN_XORPS256,
20936   IX86_BUILTIN_CMPSD,
20937   IX86_BUILTIN_CMPSS,
20938   IX86_BUILTIN_CMPPD,
20939   IX86_BUILTIN_CMPPS,
20940   IX86_BUILTIN_CMPPD256,
20941   IX86_BUILTIN_CMPPS256,
20942   IX86_BUILTIN_CVTDQ2PD256,
20943   IX86_BUILTIN_CVTDQ2PS256,
20944   IX86_BUILTIN_CVTPD2PS256,
20945   IX86_BUILTIN_CVTPS2DQ256,
20946   IX86_BUILTIN_CVTPS2PD256,
20947   IX86_BUILTIN_CVTTPD2DQ256,
20948   IX86_BUILTIN_CVTPD2DQ256,
20949   IX86_BUILTIN_CVTTPS2DQ256,
20950   IX86_BUILTIN_EXTRACTF128PD256,
20951   IX86_BUILTIN_EXTRACTF128PS256,
20952   IX86_BUILTIN_EXTRACTF128SI256,
20953   IX86_BUILTIN_VZEROALL,
20954   IX86_BUILTIN_VZEROUPPER,
20955   IX86_BUILTIN_VPERMILVARPD,
20956   IX86_BUILTIN_VPERMILVARPS,
20957   IX86_BUILTIN_VPERMILVARPD256,
20958   IX86_BUILTIN_VPERMILVARPS256,
20959   IX86_BUILTIN_VPERMILPD,
20960   IX86_BUILTIN_VPERMILPS,
20961   IX86_BUILTIN_VPERMILPD256,
20962   IX86_BUILTIN_VPERMILPS256,
20963   IX86_BUILTIN_VPERM2F128PD256,
20964   IX86_BUILTIN_VPERM2F128PS256,
20965   IX86_BUILTIN_VPERM2F128SI256,
20966   IX86_BUILTIN_VBROADCASTSS,
20967   IX86_BUILTIN_VBROADCASTSD256,
20968   IX86_BUILTIN_VBROADCASTSS256,
20969   IX86_BUILTIN_VBROADCASTPD256,
20970   IX86_BUILTIN_VBROADCASTPS256,
20971   IX86_BUILTIN_VINSERTF128PD256,
20972   IX86_BUILTIN_VINSERTF128PS256,
20973   IX86_BUILTIN_VINSERTF128SI256,
20974   IX86_BUILTIN_LOADUPD256,
20975   IX86_BUILTIN_LOADUPS256,
20976   IX86_BUILTIN_STOREUPD256,
20977   IX86_BUILTIN_STOREUPS256,
20978   IX86_BUILTIN_LDDQU256,
20979   IX86_BUILTIN_MOVNTDQ256,
20980   IX86_BUILTIN_MOVNTPD256,
20981   IX86_BUILTIN_MOVNTPS256,
20982   IX86_BUILTIN_LOADDQU256,
20983   IX86_BUILTIN_STOREDQU256,
20984   IX86_BUILTIN_MASKLOADPD,
20985   IX86_BUILTIN_MASKLOADPS,
20986   IX86_BUILTIN_MASKSTOREPD,
20987   IX86_BUILTIN_MASKSTOREPS,
20988   IX86_BUILTIN_MASKLOADPD256,
20989   IX86_BUILTIN_MASKLOADPS256,
20990   IX86_BUILTIN_MASKSTOREPD256,
20991   IX86_BUILTIN_MASKSTOREPS256,
20992   IX86_BUILTIN_MOVSHDUP256,
20993   IX86_BUILTIN_MOVSLDUP256,
20994   IX86_BUILTIN_MOVDDUP256,
20995
20996   IX86_BUILTIN_SQRTPD256,
20997   IX86_BUILTIN_SQRTPS256,
20998   IX86_BUILTIN_SQRTPS_NR256,
20999   IX86_BUILTIN_RSQRTPS256,
21000   IX86_BUILTIN_RSQRTPS_NR256,
21001
21002   IX86_BUILTIN_RCPPS256,
21003
21004   IX86_BUILTIN_ROUNDPD256,
21005   IX86_BUILTIN_ROUNDPS256,
21006
21007   IX86_BUILTIN_UNPCKHPD256,
21008   IX86_BUILTIN_UNPCKLPD256,
21009   IX86_BUILTIN_UNPCKHPS256,
21010   IX86_BUILTIN_UNPCKLPS256,
21011
21012   IX86_BUILTIN_SI256_SI,
21013   IX86_BUILTIN_PS256_PS,
21014   IX86_BUILTIN_PD256_PD,
21015   IX86_BUILTIN_SI_SI256,
21016   IX86_BUILTIN_PS_PS256,
21017   IX86_BUILTIN_PD_PD256,
21018
21019   IX86_BUILTIN_VTESTZPD,
21020   IX86_BUILTIN_VTESTCPD,
21021   IX86_BUILTIN_VTESTNZCPD,
21022   IX86_BUILTIN_VTESTZPS,
21023   IX86_BUILTIN_VTESTCPS,
21024   IX86_BUILTIN_VTESTNZCPS,
21025   IX86_BUILTIN_VTESTZPD256,
21026   IX86_BUILTIN_VTESTCPD256,
21027   IX86_BUILTIN_VTESTNZCPD256,
21028   IX86_BUILTIN_VTESTZPS256,
21029   IX86_BUILTIN_VTESTCPS256,
21030   IX86_BUILTIN_VTESTNZCPS256,
21031   IX86_BUILTIN_PTESTZ256,
21032   IX86_BUILTIN_PTESTC256,
21033   IX86_BUILTIN_PTESTNZC256,
21034
21035   IX86_BUILTIN_MOVMSKPD256,
21036   IX86_BUILTIN_MOVMSKPS256,
21037
21038   /* TFmode support builtins.  */
21039   IX86_BUILTIN_INFQ,
21040   IX86_BUILTIN_HUGE_VALQ,
21041   IX86_BUILTIN_FABSQ,
21042   IX86_BUILTIN_COPYSIGNQ,
21043
21044   /* Vectorizer support builtins.  */
21045   IX86_BUILTIN_CPYSGNPS,
21046   IX86_BUILTIN_CPYSGNPD,
21047
21048   IX86_BUILTIN_CVTUDQ2PS,
21049
21050   IX86_BUILTIN_VEC_PERM_V2DF,
21051   IX86_BUILTIN_VEC_PERM_V4SF,
21052   IX86_BUILTIN_VEC_PERM_V2DI,
21053   IX86_BUILTIN_VEC_PERM_V4SI,
21054   IX86_BUILTIN_VEC_PERM_V8HI,
21055   IX86_BUILTIN_VEC_PERM_V16QI,
21056   IX86_BUILTIN_VEC_PERM_V2DI_U,
21057   IX86_BUILTIN_VEC_PERM_V4SI_U,
21058   IX86_BUILTIN_VEC_PERM_V8HI_U,
21059   IX86_BUILTIN_VEC_PERM_V16QI_U,
21060   IX86_BUILTIN_VEC_PERM_V4DF,
21061   IX86_BUILTIN_VEC_PERM_V8SF,
21062
21063   /* FMA4 and XOP instructions.  */
21064   IX86_BUILTIN_VFMADDSS,
21065   IX86_BUILTIN_VFMADDSD,
21066   IX86_BUILTIN_VFMADDPS,
21067   IX86_BUILTIN_VFMADDPD,
21068   IX86_BUILTIN_VFMSUBSS,
21069   IX86_BUILTIN_VFMSUBSD,
21070   IX86_BUILTIN_VFMSUBPS,
21071   IX86_BUILTIN_VFMSUBPD,
21072   IX86_BUILTIN_VFMADDSUBPS,
21073   IX86_BUILTIN_VFMADDSUBPD,
21074   IX86_BUILTIN_VFMSUBADDPS,
21075   IX86_BUILTIN_VFMSUBADDPD,
21076   IX86_BUILTIN_VFNMADDSS,
21077   IX86_BUILTIN_VFNMADDSD,
21078   IX86_BUILTIN_VFNMADDPS,
21079   IX86_BUILTIN_VFNMADDPD,
21080   IX86_BUILTIN_VFNMSUBSS,
21081   IX86_BUILTIN_VFNMSUBSD,
21082   IX86_BUILTIN_VFNMSUBPS,
21083   IX86_BUILTIN_VFNMSUBPD,
21084   IX86_BUILTIN_VFMADDPS256,
21085   IX86_BUILTIN_VFMADDPD256,
21086   IX86_BUILTIN_VFMSUBPS256,
21087   IX86_BUILTIN_VFMSUBPD256,
21088   IX86_BUILTIN_VFMADDSUBPS256,
21089   IX86_BUILTIN_VFMADDSUBPD256,
21090   IX86_BUILTIN_VFMSUBADDPS256,
21091   IX86_BUILTIN_VFMSUBADDPD256,
21092   IX86_BUILTIN_VFNMADDPS256,
21093   IX86_BUILTIN_VFNMADDPD256,
21094   IX86_BUILTIN_VFNMSUBPS256,
21095   IX86_BUILTIN_VFNMSUBPD256,
21096
21097   IX86_BUILTIN_VPCMOV,
21098   IX86_BUILTIN_VPCMOV_V2DI,
21099   IX86_BUILTIN_VPCMOV_V4SI,
21100   IX86_BUILTIN_VPCMOV_V8HI,
21101   IX86_BUILTIN_VPCMOV_V16QI,
21102   IX86_BUILTIN_VPCMOV_V4SF,
21103   IX86_BUILTIN_VPCMOV_V2DF,
21104   IX86_BUILTIN_VPCMOV256,
21105   IX86_BUILTIN_VPCMOV_V4DI256,
21106   IX86_BUILTIN_VPCMOV_V8SI256,
21107   IX86_BUILTIN_VPCMOV_V16HI256,
21108   IX86_BUILTIN_VPCMOV_V32QI256,
21109   IX86_BUILTIN_VPCMOV_V8SF256,
21110   IX86_BUILTIN_VPCMOV_V4DF256,
21111
21112   IX86_BUILTIN_VPPERM,
21113
21114   IX86_BUILTIN_VPMACSSWW,
21115   IX86_BUILTIN_VPMACSWW,
21116   IX86_BUILTIN_VPMACSSWD,
21117   IX86_BUILTIN_VPMACSWD,
21118   IX86_BUILTIN_VPMACSSDD,
21119   IX86_BUILTIN_VPMACSDD,
21120   IX86_BUILTIN_VPMACSSDQL,
21121   IX86_BUILTIN_VPMACSSDQH,
21122   IX86_BUILTIN_VPMACSDQL,
21123   IX86_BUILTIN_VPMACSDQH,
21124   IX86_BUILTIN_VPMADCSSWD,
21125   IX86_BUILTIN_VPMADCSWD,
21126
21127   IX86_BUILTIN_VPHADDBW,
21128   IX86_BUILTIN_VPHADDBD,
21129   IX86_BUILTIN_VPHADDBQ,
21130   IX86_BUILTIN_VPHADDWD,
21131   IX86_BUILTIN_VPHADDWQ,
21132   IX86_BUILTIN_VPHADDDQ,
21133   IX86_BUILTIN_VPHADDUBW,
21134   IX86_BUILTIN_VPHADDUBD,
21135   IX86_BUILTIN_VPHADDUBQ,
21136   IX86_BUILTIN_VPHADDUWD,
21137   IX86_BUILTIN_VPHADDUWQ,
21138   IX86_BUILTIN_VPHADDUDQ,
21139   IX86_BUILTIN_VPHSUBBW,
21140   IX86_BUILTIN_VPHSUBWD,
21141   IX86_BUILTIN_VPHSUBDQ,
21142
21143   IX86_BUILTIN_VPROTB,
21144   IX86_BUILTIN_VPROTW,
21145   IX86_BUILTIN_VPROTD,
21146   IX86_BUILTIN_VPROTQ,
21147   IX86_BUILTIN_VPROTB_IMM,
21148   IX86_BUILTIN_VPROTW_IMM,
21149   IX86_BUILTIN_VPROTD_IMM,
21150   IX86_BUILTIN_VPROTQ_IMM,
21151
21152   IX86_BUILTIN_VPSHLB,
21153   IX86_BUILTIN_VPSHLW,
21154   IX86_BUILTIN_VPSHLD,
21155   IX86_BUILTIN_VPSHLQ,
21156   IX86_BUILTIN_VPSHAB,
21157   IX86_BUILTIN_VPSHAW,
21158   IX86_BUILTIN_VPSHAD,
21159   IX86_BUILTIN_VPSHAQ,
21160
21161   IX86_BUILTIN_VFRCZSS,
21162   IX86_BUILTIN_VFRCZSD,
21163   IX86_BUILTIN_VFRCZPS,
21164   IX86_BUILTIN_VFRCZPD,
21165   IX86_BUILTIN_VFRCZPS256,
21166   IX86_BUILTIN_VFRCZPD256,
21167
21168   IX86_BUILTIN_VPCOMEQUB,
21169   IX86_BUILTIN_VPCOMNEUB,
21170   IX86_BUILTIN_VPCOMLTUB,
21171   IX86_BUILTIN_VPCOMLEUB,
21172   IX86_BUILTIN_VPCOMGTUB,
21173   IX86_BUILTIN_VPCOMGEUB,
21174   IX86_BUILTIN_VPCOMFALSEUB,
21175   IX86_BUILTIN_VPCOMTRUEUB,
21176
21177   IX86_BUILTIN_VPCOMEQUW,
21178   IX86_BUILTIN_VPCOMNEUW,
21179   IX86_BUILTIN_VPCOMLTUW,
21180   IX86_BUILTIN_VPCOMLEUW,
21181   IX86_BUILTIN_VPCOMGTUW,
21182   IX86_BUILTIN_VPCOMGEUW,
21183   IX86_BUILTIN_VPCOMFALSEUW,
21184   IX86_BUILTIN_VPCOMTRUEUW,
21185
21186   IX86_BUILTIN_VPCOMEQUD,
21187   IX86_BUILTIN_VPCOMNEUD,
21188   IX86_BUILTIN_VPCOMLTUD,
21189   IX86_BUILTIN_VPCOMLEUD,
21190   IX86_BUILTIN_VPCOMGTUD,
21191   IX86_BUILTIN_VPCOMGEUD,
21192   IX86_BUILTIN_VPCOMFALSEUD,
21193   IX86_BUILTIN_VPCOMTRUEUD,
21194
21195   IX86_BUILTIN_VPCOMEQUQ,
21196   IX86_BUILTIN_VPCOMNEUQ,
21197   IX86_BUILTIN_VPCOMLTUQ,
21198   IX86_BUILTIN_VPCOMLEUQ,
21199   IX86_BUILTIN_VPCOMGTUQ,
21200   IX86_BUILTIN_VPCOMGEUQ,
21201   IX86_BUILTIN_VPCOMFALSEUQ,
21202   IX86_BUILTIN_VPCOMTRUEUQ,
21203
21204   IX86_BUILTIN_VPCOMEQB,
21205   IX86_BUILTIN_VPCOMNEB,
21206   IX86_BUILTIN_VPCOMLTB,
21207   IX86_BUILTIN_VPCOMLEB,
21208   IX86_BUILTIN_VPCOMGTB,
21209   IX86_BUILTIN_VPCOMGEB,
21210   IX86_BUILTIN_VPCOMFALSEB,
21211   IX86_BUILTIN_VPCOMTRUEB,
21212
21213   IX86_BUILTIN_VPCOMEQW,
21214   IX86_BUILTIN_VPCOMNEW,
21215   IX86_BUILTIN_VPCOMLTW,
21216   IX86_BUILTIN_VPCOMLEW,
21217   IX86_BUILTIN_VPCOMGTW,
21218   IX86_BUILTIN_VPCOMGEW,
21219   IX86_BUILTIN_VPCOMFALSEW,
21220   IX86_BUILTIN_VPCOMTRUEW,
21221
21222   IX86_BUILTIN_VPCOMEQD,
21223   IX86_BUILTIN_VPCOMNED,
21224   IX86_BUILTIN_VPCOMLTD,
21225   IX86_BUILTIN_VPCOMLED,
21226   IX86_BUILTIN_VPCOMGTD,
21227   IX86_BUILTIN_VPCOMGED,
21228   IX86_BUILTIN_VPCOMFALSED,
21229   IX86_BUILTIN_VPCOMTRUED,
21230
21231   IX86_BUILTIN_VPCOMEQQ,
21232   IX86_BUILTIN_VPCOMNEQ,
21233   IX86_BUILTIN_VPCOMLTQ,
21234   IX86_BUILTIN_VPCOMLEQ,
21235   IX86_BUILTIN_VPCOMGTQ,
21236   IX86_BUILTIN_VPCOMGEQ,
21237   IX86_BUILTIN_VPCOMFALSEQ,
21238   IX86_BUILTIN_VPCOMTRUEQ,
21239
21240   /* LWP instructions.  */
21241   IX86_BUILTIN_LLWPCB,
21242   IX86_BUILTIN_SLWPCB,
21243   IX86_BUILTIN_LWPVAL32,
21244   IX86_BUILTIN_LWPVAL64,
21245   IX86_BUILTIN_LWPINS32,
21246   IX86_BUILTIN_LWPINS64,
21247
21248   IX86_BUILTIN_CLZS,
21249
21250   IX86_BUILTIN_MAX
21251 };
21252
21253 /* Table for the ix86 builtin decls.  */
21254 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21255
21256 /* Table of all of the builtin functions that are possible with different ISA's
21257    but are waiting to be built until a function is declared to use that
21258    ISA.  */
21259 struct builtin_isa {
21260   const char *name;             /* function name */
21261   enum ix86_builtin_func_type tcode; /* type to use in the declaration */
21262   int isa;                      /* isa_flags this builtin is defined for */
21263   bool const_p;                 /* true if the declaration is constant */
21264   bool set_and_not_built_p;
21265 };
21266
21267 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21268
21269
21270 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
21271    of which isa_flags to use in the ix86_builtins_isa array.  Stores the
21272    function decl in the ix86_builtins array.  Returns the function decl or
21273    NULL_TREE, if the builtin was not added.
21274
21275    If the front end has a special hook for builtin functions, delay adding
21276    builtin functions that aren't in the current ISA until the ISA is changed
21277    with function specific optimization.  Doing so, can save about 300K for the
21278    default compiler.  When the builtin is expanded, check at that time whether
21279    it is valid.
21280
21281    If the front end doesn't have a special hook, record all builtins, even if
21282    it isn't an instruction set in the current ISA in case the user uses
21283    function specific options for a different ISA, so that we don't get scope
21284    errors if a builtin is added in the middle of a function scope.  */
21285
21286 static inline tree
21287 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
21288              enum ix86_builtins code)
21289 {
21290   tree decl = NULL_TREE;
21291
21292   if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21293     {
21294       ix86_builtins_isa[(int) code].isa = mask;
21295
21296       if (mask == 0
21297           || (mask & ix86_isa_flags) != 0
21298           || (lang_hooks.builtin_function
21299               == lang_hooks.builtin_function_ext_scope))
21300
21301         {
21302           tree type = ix86_get_builtin_func_type (tcode);
21303           decl = add_builtin_function (name, type, code, BUILT_IN_MD,
21304                                        NULL, NULL_TREE);
21305           ix86_builtins[(int) code] = decl;
21306           ix86_builtins_isa[(int) code].set_and_not_built_p = false;
21307         }
21308       else
21309         {
21310           ix86_builtins[(int) code] = NULL_TREE;
21311           ix86_builtins_isa[(int) code].tcode = tcode;
21312           ix86_builtins_isa[(int) code].name = name;
21313           ix86_builtins_isa[(int) code].const_p = false;
21314           ix86_builtins_isa[(int) code].set_and_not_built_p = true;
21315         }
21316     }
21317
21318   return decl;
21319 }
21320
21321 /* Like def_builtin, but also marks the function decl "const".  */
21322
21323 static inline tree
21324 def_builtin_const (int mask, const char *name,
21325                    enum ix86_builtin_func_type tcode, enum ix86_builtins code)
21326 {
21327   tree decl = def_builtin (mask, name, tcode, code);
21328   if (decl)
21329     TREE_READONLY (decl) = 1;
21330   else
21331     ix86_builtins_isa[(int) code].const_p = true;
21332
21333   return decl;
21334 }
21335
21336 /* Add any new builtin functions for a given ISA that may not have been
21337    declared.  This saves a bit of space compared to adding all of the
21338    declarations to the tree, even if we didn't use them.  */
21339
21340 static void
21341 ix86_add_new_builtins (int isa)
21342 {
21343   int i;
21344
21345   for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21346     {
21347       if ((ix86_builtins_isa[i].isa & isa) != 0
21348           && ix86_builtins_isa[i].set_and_not_built_p)
21349         {
21350           tree decl, type;
21351
21352           /* Don't define the builtin again.  */
21353           ix86_builtins_isa[i].set_and_not_built_p = false;
21354
21355           type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
21356           decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21357                                                  type, i, BUILT_IN_MD, NULL,
21358                                                  NULL_TREE);
21359
21360           ix86_builtins[i] = decl;
21361           if (ix86_builtins_isa[i].const_p)
21362             TREE_READONLY (decl) = 1;
21363         }
21364     }
21365 }
21366
21367 /* Bits for builtin_description.flag.  */
21368
21369 /* Set when we don't support the comparison natively, and should
21370    swap_comparison in order to support it.  */
21371 #define BUILTIN_DESC_SWAP_OPERANDS      1
21372
21373 struct builtin_description
21374 {
21375   const unsigned int mask;
21376   const enum insn_code icode;
21377   const char *const name;
21378   const enum ix86_builtins code;
21379   const enum rtx_code comparison;
21380   const int flag;
21381 };
21382
21383 static const struct builtin_description bdesc_comi[] =
21384 {
21385   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21386   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21387   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21388   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21389   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21390   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21391   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21392   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21393   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21394   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21395   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21396   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21397   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21398   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21399   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21400   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21401   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21402   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21403   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21404   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21405   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21406   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21407   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21408   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21409 };
21410
21411 static const struct builtin_description bdesc_pcmpestr[] =
21412 {
21413   /* SSE4.2 */
21414   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21415   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21416   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21417   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21418   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21419   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21420   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21421 };
21422
21423 static const struct builtin_description bdesc_pcmpistr[] =
21424 {
21425   /* SSE4.2 */
21426   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21427   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21428   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21429   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21430   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21431   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21432   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21433 };
21434
21435 /* Special builtins with variable number of arguments.  */
21436 static const struct builtin_description bdesc_special_args[] =
21437 {
21438   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21439   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21440
21441   /* MMX */
21442   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21443
21444   /* 3DNow! */
21445   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21446
21447   /* SSE */
21448   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21449   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21450   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21451
21452   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21453   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21454   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21455   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21456
21457   /* SSE or 3DNow!A  */
21458   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21459   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
21460
21461   /* SSE2 */
21462   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21463   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21464   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21465   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21466   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21467   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21468   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21469   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21470   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21471
21472   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21473   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21474
21475   /* SSE3 */
21476   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21477
21478   /* SSE4.1 */
21479   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21480
21481   /* SSE4A */
21482   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21483   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21484
21485   /* AVX */
21486   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21487   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21488
21489   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21490   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21491   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21492   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21493   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21494
21495   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21496   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21497   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21498   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21499   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21500   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21501   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21502
21503   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21504   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21505   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21506
21507   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21508   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21509   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21510   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21511   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21512   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21513   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21514   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21515
21516   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
21517   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
21518   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
21519   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
21520   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
21521   { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
21522
21523 };
21524
21525 /* Builtins with variable number of arguments.  */
21526 static const struct builtin_description bdesc_args[] =
21527 {
21528   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21529   { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21530   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21531   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21532   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21533   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21534   { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21535
21536   /* MMX */
21537   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21538   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21539   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21540   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21541   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21542   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21543
21544   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21545   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21546   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21547   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21548   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21549   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21550   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21551   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21552
21553   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21554   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21555
21556   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21557   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21558   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21559   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21560
21561   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21562   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21563   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21564   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21565   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21566   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21567
21568   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21569   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21570   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21571   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21572   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21573   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21574
21575   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21576   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21577   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21578
21579   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21580
21581   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21582   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21583   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21584   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21585   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21586   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21587
21588   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21589   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21590   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21591   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21592   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21593   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21594
21595   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21596   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21597   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21598   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21599
21600   /* 3DNow! */
21601   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21602   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21603   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21604   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21605
21606   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21607   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21608   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21609   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21610   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21611   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21612   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21613   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21614   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21615   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21616   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21617   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21618   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21619   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21620   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21621
21622   /* 3DNow!A */
21623   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21624   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21625   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21626   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21627   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21628   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21629
21630   /* SSE */
21631   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21632   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21633   { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21634   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21635   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21636   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21637   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21638   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21639   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21640   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21641   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21642   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21643
21644   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21645
21646   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21647   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21648   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21649   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21650   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21651   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21652   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21653   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21654
21655   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21656   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21657   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21658   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21659   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21660   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21661   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21662   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21663   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21664   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21665   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21666   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21667   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21668   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21669   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21670   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21671   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21672   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21673   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21674   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21675   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21676   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21677
21678   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21679   { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21680   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21681   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21682
21683   { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21684   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21685   { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21686   { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21687
21688   { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3,  "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21689
21690   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21691   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21692   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21693   { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21694   { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21695
21696   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21697   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21698   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21699
21700   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21701
21702   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21703   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21704   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21705
21706   /* SSE MMX or 3Dnow!A */
21707   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21708   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21709   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21710
21711   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21712   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21713   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21714   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21715
21716   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21717   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21718
21719   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21720
21721   /* SSE2 */
21722   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21723
21724   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
21725   { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
21726   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
21727   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
21728   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
21729   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21730   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
21731   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
21732   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
21733   { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
21734   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
21735   { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
21736
21737   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
21738   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21739   { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21740   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21741   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21742   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21743
21744   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21745   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21746   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21747   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21748   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21749
21750   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21751
21752   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21753   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21754   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21755   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21756
21757   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21758   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21759   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21760
21761   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21762   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21763   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21764   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21765   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21766   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21767   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21768   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21769
21770   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21771   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21772   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21773   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21774   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21775   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21776   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21777   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21778   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21779   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21780   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21781   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21782   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21783   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21784   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21785   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21786   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21787   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21788   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21789   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21790
21791   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21792   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21793   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21794   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21795
21796   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21797   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21798   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21799   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21800
21801   { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21802
21803   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21804   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21805   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21806
21807   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21808
21809   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21810   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21811   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21812   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21813   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21814   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21815   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21816   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21817
21818   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21819   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21820   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21821   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21822   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21823   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21824   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21825   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21826
21827   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21828   { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21829
21830   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21831   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21832   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21833   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21834
21835   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21836   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21837
21838   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21839   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21840   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21841   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21842   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21843   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21844
21845   { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21846   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21847   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21848   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21849
21850   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21851   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
21852   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
21853   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21854   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21855   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21856   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21857   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21858
21859   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21860   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21861   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21862
21863   { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21864   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21865
21866   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21867   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21868
21869   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21870
21871   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21872   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21873   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21874   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21875
21876   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
21877   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21878   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21879   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21880   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21881   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21882   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21883
21884   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
21885   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21886   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21887   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21888   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21889   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21890   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21891
21892   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21893   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21894   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21895   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21896
21897   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21898   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21899   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21900
21901   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21902
21903   { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21904   { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21905
21906   { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21907
21908   /* SSE2 MMX */
21909   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21910   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21911
21912   /* SSE3 */
21913   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21914   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21915
21916   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21917   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21918   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21919   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21920   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21921   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21922
21923   /* SSSE3 */
21924   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21925   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21926   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21927   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21928   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21929   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21930
21931   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21932   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21933   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21934   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21935   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21936   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21937   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21938   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21939   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21940   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21941   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21942   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21943   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21944   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21945   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21946   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21947   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21948   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21949   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21950   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21951   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21952   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21953   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21954   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21955
21956   /* SSSE3.  */
21957   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
21958   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
21959
21960   /* SSE4.1 */
21961   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21962   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21963   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21964   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21965   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21966   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21967   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21968   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21969   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21970   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21971
21972   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21973   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21974   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21975   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21976   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21977   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21978   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21979   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21980   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21981   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21982   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21983   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21984   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21985
21986   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21987   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21988   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21989   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21990   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21991   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21992   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21993   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21994   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21995   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21996   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21997   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21998
21999   /* SSE4.1 */
22000   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22001   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22002   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22003   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22004
22005   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22006   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22007   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22008
22009   /* SSE4.2 */
22010   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22011   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22012   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22013   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22014   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22015
22016   /* SSE4A */
22017   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22018   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22019   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22020   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22021
22022   /* AES */
22023   { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22024   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22025
22026   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22027   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22028   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22029   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22030
22031   /* PCLMUL */
22032   { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22033
22034   /* AVX */
22035   { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22036   { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22037   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22038   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22039   { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22040   { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22041   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22042   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22043   { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22044   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22045   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22046   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22047   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22048   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22049   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22050   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22051   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22052   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22053   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22054   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22055   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22056   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22057   { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22058   { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22059   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22060   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22061
22062   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22063   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22064   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22065   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22066
22067   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22068   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22069   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22070   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22071   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22072   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22073   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22074   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22075   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22076   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22077   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22078   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22079   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22080   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22081   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22082   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22083   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22084   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22085   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22086   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22087   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22088   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22089   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22090   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22091   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22092   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22093   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22094   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22095   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22096   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22097   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22098   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22099   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22100   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22101
22102   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22103   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22104   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22105
22106   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22107   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22108   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22109   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22110   { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22111
22112   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22113
22114   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22115   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22116
22117   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22118   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22119   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22120   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22121
22122   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22123   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22124   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22125   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22126   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22127   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22128
22129   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22130   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22131   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22132   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22133   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22134   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22135   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22136   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22137   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22138   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22139   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22140   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22141   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22142   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22143   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22144
22145   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
22146   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22147
22148   { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
22149 };
22150
22151 /* FMA4 and XOP.  */
22152 #define MULTI_ARG_3_SF          V4SF_FTYPE_V4SF_V4SF_V4SF
22153 #define MULTI_ARG_3_DF          V2DF_FTYPE_V2DF_V2DF_V2DF
22154 #define MULTI_ARG_3_SF2         V8SF_FTYPE_V8SF_V8SF_V8SF
22155 #define MULTI_ARG_3_DF2         V4DF_FTYPE_V4DF_V4DF_V4DF
22156 #define MULTI_ARG_3_DI          V2DI_FTYPE_V2DI_V2DI_V2DI
22157 #define MULTI_ARG_3_SI          V4SI_FTYPE_V4SI_V4SI_V4SI
22158 #define MULTI_ARG_3_SI_DI       V4SI_FTYPE_V4SI_V4SI_V2DI
22159 #define MULTI_ARG_3_HI          V8HI_FTYPE_V8HI_V8HI_V8HI
22160 #define MULTI_ARG_3_HI_SI       V8HI_FTYPE_V8HI_V8HI_V4SI
22161 #define MULTI_ARG_3_QI          V16QI_FTYPE_V16QI_V16QI_V16QI
22162 #define MULTI_ARG_3_DI2         V4DI_FTYPE_V4DI_V4DI_V4DI
22163 #define MULTI_ARG_3_SI2         V8SI_FTYPE_V8SI_V8SI_V8SI
22164 #define MULTI_ARG_3_HI2         V16HI_FTYPE_V16HI_V16HI_V16HI
22165 #define MULTI_ARG_3_QI2         V32QI_FTYPE_V32QI_V32QI_V32QI
22166 #define MULTI_ARG_2_SF          V4SF_FTYPE_V4SF_V4SF
22167 #define MULTI_ARG_2_DF          V2DF_FTYPE_V2DF_V2DF
22168 #define MULTI_ARG_2_DI          V2DI_FTYPE_V2DI_V2DI
22169 #define MULTI_ARG_2_SI          V4SI_FTYPE_V4SI_V4SI
22170 #define MULTI_ARG_2_HI          V8HI_FTYPE_V8HI_V8HI
22171 #define MULTI_ARG_2_QI          V16QI_FTYPE_V16QI_V16QI
22172 #define MULTI_ARG_2_DI_IMM      V2DI_FTYPE_V2DI_SI
22173 #define MULTI_ARG_2_SI_IMM      V4SI_FTYPE_V4SI_SI
22174 #define MULTI_ARG_2_HI_IMM      V8HI_FTYPE_V8HI_SI
22175 #define MULTI_ARG_2_QI_IMM      V16QI_FTYPE_V16QI_SI
22176 #define MULTI_ARG_2_DI_CMP      V2DI_FTYPE_V2DI_V2DI_CMP
22177 #define MULTI_ARG_2_SI_CMP      V4SI_FTYPE_V4SI_V4SI_CMP
22178 #define MULTI_ARG_2_HI_CMP      V8HI_FTYPE_V8HI_V8HI_CMP
22179 #define MULTI_ARG_2_QI_CMP      V16QI_FTYPE_V16QI_V16QI_CMP
22180 #define MULTI_ARG_2_SF_TF       V4SF_FTYPE_V4SF_V4SF_TF
22181 #define MULTI_ARG_2_DF_TF       V2DF_FTYPE_V2DF_V2DF_TF
22182 #define MULTI_ARG_2_DI_TF       V2DI_FTYPE_V2DI_V2DI_TF
22183 #define MULTI_ARG_2_SI_TF       V4SI_FTYPE_V4SI_V4SI_TF
22184 #define MULTI_ARG_2_HI_TF       V8HI_FTYPE_V8HI_V8HI_TF
22185 #define MULTI_ARG_2_QI_TF       V16QI_FTYPE_V16QI_V16QI_TF
22186 #define MULTI_ARG_1_SF          V4SF_FTYPE_V4SF
22187 #define MULTI_ARG_1_DF          V2DF_FTYPE_V2DF
22188 #define MULTI_ARG_1_SF2         V8SF_FTYPE_V8SF
22189 #define MULTI_ARG_1_DF2         V4DF_FTYPE_V4DF
22190 #define MULTI_ARG_1_DI          V2DI_FTYPE_V2DI
22191 #define MULTI_ARG_1_SI          V4SI_FTYPE_V4SI
22192 #define MULTI_ARG_1_HI          V8HI_FTYPE_V8HI
22193 #define MULTI_ARG_1_QI          V16QI_FTYPE_V16QI
22194 #define MULTI_ARG_1_SI_DI       V2DI_FTYPE_V4SI
22195 #define MULTI_ARG_1_HI_DI       V2DI_FTYPE_V8HI
22196 #define MULTI_ARG_1_HI_SI       V4SI_FTYPE_V8HI
22197 #define MULTI_ARG_1_QI_DI       V2DI_FTYPE_V16QI
22198 #define MULTI_ARG_1_QI_SI       V4SI_FTYPE_V16QI
22199 #define MULTI_ARG_1_QI_HI       V8HI_FTYPE_V16QI
22200
22201 static const struct builtin_description bdesc_multi_arg[] =
22202 {
22203   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4,     "__builtin_ia32_vfmaddss",    IX86_BUILTIN_VFMADDSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22204   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4,     "__builtin_ia32_vfmaddsd",    IX86_BUILTIN_VFMADDSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22205   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4,       "__builtin_ia32_vfmaddps",    IX86_BUILTIN_VFMADDPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22206   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4,       "__builtin_ia32_vfmaddpd",    IX86_BUILTIN_VFMADDPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22207   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4,     "__builtin_ia32_vfmsubss",    IX86_BUILTIN_VFMSUBSS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22208   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4,     "__builtin_ia32_vfmsubsd",    IX86_BUILTIN_VFMSUBSD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22209   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4,       "__builtin_ia32_vfmsubps",    IX86_BUILTIN_VFMSUBPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22210   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4,       "__builtin_ia32_vfmsubpd",    IX86_BUILTIN_VFMSUBPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22211
22212   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4,    "__builtin_ia32_vfnmaddss",   IX86_BUILTIN_VFNMADDSS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22213   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4,    "__builtin_ia32_vfnmaddsd",   IX86_BUILTIN_VFNMADDSD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22214   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4,      "__builtin_ia32_vfnmaddps",   IX86_BUILTIN_VFNMADDPS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22215   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4,      "__builtin_ia32_vfnmaddpd",   IX86_BUILTIN_VFNMADDPD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22216   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4,    "__builtin_ia32_vfnmsubss",   IX86_BUILTIN_VFNMSUBSS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22217   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4,    "__builtin_ia32_vfnmsubsd",   IX86_BUILTIN_VFNMSUBSD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22218   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4,      "__builtin_ia32_vfnmsubps",   IX86_BUILTIN_VFNMSUBPS,   UNKNOWN,      (int)MULTI_ARG_3_SF },
22219   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4,      "__builtin_ia32_vfnmsubpd",   IX86_BUILTIN_VFNMSUBPD,   UNKNOWN,      (int)MULTI_ARG_3_DF },
22220
22221   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4,    "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22222   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4,    "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22223   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4,    "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS,    UNKNOWN,      (int)MULTI_ARG_3_SF },
22224   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4,    "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD,    UNKNOWN,      (int)MULTI_ARG_3_DF },
22225
22226   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256,       "__builtin_ia32_vfmaddps256",    IX86_BUILTIN_VFMADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22227   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256,       "__builtin_ia32_vfmaddpd256",    IX86_BUILTIN_VFMADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22228   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256,       "__builtin_ia32_vfmsubps256",    IX86_BUILTIN_VFMSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22229   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256,       "__builtin_ia32_vfmsubpd256",    IX86_BUILTIN_VFMSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22230
22231   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256,      "__builtin_ia32_vfnmaddps256",   IX86_BUILTIN_VFNMADDPS256,   UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22232   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256,      "__builtin_ia32_vfnmaddpd256",   IX86_BUILTIN_VFNMADDPD256,   UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22233   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256,      "__builtin_ia32_vfnmsubps256",   IX86_BUILTIN_VFNMSUBPS256,   UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22234   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256,      "__builtin_ia32_vfnmsubpd256",   IX86_BUILTIN_VFNMSUBPD256,   UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22235
22236   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4,    "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22237   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4,    "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22238   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4,    "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256,    UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22239   { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4,    "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256,    UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22240
22241   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,      UNKNOWN,      (int)MULTI_ARG_3_DI },
22242   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
22243   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
22244   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
22245   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
22246   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
22247   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
22248
22249   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
22250   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
22251   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
22252   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
22253   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
22254   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
22255   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
22256
22257   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
22258
22259   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
22260   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
22261   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22262   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22263   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
22264   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
22265   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22266   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22267   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22268   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
22269   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22270   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
22271
22272   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
22273   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
22274   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
22275   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
22276   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
22277   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
22278   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
22279   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
22280   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
22281   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
22282   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
22283   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
22284   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
22285   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
22286   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
22287   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
22288
22289   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_2_SF },
22290   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_2_DF },
22291   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
22292   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
22293   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
22294   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
22295
22296   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
22297   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
22298   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
22299   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
22300   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
22301   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
22302   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
22303   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
22304   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
22305   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
22306   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
22307   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
22308   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
22309   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
22310   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
22311
22312   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
22313   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
22314   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
22315   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
22316   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
22317   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
22318   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
22319
22320   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
22321   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
22322   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
22323   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
22324   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
22325   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
22326   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
22327
22328   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
22329   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
22330   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
22331   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
22332   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
22333   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
22334   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
22335
22336   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
22337   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
22338   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
22339   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
22340   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
22341   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
22342   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
22343
22344   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
22345   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
22346   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
22347   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
22348   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
22349   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
22350   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
22351
22352   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
22353   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
22354   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
22355   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
22356   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
22357   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
22358   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
22359
22360   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
22361   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
22362   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
22363   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
22364   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
22365   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
22366   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
22367
22368   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
22369   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
22370   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
22371   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
22372   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
22373   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
22374   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
22375
22376   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22377   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22378   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22379   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22380   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22381   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22382   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22383   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22384
22385   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22386   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22387   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22388   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22389   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22390   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22391   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22392   { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22393
22394 };
22395
22396 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22397    in the current target ISA to allow the user to compile particular modules
22398    with different target specific options that differ from the command line
22399    options.  */
22400 static void
22401 ix86_init_mmx_sse_builtins (void)
22402 {
22403   const struct builtin_description * d;
22404   enum ix86_builtin_func_type ftype;
22405   size_t i;
22406
22407   /* Add all special builtins with variable number of operands.  */
22408   for (i = 0, d = bdesc_special_args;
22409        i < ARRAY_SIZE (bdesc_special_args);
22410        i++, d++)
22411     {
22412       if (d->name == 0)
22413         continue;
22414
22415       ftype = (enum ix86_builtin_func_type) d->flag;
22416       def_builtin (d->mask, d->name, ftype, d->code);
22417     }
22418
22419   /* Add all builtins with variable number of operands.  */
22420   for (i = 0, d = bdesc_args;
22421        i < ARRAY_SIZE (bdesc_args);
22422        i++, d++)
22423     {
22424       if (d->name == 0)
22425         continue;
22426
22427       ftype = (enum ix86_builtin_func_type) d->flag;
22428       def_builtin_const (d->mask, d->name, ftype, d->code);
22429     }
22430
22431   /* pcmpestr[im] insns.  */
22432   for (i = 0, d = bdesc_pcmpestr;
22433        i < ARRAY_SIZE (bdesc_pcmpestr);
22434        i++, d++)
22435     {
22436       if (d->code == IX86_BUILTIN_PCMPESTRM128)
22437         ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
22438       else
22439         ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
22440       def_builtin_const (d->mask, d->name, ftype, d->code);
22441     }
22442
22443   /* pcmpistr[im] insns.  */
22444   for (i = 0, d = bdesc_pcmpistr;
22445        i < ARRAY_SIZE (bdesc_pcmpistr);
22446        i++, d++)
22447     {
22448       if (d->code == IX86_BUILTIN_PCMPISTRM128)
22449         ftype = V16QI_FTYPE_V16QI_V16QI_INT;
22450       else
22451         ftype = INT_FTYPE_V16QI_V16QI_INT;
22452       def_builtin_const (d->mask, d->name, ftype, d->code);
22453     }
22454
22455   /* comi/ucomi insns.  */
22456   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22457     {
22458       if (d->mask == OPTION_MASK_ISA_SSE2)
22459         ftype = INT_FTYPE_V2DF_V2DF;
22460       else
22461         ftype = INT_FTYPE_V4SF_V4SF;
22462       def_builtin_const (d->mask, d->name, ftype, d->code);
22463     }
22464
22465   /* SSE */
22466   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
22467                VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
22468   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
22469                UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
22470
22471   /* SSE or 3DNow!A */
22472   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22473                "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
22474                IX86_BUILTIN_MASKMOVQ);
22475
22476   /* SSE2 */
22477   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
22478                VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
22479
22480   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
22481                VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
22482   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
22483                             VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
22484
22485   /* SSE3.  */
22486   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
22487                VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
22488   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
22489                VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
22490
22491   /* AES */
22492   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
22493                      V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
22494   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
22495                      V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
22496   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
22497                      V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
22498   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
22499                      V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
22500   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
22501                      V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
22502   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
22503                      V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
22504
22505   /* PCLMUL */
22506   def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
22507                      V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
22508
22509   /* MMX access to the vec_init patterns.  */
22510   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
22511                      V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
22512
22513   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
22514                      V4HI_FTYPE_HI_HI_HI_HI,
22515                      IX86_BUILTIN_VEC_INIT_V4HI);
22516
22517   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
22518                      V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
22519                      IX86_BUILTIN_VEC_INIT_V8QI);
22520
22521   /* Access to the vec_extract patterns.  */
22522   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
22523                      DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
22524   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
22525                      DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
22526   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
22527                      FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
22528   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
22529                      SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
22530   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
22531                      HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
22532
22533   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22534                      "__builtin_ia32_vec_ext_v4hi",
22535                      HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
22536
22537   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
22538                      SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
22539
22540   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
22541                      QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
22542
22543   /* Access to the vec_set patterns.  */
22544   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
22545                      "__builtin_ia32_vec_set_v2di",
22546                      V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
22547
22548   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
22549                      V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
22550
22551   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
22552                      V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
22553
22554   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
22555                      V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
22556
22557   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22558                      "__builtin_ia32_vec_set_v4hi",
22559                      V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
22560
22561   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
22562                      V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
22563
22564   /* Add FMA4 multi-arg argument instructions */
22565   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22566     {
22567       if (d->name == 0)
22568         continue;
22569
22570       ftype = (enum ix86_builtin_func_type) d->flag;
22571       def_builtin_const (d->mask, d->name, ftype, d->code);
22572     }
22573 }
22574
22575 /* Internal method for ix86_init_builtins.  */
22576
22577 static void
22578 ix86_init_builtins_va_builtins_abi (void)
22579 {
22580   tree ms_va_ref, sysv_va_ref;
22581   tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22582   tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22583   tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22584   tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22585
22586   if (!TARGET_64BIT)
22587     return;
22588   fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22589   fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22590   ms_va_ref = build_reference_type (ms_va_list_type_node);
22591   sysv_va_ref =
22592     build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22593
22594   fnvoid_va_end_ms =
22595     build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22596   fnvoid_va_start_ms =
22597     build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22598   fnvoid_va_end_sysv =
22599     build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22600   fnvoid_va_start_sysv =
22601     build_varargs_function_type_list (void_type_node, sysv_va_ref,
22602                                        NULL_TREE);
22603   fnvoid_va_copy_ms =
22604     build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22605                               NULL_TREE);
22606   fnvoid_va_copy_sysv =
22607     build_function_type_list (void_type_node, sysv_va_ref,
22608                               sysv_va_ref, NULL_TREE);
22609
22610   add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22611                         BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22612   add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22613                         BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22614   add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22615                         BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22616   add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22617                         BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22618   add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22619                         BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22620   add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22621                         BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22622 }
22623
22624 static void
22625 ix86_init_builtin_types (void)
22626 {
22627   tree float128_type_node, float80_type_node;
22628
22629   /* The __float80 type.  */
22630   float80_type_node = long_double_type_node;
22631   if (TYPE_MODE (float80_type_node) != XFmode)
22632     {
22633       /* The __float80 type.  */
22634       float80_type_node = make_node (REAL_TYPE);
22635
22636       TYPE_PRECISION (float80_type_node) = 80;
22637       layout_type (float80_type_node);
22638     }
22639   (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80");
22640
22641   /* The __float128 type.  */
22642   float128_type_node = make_node (REAL_TYPE);
22643   TYPE_PRECISION (float128_type_node) = 128;
22644   layout_type (float128_type_node);
22645   (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128");
22646
22647   /* This macro is built by i386-builtin-types.awk.  */
22648   DEFINE_BUILTIN_PRIMITIVE_TYPES;
22649 }
22650
22651 static void
22652 ix86_init_builtins (void)
22653 {
22654   tree t;
22655
22656   ix86_init_builtin_types ();
22657
22658   /* TFmode support builtins.  */
22659   def_builtin_const (0, "__builtin_infq",
22660                      FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
22661   def_builtin_const (0, "__builtin_huge_valq",
22662                      FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
22663
22664   /* We will expand them to normal call if SSE2 isn't available since
22665      they are used by libgcc. */
22666   t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
22667   t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
22668                             BUILT_IN_MD, "__fabstf2", NULL_TREE);
22669   TREE_READONLY (t) = 1;
22670   ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
22671
22672   t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
22673   t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
22674                             BUILT_IN_MD, "__copysigntf3", NULL_TREE);
22675   TREE_READONLY (t) = 1;
22676   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
22677
22678   ix86_init_mmx_sse_builtins ();
22679
22680   if (TARGET_64BIT)
22681     ix86_init_builtins_va_builtins_abi ();
22682 }
22683
22684 /* Return the ix86 builtin for CODE.  */
22685
22686 static tree
22687 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
22688 {
22689   if (code >= IX86_BUILTIN_MAX)
22690     return error_mark_node;
22691
22692   return ix86_builtins[code];
22693 }
22694
22695 /* Errors in the source file can cause expand_expr to return const0_rtx
22696    where we expect a vector.  To avoid crashing, use one of the vector
22697    clear instructions.  */
22698 static rtx
22699 safe_vector_operand (rtx x, enum machine_mode mode)
22700 {
22701   if (x == const0_rtx)
22702     x = CONST0_RTX (mode);
22703   return x;
22704 }
22705
22706 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
22707
22708 static rtx
22709 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22710 {
22711   rtx pat;
22712   tree arg0 = CALL_EXPR_ARG (exp, 0);
22713   tree arg1 = CALL_EXPR_ARG (exp, 1);
22714   rtx op0 = expand_normal (arg0);
22715   rtx op1 = expand_normal (arg1);
22716   enum machine_mode tmode = insn_data[icode].operand[0].mode;
22717   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22718   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22719
22720   if (VECTOR_MODE_P (mode0))
22721     op0 = safe_vector_operand (op0, mode0);
22722   if (VECTOR_MODE_P (mode1))
22723     op1 = safe_vector_operand (op1, mode1);
22724
22725   if (optimize || !target
22726       || GET_MODE (target) != tmode
22727       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22728     target = gen_reg_rtx (tmode);
22729
22730   if (GET_MODE (op1) == SImode && mode1 == TImode)
22731     {
22732       rtx x = gen_reg_rtx (V4SImode);
22733       emit_insn (gen_sse2_loadd (x, op1));
22734       op1 = gen_lowpart (TImode, x);
22735     }
22736
22737   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22738     op0 = copy_to_mode_reg (mode0, op0);
22739   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22740     op1 = copy_to_mode_reg (mode1, op1);
22741
22742   pat = GEN_FCN (icode) (target, op0, op1);
22743   if (! pat)
22744     return 0;
22745
22746   emit_insn (pat);
22747
22748   return target;
22749 }
22750
22751 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
22752
22753 static rtx
22754 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22755                                enum ix86_builtin_func_type m_type,
22756                                enum rtx_code sub_code)
22757 {
22758   rtx pat;
22759   int i;
22760   int nargs;
22761   bool comparison_p = false;
22762   bool tf_p = false;
22763   bool last_arg_constant = false;
22764   int num_memory = 0;
22765   struct {
22766     rtx op;
22767     enum machine_mode mode;
22768   } args[4];
22769
22770   enum machine_mode tmode = insn_data[icode].operand[0].mode;
22771
22772   switch (m_type)
22773     {
22774     case MULTI_ARG_3_SF:
22775     case MULTI_ARG_3_DF:
22776     case MULTI_ARG_3_SF2:
22777     case MULTI_ARG_3_DF2:
22778     case MULTI_ARG_3_DI:
22779     case MULTI_ARG_3_SI:
22780     case MULTI_ARG_3_SI_DI:
22781     case MULTI_ARG_3_HI:
22782     case MULTI_ARG_3_HI_SI:
22783     case MULTI_ARG_3_QI:
22784     case MULTI_ARG_3_DI2:
22785     case MULTI_ARG_3_SI2:
22786     case MULTI_ARG_3_HI2:
22787     case MULTI_ARG_3_QI2:
22788       nargs = 3;
22789       break;
22790
22791     case MULTI_ARG_2_SF:
22792     case MULTI_ARG_2_DF:
22793     case MULTI_ARG_2_DI:
22794     case MULTI_ARG_2_SI:
22795     case MULTI_ARG_2_HI:
22796     case MULTI_ARG_2_QI:
22797       nargs = 2;
22798       break;
22799
22800     case MULTI_ARG_2_DI_IMM:
22801     case MULTI_ARG_2_SI_IMM:
22802     case MULTI_ARG_2_HI_IMM:
22803     case MULTI_ARG_2_QI_IMM:
22804       nargs = 2;
22805       last_arg_constant = true;
22806       break;
22807
22808     case MULTI_ARG_1_SF:
22809     case MULTI_ARG_1_DF:
22810     case MULTI_ARG_1_SF2:
22811     case MULTI_ARG_1_DF2:
22812     case MULTI_ARG_1_DI:
22813     case MULTI_ARG_1_SI:
22814     case MULTI_ARG_1_HI:
22815     case MULTI_ARG_1_QI:
22816     case MULTI_ARG_1_SI_DI:
22817     case MULTI_ARG_1_HI_DI:
22818     case MULTI_ARG_1_HI_SI:
22819     case MULTI_ARG_1_QI_DI:
22820     case MULTI_ARG_1_QI_SI:
22821     case MULTI_ARG_1_QI_HI:
22822       nargs = 1;
22823       break;
22824
22825     case MULTI_ARG_2_DI_CMP:
22826     case MULTI_ARG_2_SI_CMP:
22827     case MULTI_ARG_2_HI_CMP:
22828     case MULTI_ARG_2_QI_CMP:
22829       nargs = 2;
22830       comparison_p = true;
22831       break;
22832
22833     case MULTI_ARG_2_SF_TF:
22834     case MULTI_ARG_2_DF_TF:
22835     case MULTI_ARG_2_DI_TF:
22836     case MULTI_ARG_2_SI_TF:
22837     case MULTI_ARG_2_HI_TF:
22838     case MULTI_ARG_2_QI_TF:
22839       nargs = 2;
22840       tf_p = true;
22841       break;
22842
22843     default:
22844       gcc_unreachable ();
22845     }
22846
22847   if (optimize || !target
22848       || GET_MODE (target) != tmode
22849       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22850     target = gen_reg_rtx (tmode);
22851
22852   gcc_assert (nargs <= 4);
22853
22854   for (i = 0; i < nargs; i++)
22855     {
22856       tree arg = CALL_EXPR_ARG (exp, i);
22857       rtx op = expand_normal (arg);
22858       int adjust = (comparison_p) ? 1 : 0;
22859       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
22860
22861       if (last_arg_constant && i == nargs-1)
22862         {
22863           if (!CONST_INT_P (op))
22864             {
22865               error ("last argument must be an immediate");
22866               return gen_reg_rtx (tmode);
22867             }
22868         }
22869       else
22870         {
22871           if (VECTOR_MODE_P (mode))
22872             op = safe_vector_operand (op, mode);
22873
22874           /* If we aren't optimizing, only allow one memory operand to be
22875              generated.  */
22876           if (memory_operand (op, mode))
22877             num_memory++;
22878
22879           gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
22880
22881           if (optimize
22882               || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
22883               || num_memory > 1)
22884             op = force_reg (mode, op);
22885         }
22886
22887       args[i].op = op;
22888       args[i].mode = mode;
22889     }
22890
22891   switch (nargs)
22892     {
22893     case 1:
22894       pat = GEN_FCN (icode) (target, args[0].op);
22895       break;
22896
22897     case 2:
22898       if (tf_p)
22899         pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
22900                                GEN_INT ((int)sub_code));
22901       else if (! comparison_p)
22902         pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
22903       else
22904         {
22905           rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
22906                                        args[0].op,
22907                                        args[1].op);
22908
22909           pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
22910         }
22911       break;
22912
22913     case 3:
22914       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
22915       break;
22916
22917     default:
22918       gcc_unreachable ();
22919     }
22920
22921   if (! pat)
22922     return 0;
22923
22924   emit_insn (pat);
22925   return target;
22926 }
22927
22928 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
22929    insns with vec_merge.  */
22930
22931 static rtx
22932 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
22933                                     rtx target)
22934 {
22935   rtx pat;
22936   tree arg0 = CALL_EXPR_ARG (exp, 0);
22937   rtx op1, op0 = expand_normal (arg0);
22938   enum machine_mode tmode = insn_data[icode].operand[0].mode;
22939   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22940
22941   if (optimize || !target
22942       || GET_MODE (target) != tmode
22943       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22944     target = gen_reg_rtx (tmode);
22945
22946   if (VECTOR_MODE_P (mode0))
22947     op0 = safe_vector_operand (op0, mode0);
22948
22949   if ((optimize && !register_operand (op0, mode0))
22950       || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22951     op0 = copy_to_mode_reg (mode0, op0);
22952
22953   op1 = op0;
22954   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
22955     op1 = copy_to_mode_reg (mode0, op1);
22956
22957   pat = GEN_FCN (icode) (target, op0, op1);
22958   if (! pat)
22959     return 0;
22960   emit_insn (pat);
22961   return target;
22962 }
22963
22964 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
22965
22966 static rtx
22967 ix86_expand_sse_compare (const struct builtin_description *d,
22968                          tree exp, rtx target, bool swap)
22969 {
22970   rtx pat;
22971   tree arg0 = CALL_EXPR_ARG (exp, 0);
22972   tree arg1 = CALL_EXPR_ARG (exp, 1);
22973   rtx op0 = expand_normal (arg0);
22974   rtx op1 = expand_normal (arg1);
22975   rtx op2;
22976   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
22977   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
22978   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
22979   enum rtx_code comparison = d->comparison;
22980
22981   if (VECTOR_MODE_P (mode0))
22982     op0 = safe_vector_operand (op0, mode0);
22983   if (VECTOR_MODE_P (mode1))
22984     op1 = safe_vector_operand (op1, mode1);
22985
22986   /* Swap operands if we have a comparison that isn't available in
22987      hardware.  */
22988   if (swap)
22989     {
22990       rtx tmp = gen_reg_rtx (mode1);
22991       emit_move_insn (tmp, op1);
22992       op1 = op0;
22993       op0 = tmp;
22994     }
22995
22996   if (optimize || !target
22997       || GET_MODE (target) != tmode
22998       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
22999     target = gen_reg_rtx (tmode);
23000
23001   if ((optimize && !register_operand (op0, mode0))
23002       || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23003     op0 = copy_to_mode_reg (mode0, op0);
23004   if ((optimize && !register_operand (op1, mode1))
23005       || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23006     op1 = copy_to_mode_reg (mode1, op1);
23007
23008   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23009   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23010   if (! pat)
23011     return 0;
23012   emit_insn (pat);
23013   return target;
23014 }
23015
23016 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
23017
23018 static rtx
23019 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23020                       rtx target)
23021 {
23022   rtx pat;
23023   tree arg0 = CALL_EXPR_ARG (exp, 0);
23024   tree arg1 = CALL_EXPR_ARG (exp, 1);
23025   rtx op0 = expand_normal (arg0);
23026   rtx op1 = expand_normal (arg1);
23027   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23028   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23029   enum rtx_code comparison = d->comparison;
23030
23031   if (VECTOR_MODE_P (mode0))
23032     op0 = safe_vector_operand (op0, mode0);
23033   if (VECTOR_MODE_P (mode1))
23034     op1 = safe_vector_operand (op1, mode1);
23035
23036   /* Swap operands if we have a comparison that isn't available in
23037      hardware.  */
23038   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23039     {
23040       rtx tmp = op1;
23041       op1 = op0;
23042       op0 = tmp;
23043     }
23044
23045   target = gen_reg_rtx (SImode);
23046   emit_move_insn (target, const0_rtx);
23047   target = gen_rtx_SUBREG (QImode, target, 0);
23048
23049   if ((optimize && !register_operand (op0, mode0))
23050       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23051     op0 = copy_to_mode_reg (mode0, op0);
23052   if ((optimize && !register_operand (op1, mode1))
23053       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23054     op1 = copy_to_mode_reg (mode1, op1);
23055
23056   pat = GEN_FCN (d->icode) (op0, op1);
23057   if (! pat)
23058     return 0;
23059   emit_insn (pat);
23060   emit_insn (gen_rtx_SET (VOIDmode,
23061                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23062                           gen_rtx_fmt_ee (comparison, QImode,
23063                                           SET_DEST (pat),
23064                                           const0_rtx)));
23065
23066   return SUBREG_REG (target);
23067 }
23068
23069 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
23070
23071 static rtx
23072 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23073                        rtx target)
23074 {
23075   rtx pat;
23076   tree arg0 = CALL_EXPR_ARG (exp, 0);
23077   tree arg1 = CALL_EXPR_ARG (exp, 1);
23078   rtx op0 = expand_normal (arg0);
23079   rtx op1 = expand_normal (arg1);
23080   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23081   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23082   enum rtx_code comparison = d->comparison;
23083
23084   if (VECTOR_MODE_P (mode0))
23085     op0 = safe_vector_operand (op0, mode0);
23086   if (VECTOR_MODE_P (mode1))
23087     op1 = safe_vector_operand (op1, mode1);
23088
23089   target = gen_reg_rtx (SImode);
23090   emit_move_insn (target, const0_rtx);
23091   target = gen_rtx_SUBREG (QImode, target, 0);
23092
23093   if ((optimize && !register_operand (op0, mode0))
23094       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23095     op0 = copy_to_mode_reg (mode0, op0);
23096   if ((optimize && !register_operand (op1, mode1))
23097       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23098     op1 = copy_to_mode_reg (mode1, op1);
23099
23100   pat = GEN_FCN (d->icode) (op0, op1);
23101   if (! pat)
23102     return 0;
23103   emit_insn (pat);
23104   emit_insn (gen_rtx_SET (VOIDmode,
23105                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23106                           gen_rtx_fmt_ee (comparison, QImode,
23107                                           SET_DEST (pat),
23108                                           const0_rtx)));
23109
23110   return SUBREG_REG (target);
23111 }
23112
23113 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
23114
23115 static rtx
23116 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23117                           tree exp, rtx target)
23118 {
23119   rtx pat;
23120   tree arg0 = CALL_EXPR_ARG (exp, 0);
23121   tree arg1 = CALL_EXPR_ARG (exp, 1);
23122   tree arg2 = CALL_EXPR_ARG (exp, 2);
23123   tree arg3 = CALL_EXPR_ARG (exp, 3);
23124   tree arg4 = CALL_EXPR_ARG (exp, 4);
23125   rtx scratch0, scratch1;
23126   rtx op0 = expand_normal (arg0);
23127   rtx op1 = expand_normal (arg1);
23128   rtx op2 = expand_normal (arg2);
23129   rtx op3 = expand_normal (arg3);
23130   rtx op4 = expand_normal (arg4);
23131   enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23132
23133   tmode0 = insn_data[d->icode].operand[0].mode;
23134   tmode1 = insn_data[d->icode].operand[1].mode;
23135   modev2 = insn_data[d->icode].operand[2].mode;
23136   modei3 = insn_data[d->icode].operand[3].mode;
23137   modev4 = insn_data[d->icode].operand[4].mode;
23138   modei5 = insn_data[d->icode].operand[5].mode;
23139   modeimm = insn_data[d->icode].operand[6].mode;
23140
23141   if (VECTOR_MODE_P (modev2))
23142     op0 = safe_vector_operand (op0, modev2);
23143   if (VECTOR_MODE_P (modev4))
23144     op2 = safe_vector_operand (op2, modev4);
23145
23146   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23147     op0 = copy_to_mode_reg (modev2, op0);
23148   if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23149     op1 = copy_to_mode_reg (modei3, op1);
23150   if ((optimize && !register_operand (op2, modev4))
23151       || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23152     op2 = copy_to_mode_reg (modev4, op2);
23153   if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23154     op3 = copy_to_mode_reg (modei5, op3);
23155
23156   if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23157     {
23158       error ("the fifth argument must be a 8-bit immediate");
23159       return const0_rtx;
23160     }
23161
23162   if (d->code == IX86_BUILTIN_PCMPESTRI128)
23163     {
23164       if (optimize || !target
23165           || GET_MODE (target) != tmode0
23166           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23167         target = gen_reg_rtx (tmode0);
23168
23169       scratch1 = gen_reg_rtx (tmode1);
23170
23171       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23172     }
23173   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23174     {
23175       if (optimize || !target
23176           || GET_MODE (target) != tmode1
23177           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23178         target = gen_reg_rtx (tmode1);
23179
23180       scratch0 = gen_reg_rtx (tmode0);
23181
23182       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23183     }
23184   else
23185     {
23186       gcc_assert (d->flag);
23187
23188       scratch0 = gen_reg_rtx (tmode0);
23189       scratch1 = gen_reg_rtx (tmode1);
23190
23191       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23192     }
23193
23194   if (! pat)
23195     return 0;
23196
23197   emit_insn (pat);
23198
23199   if (d->flag)
23200     {
23201       target = gen_reg_rtx (SImode);
23202       emit_move_insn (target, const0_rtx);
23203       target = gen_rtx_SUBREG (QImode, target, 0);
23204
23205       emit_insn
23206         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23207                       gen_rtx_fmt_ee (EQ, QImode,
23208                                       gen_rtx_REG ((enum machine_mode) d->flag,
23209                                                    FLAGS_REG),
23210                                       const0_rtx)));
23211       return SUBREG_REG (target);
23212     }
23213   else
23214     return target;
23215 }
23216
23217
23218 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
23219
23220 static rtx
23221 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23222                           tree exp, rtx target)
23223 {
23224   rtx pat;
23225   tree arg0 = CALL_EXPR_ARG (exp, 0);
23226   tree arg1 = CALL_EXPR_ARG (exp, 1);
23227   tree arg2 = CALL_EXPR_ARG (exp, 2);
23228   rtx scratch0, scratch1;
23229   rtx op0 = expand_normal (arg0);
23230   rtx op1 = expand_normal (arg1);
23231   rtx op2 = expand_normal (arg2);
23232   enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23233
23234   tmode0 = insn_data[d->icode].operand[0].mode;
23235   tmode1 = insn_data[d->icode].operand[1].mode;
23236   modev2 = insn_data[d->icode].operand[2].mode;
23237   modev3 = insn_data[d->icode].operand[3].mode;
23238   modeimm = insn_data[d->icode].operand[4].mode;
23239
23240   if (VECTOR_MODE_P (modev2))
23241     op0 = safe_vector_operand (op0, modev2);
23242   if (VECTOR_MODE_P (modev3))
23243     op1 = safe_vector_operand (op1, modev3);
23244
23245   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23246     op0 = copy_to_mode_reg (modev2, op0);
23247   if ((optimize && !register_operand (op1, modev3))
23248       || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23249     op1 = copy_to_mode_reg (modev3, op1);
23250
23251   if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23252     {
23253       error ("the third argument must be a 8-bit immediate");
23254       return const0_rtx;
23255     }
23256
23257   if (d->code == IX86_BUILTIN_PCMPISTRI128)
23258     {
23259       if (optimize || !target
23260           || GET_MODE (target) != tmode0
23261           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23262         target = gen_reg_rtx (tmode0);
23263
23264       scratch1 = gen_reg_rtx (tmode1);
23265
23266       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23267     }
23268   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23269     {
23270       if (optimize || !target
23271           || GET_MODE (target) != tmode1
23272           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23273         target = gen_reg_rtx (tmode1);
23274
23275       scratch0 = gen_reg_rtx (tmode0);
23276
23277       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23278     }
23279   else
23280     {
23281       gcc_assert (d->flag);
23282
23283       scratch0 = gen_reg_rtx (tmode0);
23284       scratch1 = gen_reg_rtx (tmode1);
23285
23286       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23287     }
23288
23289   if (! pat)
23290     return 0;
23291
23292   emit_insn (pat);
23293
23294   if (d->flag)
23295     {
23296       target = gen_reg_rtx (SImode);
23297       emit_move_insn (target, const0_rtx);
23298       target = gen_rtx_SUBREG (QImode, target, 0);
23299
23300       emit_insn
23301         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23302                       gen_rtx_fmt_ee (EQ, QImode,
23303                                       gen_rtx_REG ((enum machine_mode) d->flag,
23304                                                    FLAGS_REG),
23305                                       const0_rtx)));
23306       return SUBREG_REG (target);
23307     }
23308   else
23309     return target;
23310 }
23311
23312 /* Subroutine of ix86_expand_builtin to take care of insns with
23313    variable number of operands.  */
23314
23315 static rtx
23316 ix86_expand_args_builtin (const struct builtin_description *d,
23317                           tree exp, rtx target)
23318 {
23319   rtx pat, real_target;
23320   unsigned int i, nargs;
23321   unsigned int nargs_constant = 0;
23322   int num_memory = 0;
23323   struct
23324     {
23325       rtx op;
23326       enum machine_mode mode;
23327     } args[4];
23328   bool last_arg_count = false;
23329   enum insn_code icode = d->icode;
23330   const struct insn_data *insn_p = &insn_data[icode];
23331   enum machine_mode tmode = insn_p->operand[0].mode;
23332   enum machine_mode rmode = VOIDmode;
23333   bool swap = false;
23334   enum rtx_code comparison = d->comparison;
23335
23336   switch ((enum ix86_builtin_func_type) d->flag)
23337     {
23338     case INT_FTYPE_V8SF_V8SF_PTEST:
23339     case INT_FTYPE_V4DI_V4DI_PTEST:
23340     case INT_FTYPE_V4DF_V4DF_PTEST:
23341     case INT_FTYPE_V4SF_V4SF_PTEST:
23342     case INT_FTYPE_V2DI_V2DI_PTEST:
23343     case INT_FTYPE_V2DF_V2DF_PTEST:
23344       return ix86_expand_sse_ptest (d, exp, target);
23345     case FLOAT128_FTYPE_FLOAT128:
23346     case FLOAT_FTYPE_FLOAT:
23347     case INT_FTYPE_INT:
23348     case UINT64_FTYPE_INT:
23349     case UINT16_FTYPE_UINT16:
23350     case INT64_FTYPE_INT64:
23351     case INT64_FTYPE_V4SF:
23352     case INT64_FTYPE_V2DF:
23353     case INT_FTYPE_V16QI:
23354     case INT_FTYPE_V8QI:
23355     case INT_FTYPE_V8SF:
23356     case INT_FTYPE_V4DF:
23357     case INT_FTYPE_V4SF:
23358     case INT_FTYPE_V2DF:
23359     case V16QI_FTYPE_V16QI:
23360     case V8SI_FTYPE_V8SF:
23361     case V8SI_FTYPE_V4SI:
23362     case V8HI_FTYPE_V8HI:
23363     case V8HI_FTYPE_V16QI:
23364     case V8QI_FTYPE_V8QI:
23365     case V8SF_FTYPE_V8SF:
23366     case V8SF_FTYPE_V8SI:
23367     case V8SF_FTYPE_V4SF:
23368     case V4SI_FTYPE_V4SI:
23369     case V4SI_FTYPE_V16QI:
23370     case V4SI_FTYPE_V4SF:
23371     case V4SI_FTYPE_V8SI:
23372     case V4SI_FTYPE_V8HI:
23373     case V4SI_FTYPE_V4DF:
23374     case V4SI_FTYPE_V2DF:
23375     case V4HI_FTYPE_V4HI:
23376     case V4DF_FTYPE_V4DF:
23377     case V4DF_FTYPE_V4SI:
23378     case V4DF_FTYPE_V4SF:
23379     case V4DF_FTYPE_V2DF:
23380     case V4SF_FTYPE_V4SF:
23381     case V4SF_FTYPE_V4SI:
23382     case V4SF_FTYPE_V8SF:
23383     case V4SF_FTYPE_V4DF:
23384     case V4SF_FTYPE_V2DF:
23385     case V2DI_FTYPE_V2DI:
23386     case V2DI_FTYPE_V16QI:
23387     case V2DI_FTYPE_V8HI:
23388     case V2DI_FTYPE_V4SI:
23389     case V2DF_FTYPE_V2DF:
23390     case V2DF_FTYPE_V4SI:
23391     case V2DF_FTYPE_V4DF:
23392     case V2DF_FTYPE_V4SF:
23393     case V2DF_FTYPE_V2SI:
23394     case V2SI_FTYPE_V2SI:
23395     case V2SI_FTYPE_V4SF:
23396     case V2SI_FTYPE_V2SF:
23397     case V2SI_FTYPE_V2DF:
23398     case V2SF_FTYPE_V2SF:
23399     case V2SF_FTYPE_V2SI:
23400       nargs = 1;
23401       break;
23402     case V4SF_FTYPE_V4SF_VEC_MERGE:
23403     case V2DF_FTYPE_V2DF_VEC_MERGE:
23404       return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23405     case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23406     case V16QI_FTYPE_V16QI_V16QI:
23407     case V16QI_FTYPE_V8HI_V8HI:
23408     case V8QI_FTYPE_V8QI_V8QI:
23409     case V8QI_FTYPE_V4HI_V4HI:
23410     case V8HI_FTYPE_V8HI_V8HI:
23411     case V8HI_FTYPE_V16QI_V16QI:
23412     case V8HI_FTYPE_V4SI_V4SI:
23413     case V8SF_FTYPE_V8SF_V8SF:
23414     case V8SF_FTYPE_V8SF_V8SI:
23415     case V4SI_FTYPE_V4SI_V4SI:
23416     case V4SI_FTYPE_V8HI_V8HI:
23417     case V4SI_FTYPE_V4SF_V4SF:
23418     case V4SI_FTYPE_V2DF_V2DF:
23419     case V4HI_FTYPE_V4HI_V4HI:
23420     case V4HI_FTYPE_V8QI_V8QI:
23421     case V4HI_FTYPE_V2SI_V2SI:
23422     case V4DF_FTYPE_V4DF_V4DF:
23423     case V4DF_FTYPE_V4DF_V4DI:
23424     case V4SF_FTYPE_V4SF_V4SF:
23425     case V4SF_FTYPE_V4SF_V4SI:
23426     case V4SF_FTYPE_V4SF_V2SI:
23427     case V4SF_FTYPE_V4SF_V2DF:
23428     case V4SF_FTYPE_V4SF_DI:
23429     case V4SF_FTYPE_V4SF_SI:
23430     case V2DI_FTYPE_V2DI_V2DI:
23431     case V2DI_FTYPE_V16QI_V16QI:
23432     case V2DI_FTYPE_V4SI_V4SI:
23433     case V2DI_FTYPE_V2DI_V16QI:
23434     case V2DI_FTYPE_V2DF_V2DF:
23435     case V2SI_FTYPE_V2SI_V2SI:
23436     case V2SI_FTYPE_V4HI_V4HI:
23437     case V2SI_FTYPE_V2SF_V2SF:
23438     case V2DF_FTYPE_V2DF_V2DF:
23439     case V2DF_FTYPE_V2DF_V4SF:
23440     case V2DF_FTYPE_V2DF_V2DI:
23441     case V2DF_FTYPE_V2DF_DI:
23442     case V2DF_FTYPE_V2DF_SI:
23443     case V2SF_FTYPE_V2SF_V2SF:
23444     case V1DI_FTYPE_V1DI_V1DI:
23445     case V1DI_FTYPE_V8QI_V8QI:
23446     case V1DI_FTYPE_V2SI_V2SI:
23447       if (comparison == UNKNOWN)
23448         return ix86_expand_binop_builtin (icode, exp, target);
23449       nargs = 2;
23450       break;
23451     case V4SF_FTYPE_V4SF_V4SF_SWAP:
23452     case V2DF_FTYPE_V2DF_V2DF_SWAP:
23453       gcc_assert (comparison != UNKNOWN);
23454       nargs = 2;
23455       swap = true;
23456       break;
23457     case V8HI_FTYPE_V8HI_V8HI_COUNT:
23458     case V8HI_FTYPE_V8HI_SI_COUNT:
23459     case V4SI_FTYPE_V4SI_V4SI_COUNT:
23460     case V4SI_FTYPE_V4SI_SI_COUNT:
23461     case V4HI_FTYPE_V4HI_V4HI_COUNT:
23462     case V4HI_FTYPE_V4HI_SI_COUNT:
23463     case V2DI_FTYPE_V2DI_V2DI_COUNT:
23464     case V2DI_FTYPE_V2DI_SI_COUNT:
23465     case V2SI_FTYPE_V2SI_V2SI_COUNT:
23466     case V2SI_FTYPE_V2SI_SI_COUNT:
23467     case V1DI_FTYPE_V1DI_V1DI_COUNT:
23468     case V1DI_FTYPE_V1DI_SI_COUNT:
23469       nargs = 2;
23470       last_arg_count = true;
23471       break;
23472     case UINT64_FTYPE_UINT64_UINT64:
23473     case UINT_FTYPE_UINT_UINT:
23474     case UINT_FTYPE_UINT_USHORT:
23475     case UINT_FTYPE_UINT_UCHAR:
23476     case UINT16_FTYPE_UINT16_INT:
23477     case UINT8_FTYPE_UINT8_INT:
23478       nargs = 2;
23479       break;
23480     case V2DI_FTYPE_V2DI_INT_CONVERT:
23481       nargs = 2;
23482       rmode = V1TImode;
23483       nargs_constant = 1;
23484       break;
23485     case V8HI_FTYPE_V8HI_INT:
23486     case V8SF_FTYPE_V8SF_INT:
23487     case V4SI_FTYPE_V4SI_INT:
23488     case V4SI_FTYPE_V8SI_INT:
23489     case V4HI_FTYPE_V4HI_INT:
23490     case V4DF_FTYPE_V4DF_INT:
23491     case V4SF_FTYPE_V4SF_INT:
23492     case V4SF_FTYPE_V8SF_INT:
23493     case V2DI_FTYPE_V2DI_INT:
23494     case V2DF_FTYPE_V2DF_INT:
23495     case V2DF_FTYPE_V4DF_INT:
23496       nargs = 2;
23497       nargs_constant = 1;
23498       break;
23499     case V16QI_FTYPE_V16QI_V16QI_V16QI:
23500     case V8SF_FTYPE_V8SF_V8SF_V8SF:
23501     case V4DF_FTYPE_V4DF_V4DF_V4DF:
23502     case V4SF_FTYPE_V4SF_V4SF_V4SF:
23503     case V2DF_FTYPE_V2DF_V2DF_V2DF:
23504       nargs = 3;
23505       break;
23506     case V16QI_FTYPE_V16QI_V16QI_INT:
23507     case V8HI_FTYPE_V8HI_V8HI_INT:
23508     case V8SI_FTYPE_V8SI_V8SI_INT:
23509     case V8SI_FTYPE_V8SI_V4SI_INT:
23510     case V8SF_FTYPE_V8SF_V8SF_INT:
23511     case V8SF_FTYPE_V8SF_V4SF_INT:
23512     case V4SI_FTYPE_V4SI_V4SI_INT:
23513     case V4DF_FTYPE_V4DF_V4DF_INT:
23514     case V4DF_FTYPE_V4DF_V2DF_INT:
23515     case V4SF_FTYPE_V4SF_V4SF_INT:
23516     case V2DI_FTYPE_V2DI_V2DI_INT:
23517     case V2DF_FTYPE_V2DF_V2DF_INT:
23518       nargs = 3;
23519       nargs_constant = 1;
23520       break;
23521     case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
23522       nargs = 3;
23523       rmode = V2DImode;
23524       nargs_constant = 1;
23525       break;
23526     case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
23527       nargs = 3;
23528       rmode = DImode;
23529       nargs_constant = 1;
23530       break;
23531     case V2DI_FTYPE_V2DI_UINT_UINT:
23532       nargs = 3;
23533       nargs_constant = 2;
23534       break;
23535     case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23536       nargs = 4;
23537       nargs_constant = 2;
23538       break;
23539     default:
23540       gcc_unreachable ();
23541     }
23542
23543   gcc_assert (nargs <= ARRAY_SIZE (args));
23544
23545   if (comparison != UNKNOWN)
23546     {
23547       gcc_assert (nargs == 2);
23548       return ix86_expand_sse_compare (d, exp, target, swap);
23549     }
23550
23551   if (rmode == VOIDmode || rmode == tmode)
23552     {
23553       if (optimize
23554           || target == 0
23555           || GET_MODE (target) != tmode
23556           || ! (*insn_p->operand[0].predicate) (target, tmode))
23557         target = gen_reg_rtx (tmode);
23558       real_target = target;
23559     }
23560   else
23561     {
23562       target = gen_reg_rtx (rmode);
23563       real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23564     }
23565
23566   for (i = 0; i < nargs; i++)
23567     {
23568       tree arg = CALL_EXPR_ARG (exp, i);
23569       rtx op = expand_normal (arg);
23570       enum machine_mode mode = insn_p->operand[i + 1].mode;
23571       bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23572
23573       if (last_arg_count && (i + 1) == nargs)
23574         {
23575           /* SIMD shift insns take either an 8-bit immediate or
23576              register as count.  But builtin functions take int as
23577              count.  If count doesn't match, we put it in register.  */
23578           if (!match)
23579             {
23580               op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23581               if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23582                 op = copy_to_reg (op);
23583             }
23584         }
23585       else if ((nargs - i) <= nargs_constant)
23586         {
23587           if (!match)
23588             switch (icode)
23589               {
23590               case CODE_FOR_sse4_1_roundpd:
23591               case CODE_FOR_sse4_1_roundps:
23592               case CODE_FOR_sse4_1_roundsd:
23593               case CODE_FOR_sse4_1_roundss:
23594               case CODE_FOR_sse4_1_blendps:
23595               case CODE_FOR_avx_blendpd256:
23596               case CODE_FOR_avx_vpermilv4df:
23597               case CODE_FOR_avx_roundpd256:
23598               case CODE_FOR_avx_roundps256:
23599                 error ("the last argument must be a 4-bit immediate");
23600                 return const0_rtx;
23601
23602               case CODE_FOR_sse4_1_blendpd:
23603               case CODE_FOR_avx_vpermilv2df:
23604                 error ("the last argument must be a 2-bit immediate");
23605                 return const0_rtx;
23606
23607               case CODE_FOR_avx_vextractf128v4df:
23608               case CODE_FOR_avx_vextractf128v8sf:
23609               case CODE_FOR_avx_vextractf128v8si:
23610               case CODE_FOR_avx_vinsertf128v4df:
23611               case CODE_FOR_avx_vinsertf128v8sf:
23612               case CODE_FOR_avx_vinsertf128v8si:
23613                 error ("the last argument must be a 1-bit immediate");
23614                 return const0_rtx;
23615
23616               case CODE_FOR_avx_cmpsdv2df3:
23617               case CODE_FOR_avx_cmpssv4sf3:
23618               case CODE_FOR_avx_cmppdv2df3:
23619               case CODE_FOR_avx_cmppsv4sf3:
23620               case CODE_FOR_avx_cmppdv4df3:
23621               case CODE_FOR_avx_cmppsv8sf3:
23622                 error ("the last argument must be a 5-bit immediate");
23623                 return const0_rtx;
23624
23625              default:
23626                 switch (nargs_constant)
23627                   {
23628                   case 2:
23629                     if ((nargs - i) == nargs_constant)
23630                       {
23631                         error ("the next to last argument must be an 8-bit immediate");
23632                         break;
23633                       }
23634                   case 1:
23635                     error ("the last argument must be an 8-bit immediate");
23636                     break;
23637                   default:
23638                     gcc_unreachable ();
23639                   }
23640                 return const0_rtx;
23641               }
23642         }
23643       else
23644         {
23645           if (VECTOR_MODE_P (mode))
23646             op = safe_vector_operand (op, mode);
23647
23648           /* If we aren't optimizing, only allow one memory operand to
23649              be generated.  */
23650           if (memory_operand (op, mode))
23651             num_memory++;
23652
23653           if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23654             {
23655               if (optimize || !match || num_memory > 1)
23656                 op = copy_to_mode_reg (mode, op);
23657             }
23658           else
23659             {
23660               op = copy_to_reg (op);
23661               op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23662             }
23663         }
23664
23665       args[i].op = op;
23666       args[i].mode = mode;
23667     }
23668
23669   switch (nargs)
23670     {
23671     case 1:
23672       pat = GEN_FCN (icode) (real_target, args[0].op);
23673       break;
23674     case 2:
23675       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23676       break;
23677     case 3:
23678       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23679                              args[2].op);
23680       break;
23681     case 4:
23682       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23683                              args[2].op, args[3].op);
23684       break;
23685     default:
23686       gcc_unreachable ();
23687     }
23688
23689   if (! pat)
23690     return 0;
23691
23692   emit_insn (pat);
23693   return target;
23694 }
23695
23696 /* Subroutine of ix86_expand_builtin to take care of special insns
23697    with variable number of operands.  */
23698
23699 static rtx
23700 ix86_expand_special_args_builtin (const struct builtin_description *d,
23701                                     tree exp, rtx target)
23702 {
23703   tree arg;
23704   rtx pat, op;
23705   unsigned int i, nargs, arg_adjust, memory;
23706   struct
23707     {
23708       rtx op;
23709       enum machine_mode mode;
23710     } args[3];
23711   enum insn_code icode = d->icode;
23712   bool last_arg_constant = false;
23713   const struct insn_data *insn_p = &insn_data[icode];
23714   enum machine_mode tmode = insn_p->operand[0].mode;
23715   enum { load, store } klass;
23716
23717   switch ((enum ix86_builtin_func_type) d->flag)
23718     {
23719     case VOID_FTYPE_VOID:
23720       emit_insn (GEN_FCN (icode) (target));
23721       return 0;
23722     case UINT64_FTYPE_VOID:
23723       nargs = 0;
23724       klass = load;
23725       memory = 0;
23726       break;
23727     case UINT64_FTYPE_PUNSIGNED:
23728     case V2DI_FTYPE_PV2DI:
23729     case V32QI_FTYPE_PCCHAR:
23730     case V16QI_FTYPE_PCCHAR:
23731     case V8SF_FTYPE_PCV4SF:
23732     case V8SF_FTYPE_PCFLOAT:
23733     case V4SF_FTYPE_PCFLOAT:
23734     case V4DF_FTYPE_PCV2DF:
23735     case V4DF_FTYPE_PCDOUBLE:
23736     case V2DF_FTYPE_PCDOUBLE:
23737     case VOID_FTYPE_PVOID:
23738       nargs = 1;
23739       klass = load;
23740       memory = 0;
23741       break;
23742     case VOID_FTYPE_PV2SF_V4SF:
23743     case VOID_FTYPE_PV4DI_V4DI:
23744     case VOID_FTYPE_PV2DI_V2DI:
23745     case VOID_FTYPE_PCHAR_V32QI:
23746     case VOID_FTYPE_PCHAR_V16QI:
23747     case VOID_FTYPE_PFLOAT_V8SF:
23748     case VOID_FTYPE_PFLOAT_V4SF:
23749     case VOID_FTYPE_PDOUBLE_V4DF:
23750     case VOID_FTYPE_PDOUBLE_V2DF:
23751     case VOID_FTYPE_PULONGLONG_ULONGLONG:
23752     case VOID_FTYPE_PINT_INT:
23753       nargs = 1;
23754       klass = store;
23755       /* Reserve memory operand for target.  */
23756       memory = ARRAY_SIZE (args);
23757       break;
23758     case V4SF_FTYPE_V4SF_PCV2SF:
23759     case V2DF_FTYPE_V2DF_PCDOUBLE:
23760       nargs = 2;
23761       klass = load;
23762       memory = 1;
23763       break;
23764     case V8SF_FTYPE_PCV8SF_V8SF:
23765     case V4DF_FTYPE_PCV4DF_V4DF:
23766     case V4SF_FTYPE_PCV4SF_V4SF:
23767     case V2DF_FTYPE_PCV2DF_V2DF:
23768       nargs = 2;
23769       klass = load;
23770       memory = 0;
23771       break;
23772     case VOID_FTYPE_PV8SF_V8SF_V8SF:
23773     case VOID_FTYPE_PV4DF_V4DF_V4DF:
23774     case VOID_FTYPE_PV4SF_V4SF_V4SF:
23775     case VOID_FTYPE_PV2DF_V2DF_V2DF:
23776       nargs = 2;
23777       klass = store;
23778       /* Reserve memory operand for target.  */
23779       memory = ARRAY_SIZE (args);
23780       break;
23781     case VOID_FTYPE_UINT_UINT_UINT:
23782     case VOID_FTYPE_UINT64_UINT_UINT:
23783     case UCHAR_FTYPE_UINT_UINT_UINT:
23784     case UCHAR_FTYPE_UINT64_UINT_UINT:
23785       nargs = 3;
23786       klass = load;
23787       memory = ARRAY_SIZE (args);
23788       last_arg_constant = true;
23789       break;
23790     default:
23791       gcc_unreachable ();
23792     }
23793
23794   gcc_assert (nargs <= ARRAY_SIZE (args));
23795
23796   if (klass == store)
23797     {
23798       arg = CALL_EXPR_ARG (exp, 0);
23799       op = expand_normal (arg);
23800       gcc_assert (target == 0);
23801       target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23802       arg_adjust = 1;
23803     }
23804   else
23805     {
23806       arg_adjust = 0;
23807       if (optimize
23808           || target == 0
23809           || GET_MODE (target) != tmode
23810           || ! (*insn_p->operand[0].predicate) (target, tmode))
23811         target = gen_reg_rtx (tmode);
23812     }
23813
23814   for (i = 0; i < nargs; i++)
23815     {
23816       enum machine_mode mode = insn_p->operand[i + 1].mode;
23817       bool match;
23818
23819       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23820       op = expand_normal (arg);
23821       match = (*insn_p->operand[i + 1].predicate) (op, mode);
23822
23823       if (last_arg_constant && (i + 1) == nargs)
23824         {
23825           if (!match)
23826             {
23827               if (icode == CODE_FOR_lwp_lwpvalsi3
23828                   || icode == CODE_FOR_lwp_lwpinssi3
23829                   || icode == CODE_FOR_lwp_lwpvaldi3
23830                   || icode == CODE_FOR_lwp_lwpinsdi3)
23831                 error ("the last argument must be a 32-bit immediate");
23832               else
23833                 error ("the last argument must be an 8-bit immediate");
23834               return const0_rtx;
23835             }
23836         }
23837       else
23838         {
23839           if (i == memory)
23840             {
23841               /* This must be the memory operand.  */
23842               op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23843               gcc_assert (GET_MODE (op) == mode
23844                           || GET_MODE (op) == VOIDmode);
23845             }
23846           else
23847             {
23848               /* This must be register.  */
23849               if (VECTOR_MODE_P (mode))
23850                 op = safe_vector_operand (op, mode);
23851
23852               gcc_assert (GET_MODE (op) == mode
23853                           || GET_MODE (op) == VOIDmode);
23854               op = copy_to_mode_reg (mode, op);
23855             }
23856         }
23857
23858       args[i].op = op;
23859       args[i].mode = mode;
23860     }
23861
23862   switch (nargs)
23863     {
23864     case 0:
23865       pat = GEN_FCN (icode) (target);
23866       break;
23867     case 1:
23868       pat = GEN_FCN (icode) (target, args[0].op);
23869       break;
23870     case 2:
23871       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23872       break;
23873     case 3:
23874       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23875       break;
23876     default:
23877       gcc_unreachable ();
23878     }
23879
23880   if (! pat)
23881     return 0;
23882   emit_insn (pat);
23883   return klass == store ? 0 : target;
23884 }
23885
23886 /* Return the integer constant in ARG.  Constrain it to be in the range
23887    of the subparts of VEC_TYPE; issue an error if not.  */
23888
23889 static int
23890 get_element_number (tree vec_type, tree arg)
23891 {
23892   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
23893
23894   if (!host_integerp (arg, 1)
23895       || (elt = tree_low_cst (arg, 1), elt > max))
23896     {
23897       error ("selector must be an integer constant in the range 0..%wi", max);
23898       return 0;
23899     }
23900
23901   return elt;
23902 }
23903
23904 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
23905    ix86_expand_vector_init.  We DO have language-level syntax for this, in
23906    the form of  (type){ init-list }.  Except that since we can't place emms
23907    instructions from inside the compiler, we can't allow the use of MMX
23908    registers unless the user explicitly asks for it.  So we do *not* define
23909    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
23910    we have builtins invoked by mmintrin.h that gives us license to emit
23911    these sorts of instructions.  */
23912
23913 static rtx
23914 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
23915 {
23916   enum machine_mode tmode = TYPE_MODE (type);
23917   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
23918   int i, n_elt = GET_MODE_NUNITS (tmode);
23919   rtvec v = rtvec_alloc (n_elt);
23920
23921   gcc_assert (VECTOR_MODE_P (tmode));
23922   gcc_assert (call_expr_nargs (exp) == n_elt);
23923
23924   for (i = 0; i < n_elt; ++i)
23925     {
23926       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
23927       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
23928     }
23929
23930   if (!target || !register_operand (target, tmode))
23931     target = gen_reg_rtx (tmode);
23932
23933   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
23934   return target;
23935 }
23936
23937 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
23938    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
23939    had a language-level syntax for referencing vector elements.  */
23940
23941 static rtx
23942 ix86_expand_vec_ext_builtin (tree exp, rtx target)
23943 {
23944   enum machine_mode tmode, mode0;
23945   tree arg0, arg1;
23946   int elt;
23947   rtx op0;
23948
23949   arg0 = CALL_EXPR_ARG (exp, 0);
23950   arg1 = CALL_EXPR_ARG (exp, 1);
23951
23952   op0 = expand_normal (arg0);
23953   elt = get_element_number (TREE_TYPE (arg0), arg1);
23954
23955   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
23956   mode0 = TYPE_MODE (TREE_TYPE (arg0));
23957   gcc_assert (VECTOR_MODE_P (mode0));
23958
23959   op0 = force_reg (mode0, op0);
23960
23961   if (optimize || !target || !register_operand (target, tmode))
23962     target = gen_reg_rtx (tmode);
23963
23964   ix86_expand_vector_extract (true, target, op0, elt);
23965
23966   return target;
23967 }
23968
23969 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
23970    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
23971    a language-level syntax for referencing vector elements.  */
23972
23973 static rtx
23974 ix86_expand_vec_set_builtin (tree exp)
23975 {
23976   enum machine_mode tmode, mode1;
23977   tree arg0, arg1, arg2;
23978   int elt;
23979   rtx op0, op1, target;
23980
23981   arg0 = CALL_EXPR_ARG (exp, 0);
23982   arg1 = CALL_EXPR_ARG (exp, 1);
23983   arg2 = CALL_EXPR_ARG (exp, 2);
23984
23985   tmode = TYPE_MODE (TREE_TYPE (arg0));
23986   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
23987   gcc_assert (VECTOR_MODE_P (tmode));
23988
23989   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
23990   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
23991   elt = get_element_number (TREE_TYPE (arg0), arg2);
23992
23993   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
23994     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
23995
23996   op0 = force_reg (tmode, op0);
23997   op1 = force_reg (mode1, op1);
23998
23999   /* OP0 is the source of these builtin functions and shouldn't be
24000      modified.  Create a copy, use it and return it as target.  */
24001   target = gen_reg_rtx (tmode);
24002   emit_move_insn (target, op0);
24003   ix86_expand_vector_set (true, target, op1, elt);
24004
24005   return target;
24006 }
24007
24008 /* Expand an expression EXP that calls a built-in function,
24009    with result going to TARGET if that's convenient
24010    (and in mode MODE if that's convenient).
24011    SUBTARGET may be used as the target for computing one of EXP's operands.
24012    IGNORE is nonzero if the value is to be ignored.  */
24013
24014 static rtx
24015 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24016                      enum machine_mode mode ATTRIBUTE_UNUSED,
24017                      int ignore ATTRIBUTE_UNUSED)
24018 {
24019   const struct builtin_description *d;
24020   size_t i;
24021   enum insn_code icode;
24022   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24023   tree arg0, arg1, arg2;
24024   rtx op0, op1, op2, pat;
24025   enum machine_mode mode0, mode1, mode2;
24026   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24027
24028   /* Determine whether the builtin function is available under the current ISA.
24029      Originally the builtin was not created if it wasn't applicable to the
24030      current ISA based on the command line switches.  With function specific
24031      options, we need to check in the context of the function making the call
24032      whether it is supported.  */
24033   if (ix86_builtins_isa[fcode].isa
24034       && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24035     {
24036       char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24037                                        NULL, NULL, false);
24038
24039       if (!opts)
24040         error ("%qE needs unknown isa option", fndecl);
24041       else
24042         {
24043           gcc_assert (opts != NULL);
24044           error ("%qE needs isa option %s", fndecl, opts);
24045           free (opts);
24046         }
24047       return const0_rtx;
24048     }
24049
24050   switch (fcode)
24051     {
24052     case IX86_BUILTIN_MASKMOVQ:
24053     case IX86_BUILTIN_MASKMOVDQU:
24054       icode = (fcode == IX86_BUILTIN_MASKMOVQ
24055                ? CODE_FOR_mmx_maskmovq
24056                : CODE_FOR_sse2_maskmovdqu);
24057       /* Note the arg order is different from the operand order.  */
24058       arg1 = CALL_EXPR_ARG (exp, 0);
24059       arg2 = CALL_EXPR_ARG (exp, 1);
24060       arg0 = CALL_EXPR_ARG (exp, 2);
24061       op0 = expand_normal (arg0);
24062       op1 = expand_normal (arg1);
24063       op2 = expand_normal (arg2);
24064       mode0 = insn_data[icode].operand[0].mode;
24065       mode1 = insn_data[icode].operand[1].mode;
24066       mode2 = insn_data[icode].operand[2].mode;
24067
24068       op0 = force_reg (Pmode, op0);
24069       op0 = gen_rtx_MEM (mode1, op0);
24070
24071       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24072         op0 = copy_to_mode_reg (mode0, op0);
24073       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24074         op1 = copy_to_mode_reg (mode1, op1);
24075       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24076         op2 = copy_to_mode_reg (mode2, op2);
24077       pat = GEN_FCN (icode) (op0, op1, op2);
24078       if (! pat)
24079         return 0;
24080       emit_insn (pat);
24081       return 0;
24082
24083     case IX86_BUILTIN_LDMXCSR:
24084       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24085       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24086       emit_move_insn (target, op0);
24087       emit_insn (gen_sse_ldmxcsr (target));
24088       return 0;
24089
24090     case IX86_BUILTIN_STMXCSR:
24091       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24092       emit_insn (gen_sse_stmxcsr (target));
24093       return copy_to_mode_reg (SImode, target);
24094
24095     case IX86_BUILTIN_CLFLUSH:
24096         arg0 = CALL_EXPR_ARG (exp, 0);
24097         op0 = expand_normal (arg0);
24098         icode = CODE_FOR_sse2_clflush;
24099         if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24100             op0 = copy_to_mode_reg (Pmode, op0);
24101
24102         emit_insn (gen_sse2_clflush (op0));
24103         return 0;
24104
24105     case IX86_BUILTIN_MONITOR:
24106       arg0 = CALL_EXPR_ARG (exp, 0);
24107       arg1 = CALL_EXPR_ARG (exp, 1);
24108       arg2 = CALL_EXPR_ARG (exp, 2);
24109       op0 = expand_normal (arg0);
24110       op1 = expand_normal (arg1);
24111       op2 = expand_normal (arg2);
24112       if (!REG_P (op0))
24113         op0 = copy_to_mode_reg (Pmode, op0);
24114       if (!REG_P (op1))
24115         op1 = copy_to_mode_reg (SImode, op1);
24116       if (!REG_P (op2))
24117         op2 = copy_to_mode_reg (SImode, op2);
24118       emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24119       return 0;
24120
24121     case IX86_BUILTIN_MWAIT:
24122       arg0 = CALL_EXPR_ARG (exp, 0);
24123       arg1 = CALL_EXPR_ARG (exp, 1);
24124       op0 = expand_normal (arg0);
24125       op1 = expand_normal (arg1);
24126       if (!REG_P (op0))
24127         op0 = copy_to_mode_reg (SImode, op0);
24128       if (!REG_P (op1))
24129         op1 = copy_to_mode_reg (SImode, op1);
24130       emit_insn (gen_sse3_mwait (op0, op1));
24131       return 0;
24132
24133     case IX86_BUILTIN_VEC_INIT_V2SI:
24134     case IX86_BUILTIN_VEC_INIT_V4HI:
24135     case IX86_BUILTIN_VEC_INIT_V8QI:
24136       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24137
24138     case IX86_BUILTIN_VEC_EXT_V2DF:
24139     case IX86_BUILTIN_VEC_EXT_V2DI:
24140     case IX86_BUILTIN_VEC_EXT_V4SF:
24141     case IX86_BUILTIN_VEC_EXT_V4SI:
24142     case IX86_BUILTIN_VEC_EXT_V8HI:
24143     case IX86_BUILTIN_VEC_EXT_V2SI:
24144     case IX86_BUILTIN_VEC_EXT_V4HI:
24145     case IX86_BUILTIN_VEC_EXT_V16QI:
24146       return ix86_expand_vec_ext_builtin (exp, target);
24147
24148     case IX86_BUILTIN_VEC_SET_V2DI:
24149     case IX86_BUILTIN_VEC_SET_V4SF:
24150     case IX86_BUILTIN_VEC_SET_V4SI:
24151     case IX86_BUILTIN_VEC_SET_V8HI:
24152     case IX86_BUILTIN_VEC_SET_V4HI:
24153     case IX86_BUILTIN_VEC_SET_V16QI:
24154       return ix86_expand_vec_set_builtin (exp);
24155
24156     case IX86_BUILTIN_VEC_PERM_V2DF:
24157     case IX86_BUILTIN_VEC_PERM_V4SF:
24158     case IX86_BUILTIN_VEC_PERM_V2DI:
24159     case IX86_BUILTIN_VEC_PERM_V4SI:
24160     case IX86_BUILTIN_VEC_PERM_V8HI:
24161     case IX86_BUILTIN_VEC_PERM_V16QI:
24162     case IX86_BUILTIN_VEC_PERM_V2DI_U:
24163     case IX86_BUILTIN_VEC_PERM_V4SI_U:
24164     case IX86_BUILTIN_VEC_PERM_V8HI_U:
24165     case IX86_BUILTIN_VEC_PERM_V16QI_U:
24166     case IX86_BUILTIN_VEC_PERM_V4DF:
24167     case IX86_BUILTIN_VEC_PERM_V8SF:
24168       return ix86_expand_vec_perm_builtin (exp);
24169
24170     case IX86_BUILTIN_INFQ:
24171     case IX86_BUILTIN_HUGE_VALQ:
24172       {
24173         REAL_VALUE_TYPE inf;
24174         rtx tmp;
24175
24176         real_inf (&inf);
24177         tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24178
24179         tmp = validize_mem (force_const_mem (mode, tmp));
24180
24181         if (target == 0)
24182           target = gen_reg_rtx (mode);
24183
24184         emit_move_insn (target, tmp);
24185         return target;
24186       }
24187
24188     case IX86_BUILTIN_LLWPCB:
24189       arg0 = CALL_EXPR_ARG (exp, 0);
24190       op0 = expand_normal (arg0);
24191       icode = CODE_FOR_lwp_llwpcb;
24192       if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24193         op0 = copy_to_mode_reg (Pmode, op0);
24194       emit_insn (gen_lwp_llwpcb (op0));
24195       return 0;
24196
24197     case IX86_BUILTIN_SLWPCB:
24198       icode = CODE_FOR_lwp_slwpcb;
24199       if (!target
24200           || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
24201         target = gen_reg_rtx (Pmode);
24202       emit_insn (gen_lwp_slwpcb (target));
24203       return target;
24204
24205     default:
24206       break;
24207     }
24208
24209   for (i = 0, d = bdesc_special_args;
24210        i < ARRAY_SIZE (bdesc_special_args);
24211        i++, d++)
24212     if (d->code == fcode)
24213       return ix86_expand_special_args_builtin (d, exp, target);
24214
24215   for (i = 0, d = bdesc_args;
24216        i < ARRAY_SIZE (bdesc_args);
24217        i++, d++)
24218     if (d->code == fcode)
24219       switch (fcode)
24220         {
24221         case IX86_BUILTIN_FABSQ:
24222         case IX86_BUILTIN_COPYSIGNQ:
24223           if (!TARGET_SSE2)
24224             /* Emit a normal call if SSE2 isn't available.  */
24225             return expand_call (exp, target, ignore);
24226         default:
24227           return ix86_expand_args_builtin (d, exp, target);
24228         }
24229
24230   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24231     if (d->code == fcode)
24232       return ix86_expand_sse_comi (d, exp, target);
24233
24234   for (i = 0, d = bdesc_pcmpestr;
24235        i < ARRAY_SIZE (bdesc_pcmpestr);
24236        i++, d++)
24237     if (d->code == fcode)
24238       return ix86_expand_sse_pcmpestr (d, exp, target);
24239
24240   for (i = 0, d = bdesc_pcmpistr;
24241        i < ARRAY_SIZE (bdesc_pcmpistr);
24242        i++, d++)
24243     if (d->code == fcode)
24244       return ix86_expand_sse_pcmpistr (d, exp, target);
24245
24246   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24247     if (d->code == fcode)
24248       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24249                                             (enum ix86_builtin_func_type)
24250                                             d->flag, d->comparison);
24251
24252   gcc_unreachable ();
24253 }
24254
24255 /* Returns a function decl for a vectorized version of the builtin function
24256    with builtin function code FN and the result vector type TYPE, or NULL_TREE
24257    if it is not available.  */
24258
24259 static tree
24260 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24261                                   tree type_in)
24262 {
24263   enum machine_mode in_mode, out_mode;
24264   int in_n, out_n;
24265
24266   if (TREE_CODE (type_out) != VECTOR_TYPE
24267       || TREE_CODE (type_in) != VECTOR_TYPE)
24268     return NULL_TREE;
24269
24270   out_mode = TYPE_MODE (TREE_TYPE (type_out));
24271   out_n = TYPE_VECTOR_SUBPARTS (type_out);
24272   in_mode = TYPE_MODE (TREE_TYPE (type_in));
24273   in_n = TYPE_VECTOR_SUBPARTS (type_in);
24274
24275   switch (fn)
24276     {
24277     case BUILT_IN_SQRT:
24278       if (out_mode == DFmode && out_n == 2
24279           && in_mode == DFmode && in_n == 2)
24280         return ix86_builtins[IX86_BUILTIN_SQRTPD];
24281       break;
24282
24283     case BUILT_IN_SQRTF:
24284       if (out_mode == SFmode && out_n == 4
24285           && in_mode == SFmode && in_n == 4)
24286         return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24287       break;
24288
24289     case BUILT_IN_LRINT:
24290       if (out_mode == SImode && out_n == 4
24291           && in_mode == DFmode && in_n == 2)
24292         return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24293       break;
24294
24295     case BUILT_IN_LRINTF:
24296       if (out_mode == SImode && out_n == 4
24297           && in_mode == SFmode && in_n == 4)
24298         return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24299       break;
24300
24301     case BUILT_IN_COPYSIGN:
24302       if (out_mode == DFmode && out_n == 2
24303           && in_mode == DFmode && in_n == 2)
24304         return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
24305       break;
24306
24307     case BUILT_IN_COPYSIGNF:
24308       if (out_mode == SFmode && out_n == 4
24309           && in_mode == SFmode && in_n == 4)
24310         return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
24311       break;
24312
24313     default:
24314       ;
24315     }
24316
24317   /* Dispatch to a handler for a vectorization library.  */
24318   if (ix86_veclib_handler)
24319     return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
24320                                    type_in);
24321
24322   return NULL_TREE;
24323 }
24324
24325 /* Handler for an SVML-style interface to
24326    a library with vectorized intrinsics.  */
24327
24328 static tree
24329 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24330 {
24331   char name[20];
24332   tree fntype, new_fndecl, args;
24333   unsigned arity;
24334   const char *bname;
24335   enum machine_mode el_mode, in_mode;
24336   int n, in_n;
24337
24338   /* The SVML is suitable for unsafe math only.  */
24339   if (!flag_unsafe_math_optimizations)
24340     return NULL_TREE;
24341
24342   el_mode = TYPE_MODE (TREE_TYPE (type_out));
24343   n = TYPE_VECTOR_SUBPARTS (type_out);
24344   in_mode = TYPE_MODE (TREE_TYPE (type_in));
24345   in_n = TYPE_VECTOR_SUBPARTS (type_in);
24346   if (el_mode != in_mode
24347       || n != in_n)
24348     return NULL_TREE;
24349
24350   switch (fn)
24351     {
24352     case BUILT_IN_EXP:
24353     case BUILT_IN_LOG:
24354     case BUILT_IN_LOG10:
24355     case BUILT_IN_POW:
24356     case BUILT_IN_TANH:
24357     case BUILT_IN_TAN:
24358     case BUILT_IN_ATAN:
24359     case BUILT_IN_ATAN2:
24360     case BUILT_IN_ATANH:
24361     case BUILT_IN_CBRT:
24362     case BUILT_IN_SINH:
24363     case BUILT_IN_SIN:
24364     case BUILT_IN_ASINH:
24365     case BUILT_IN_ASIN:
24366     case BUILT_IN_COSH:
24367     case BUILT_IN_COS:
24368     case BUILT_IN_ACOSH:
24369     case BUILT_IN_ACOS:
24370       if (el_mode != DFmode || n != 2)
24371         return NULL_TREE;
24372       break;
24373
24374     case BUILT_IN_EXPF:
24375     case BUILT_IN_LOGF:
24376     case BUILT_IN_LOG10F:
24377     case BUILT_IN_POWF:
24378     case BUILT_IN_TANHF:
24379     case BUILT_IN_TANF:
24380     case BUILT_IN_ATANF:
24381     case BUILT_IN_ATAN2F:
24382     case BUILT_IN_ATANHF:
24383     case BUILT_IN_CBRTF:
24384     case BUILT_IN_SINHF:
24385     case BUILT_IN_SINF:
24386     case BUILT_IN_ASINHF:
24387     case BUILT_IN_ASINF:
24388     case BUILT_IN_COSHF:
24389     case BUILT_IN_COSF:
24390     case BUILT_IN_ACOSHF:
24391     case BUILT_IN_ACOSF:
24392       if (el_mode != SFmode || n != 4)
24393         return NULL_TREE;
24394       break;
24395
24396     default:
24397       return NULL_TREE;
24398     }
24399
24400   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24401
24402   if (fn == BUILT_IN_LOGF)
24403     strcpy (name, "vmlsLn4");
24404   else if (fn == BUILT_IN_LOG)
24405     strcpy (name, "vmldLn2");
24406   else if (n == 4)
24407     {
24408       sprintf (name, "vmls%s", bname+10);
24409       name[strlen (name)-1] = '4';
24410     }
24411   else
24412     sprintf (name, "vmld%s2", bname+10);
24413
24414   /* Convert to uppercase. */
24415   name[4] &= ~0x20;
24416
24417   arity = 0;
24418   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24419        args = TREE_CHAIN (args))
24420     arity++;
24421
24422   if (arity == 1)
24423     fntype = build_function_type_list (type_out, type_in, NULL);
24424   else
24425     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24426
24427   /* Build a function declaration for the vectorized function.  */
24428   new_fndecl = build_decl (BUILTINS_LOCATION,
24429                            FUNCTION_DECL, get_identifier (name), fntype);
24430   TREE_PUBLIC (new_fndecl) = 1;
24431   DECL_EXTERNAL (new_fndecl) = 1;
24432   DECL_IS_NOVOPS (new_fndecl) = 1;
24433   TREE_READONLY (new_fndecl) = 1;
24434
24435   return new_fndecl;
24436 }
24437
24438 /* Handler for an ACML-style interface to
24439    a library with vectorized intrinsics.  */
24440
24441 static tree
24442 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24443 {
24444   char name[20] = "__vr.._";
24445   tree fntype, new_fndecl, args;
24446   unsigned arity;
24447   const char *bname;
24448   enum machine_mode el_mode, in_mode;
24449   int n, in_n;
24450
24451   /* The ACML is 64bits only and suitable for unsafe math only as
24452      it does not correctly support parts of IEEE with the required
24453      precision such as denormals.  */
24454   if (!TARGET_64BIT
24455       || !flag_unsafe_math_optimizations)
24456     return NULL_TREE;
24457
24458   el_mode = TYPE_MODE (TREE_TYPE (type_out));
24459   n = TYPE_VECTOR_SUBPARTS (type_out);
24460   in_mode = TYPE_MODE (TREE_TYPE (type_in));
24461   in_n = TYPE_VECTOR_SUBPARTS (type_in);
24462   if (el_mode != in_mode
24463       || n != in_n)
24464     return NULL_TREE;
24465
24466   switch (fn)
24467     {
24468     case BUILT_IN_SIN:
24469     case BUILT_IN_COS:
24470     case BUILT_IN_EXP:
24471     case BUILT_IN_LOG:
24472     case BUILT_IN_LOG2:
24473     case BUILT_IN_LOG10:
24474       name[4] = 'd';
24475       name[5] = '2';
24476       if (el_mode != DFmode
24477           || n != 2)
24478         return NULL_TREE;
24479       break;
24480
24481     case BUILT_IN_SINF:
24482     case BUILT_IN_COSF:
24483     case BUILT_IN_EXPF:
24484     case BUILT_IN_POWF:
24485     case BUILT_IN_LOGF:
24486     case BUILT_IN_LOG2F:
24487     case BUILT_IN_LOG10F:
24488       name[4] = 's';
24489       name[5] = '4';
24490       if (el_mode != SFmode
24491           || n != 4)
24492         return NULL_TREE;
24493       break;
24494
24495     default:
24496       return NULL_TREE;
24497     }
24498
24499   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24500   sprintf (name + 7, "%s", bname+10);
24501
24502   arity = 0;
24503   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24504        args = TREE_CHAIN (args))
24505     arity++;
24506
24507   if (arity == 1)
24508     fntype = build_function_type_list (type_out, type_in, NULL);
24509   else
24510     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24511
24512   /* Build a function declaration for the vectorized function.  */
24513   new_fndecl = build_decl (BUILTINS_LOCATION,
24514                            FUNCTION_DECL, get_identifier (name), fntype);
24515   TREE_PUBLIC (new_fndecl) = 1;
24516   DECL_EXTERNAL (new_fndecl) = 1;
24517   DECL_IS_NOVOPS (new_fndecl) = 1;
24518   TREE_READONLY (new_fndecl) = 1;
24519
24520   return new_fndecl;
24521 }
24522
24523
24524 /* Returns a decl of a function that implements conversion of an integer vector
24525    into a floating-point vector, or vice-versa. TYPE is the type of the integer
24526    side of the conversion.
24527    Return NULL_TREE if it is not available.  */
24528
24529 static tree
24530 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24531 {
24532   if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
24533     return NULL_TREE;
24534
24535   switch (code)
24536     {
24537     case FLOAT_EXPR:
24538       switch (TYPE_MODE (type))
24539         {
24540         case V4SImode:
24541           return TYPE_UNSIGNED (type)
24542             ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
24543             : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24544         default:
24545           return NULL_TREE;
24546         }
24547
24548     case FIX_TRUNC_EXPR:
24549       switch (TYPE_MODE (type))
24550         {
24551         case V4SImode:
24552           return TYPE_UNSIGNED (type)
24553             ? NULL_TREE
24554             : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24555         default:
24556           return NULL_TREE;
24557         }
24558     default:
24559       return NULL_TREE;
24560
24561     }
24562 }
24563
24564 /* Returns a code for a target-specific builtin that implements
24565    reciprocal of the function, or NULL_TREE if not available.  */
24566
24567 static tree
24568 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24569                          bool sqrt ATTRIBUTE_UNUSED)
24570 {
24571   if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
24572          && flag_finite_math_only && !flag_trapping_math
24573          && flag_unsafe_math_optimizations))
24574     return NULL_TREE;
24575
24576   if (md_fn)
24577     /* Machine dependent builtins.  */
24578     switch (fn)
24579       {
24580         /* Vectorized version of sqrt to rsqrt conversion.  */
24581       case IX86_BUILTIN_SQRTPS_NR:
24582         return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24583
24584       default:
24585         return NULL_TREE;
24586       }
24587   else
24588     /* Normal builtins.  */
24589     switch (fn)
24590       {
24591         /* Sqrt to rsqrt conversion.  */
24592       case BUILT_IN_SQRTF:
24593         return ix86_builtins[IX86_BUILTIN_RSQRTF];
24594
24595       default:
24596         return NULL_TREE;
24597       }
24598 }
24599 \f
24600 /* Helper for avx_vpermilps256_operand et al.  This is also used by
24601    the expansion functions to turn the parallel back into a mask.
24602    The return value is 0 for no match and the imm8+1 for a match.  */
24603
24604 int
24605 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
24606 {
24607   unsigned i, nelt = GET_MODE_NUNITS (mode);
24608   unsigned mask = 0;
24609   unsigned char ipar[8];
24610
24611   if (XVECLEN (par, 0) != (int) nelt)
24612     return 0;
24613
24614   /* Validate that all of the elements are constants, and not totally
24615      out of range.  Copy the data into an integral array to make the
24616      subsequent checks easier.  */
24617   for (i = 0; i < nelt; ++i)
24618     {
24619       rtx er = XVECEXP (par, 0, i);
24620       unsigned HOST_WIDE_INT ei;
24621
24622       if (!CONST_INT_P (er))
24623         return 0;
24624       ei = INTVAL (er);
24625       if (ei >= 2 * nelt)
24626         return 0;
24627       ipar[i] = ei;
24628     }
24629
24630   switch (mode)
24631     {
24632     case V4DFmode:
24633       /* In the 256-bit DFmode case, we can only move elements within
24634          a 128-bit lane.  */
24635       for (i = 0; i < 2; ++i)
24636         {
24637           if (ipar[i] >= 2)
24638             return 0;
24639           mask |= ipar[i] << i;
24640         }
24641       for (i = 2; i < 4; ++i)
24642         {
24643           if (ipar[i] < 2)
24644             return 0;
24645           mask |= (ipar[i] - 2) << i;
24646         }
24647       break;
24648
24649     case V8SFmode:
24650       /* In the 256-bit SFmode case, we have full freedom of movement
24651          within the low 128-bit lane, but the high 128-bit lane must
24652          mirror the exact same pattern.  */
24653       for (i = 0; i < 4; ++i)
24654         if (ipar[i] + 4 != ipar[i + 4])
24655           return 0;
24656       nelt = 4;
24657       /* FALLTHRU */
24658
24659     case V2DFmode:
24660     case V4SFmode:
24661       /* In the 128-bit case, we've full freedom in the placement of
24662          the elements from the source operand.  */
24663       for (i = 0; i < nelt; ++i)
24664         mask |= ipar[i] << (i * (nelt / 2));
24665       break;
24666
24667     default:
24668       gcc_unreachable ();
24669     }
24670
24671   /* Make sure success has a non-zero value by adding one.  */
24672   return mask + 1;
24673 }
24674
24675 /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
24676    the expansion functions to turn the parallel back into a mask.
24677    The return value is 0 for no match and the imm8+1 for a match.  */
24678
24679 int
24680 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
24681 {
24682   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
24683   unsigned mask = 0;
24684   unsigned char ipar[8];
24685
24686   if (XVECLEN (par, 0) != (int) nelt)
24687     return 0;
24688
24689   /* Validate that all of the elements are constants, and not totally
24690      out of range.  Copy the data into an integral array to make the
24691      subsequent checks easier.  */
24692   for (i = 0; i < nelt; ++i)
24693     {
24694       rtx er = XVECEXP (par, 0, i);
24695       unsigned HOST_WIDE_INT ei;
24696
24697       if (!CONST_INT_P (er))
24698         return 0;
24699       ei = INTVAL (er);
24700       if (ei >= 2 * nelt)
24701         return 0;
24702       ipar[i] = ei;
24703     }
24704
24705   /* Validate that the halves of the permute are halves.  */
24706   for (i = 0; i < nelt2 - 1; ++i)
24707     if (ipar[i] + 1 != ipar[i + 1])
24708       return 0;
24709   for (i = nelt2; i < nelt - 1; ++i)
24710     if (ipar[i] + 1 != ipar[i + 1])
24711       return 0;
24712
24713   /* Reconstruct the mask.  */
24714   for (i = 0; i < 2; ++i)
24715     {
24716       unsigned e = ipar[i * nelt2];
24717       if (e % nelt2)
24718         return 0;
24719       e /= nelt2;
24720       mask |= e << (i * 4);
24721     }
24722
24723   /* Make sure success has a non-zero value by adding one.  */
24724   return mask + 1;
24725 }
24726 \f
24727
24728 /* Store OPERAND to the memory after reload is completed.  This means
24729    that we can't easily use assign_stack_local.  */
24730 rtx
24731 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24732 {
24733   rtx result;
24734
24735   gcc_assert (reload_completed);
24736   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24737     {
24738       result = gen_rtx_MEM (mode,
24739                             gen_rtx_PLUS (Pmode,
24740                                           stack_pointer_rtx,
24741                                           GEN_INT (-RED_ZONE_SIZE)));
24742       emit_move_insn (result, operand);
24743     }
24744   else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24745     {
24746       switch (mode)
24747         {
24748         case HImode:
24749         case SImode:
24750           operand = gen_lowpart (DImode, operand);
24751           /* FALLTHRU */
24752         case DImode:
24753           emit_insn (
24754                       gen_rtx_SET (VOIDmode,
24755                                    gen_rtx_MEM (DImode,
24756                                                 gen_rtx_PRE_DEC (DImode,
24757                                                         stack_pointer_rtx)),
24758                                    operand));
24759           break;
24760         default:
24761           gcc_unreachable ();
24762         }
24763       result = gen_rtx_MEM (mode, stack_pointer_rtx);
24764     }
24765   else
24766     {
24767       switch (mode)
24768         {
24769         case DImode:
24770           {
24771             rtx operands[2];
24772             split_di (&operand, 1, operands, operands + 1);
24773             emit_insn (
24774                         gen_rtx_SET (VOIDmode,
24775                                      gen_rtx_MEM (SImode,
24776                                                   gen_rtx_PRE_DEC (Pmode,
24777                                                         stack_pointer_rtx)),
24778                                      operands[1]));
24779             emit_insn (
24780                         gen_rtx_SET (VOIDmode,
24781                                      gen_rtx_MEM (SImode,
24782                                                   gen_rtx_PRE_DEC (Pmode,
24783                                                         stack_pointer_rtx)),
24784                                      operands[0]));
24785           }
24786           break;
24787         case HImode:
24788           /* Store HImodes as SImodes.  */
24789           operand = gen_lowpart (SImode, operand);
24790           /* FALLTHRU */
24791         case SImode:
24792           emit_insn (
24793                       gen_rtx_SET (VOIDmode,
24794                                    gen_rtx_MEM (GET_MODE (operand),
24795                                                 gen_rtx_PRE_DEC (SImode,
24796                                                         stack_pointer_rtx)),
24797                                    operand));
24798           break;
24799         default:
24800           gcc_unreachable ();
24801         }
24802       result = gen_rtx_MEM (mode, stack_pointer_rtx);
24803     }
24804   return result;
24805 }
24806
24807 /* Free operand from the memory.  */
24808 void
24809 ix86_free_from_memory (enum machine_mode mode)
24810 {
24811   if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24812     {
24813       int size;
24814
24815       if (mode == DImode || TARGET_64BIT)
24816         size = 8;
24817       else
24818         size = 4;
24819       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
24820          to pop or add instruction if registers are available.  */
24821       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24822                               gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24823                                             GEN_INT (size))));
24824     }
24825 }
24826
24827 /* Implement TARGET_IRA_COVER_CLASSES.  If -mfpmath=sse, we prefer
24828    SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
24829    same.  */
24830 static const enum reg_class *
24831 i386_ira_cover_classes (void)
24832 {
24833   static const enum reg_class sse_fpmath_classes[] = {
24834     GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
24835   };
24836   static const enum reg_class no_sse_fpmath_classes[] = {
24837     GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
24838   };
24839
24840  return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
24841 }
24842
24843 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24844    QImode must go into class Q_REGS.
24845    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
24846    movdf to do mem-to-mem moves through integer regs.  */
24847 enum reg_class
24848 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24849 {
24850   enum machine_mode mode = GET_MODE (x);
24851
24852   /* We're only allowed to return a subclass of CLASS.  Many of the
24853      following checks fail for NO_REGS, so eliminate that early.  */
24854   if (regclass == NO_REGS)
24855     return NO_REGS;
24856
24857   /* All classes can load zeros.  */
24858   if (x == CONST0_RTX (mode))
24859     return regclass;
24860
24861   /* Force constants into memory if we are loading a (nonzero) constant into
24862      an MMX or SSE register.  This is because there are no MMX/SSE instructions
24863      to load from a constant.  */
24864   if (CONSTANT_P (x)
24865       && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24866     return NO_REGS;
24867
24868   /* Prefer SSE regs only, if we can use them for math.  */
24869   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24870     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24871
24872   /* Floating-point constants need more complex checks.  */
24873   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24874     {
24875       /* General regs can load everything.  */
24876       if (reg_class_subset_p (regclass, GENERAL_REGS))
24877         return regclass;
24878
24879       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
24880          zero above.  We only want to wind up preferring 80387 registers if
24881          we plan on doing computation with them.  */
24882       if (TARGET_80387
24883           && standard_80387_constant_p (x))
24884         {
24885           /* Limit class to non-sse.  */
24886           if (regclass == FLOAT_SSE_REGS)
24887             return FLOAT_REGS;
24888           if (regclass == FP_TOP_SSE_REGS)
24889             return FP_TOP_REG;
24890           if (regclass == FP_SECOND_SSE_REGS)
24891             return FP_SECOND_REG;
24892           if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
24893             return regclass;
24894         }
24895
24896       return NO_REGS;
24897     }
24898
24899   /* Generally when we see PLUS here, it's the function invariant
24900      (plus soft-fp const_int).  Which can only be computed into general
24901      regs.  */
24902   if (GET_CODE (x) == PLUS)
24903     return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
24904
24905   /* QImode constants are easy to load, but non-constant QImode data
24906      must go into Q_REGS.  */
24907   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
24908     {
24909       if (reg_class_subset_p (regclass, Q_REGS))
24910         return regclass;
24911       if (reg_class_subset_p (Q_REGS, regclass))
24912         return Q_REGS;
24913       return NO_REGS;
24914     }
24915
24916   return regclass;
24917 }
24918
24919 /* Discourage putting floating-point values in SSE registers unless
24920    SSE math is being used, and likewise for the 387 registers.  */
24921 enum reg_class
24922 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
24923 {
24924   enum machine_mode mode = GET_MODE (x);
24925
24926   /* Restrict the output reload class to the register bank that we are doing
24927      math on.  If we would like not to return a subset of CLASS, reject this
24928      alternative: if reload cannot do this, it will still use its choice.  */
24929   mode = GET_MODE (x);
24930   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
24931     return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
24932
24933   if (X87_FLOAT_MODE_P (mode))
24934     {
24935       if (regclass == FP_TOP_SSE_REGS)
24936         return FP_TOP_REG;
24937       else if (regclass == FP_SECOND_SSE_REGS)
24938         return FP_SECOND_REG;
24939       else
24940         return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
24941     }
24942
24943   return regclass;
24944 }
24945
24946 static enum reg_class
24947 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
24948                        enum machine_mode mode,
24949                        secondary_reload_info *sri ATTRIBUTE_UNUSED)
24950 {
24951   /* QImode spills from non-QI registers require
24952      intermediate register on 32bit targets.  */
24953   if (!in_p && mode == QImode && !TARGET_64BIT
24954       && (rclass == GENERAL_REGS
24955           || rclass == LEGACY_REGS
24956           || rclass == INDEX_REGS))
24957     {
24958       int regno;
24959
24960       if (REG_P (x))
24961         regno = REGNO (x);
24962       else
24963         regno = -1;
24964
24965       if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
24966         regno = true_regnum (x);
24967
24968       /* Return Q_REGS if the operand is in memory.  */
24969       if (regno == -1)
24970         return Q_REGS;
24971     }
24972
24973   return NO_REGS;
24974 }
24975
24976 /* If we are copying between general and FP registers, we need a memory
24977    location. The same is true for SSE and MMX registers.
24978
24979    To optimize register_move_cost performance, allow inline variant.
24980
24981    The macro can't work reliably when one of the CLASSES is class containing
24982    registers from multiple units (SSE, MMX, integer).  We avoid this by never
24983    combining those units in single alternative in the machine description.
24984    Ensure that this constraint holds to avoid unexpected surprises.
24985
24986    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
24987    enforce these sanity checks.  */
24988
24989 static inline int
24990 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
24991                               enum machine_mode mode, int strict)
24992 {
24993   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
24994       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
24995       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
24996       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
24997       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
24998       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
24999     {
25000       gcc_assert (!strict);
25001       return true;
25002     }
25003
25004   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25005     return true;
25006
25007   /* ??? This is a lie.  We do have moves between mmx/general, and for
25008      mmx/sse2.  But by saying we need secondary memory we discourage the
25009      register allocator from using the mmx registers unless needed.  */
25010   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25011     return true;
25012
25013   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25014     {
25015       /* SSE1 doesn't have any direct moves from other classes.  */
25016       if (!TARGET_SSE2)
25017         return true;
25018
25019       /* If the target says that inter-unit moves are more expensive
25020          than moving through memory, then don't generate them.  */
25021       if (!TARGET_INTER_UNIT_MOVES)
25022         return true;
25023
25024       /* Between SSE and general, we have moves no larger than word size.  */
25025       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25026         return true;
25027     }
25028
25029   return false;
25030 }
25031
25032 int
25033 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25034                               enum machine_mode mode, int strict)
25035 {
25036   return inline_secondary_memory_needed (class1, class2, mode, strict);
25037 }
25038
25039 /* Return true if the registers in CLASS cannot represent the change from
25040    modes FROM to TO.  */
25041
25042 bool
25043 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25044                                enum reg_class regclass)
25045 {
25046   if (from == to)
25047     return false;
25048
25049   /* x87 registers can't do subreg at all, as all values are reformatted
25050      to extended precision.  */
25051   if (MAYBE_FLOAT_CLASS_P (regclass))
25052     return true;
25053
25054   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25055     {
25056       /* Vector registers do not support QI or HImode loads.  If we don't
25057          disallow a change to these modes, reload will assume it's ok to
25058          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
25059          the vec_dupv4hi pattern.  */
25060       if (GET_MODE_SIZE (from) < 4)
25061         return true;
25062
25063       /* Vector registers do not support subreg with nonzero offsets, which
25064          are otherwise valid for integer registers.  Since we can't see
25065          whether we have a nonzero offset from here, prohibit all
25066          nonparadoxical subregs changing size.  */
25067       if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25068         return true;
25069     }
25070
25071   return false;
25072 }
25073
25074 /* Return the cost of moving data of mode M between a
25075    register and memory.  A value of 2 is the default; this cost is
25076    relative to those in `REGISTER_MOVE_COST'.
25077
25078    This function is used extensively by register_move_cost that is used to
25079    build tables at startup.  Make it inline in this case.
25080    When IN is 2, return maximum of in and out move cost.
25081
25082    If moving between registers and memory is more expensive than
25083    between two registers, you should define this macro to express the
25084    relative cost.
25085
25086    Model also increased moving costs of QImode registers in non
25087    Q_REGS classes.
25088  */
25089 static inline int
25090 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25091                          int in)
25092 {
25093   int cost;
25094   if (FLOAT_CLASS_P (regclass))
25095     {
25096       int index;
25097       switch (mode)
25098         {
25099           case SFmode:
25100             index = 0;
25101             break;
25102           case DFmode:
25103             index = 1;
25104             break;
25105           case XFmode:
25106             index = 2;
25107             break;
25108           default:
25109             return 100;
25110         }
25111       if (in == 2)
25112         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25113       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25114     }
25115   if (SSE_CLASS_P (regclass))
25116     {
25117       int index;
25118       switch (GET_MODE_SIZE (mode))
25119         {
25120           case 4:
25121             index = 0;
25122             break;
25123           case 8:
25124             index = 1;
25125             break;
25126           case 16:
25127             index = 2;
25128             break;
25129           default:
25130             return 100;
25131         }
25132       if (in == 2)
25133         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25134       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25135     }
25136   if (MMX_CLASS_P (regclass))
25137     {
25138       int index;
25139       switch (GET_MODE_SIZE (mode))
25140         {
25141           case 4:
25142             index = 0;
25143             break;
25144           case 8:
25145             index = 1;
25146             break;
25147           default:
25148             return 100;
25149         }
25150       if (in)
25151         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25152       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25153     }
25154   switch (GET_MODE_SIZE (mode))
25155     {
25156       case 1:
25157         if (Q_CLASS_P (regclass) || TARGET_64BIT)
25158           {
25159             if (!in)
25160               return ix86_cost->int_store[0];
25161             if (TARGET_PARTIAL_REG_DEPENDENCY
25162                 && optimize_function_for_speed_p (cfun))
25163               cost = ix86_cost->movzbl_load;
25164             else
25165               cost = ix86_cost->int_load[0];
25166             if (in == 2)
25167               return MAX (cost, ix86_cost->int_store[0]);
25168             return cost;
25169           }
25170         else
25171           {
25172            if (in == 2)
25173              return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25174            if (in)
25175              return ix86_cost->movzbl_load;
25176            else
25177              return ix86_cost->int_store[0] + 4;
25178           }
25179         break;
25180       case 2:
25181         if (in == 2)
25182           return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25183         return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25184       default:
25185         /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
25186         if (mode == TFmode)
25187           mode = XFmode;
25188         if (in == 2)
25189           cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25190         else if (in)
25191           cost = ix86_cost->int_load[2];
25192         else
25193           cost = ix86_cost->int_store[2];
25194         return (cost * (((int) GET_MODE_SIZE (mode)
25195                         + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25196     }
25197 }
25198
25199 int
25200 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25201 {
25202   return inline_memory_move_cost (mode, regclass, in);
25203 }
25204
25205
25206 /* Return the cost of moving data from a register in class CLASS1 to
25207    one in class CLASS2.
25208
25209    It is not required that the cost always equal 2 when FROM is the same as TO;
25210    on some machines it is expensive to move between registers if they are not
25211    general registers.  */
25212
25213 int
25214 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25215                          enum reg_class class2)
25216 {
25217   /* In case we require secondary memory, compute cost of the store followed
25218      by load.  In order to avoid bad register allocation choices, we need
25219      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
25220
25221   if (inline_secondary_memory_needed (class1, class2, mode, 0))
25222     {
25223       int cost = 1;
25224
25225       cost += inline_memory_move_cost (mode, class1, 2);
25226       cost += inline_memory_move_cost (mode, class2, 2);
25227
25228       /* In case of copying from general_purpose_register we may emit multiple
25229          stores followed by single load causing memory size mismatch stall.
25230          Count this as arbitrarily high cost of 20.  */
25231       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25232         cost += 20;
25233
25234       /* In the case of FP/MMX moves, the registers actually overlap, and we
25235          have to switch modes in order to treat them differently.  */
25236       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25237           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25238         cost += 20;
25239
25240       return cost;
25241     }
25242
25243   /* Moves between SSE/MMX and integer unit are expensive.  */
25244   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25245       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25246
25247     /* ??? By keeping returned value relatively high, we limit the number
25248        of moves between integer and MMX/SSE registers for all targets.
25249        Additionally, high value prevents problem with x86_modes_tieable_p(),
25250        where integer modes in MMX/SSE registers are not tieable
25251        because of missing QImode and HImode moves to, from or between
25252        MMX/SSE registers.  */
25253     return MAX (8, ix86_cost->mmxsse_to_integer);
25254
25255   if (MAYBE_FLOAT_CLASS_P (class1))
25256     return ix86_cost->fp_move;
25257   if (MAYBE_SSE_CLASS_P (class1))
25258     return ix86_cost->sse_move;
25259   if (MAYBE_MMX_CLASS_P (class1))
25260     return ix86_cost->mmx_move;
25261   return 2;
25262 }
25263
25264 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
25265
25266 bool
25267 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25268 {
25269   /* Flags and only flags can only hold CCmode values.  */
25270   if (CC_REGNO_P (regno))
25271     return GET_MODE_CLASS (mode) == MODE_CC;
25272   if (GET_MODE_CLASS (mode) == MODE_CC
25273       || GET_MODE_CLASS (mode) == MODE_RANDOM
25274       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25275     return 0;
25276   if (FP_REGNO_P (regno))
25277     return VALID_FP_MODE_P (mode);
25278   if (SSE_REGNO_P (regno))
25279     {
25280       /* We implement the move patterns for all vector modes into and
25281          out of SSE registers, even when no operation instructions
25282          are available.  OImode move is available only when AVX is
25283          enabled.  */
25284       return ((TARGET_AVX && mode == OImode)
25285               || VALID_AVX256_REG_MODE (mode)
25286               || VALID_SSE_REG_MODE (mode)
25287               || VALID_SSE2_REG_MODE (mode)
25288               || VALID_MMX_REG_MODE (mode)
25289               || VALID_MMX_REG_MODE_3DNOW (mode));
25290     }
25291   if (MMX_REGNO_P (regno))
25292     {
25293       /* We implement the move patterns for 3DNOW modes even in MMX mode,
25294          so if the register is available at all, then we can move data of
25295          the given mode into or out of it.  */
25296       return (VALID_MMX_REG_MODE (mode)
25297               || VALID_MMX_REG_MODE_3DNOW (mode));
25298     }
25299
25300   if (mode == QImode)
25301     {
25302       /* Take care for QImode values - they can be in non-QI regs,
25303          but then they do cause partial register stalls.  */
25304       if (regno <= BX_REG || TARGET_64BIT)
25305         return 1;
25306       if (!TARGET_PARTIAL_REG_STALL)
25307         return 1;
25308       return reload_in_progress || reload_completed;
25309     }
25310   /* We handle both integer and floats in the general purpose registers.  */
25311   else if (VALID_INT_MODE_P (mode))
25312     return 1;
25313   else if (VALID_FP_MODE_P (mode))
25314     return 1;
25315   else if (VALID_DFP_MODE_P (mode))
25316     return 1;
25317   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
25318      on to use that value in smaller contexts, this can easily force a
25319      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
25320      supporting DImode, allow it.  */
25321   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25322     return 1;
25323
25324   return 0;
25325 }
25326
25327 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
25328    tieable integer mode.  */
25329
25330 static bool
25331 ix86_tieable_integer_mode_p (enum machine_mode mode)
25332 {
25333   switch (mode)
25334     {
25335     case HImode:
25336     case SImode:
25337       return true;
25338
25339     case QImode:
25340       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25341
25342     case DImode:
25343       return TARGET_64BIT;
25344
25345     default:
25346       return false;
25347     }
25348 }
25349
25350 /* Return true if MODE1 is accessible in a register that can hold MODE2
25351    without copying.  That is, all register classes that can hold MODE2
25352    can also hold MODE1.  */
25353
25354 bool
25355 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25356 {
25357   if (mode1 == mode2)
25358     return true;
25359
25360   if (ix86_tieable_integer_mode_p (mode1)
25361       && ix86_tieable_integer_mode_p (mode2))
25362     return true;
25363
25364   /* MODE2 being XFmode implies fp stack or general regs, which means we
25365      can tie any smaller floating point modes to it.  Note that we do not
25366      tie this with TFmode.  */
25367   if (mode2 == XFmode)
25368     return mode1 == SFmode || mode1 == DFmode;
25369
25370   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25371      that we can tie it with SFmode.  */
25372   if (mode2 == DFmode)
25373     return mode1 == SFmode;
25374
25375   /* If MODE2 is only appropriate for an SSE register, then tie with
25376      any other mode acceptable to SSE registers.  */
25377   if (GET_MODE_SIZE (mode2) == 16
25378       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25379     return (GET_MODE_SIZE (mode1) == 16
25380             && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25381
25382   /* If MODE2 is appropriate for an MMX register, then tie
25383      with any other mode acceptable to MMX registers.  */
25384   if (GET_MODE_SIZE (mode2) == 8
25385       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25386     return (GET_MODE_SIZE (mode1) == 8
25387             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25388
25389   return false;
25390 }
25391
25392 /* Compute a (partial) cost for rtx X.  Return true if the complete
25393    cost has been computed, and false if subexpressions should be
25394    scanned.  In either case, *TOTAL contains the cost result.  */
25395
25396 static bool
25397 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25398 {
25399   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25400   enum machine_mode mode = GET_MODE (x);
25401   const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25402
25403   switch (code)
25404     {
25405     case CONST_INT:
25406     case CONST:
25407     case LABEL_REF:
25408     case SYMBOL_REF:
25409       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25410         *total = 3;
25411       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25412         *total = 2;
25413       else if (flag_pic && SYMBOLIC_CONST (x)
25414                && (!TARGET_64BIT
25415                    || (!GET_CODE (x) != LABEL_REF
25416                        && (GET_CODE (x) != SYMBOL_REF
25417                            || !SYMBOL_REF_LOCAL_P (x)))))
25418         *total = 1;
25419       else
25420         *total = 0;
25421       return true;
25422
25423     case CONST_DOUBLE:
25424       if (mode == VOIDmode)
25425         *total = 0;
25426       else
25427         switch (standard_80387_constant_p (x))
25428           {
25429           case 1: /* 0.0 */
25430             *total = 1;
25431             break;
25432           default: /* Other constants */
25433             *total = 2;
25434             break;
25435           case 0:
25436           case -1:
25437             /* Start with (MEM (SYMBOL_REF)), since that's where
25438                it'll probably end up.  Add a penalty for size.  */
25439             *total = (COSTS_N_INSNS (1)
25440                       + (flag_pic != 0 && !TARGET_64BIT)
25441                       + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25442             break;
25443           }
25444       return true;
25445
25446     case ZERO_EXTEND:
25447       /* The zero extensions is often completely free on x86_64, so make
25448          it as cheap as possible.  */
25449       if (TARGET_64BIT && mode == DImode
25450           && GET_MODE (XEXP (x, 0)) == SImode)
25451         *total = 1;
25452       else if (TARGET_ZERO_EXTEND_WITH_AND)
25453         *total = cost->add;
25454       else
25455         *total = cost->movzx;
25456       return false;
25457
25458     case SIGN_EXTEND:
25459       *total = cost->movsx;
25460       return false;
25461
25462     case ASHIFT:
25463       if (CONST_INT_P (XEXP (x, 1))
25464           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25465         {
25466           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25467           if (value == 1)
25468             {
25469               *total = cost->add;
25470               return false;
25471             }
25472           if ((value == 2 || value == 3)
25473               && cost->lea <= cost->shift_const)
25474             {
25475               *total = cost->lea;
25476               return false;
25477             }
25478         }
25479       /* FALLTHRU */
25480
25481     case ROTATE:
25482     case ASHIFTRT:
25483     case LSHIFTRT:
25484     case ROTATERT:
25485       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25486         {
25487           if (CONST_INT_P (XEXP (x, 1)))
25488             {
25489               if (INTVAL (XEXP (x, 1)) > 32)
25490                 *total = cost->shift_const + COSTS_N_INSNS (2);
25491               else
25492                 *total = cost->shift_const * 2;
25493             }
25494           else
25495             {
25496               if (GET_CODE (XEXP (x, 1)) == AND)
25497                 *total = cost->shift_var * 2;
25498               else
25499                 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25500             }
25501         }
25502       else
25503         {
25504           if (CONST_INT_P (XEXP (x, 1)))
25505             *total = cost->shift_const;
25506           else
25507             *total = cost->shift_var;
25508         }
25509       return false;
25510
25511     case MULT:
25512       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25513         {
25514           /* ??? SSE scalar cost should be used here.  */
25515           *total = cost->fmul;
25516           return false;
25517         }
25518       else if (X87_FLOAT_MODE_P (mode))
25519         {
25520           *total = cost->fmul;
25521           return false;
25522         }
25523       else if (FLOAT_MODE_P (mode))
25524         {
25525           /* ??? SSE vector cost should be used here.  */
25526           *total = cost->fmul;
25527           return false;
25528         }
25529       else
25530         {
25531           rtx op0 = XEXP (x, 0);
25532           rtx op1 = XEXP (x, 1);
25533           int nbits;
25534           if (CONST_INT_P (XEXP (x, 1)))
25535             {
25536               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25537               for (nbits = 0; value != 0; value &= value - 1)
25538                 nbits++;
25539             }
25540           else
25541             /* This is arbitrary.  */
25542             nbits = 7;
25543
25544           /* Compute costs correctly for widening multiplication.  */
25545           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25546               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25547                  == GET_MODE_SIZE (mode))
25548             {
25549               int is_mulwiden = 0;
25550               enum machine_mode inner_mode = GET_MODE (op0);
25551
25552               if (GET_CODE (op0) == GET_CODE (op1))
25553                 is_mulwiden = 1, op1 = XEXP (op1, 0);
25554               else if (CONST_INT_P (op1))
25555                 {
25556                   if (GET_CODE (op0) == SIGN_EXTEND)
25557                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25558                                   == INTVAL (op1);
25559                   else
25560                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25561                 }
25562
25563               if (is_mulwiden)
25564                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25565             }
25566
25567           *total = (cost->mult_init[MODE_INDEX (mode)]
25568                     + nbits * cost->mult_bit
25569                     + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25570
25571           return true;
25572         }
25573
25574     case DIV:
25575     case UDIV:
25576     case MOD:
25577     case UMOD:
25578       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25579         /* ??? SSE cost should be used here.  */
25580         *total = cost->fdiv;
25581       else if (X87_FLOAT_MODE_P (mode))
25582         *total = cost->fdiv;
25583       else if (FLOAT_MODE_P (mode))
25584         /* ??? SSE vector cost should be used here.  */
25585         *total = cost->fdiv;
25586       else
25587         *total = cost->divide[MODE_INDEX (mode)];
25588       return false;
25589
25590     case PLUS:
25591       if (GET_MODE_CLASS (mode) == MODE_INT
25592                && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25593         {
25594           if (GET_CODE (XEXP (x, 0)) == PLUS
25595               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25596               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25597               && CONSTANT_P (XEXP (x, 1)))
25598             {
25599               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25600               if (val == 2 || val == 4 || val == 8)
25601                 {
25602                   *total = cost->lea;
25603                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25604                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25605                                       outer_code, speed);
25606                   *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25607                   return true;
25608                 }
25609             }
25610           else if (GET_CODE (XEXP (x, 0)) == MULT
25611                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25612             {
25613               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25614               if (val == 2 || val == 4 || val == 8)
25615                 {
25616                   *total = cost->lea;
25617                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25618                   *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25619                   return true;
25620                 }
25621             }
25622           else if (GET_CODE (XEXP (x, 0)) == PLUS)
25623             {
25624               *total = cost->lea;
25625               *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25626               *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25627               *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25628               return true;
25629             }
25630         }
25631       /* FALLTHRU */
25632
25633     case MINUS:
25634       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25635         {
25636           /* ??? SSE cost should be used here.  */
25637           *total = cost->fadd;
25638           return false;
25639         }
25640       else if (X87_FLOAT_MODE_P (mode))
25641         {
25642           *total = cost->fadd;
25643           return false;
25644         }
25645       else if (FLOAT_MODE_P (mode))
25646         {
25647           /* ??? SSE vector cost should be used here.  */
25648           *total = cost->fadd;
25649           return false;
25650         }
25651       /* FALLTHRU */
25652
25653     case AND:
25654     case IOR:
25655     case XOR:
25656       if (!TARGET_64BIT && mode == DImode)
25657         {
25658           *total = (cost->add * 2
25659                     + (rtx_cost (XEXP (x, 0), outer_code, speed)
25660                        << (GET_MODE (XEXP (x, 0)) != DImode))
25661                     + (rtx_cost (XEXP (x, 1), outer_code, speed)
25662                        << (GET_MODE (XEXP (x, 1)) != DImode)));
25663           return true;
25664         }
25665       /* FALLTHRU */
25666
25667     case NEG:
25668       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25669         {
25670           /* ??? SSE cost should be used here.  */
25671           *total = cost->fchs;
25672           return false;
25673         }
25674       else if (X87_FLOAT_MODE_P (mode))
25675         {
25676           *total = cost->fchs;
25677           return false;
25678         }
25679       else if (FLOAT_MODE_P (mode))
25680         {
25681           /* ??? SSE vector cost should be used here.  */
25682           *total = cost->fchs;
25683           return false;
25684         }
25685       /* FALLTHRU */
25686
25687     case NOT:
25688       if (!TARGET_64BIT && mode == DImode)
25689         *total = cost->add * 2;
25690       else
25691         *total = cost->add;
25692       return false;
25693
25694     case COMPARE:
25695       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25696           && XEXP (XEXP (x, 0), 1) == const1_rtx
25697           && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25698           && XEXP (x, 1) == const0_rtx)
25699         {
25700           /* This kind of construct is implemented using test[bwl].
25701              Treat it as if we had an AND.  */
25702           *total = (cost->add
25703                     + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
25704                     + rtx_cost (const1_rtx, outer_code, speed));
25705           return true;
25706         }
25707       return false;
25708
25709     case FLOAT_EXTEND:
25710       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25711         *total = 0;
25712       return false;
25713
25714     case ABS:
25715       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25716         /* ??? SSE cost should be used here.  */
25717         *total = cost->fabs;
25718       else if (X87_FLOAT_MODE_P (mode))
25719         *total = cost->fabs;
25720       else if (FLOAT_MODE_P (mode))
25721         /* ??? SSE vector cost should be used here.  */
25722         *total = cost->fabs;
25723       return false;
25724
25725     case SQRT:
25726       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25727         /* ??? SSE cost should be used here.  */
25728         *total = cost->fsqrt;
25729       else if (X87_FLOAT_MODE_P (mode))
25730         *total = cost->fsqrt;
25731       else if (FLOAT_MODE_P (mode))
25732         /* ??? SSE vector cost should be used here.  */
25733         *total = cost->fsqrt;
25734       return false;
25735
25736     case UNSPEC:
25737       if (XINT (x, 1) == UNSPEC_TP)
25738         *total = 0;
25739       return false;
25740
25741     case VEC_SELECT:
25742     case VEC_CONCAT:
25743     case VEC_MERGE:
25744     case VEC_DUPLICATE:
25745       /* ??? Assume all of these vector manipulation patterns are
25746          recognizable.  In which case they all pretty much have the
25747          same cost.  */
25748      *total = COSTS_N_INSNS (1);
25749      return true;
25750
25751     default:
25752       return false;
25753     }
25754 }
25755
25756 #if TARGET_MACHO
25757
25758 static int current_machopic_label_num;
25759
25760 /* Given a symbol name and its associated stub, write out the
25761    definition of the stub.  */
25762
25763 void
25764 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25765 {
25766   unsigned int length;
25767   char *binder_name, *symbol_name, lazy_ptr_name[32];
25768   int label = ++current_machopic_label_num;
25769
25770   /* For 64-bit we shouldn't get here.  */
25771   gcc_assert (!TARGET_64BIT);
25772
25773   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
25774   symb = (*targetm.strip_name_encoding) (symb);
25775
25776   length = strlen (stub);
25777   binder_name = XALLOCAVEC (char, length + 32);
25778   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25779
25780   length = strlen (symb);
25781   symbol_name = XALLOCAVEC (char, length + 32);
25782   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25783
25784   sprintf (lazy_ptr_name, "L%d$lz", label);
25785
25786   if (MACHOPIC_PURE)
25787     switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25788   else
25789     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25790
25791   fprintf (file, "%s:\n", stub);
25792   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25793
25794   if (MACHOPIC_PURE)
25795     {
25796       fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25797       fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25798       fprintf (file, "\tjmp\t*%%edx\n");
25799     }
25800   else
25801     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25802
25803   fprintf (file, "%s:\n", binder_name);
25804
25805   if (MACHOPIC_PURE)
25806     {
25807       fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25808       fputs ("\tpushl\t%eax\n", file);
25809     }
25810   else
25811     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25812
25813   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
25814
25815   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25816   fprintf (file, "%s:\n", lazy_ptr_name);
25817   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25818   fprintf (file, ASM_LONG "%s\n", binder_name);
25819 }
25820
25821 void
25822 darwin_x86_file_end (void)
25823 {
25824   darwin_file_end ();
25825   ix86_file_end ();
25826 }
25827 #endif /* TARGET_MACHO */
25828
25829 /* Order the registers for register allocator.  */
25830
25831 void
25832 x86_order_regs_for_local_alloc (void)
25833 {
25834    int pos = 0;
25835    int i;
25836
25837    /* First allocate the local general purpose registers.  */
25838    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25839      if (GENERAL_REGNO_P (i) && call_used_regs[i])
25840         reg_alloc_order [pos++] = i;
25841
25842    /* Global general purpose registers.  */
25843    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25844      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25845         reg_alloc_order [pos++] = i;
25846
25847    /* x87 registers come first in case we are doing FP math
25848       using them.  */
25849    if (!TARGET_SSE_MATH)
25850      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25851        reg_alloc_order [pos++] = i;
25852
25853    /* SSE registers.  */
25854    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25855      reg_alloc_order [pos++] = i;
25856    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25857      reg_alloc_order [pos++] = i;
25858
25859    /* x87 registers.  */
25860    if (TARGET_SSE_MATH)
25861      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25862        reg_alloc_order [pos++] = i;
25863
25864    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25865      reg_alloc_order [pos++] = i;
25866
25867    /* Initialize the rest of array as we do not allocate some registers
25868       at all.  */
25869    while (pos < FIRST_PSEUDO_REGISTER)
25870      reg_alloc_order [pos++] = 0;
25871 }
25872
25873 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25874    struct attribute_spec.handler.  */
25875 static tree
25876 ix86_handle_abi_attribute (tree *node, tree name,
25877                               tree args ATTRIBUTE_UNUSED,
25878                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25879 {
25880   if (TREE_CODE (*node) != FUNCTION_TYPE
25881       && TREE_CODE (*node) != METHOD_TYPE
25882       && TREE_CODE (*node) != FIELD_DECL
25883       && TREE_CODE (*node) != TYPE_DECL)
25884     {
25885       warning (OPT_Wattributes, "%qE attribute only applies to functions",
25886                name);
25887       *no_add_attrs = true;
25888       return NULL_TREE;
25889     }
25890   if (!TARGET_64BIT)
25891     {
25892       warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
25893                name);
25894       *no_add_attrs = true;
25895       return NULL_TREE;
25896     }
25897
25898   /* Can combine regparm with all attributes but fastcall.  */
25899   if (is_attribute_p ("ms_abi", name))
25900     {
25901       if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
25902         {
25903           error ("ms_abi and sysv_abi attributes are not compatible");
25904         }
25905
25906       return NULL_TREE;
25907     }
25908   else if (is_attribute_p ("sysv_abi", name))
25909     {
25910       if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
25911         {
25912           error ("ms_abi and sysv_abi attributes are not compatible");
25913         }
25914
25915       return NULL_TREE;
25916     }
25917
25918   return NULL_TREE;
25919 }
25920
25921 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
25922    struct attribute_spec.handler.  */
25923 static tree
25924 ix86_handle_struct_attribute (tree *node, tree name,
25925                               tree args ATTRIBUTE_UNUSED,
25926                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25927 {
25928   tree *type = NULL;
25929   if (DECL_P (*node))
25930     {
25931       if (TREE_CODE (*node) == TYPE_DECL)
25932         type = &TREE_TYPE (*node);
25933     }
25934   else
25935     type = node;
25936
25937   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
25938                  || TREE_CODE (*type) == UNION_TYPE)))
25939     {
25940       warning (OPT_Wattributes, "%qE attribute ignored",
25941                name);
25942       *no_add_attrs = true;
25943     }
25944
25945   else if ((is_attribute_p ("ms_struct", name)
25946             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
25947            || ((is_attribute_p ("gcc_struct", name)
25948                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
25949     {
25950       warning (OPT_Wattributes, "%qE incompatible attribute ignored",
25951                name);
25952       *no_add_attrs = true;
25953     }
25954
25955   return NULL_TREE;
25956 }
25957
25958 static tree
25959 ix86_handle_fndecl_attribute (tree *node, tree name,
25960                               tree args ATTRIBUTE_UNUSED,
25961                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
25962 {
25963   if (TREE_CODE (*node) != FUNCTION_DECL)
25964     {
25965       warning (OPT_Wattributes, "%qE attribute only applies to functions",
25966                name);
25967       *no_add_attrs = true;
25968       return NULL_TREE;
25969     }
25970
25971   if (TARGET_64BIT)
25972     {
25973       warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
25974                name);
25975       return NULL_TREE;
25976     }
25977
25978 #ifndef HAVE_AS_IX86_SWAP
25979   sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
25980 #endif
25981
25982     return NULL_TREE;
25983 }
25984
25985 static bool
25986 ix86_ms_bitfield_layout_p (const_tree record_type)
25987 {
25988   return (TARGET_MS_BITFIELD_LAYOUT &&
25989           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
25990     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
25991 }
25992
25993 /* Returns an expression indicating where the this parameter is
25994    located on entry to the FUNCTION.  */
25995
25996 static rtx
25997 x86_this_parameter (tree function)
25998 {
25999   tree type = TREE_TYPE (function);
26000   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26001   int nregs;
26002
26003   if (TARGET_64BIT)
26004     {
26005       const int *parm_regs;
26006
26007       if (ix86_function_type_abi (type) == MS_ABI)
26008         parm_regs = x86_64_ms_abi_int_parameter_registers;
26009       else
26010         parm_regs = x86_64_int_parameter_registers;
26011       return gen_rtx_REG (DImode, parm_regs[aggr]);
26012     }
26013
26014   nregs = ix86_function_regparm (type, function);
26015
26016   if (nregs > 0 && !stdarg_p (type))
26017     {
26018       int regno;
26019
26020       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26021         regno = aggr ? DX_REG : CX_REG;
26022       else
26023         {
26024           regno = AX_REG;
26025           if (aggr)
26026             {
26027               regno = DX_REG;
26028               if (nregs == 1)
26029                 return gen_rtx_MEM (SImode,
26030                                     plus_constant (stack_pointer_rtx, 4));
26031             }
26032         }
26033       return gen_rtx_REG (SImode, regno);
26034     }
26035
26036   return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26037 }
26038
26039 /* Determine whether x86_output_mi_thunk can succeed.  */
26040
26041 static bool
26042 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26043                          HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26044                          HOST_WIDE_INT vcall_offset, const_tree function)
26045 {
26046   /* 64-bit can handle anything.  */
26047   if (TARGET_64BIT)
26048     return true;
26049
26050   /* For 32-bit, everything's fine if we have one free register.  */
26051   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26052     return true;
26053
26054   /* Need a free register for vcall_offset.  */
26055   if (vcall_offset)
26056     return false;
26057
26058   /* Need a free register for GOT references.  */
26059   if (flag_pic && !(*targetm.binds_local_p) (function))
26060     return false;
26061
26062   /* Otherwise ok.  */
26063   return true;
26064 }
26065
26066 /* Output the assembler code for a thunk function.  THUNK_DECL is the
26067    declaration for the thunk function itself, FUNCTION is the decl for
26068    the target function.  DELTA is an immediate constant offset to be
26069    added to THIS.  If VCALL_OFFSET is nonzero, the word at
26070    *(*this + vcall_offset) should be added to THIS.  */
26071
26072 static void
26073 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26074                      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26075                      HOST_WIDE_INT vcall_offset, tree function)
26076 {
26077   rtx xops[3];
26078   rtx this_param = x86_this_parameter (function);
26079   rtx this_reg, tmp;
26080
26081   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
26082      pull it in now and let DELTA benefit.  */
26083   if (REG_P (this_param))
26084     this_reg = this_param;
26085   else if (vcall_offset)
26086     {
26087       /* Put the this parameter into %eax.  */
26088       xops[0] = this_param;
26089       xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26090       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26091     }
26092   else
26093     this_reg = NULL_RTX;
26094
26095   /* Adjust the this parameter by a fixed constant.  */
26096   if (delta)
26097     {
26098       /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
26099          Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
26100       bool sub = delta < 0 || delta == 128;
26101       xops[0] = GEN_INT (sub ? -delta : delta);
26102       xops[1] = this_reg ? this_reg : this_param;
26103       if (TARGET_64BIT)
26104         {
26105           if (!x86_64_general_operand (xops[0], DImode))
26106             {
26107               tmp = gen_rtx_REG (DImode, R10_REG);
26108               xops[1] = tmp;
26109               output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26110               xops[0] = tmp;
26111               xops[1] = this_param;
26112             }
26113           if (sub)
26114             output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
26115           else
26116             output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26117         }
26118       else if (sub)
26119         output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
26120       else
26121         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26122     }
26123
26124   /* Adjust the this parameter by a value stored in the vtable.  */
26125   if (vcall_offset)
26126     {
26127       if (TARGET_64BIT)
26128         tmp = gen_rtx_REG (DImode, R10_REG);
26129       else
26130         {
26131           int tmp_regno = CX_REG;
26132           if (lookup_attribute ("fastcall",
26133                                 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26134             tmp_regno = AX_REG;
26135           tmp = gen_rtx_REG (SImode, tmp_regno);
26136         }
26137
26138       xops[0] = gen_rtx_MEM (Pmode, this_reg);
26139       xops[1] = tmp;
26140       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26141
26142       /* Adjust the this parameter.  */
26143       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26144       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26145         {
26146           rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26147           xops[0] = GEN_INT (vcall_offset);
26148           xops[1] = tmp2;
26149           output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26150           xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26151         }
26152       xops[1] = this_reg;
26153       output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26154     }
26155
26156   /* If necessary, drop THIS back to its stack slot.  */
26157   if (this_reg && this_reg != this_param)
26158     {
26159       xops[0] = this_reg;
26160       xops[1] = this_param;
26161       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26162     }
26163
26164   xops[0] = XEXP (DECL_RTL (function), 0);
26165   if (TARGET_64BIT)
26166     {
26167       if (!flag_pic || (*targetm.binds_local_p) (function))
26168         output_asm_insn ("jmp\t%P0", xops);
26169       /* All thunks should be in the same object as their target,
26170          and thus binds_local_p should be true.  */
26171       else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26172         gcc_unreachable ();
26173       else
26174         {
26175           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26176           tmp = gen_rtx_CONST (Pmode, tmp);
26177           tmp = gen_rtx_MEM (QImode, tmp);
26178           xops[0] = tmp;
26179           output_asm_insn ("jmp\t%A0", xops);
26180         }
26181     }
26182   else
26183     {
26184       if (!flag_pic || (*targetm.binds_local_p) (function))
26185         output_asm_insn ("jmp\t%P0", xops);
26186       else
26187 #if TARGET_MACHO
26188         if (TARGET_MACHO)
26189           {
26190             rtx sym_ref = XEXP (DECL_RTL (function), 0);
26191             tmp = (gen_rtx_SYMBOL_REF
26192                    (Pmode,
26193                     machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26194             tmp = gen_rtx_MEM (QImode, tmp);
26195             xops[0] = tmp;
26196             output_asm_insn ("jmp\t%0", xops);
26197           }
26198         else
26199 #endif /* TARGET_MACHO */
26200         {
26201           tmp = gen_rtx_REG (SImode, CX_REG);
26202           output_set_got (tmp, NULL_RTX);
26203
26204           xops[1] = tmp;
26205           output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26206           output_asm_insn ("jmp\t{*}%1", xops);
26207         }
26208     }
26209 }
26210
26211 static void
26212 x86_file_start (void)
26213 {
26214   default_file_start ();
26215 #if TARGET_MACHO
26216   darwin_file_start ();
26217 #endif
26218   if (X86_FILE_START_VERSION_DIRECTIVE)
26219     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26220   if (X86_FILE_START_FLTUSED)
26221     fputs ("\t.global\t__fltused\n", asm_out_file);
26222   if (ix86_asm_dialect == ASM_INTEL)
26223     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26224 }
26225
26226 int
26227 x86_field_alignment (tree field, int computed)
26228 {
26229   enum machine_mode mode;
26230   tree type = TREE_TYPE (field);
26231
26232   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26233     return computed;
26234   mode = TYPE_MODE (strip_array_types (type));
26235   if (mode == DFmode || mode == DCmode
26236       || GET_MODE_CLASS (mode) == MODE_INT
26237       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26238     return MIN (32, computed);
26239   return computed;
26240 }
26241
26242 /* Output assembler code to FILE to increment profiler label # LABELNO
26243    for profiling a function entry.  */
26244 void
26245 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26246 {
26247   if (TARGET_64BIT)
26248     {
26249 #ifndef NO_PROFILE_COUNTERS
26250       fprintf (file, "\tleaq\t" LPREFIX "P%d@(%%rip),%%r11\n", labelno);
26251 #endif
26252
26253       if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26254         fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
26255       else
26256         fputs ("\tcall\t" MCOUNT_NAME "\n", file);
26257     }
26258   else if (flag_pic)
26259     {
26260 #ifndef NO_PROFILE_COUNTERS
26261       fprintf (file, "\tleal\t" LPREFIX "P%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
26262                labelno);
26263 #endif
26264       fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
26265     }
26266   else
26267     {
26268 #ifndef NO_PROFILE_COUNTERS
26269       fprintf (file, "\tmovl\t$" LPREFIX "P%d,%%" PROFILE_COUNT_REGISTER "\n",
26270                labelno);
26271 #endif
26272       fputs ("\tcall\t" MCOUNT_NAME "\n", file);
26273     }
26274 }
26275
26276 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
26277 /* We don't have exact information about the insn sizes, but we may assume
26278    quite safely that we are informed about all 1 byte insns and memory
26279    address sizes.  This is enough to eliminate unnecessary padding in
26280    99% of cases.  */
26281
26282 static int
26283 min_insn_size (rtx insn)
26284 {
26285   int l = 0, len;
26286
26287   if (!INSN_P (insn) || !active_insn_p (insn))
26288     return 0;
26289
26290   /* Discard alignments we've emit and jump instructions.  */
26291   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26292       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26293     return 0;
26294   if (JUMP_TABLE_DATA_P (insn))
26295     return 0;
26296
26297   /* Important case - calls are always 5 bytes.
26298      It is common to have many calls in the row.  */
26299   if (CALL_P (insn)
26300       && symbolic_reference_mentioned_p (PATTERN (insn))
26301       && !SIBLING_CALL_P (insn))
26302     return 5;
26303   len = get_attr_length (insn);
26304   if (len <= 1)
26305     return 1;
26306
26307   /* For normal instructions we rely on get_attr_length being exact,
26308      with a few exceptions.  */
26309   if (!JUMP_P (insn))
26310     {
26311       enum attr_type type = get_attr_type (insn);
26312
26313       switch (type)
26314         {
26315         case TYPE_MULTI:
26316           if (GET_CODE (PATTERN (insn)) == ASM_INPUT
26317               || asm_noperands (PATTERN (insn)) >= 0)
26318             return 0;
26319           break;
26320         case TYPE_OTHER:
26321         case TYPE_FCMP:
26322           break;
26323         default:
26324           /* Otherwise trust get_attr_length.  */
26325           return len;
26326         }
26327
26328       l = get_attr_length_address (insn);
26329       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26330         l = 4;
26331     }
26332   if (l)
26333     return 1+l;
26334   else
26335     return 2;
26336 }
26337
26338 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26339    window.  */
26340
26341 static void
26342 ix86_avoid_jump_mispredicts (void)
26343 {
26344   rtx insn, start = get_insns ();
26345   int nbytes = 0, njumps = 0;
26346   int isjump = 0;
26347
26348   /* Look for all minimal intervals of instructions containing 4 jumps.
26349      The intervals are bounded by START and INSN.  NBYTES is the total
26350      size of instructions in the interval including INSN and not including
26351      START.  When the NBYTES is smaller than 16 bytes, it is possible
26352      that the end of START and INSN ends up in the same 16byte page.
26353
26354      The smallest offset in the page INSN can start is the case where START
26355      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
26356      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
26357      */
26358   for (insn = start; insn; insn = NEXT_INSN (insn))
26359     {
26360       int min_size;
26361
26362       if (LABEL_P (insn))
26363         {
26364           int align = label_to_alignment (insn);
26365           int max_skip = label_to_max_skip (insn);
26366
26367           if (max_skip > 15)
26368             max_skip = 15;
26369           /* If align > 3, only up to 16 - max_skip - 1 bytes can be
26370              already in the current 16 byte page, because otherwise
26371              ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
26372              bytes to reach 16 byte boundary.  */
26373           if (align <= 0
26374               || (align <= 3 && max_skip != (1 << align) - 1))
26375             max_skip = 0;
26376           if (dump_file)
26377             fprintf (dump_file, "Label %i with max_skip %i\n",
26378                      INSN_UID (insn), max_skip);
26379           if (max_skip)
26380             {
26381               while (nbytes + max_skip >= 16)
26382                 {
26383                   start = NEXT_INSN (start);
26384                   if ((JUMP_P (start)
26385                        && GET_CODE (PATTERN (start)) != ADDR_VEC
26386                        && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26387                       || CALL_P (start))
26388                     njumps--, isjump = 1;
26389                   else
26390                     isjump = 0;
26391                   nbytes -= min_insn_size (start);
26392                 }
26393             }
26394           continue;
26395         }
26396
26397       min_size = min_insn_size (insn);
26398       nbytes += min_size;
26399       if (dump_file)
26400         fprintf (dump_file, "Insn %i estimated to %i bytes\n",
26401                  INSN_UID (insn), min_size);
26402       if ((JUMP_P (insn)
26403            && GET_CODE (PATTERN (insn)) != ADDR_VEC
26404            && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26405           || CALL_P (insn))
26406         njumps++;
26407       else
26408         continue;
26409
26410       while (njumps > 3)
26411         {
26412           start = NEXT_INSN (start);
26413           if ((JUMP_P (start)
26414                && GET_CODE (PATTERN (start)) != ADDR_VEC
26415                && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26416               || CALL_P (start))
26417             njumps--, isjump = 1;
26418           else
26419             isjump = 0;
26420           nbytes -= min_insn_size (start);
26421         }
26422       gcc_assert (njumps >= 0);
26423       if (dump_file)
26424         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26425                  INSN_UID (start), INSN_UID (insn), nbytes);
26426
26427       if (njumps == 3 && isjump && nbytes < 16)
26428         {
26429           int padsize = 15 - nbytes + min_insn_size (insn);
26430
26431           if (dump_file)
26432             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26433                      INSN_UID (insn), padsize);
26434           emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
26435         }
26436     }
26437 }
26438 #endif
26439
26440 /* AMD Athlon works faster
26441    when RET is not destination of conditional jump or directly preceded
26442    by other jump instruction.  We avoid the penalty by inserting NOP just
26443    before the RET instructions in such cases.  */
26444 static void
26445 ix86_pad_returns (void)
26446 {
26447   edge e;
26448   edge_iterator ei;
26449
26450   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26451     {
26452       basic_block bb = e->src;
26453       rtx ret = BB_END (bb);
26454       rtx prev;
26455       bool replace = false;
26456
26457       if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26458           || optimize_bb_for_size_p (bb))
26459         continue;
26460       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26461         if (active_insn_p (prev) || LABEL_P (prev))
26462           break;
26463       if (prev && LABEL_P (prev))
26464         {
26465           edge e;
26466           edge_iterator ei;
26467
26468           FOR_EACH_EDGE (e, ei, bb->preds)
26469             if (EDGE_FREQUENCY (e) && e->src->index >= 0
26470                 && !(e->flags & EDGE_FALLTHRU))
26471               replace = true;
26472         }
26473       if (!replace)
26474         {
26475           prev = prev_active_insn (ret);
26476           if (prev
26477               && ((JUMP_P (prev) && any_condjump_p (prev))
26478                   || CALL_P (prev)))
26479             replace = true;
26480           /* Empty functions get branch mispredict even when the jump destination
26481              is not visible to us.  */
26482           if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26483             replace = true;
26484         }
26485       if (replace)
26486         {
26487           emit_jump_insn_before (gen_return_internal_long (), ret);
26488           delete_insn (ret);
26489         }
26490     }
26491 }
26492
26493 /* Implement machine specific optimizations.  We implement padding of returns
26494    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
26495 static void
26496 ix86_reorg (void)
26497 {
26498   if (optimize && optimize_function_for_speed_p (cfun))
26499     {
26500       if (TARGET_PAD_RETURNS)
26501         ix86_pad_returns ();
26502 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
26503       if (TARGET_FOUR_JUMP_LIMIT)
26504         ix86_avoid_jump_mispredicts ();
26505 #endif
26506     }
26507 }
26508
26509 /* Return nonzero when QImode register that must be represented via REX prefix
26510    is used.  */
26511 bool
26512 x86_extended_QIreg_mentioned_p (rtx insn)
26513 {
26514   int i;
26515   extract_insn_cached (insn);
26516   for (i = 0; i < recog_data.n_operands; i++)
26517     if (REG_P (recog_data.operand[i])
26518         && REGNO (recog_data.operand[i]) > BX_REG)
26519        return true;
26520   return false;
26521 }
26522
26523 /* Return nonzero when P points to register encoded via REX prefix.
26524    Called via for_each_rtx.  */
26525 static int
26526 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26527 {
26528    unsigned int regno;
26529    if (!REG_P (*p))
26530      return 0;
26531    regno = REGNO (*p);
26532    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26533 }
26534
26535 /* Return true when INSN mentions register that must be encoded using REX
26536    prefix.  */
26537 bool
26538 x86_extended_reg_mentioned_p (rtx insn)
26539 {
26540   return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26541                        extended_reg_mentioned_1, NULL);
26542 }
26543
26544 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
26545    optabs would emit if we didn't have TFmode patterns.  */
26546
26547 void
26548 x86_emit_floatuns (rtx operands[2])
26549 {
26550   rtx neglab, donelab, i0, i1, f0, in, out;
26551   enum machine_mode mode, inmode;
26552
26553   inmode = GET_MODE (operands[1]);
26554   gcc_assert (inmode == SImode || inmode == DImode);
26555
26556   out = operands[0];
26557   in = force_reg (inmode, operands[1]);
26558   mode = GET_MODE (out);
26559   neglab = gen_label_rtx ();
26560   donelab = gen_label_rtx ();
26561   f0 = gen_reg_rtx (mode);
26562
26563   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26564
26565   expand_float (out, in, 0);
26566
26567   emit_jump_insn (gen_jump (donelab));
26568   emit_barrier ();
26569
26570   emit_label (neglab);
26571
26572   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26573                             1, OPTAB_DIRECT);
26574   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26575                             1, OPTAB_DIRECT);
26576   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26577
26578   expand_float (f0, i0, 0);
26579
26580   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26581
26582   emit_label (donelab);
26583 }
26584 \f
26585 /* AVX does not support 32-byte integer vector operations,
26586    thus the longest vector we are faced with is V16QImode.  */
26587 #define MAX_VECT_LEN    16
26588
26589 struct expand_vec_perm_d
26590 {
26591   rtx target, op0, op1;
26592   unsigned char perm[MAX_VECT_LEN];
26593   enum machine_mode vmode;
26594   unsigned char nelt;
26595   bool testing_p;
26596 };
26597
26598 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
26599 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
26600
26601 /* Get a vector mode of the same size as the original but with elements
26602    twice as wide.  This is only guaranteed to apply to integral vectors.  */
26603
26604 static inline enum machine_mode
26605 get_mode_wider_vector (enum machine_mode o)
26606 {
26607   /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
26608   enum machine_mode n = GET_MODE_WIDER_MODE (o);
26609   gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
26610   gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
26611   return n;
26612 }
26613
26614 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
26615    with all elements equal to VAR.  Return true if successful.  */
26616
26617 static bool
26618 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26619                                    rtx target, rtx val)
26620 {
26621   bool ok;
26622
26623   switch (mode)
26624     {
26625     case V2SImode:
26626     case V2SFmode:
26627       if (!mmx_ok)
26628         return false;
26629       /* FALLTHRU */
26630
26631     case V4DFmode:
26632     case V4DImode:
26633     case V8SFmode:
26634     case V8SImode:
26635     case V2DFmode:
26636     case V2DImode:
26637     case V4SFmode:
26638     case V4SImode:
26639       {
26640         rtx insn, dup;
26641
26642         /* First attempt to recognize VAL as-is.  */
26643         dup = gen_rtx_VEC_DUPLICATE (mode, val);
26644         insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
26645         if (recog_memoized (insn) < 0)
26646           {
26647             rtx seq;
26648             /* If that fails, force VAL into a register.  */
26649
26650             start_sequence ();
26651             XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
26652             seq = get_insns ();
26653             end_sequence ();
26654             if (seq)
26655               emit_insn_before (seq, insn);
26656
26657             ok = recog_memoized (insn) >= 0;
26658             gcc_assert (ok);
26659           }
26660       }
26661       return true;
26662
26663     case V4HImode:
26664       if (!mmx_ok)
26665         return false;
26666       if (TARGET_SSE || TARGET_3DNOW_A)
26667         {
26668           rtx x;
26669
26670           val = gen_lowpart (SImode, val);
26671           x = gen_rtx_TRUNCATE (HImode, val);
26672           x = gen_rtx_VEC_DUPLICATE (mode, x);
26673           emit_insn (gen_rtx_SET (VOIDmode, target, x));
26674           return true;
26675         }
26676       goto widen;
26677
26678     case V8QImode:
26679       if (!mmx_ok)
26680         return false;
26681       goto widen;
26682
26683     case V8HImode:
26684       if (TARGET_SSE2)
26685         {
26686           struct expand_vec_perm_d dperm;
26687           rtx tmp1, tmp2;
26688
26689         permute:
26690           memset (&dperm, 0, sizeof (dperm));
26691           dperm.target = target;
26692           dperm.vmode = mode;
26693           dperm.nelt = GET_MODE_NUNITS (mode);
26694           dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
26695
26696           /* Extend to SImode using a paradoxical SUBREG.  */
26697           tmp1 = gen_reg_rtx (SImode);
26698           emit_move_insn (tmp1, gen_lowpart (SImode, val));
26699
26700           /* Insert the SImode value as low element of a V4SImode vector. */
26701           tmp2 = gen_lowpart (V4SImode, dperm.op0);
26702           emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
26703
26704           ok = (expand_vec_perm_1 (&dperm)
26705                 || expand_vec_perm_broadcast_1 (&dperm));
26706           gcc_assert (ok);
26707           return ok;
26708         }
26709       goto widen;
26710
26711     case V16QImode:
26712       if (TARGET_SSE2)
26713         goto permute;
26714       goto widen;
26715
26716     widen:
26717       /* Replicate the value once into the next wider mode and recurse.  */
26718       {
26719         enum machine_mode smode, wsmode, wvmode;
26720         rtx x;
26721
26722         smode = GET_MODE_INNER (mode);
26723         wvmode = get_mode_wider_vector (mode);
26724         wsmode = GET_MODE_INNER (wvmode);
26725
26726         val = convert_modes (wsmode, smode, val, true);
26727         x = expand_simple_binop (wsmode, ASHIFT, val,
26728                                  GEN_INT (GET_MODE_BITSIZE (smode)),
26729                                  NULL_RTX, 1, OPTAB_LIB_WIDEN);
26730         val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26731
26732         x = gen_lowpart (wvmode, target);
26733         ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
26734         gcc_assert (ok);
26735         return ok;
26736       }
26737
26738     case V16HImode:
26739     case V32QImode:
26740       {
26741         enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
26742         rtx x = gen_reg_rtx (hvmode);
26743
26744         ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
26745         gcc_assert (ok);
26746
26747         x = gen_rtx_VEC_CONCAT (mode, x, x);
26748         emit_insn (gen_rtx_SET (VOIDmode, target, x));
26749       }
26750       return true;
26751
26752     default:
26753       return false;
26754     }
26755 }
26756
26757 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
26758    whose ONE_VAR element is VAR, and other elements are zero.  Return true
26759    if successful.  */
26760
26761 static bool
26762 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26763                                      rtx target, rtx var, int one_var)
26764 {
26765   enum machine_mode vsimode;
26766   rtx new_target;
26767   rtx x, tmp;
26768   bool use_vector_set = false;
26769
26770   switch (mode)
26771     {
26772     case V2DImode:
26773       /* For SSE4.1, we normally use vector set.  But if the second
26774          element is zero and inter-unit moves are OK, we use movq
26775          instead.  */
26776       use_vector_set = (TARGET_64BIT
26777                         && TARGET_SSE4_1
26778                         && !(TARGET_INTER_UNIT_MOVES
26779                              && one_var == 0));
26780       break;
26781     case V16QImode:
26782     case V4SImode:
26783     case V4SFmode:
26784       use_vector_set = TARGET_SSE4_1;
26785       break;
26786     case V8HImode:
26787       use_vector_set = TARGET_SSE2;
26788       break;
26789     case V4HImode:
26790       use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26791       break;
26792     case V32QImode:
26793     case V16HImode:
26794     case V8SImode:
26795     case V8SFmode:
26796     case V4DFmode:
26797       use_vector_set = TARGET_AVX;
26798       break;
26799     case V4DImode:
26800       /* Use ix86_expand_vector_set in 64bit mode only.  */
26801       use_vector_set = TARGET_AVX && TARGET_64BIT;
26802       break;
26803     default:
26804       break;
26805     }
26806
26807   if (use_vector_set)
26808     {
26809       emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26810       var = force_reg (GET_MODE_INNER (mode), var);
26811       ix86_expand_vector_set (mmx_ok, target, var, one_var);
26812       return true;
26813     }
26814
26815   switch (mode)
26816     {
26817     case V2SFmode:
26818     case V2SImode:
26819       if (!mmx_ok)
26820         return false;
26821       /* FALLTHRU */
26822
26823     case V2DFmode:
26824     case V2DImode:
26825       if (one_var != 0)
26826         return false;
26827       var = force_reg (GET_MODE_INNER (mode), var);
26828       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26829       emit_insn (gen_rtx_SET (VOIDmode, target, x));
26830       return true;
26831
26832     case V4SFmode:
26833     case V4SImode:
26834       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26835         new_target = gen_reg_rtx (mode);
26836       else
26837         new_target = target;
26838       var = force_reg (GET_MODE_INNER (mode), var);
26839       x = gen_rtx_VEC_DUPLICATE (mode, var);
26840       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26841       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26842       if (one_var != 0)
26843         {
26844           /* We need to shuffle the value to the correct position, so
26845              create a new pseudo to store the intermediate result.  */
26846
26847           /* With SSE2, we can use the integer shuffle insns.  */
26848           if (mode != V4SFmode && TARGET_SSE2)
26849             {
26850               emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26851                                             const1_rtx,
26852                                             GEN_INT (one_var == 1 ? 0 : 1),
26853                                             GEN_INT (one_var == 2 ? 0 : 1),
26854                                             GEN_INT (one_var == 3 ? 0 : 1)));
26855               if (target != new_target)
26856                 emit_move_insn (target, new_target);
26857               return true;
26858             }
26859
26860           /* Otherwise convert the intermediate result to V4SFmode and
26861              use the SSE1 shuffle instructions.  */
26862           if (mode != V4SFmode)
26863             {
26864               tmp = gen_reg_rtx (V4SFmode);
26865               emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26866             }
26867           else
26868             tmp = new_target;
26869
26870           emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26871                                        const1_rtx,
26872                                        GEN_INT (one_var == 1 ? 0 : 1),
26873                                        GEN_INT (one_var == 2 ? 0+4 : 1+4),
26874                                        GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26875
26876           if (mode != V4SFmode)
26877             emit_move_insn (target, gen_lowpart (V4SImode, tmp));
26878           else if (tmp != target)
26879             emit_move_insn (target, tmp);
26880         }
26881       else if (target != new_target)
26882         emit_move_insn (target, new_target);
26883       return true;
26884
26885     case V8HImode:
26886     case V16QImode:
26887       vsimode = V4SImode;
26888       goto widen;
26889     case V4HImode:
26890     case V8QImode:
26891       if (!mmx_ok)
26892         return false;
26893       vsimode = V2SImode;
26894       goto widen;
26895     widen:
26896       if (one_var != 0)
26897         return false;
26898
26899       /* Zero extend the variable element to SImode and recurse.  */
26900       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
26901
26902       x = gen_reg_rtx (vsimode);
26903       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
26904                                                 var, one_var))
26905         gcc_unreachable ();
26906
26907       emit_move_insn (target, gen_lowpart (mode, x));
26908       return true;
26909
26910     default:
26911       return false;
26912     }
26913 }
26914
26915 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
26916    consisting of the values in VALS.  It is known that all elements
26917    except ONE_VAR are constants.  Return true if successful.  */
26918
26919 static bool
26920 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
26921                                  rtx target, rtx vals, int one_var)
26922 {
26923   rtx var = XVECEXP (vals, 0, one_var);
26924   enum machine_mode wmode;
26925   rtx const_vec, x;
26926
26927   const_vec = copy_rtx (vals);
26928   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
26929   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
26930
26931   switch (mode)
26932     {
26933     case V2DFmode:
26934     case V2DImode:
26935     case V2SFmode:
26936     case V2SImode:
26937       /* For the two element vectors, it's just as easy to use
26938          the general case.  */
26939       return false;
26940
26941     case V4DImode:
26942       /* Use ix86_expand_vector_set in 64bit mode only.  */
26943       if (!TARGET_64BIT)
26944         return false;
26945     case V4DFmode:
26946     case V8SFmode:
26947     case V8SImode:
26948     case V16HImode:
26949     case V32QImode:
26950     case V4SFmode:
26951     case V4SImode:
26952     case V8HImode:
26953     case V4HImode:
26954       break;
26955
26956     case V16QImode:
26957       if (TARGET_SSE4_1)
26958         break;
26959       wmode = V8HImode;
26960       goto widen;
26961     case V8QImode:
26962       wmode = V4HImode;
26963       goto widen;
26964     widen:
26965       /* There's no way to set one QImode entry easily.  Combine
26966          the variable value with its adjacent constant value, and
26967          promote to an HImode set.  */
26968       x = XVECEXP (vals, 0, one_var ^ 1);
26969       if (one_var & 1)
26970         {
26971           var = convert_modes (HImode, QImode, var, true);
26972           var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
26973                                      NULL_RTX, 1, OPTAB_LIB_WIDEN);
26974           x = GEN_INT (INTVAL (x) & 0xff);
26975         }
26976       else
26977         {
26978           var = convert_modes (HImode, QImode, var, true);
26979           x = gen_int_mode (INTVAL (x) << 8, HImode);
26980         }
26981       if (x != const0_rtx)
26982         var = expand_simple_binop (HImode, IOR, var, x, var,
26983                                    1, OPTAB_LIB_WIDEN);
26984
26985       x = gen_reg_rtx (wmode);
26986       emit_move_insn (x, gen_lowpart (wmode, const_vec));
26987       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
26988
26989       emit_move_insn (target, gen_lowpart (mode, x));
26990       return true;
26991
26992     default:
26993       return false;
26994     }
26995
26996   emit_move_insn (target, const_vec);
26997   ix86_expand_vector_set (mmx_ok, target, var, one_var);
26998   return true;
26999 }
27000
27001 /* A subroutine of ix86_expand_vector_init_general.  Use vector
27002    concatenate to handle the most general case: all values variable,
27003    and none identical.  */
27004
27005 static void
27006 ix86_expand_vector_init_concat (enum machine_mode mode,
27007                                 rtx target, rtx *ops, int n)
27008 {
27009   enum machine_mode cmode, hmode = VOIDmode;
27010   rtx first[8], second[4];
27011   rtvec v;
27012   int i, j;
27013
27014   switch (n)
27015     {
27016     case 2:
27017       switch (mode)
27018         {
27019         case V8SImode:
27020           cmode = V4SImode;
27021           break;
27022         case V8SFmode:
27023           cmode = V4SFmode;
27024           break;
27025         case V4DImode:
27026           cmode = V2DImode;
27027           break;
27028         case V4DFmode:
27029           cmode = V2DFmode;
27030           break;
27031         case V4SImode:
27032           cmode = V2SImode;
27033           break;
27034         case V4SFmode:
27035           cmode = V2SFmode;
27036           break;
27037         case V2DImode:
27038           cmode = DImode;
27039           break;
27040         case V2SImode:
27041           cmode = SImode;
27042           break;
27043         case V2DFmode:
27044           cmode = DFmode;
27045           break;
27046         case V2SFmode:
27047           cmode = SFmode;
27048           break;
27049         default:
27050           gcc_unreachable ();
27051         }
27052
27053       if (!register_operand (ops[1], cmode))
27054         ops[1] = force_reg (cmode, ops[1]);
27055       if (!register_operand (ops[0], cmode))
27056         ops[0] = force_reg (cmode, ops[0]);
27057       emit_insn (gen_rtx_SET (VOIDmode, target,
27058                               gen_rtx_VEC_CONCAT (mode, ops[0],
27059                                                   ops[1])));
27060       break;
27061
27062     case 4:
27063       switch (mode)
27064         {
27065         case V4DImode:
27066           cmode = V2DImode;
27067           break;
27068         case V4DFmode:
27069           cmode = V2DFmode;
27070           break;
27071         case V4SImode:
27072           cmode = V2SImode;
27073           break;
27074         case V4SFmode:
27075           cmode = V2SFmode;
27076           break;
27077         default:
27078           gcc_unreachable ();
27079         }
27080       goto half;
27081
27082     case 8:
27083       switch (mode)
27084         {
27085         case V8SImode:
27086           cmode = V2SImode;
27087           hmode = V4SImode;
27088           break;
27089         case V8SFmode:
27090           cmode = V2SFmode;
27091           hmode = V4SFmode;
27092           break;
27093         default:
27094           gcc_unreachable ();
27095         }
27096       goto half;
27097
27098 half:
27099       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
27100       i = n - 1;
27101       j = (n >> 1) - 1;
27102       for (; i > 0; i -= 2, j--)
27103         {
27104           first[j] = gen_reg_rtx (cmode);
27105           v = gen_rtvec (2, ops[i - 1], ops[i]);
27106           ix86_expand_vector_init (false, first[j],
27107                                    gen_rtx_PARALLEL (cmode, v));
27108         }
27109
27110       n >>= 1;
27111       if (n > 2)
27112         {
27113           gcc_assert (hmode != VOIDmode);
27114           for (i = j = 0; i < n; i += 2, j++)
27115             {
27116               second[j] = gen_reg_rtx (hmode);
27117               ix86_expand_vector_init_concat (hmode, second [j],
27118                                               &first [i], 2);
27119             }
27120           n >>= 1;
27121           ix86_expand_vector_init_concat (mode, target, second, n);
27122         }
27123       else
27124         ix86_expand_vector_init_concat (mode, target, first, n);
27125       break;
27126
27127     default:
27128       gcc_unreachable ();
27129     }
27130 }
27131
27132 /* A subroutine of ix86_expand_vector_init_general.  Use vector
27133    interleave to handle the most general case: all values variable,
27134    and none identical.  */
27135
27136 static void
27137 ix86_expand_vector_init_interleave (enum machine_mode mode,
27138                                     rtx target, rtx *ops, int n)
27139 {
27140   enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27141   int i, j;
27142   rtx op0, op1;
27143   rtx (*gen_load_even) (rtx, rtx, rtx);
27144   rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27145   rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27146
27147   switch (mode)
27148     {
27149     case V8HImode:
27150       gen_load_even = gen_vec_setv8hi;
27151       gen_interleave_first_low = gen_vec_interleave_lowv4si;
27152       gen_interleave_second_low = gen_vec_interleave_lowv2di;
27153       inner_mode = HImode;
27154       first_imode = V4SImode;
27155       second_imode = V2DImode;
27156       third_imode = VOIDmode;
27157       break;
27158     case V16QImode:
27159       gen_load_even = gen_vec_setv16qi;
27160       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27161       gen_interleave_second_low = gen_vec_interleave_lowv4si;
27162       inner_mode = QImode;
27163       first_imode = V8HImode;
27164       second_imode = V4SImode;
27165       third_imode = V2DImode;
27166       break;
27167     default:
27168       gcc_unreachable ();
27169     }
27170
27171   for (i = 0; i < n; i++)
27172     {
27173       /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
27174       op0 = gen_reg_rtx (SImode);
27175       emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27176
27177       /* Insert the SImode value as low element of V4SImode vector. */
27178       op1 = gen_reg_rtx (V4SImode);
27179       op0 = gen_rtx_VEC_MERGE (V4SImode,
27180                                gen_rtx_VEC_DUPLICATE (V4SImode,
27181                                                       op0),
27182                                CONST0_RTX (V4SImode),
27183                                const1_rtx);
27184       emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27185
27186       /* Cast the V4SImode vector back to a vector in orignal mode.  */
27187       op0 = gen_reg_rtx (mode);
27188       emit_move_insn (op0, gen_lowpart (mode, op1));
27189
27190       /* Load even elements into the second positon.  */
27191       emit_insn ((*gen_load_even) (op0,
27192                                    force_reg (inner_mode,
27193                                               ops [i + i + 1]),
27194                                    const1_rtx));
27195
27196       /* Cast vector to FIRST_IMODE vector.  */
27197       ops[i] = gen_reg_rtx (first_imode);
27198       emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27199     }
27200
27201   /* Interleave low FIRST_IMODE vectors.  */
27202   for (i = j = 0; i < n; i += 2, j++)
27203     {
27204       op0 = gen_reg_rtx (first_imode);
27205       emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27206
27207       /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
27208       ops[j] = gen_reg_rtx (second_imode);
27209       emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27210     }
27211
27212   /* Interleave low SECOND_IMODE vectors.  */
27213   switch (second_imode)
27214     {
27215     case V4SImode:
27216       for (i = j = 0; i < n / 2; i += 2, j++)
27217         {
27218           op0 = gen_reg_rtx (second_imode);
27219           emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27220                                                    ops[i + 1]));
27221
27222           /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27223              vector.  */
27224           ops[j] = gen_reg_rtx (third_imode);
27225           emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27226         }
27227       second_imode = V2DImode;
27228       gen_interleave_second_low = gen_vec_interleave_lowv2di;
27229       /* FALLTHRU */
27230
27231     case V2DImode:
27232       op0 = gen_reg_rtx (second_imode);
27233       emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27234                                                ops[1]));
27235
27236       /* Cast the SECOND_IMODE vector back to a vector on original
27237          mode.  */
27238       emit_insn (gen_rtx_SET (VOIDmode, target,
27239                               gen_lowpart (mode, op0)));
27240       break;
27241
27242     default:
27243       gcc_unreachable ();
27244     }
27245 }
27246
27247 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
27248    all values variable, and none identical.  */
27249
27250 static void
27251 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27252                                  rtx target, rtx vals)
27253 {
27254   rtx ops[32], op0, op1;
27255   enum machine_mode half_mode = VOIDmode;
27256   int n, i;
27257
27258   switch (mode)
27259     {
27260     case V2SFmode:
27261     case V2SImode:
27262       if (!mmx_ok && !TARGET_SSE)
27263         break;
27264       /* FALLTHRU */
27265
27266     case V8SFmode:
27267     case V8SImode:
27268     case V4DFmode:
27269     case V4DImode:
27270     case V4SFmode:
27271     case V4SImode:
27272     case V2DFmode:
27273     case V2DImode:
27274       n = GET_MODE_NUNITS (mode);
27275       for (i = 0; i < n; i++)
27276         ops[i] = XVECEXP (vals, 0, i);
27277       ix86_expand_vector_init_concat (mode, target, ops, n);
27278       return;
27279
27280     case V32QImode:
27281       half_mode = V16QImode;
27282       goto half;
27283
27284     case V16HImode:
27285       half_mode = V8HImode;
27286       goto half;
27287
27288 half:
27289       n = GET_MODE_NUNITS (mode);
27290       for (i = 0; i < n; i++)
27291         ops[i] = XVECEXP (vals, 0, i);
27292       op0 = gen_reg_rtx (half_mode);
27293       op1 = gen_reg_rtx (half_mode);
27294       ix86_expand_vector_init_interleave (half_mode, op0, ops,
27295                                           n >> 2);
27296       ix86_expand_vector_init_interleave (half_mode, op1,
27297                                           &ops [n >> 1], n >> 2);
27298       emit_insn (gen_rtx_SET (VOIDmode, target,
27299                               gen_rtx_VEC_CONCAT (mode, op0, op1)));
27300       return;
27301
27302     case V16QImode:
27303       if (!TARGET_SSE4_1)
27304         break;
27305       /* FALLTHRU */
27306
27307     case V8HImode:
27308       if (!TARGET_SSE2)
27309         break;
27310
27311       /* Don't use ix86_expand_vector_init_interleave if we can't
27312          move from GPR to SSE register directly.  */
27313       if (!TARGET_INTER_UNIT_MOVES)
27314         break;
27315
27316       n = GET_MODE_NUNITS (mode);
27317       for (i = 0; i < n; i++)
27318         ops[i] = XVECEXP (vals, 0, i);
27319       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27320       return;
27321
27322     case V4HImode:
27323     case V8QImode:
27324       break;
27325
27326     default:
27327       gcc_unreachable ();
27328     }
27329
27330     {
27331       int i, j, n_elts, n_words, n_elt_per_word;
27332       enum machine_mode inner_mode;
27333       rtx words[4], shift;
27334
27335       inner_mode = GET_MODE_INNER (mode);
27336       n_elts = GET_MODE_NUNITS (mode);
27337       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27338       n_elt_per_word = n_elts / n_words;
27339       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27340
27341       for (i = 0; i < n_words; ++i)
27342         {
27343           rtx word = NULL_RTX;
27344
27345           for (j = 0; j < n_elt_per_word; ++j)
27346             {
27347               rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27348               elt = convert_modes (word_mode, inner_mode, elt, true);
27349
27350               if (j == 0)
27351                 word = elt;
27352               else
27353                 {
27354                   word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27355                                               word, 1, OPTAB_LIB_WIDEN);
27356                   word = expand_simple_binop (word_mode, IOR, word, elt,
27357                                               word, 1, OPTAB_LIB_WIDEN);
27358                 }
27359             }
27360
27361           words[i] = word;
27362         }
27363
27364       if (n_words == 1)
27365         emit_move_insn (target, gen_lowpart (mode, words[0]));
27366       else if (n_words == 2)
27367         {
27368           rtx tmp = gen_reg_rtx (mode);
27369           emit_clobber (tmp);
27370           emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27371           emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27372           emit_move_insn (target, tmp);
27373         }
27374       else if (n_words == 4)
27375         {
27376           rtx tmp = gen_reg_rtx (V4SImode);
27377           gcc_assert (word_mode == SImode);
27378           vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27379           ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27380           emit_move_insn (target, gen_lowpart (mode, tmp));
27381         }
27382       else
27383         gcc_unreachable ();
27384     }
27385 }
27386
27387 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
27388    instructions unless MMX_OK is true.  */
27389
27390 void
27391 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27392 {
27393   enum machine_mode mode = GET_MODE (target);
27394   enum machine_mode inner_mode = GET_MODE_INNER (mode);
27395   int n_elts = GET_MODE_NUNITS (mode);
27396   int n_var = 0, one_var = -1;
27397   bool all_same = true, all_const_zero = true;
27398   int i;
27399   rtx x;
27400
27401   for (i = 0; i < n_elts; ++i)
27402     {
27403       x = XVECEXP (vals, 0, i);
27404       if (!(CONST_INT_P (x)
27405             || GET_CODE (x) == CONST_DOUBLE
27406             || GET_CODE (x) == CONST_FIXED))
27407         n_var++, one_var = i;
27408       else if (x != CONST0_RTX (inner_mode))
27409         all_const_zero = false;
27410       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27411         all_same = false;
27412     }
27413
27414   /* Constants are best loaded from the constant pool.  */
27415   if (n_var == 0)
27416     {
27417       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27418       return;
27419     }
27420
27421   /* If all values are identical, broadcast the value.  */
27422   if (all_same
27423       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27424                                             XVECEXP (vals, 0, 0)))
27425     return;
27426
27427   /* Values where only one field is non-constant are best loaded from
27428      the pool and overwritten via move later.  */
27429   if (n_var == 1)
27430     {
27431       if (all_const_zero
27432           && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27433                                                   XVECEXP (vals, 0, one_var),
27434                                                   one_var))
27435         return;
27436
27437       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27438         return;
27439     }
27440
27441   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27442 }
27443
27444 void
27445 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27446 {
27447   enum machine_mode mode = GET_MODE (target);
27448   enum machine_mode inner_mode = GET_MODE_INNER (mode);
27449   enum machine_mode half_mode;
27450   bool use_vec_merge = false;
27451   rtx tmp;
27452   static rtx (*gen_extract[6][2]) (rtx, rtx)
27453     = {
27454         { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27455         { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27456         { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27457         { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27458         { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27459         { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27460       };
27461   static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27462     = {
27463         { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27464         { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27465         { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27466         { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27467         { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27468         { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27469       };
27470   int i, j, n;
27471
27472   switch (mode)
27473     {
27474     case V2SFmode:
27475     case V2SImode:
27476       if (mmx_ok)
27477         {
27478           tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27479           ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27480           if (elt == 0)
27481             tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27482           else
27483             tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27484           emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27485           return;
27486         }
27487       break;
27488
27489     case V2DImode:
27490       use_vec_merge = TARGET_SSE4_1;
27491       if (use_vec_merge)
27492         break;
27493
27494     case V2DFmode:
27495       {
27496         rtx op0, op1;
27497
27498         /* For the two element vectors, we implement a VEC_CONCAT with
27499            the extraction of the other element.  */
27500
27501         tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27502         tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27503
27504         if (elt == 0)
27505           op0 = val, op1 = tmp;
27506         else
27507           op0 = tmp, op1 = val;
27508
27509         tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27510         emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27511       }
27512       return;
27513
27514     case V4SFmode:
27515       use_vec_merge = TARGET_SSE4_1;
27516       if (use_vec_merge)
27517         break;
27518
27519       switch (elt)
27520         {
27521         case 0:
27522           use_vec_merge = true;
27523           break;
27524
27525         case 1:
27526           /* tmp = target = A B C D */
27527           tmp = copy_to_reg (target);
27528           /* target = A A B B */
27529           emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
27530           /* target = X A B B */
27531           ix86_expand_vector_set (false, target, val, 0);
27532           /* target = A X C D  */
27533           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27534                                           const1_rtx, const0_rtx,
27535                                           GEN_INT (2+4), GEN_INT (3+4)));
27536           return;
27537
27538         case 2:
27539           /* tmp = target = A B C D */
27540           tmp = copy_to_reg (target);
27541           /* tmp = X B C D */
27542           ix86_expand_vector_set (false, tmp, val, 0);
27543           /* target = A B X D */
27544           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27545                                           const0_rtx, const1_rtx,
27546                                           GEN_INT (0+4), GEN_INT (3+4)));
27547           return;
27548
27549         case 3:
27550           /* tmp = target = A B C D */
27551           tmp = copy_to_reg (target);
27552           /* tmp = X B C D */
27553           ix86_expand_vector_set (false, tmp, val, 0);
27554           /* target = A B X D */
27555           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27556                                           const0_rtx, const1_rtx,
27557                                           GEN_INT (2+4), GEN_INT (0+4)));
27558           return;
27559
27560         default:
27561           gcc_unreachable ();
27562         }
27563       break;
27564
27565     case V4SImode:
27566       use_vec_merge = TARGET_SSE4_1;
27567       if (use_vec_merge)
27568         break;
27569
27570       /* Element 0 handled by vec_merge below.  */
27571       if (elt == 0)
27572         {
27573           use_vec_merge = true;
27574           break;
27575         }
27576
27577       if (TARGET_SSE2)
27578         {
27579           /* With SSE2, use integer shuffles to swap element 0 and ELT,
27580              store into element 0, then shuffle them back.  */
27581
27582           rtx order[4];
27583
27584           order[0] = GEN_INT (elt);
27585           order[1] = const1_rtx;
27586           order[2] = const2_rtx;
27587           order[3] = GEN_INT (3);
27588           order[elt] = const0_rtx;
27589
27590           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27591                                         order[1], order[2], order[3]));
27592
27593           ix86_expand_vector_set (false, target, val, 0);
27594
27595           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27596                                         order[1], order[2], order[3]));
27597         }
27598       else
27599         {
27600           /* For SSE1, we have to reuse the V4SF code.  */
27601           ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27602                                   gen_lowpart (SFmode, val), elt);
27603         }
27604       return;
27605
27606     case V8HImode:
27607       use_vec_merge = TARGET_SSE2;
27608       break;
27609     case V4HImode:
27610       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27611       break;
27612
27613     case V16QImode:
27614       use_vec_merge = TARGET_SSE4_1;
27615       break;
27616
27617     case V8QImode:
27618       break;
27619
27620     case V32QImode:
27621       half_mode = V16QImode;
27622       j = 0;
27623       n = 16;
27624       goto half;
27625
27626     case V16HImode:
27627       half_mode = V8HImode;
27628       j = 1;
27629       n = 8;
27630       goto half;
27631
27632     case V8SImode:
27633       half_mode = V4SImode;
27634       j = 2;
27635       n = 4;
27636       goto half;
27637
27638     case V4DImode:
27639       half_mode = V2DImode;
27640       j = 3;
27641       n = 2;
27642       goto half;
27643
27644     case V8SFmode:
27645       half_mode = V4SFmode;
27646       j = 4;
27647       n = 4;
27648       goto half;
27649
27650     case V4DFmode:
27651       half_mode = V2DFmode;
27652       j = 5;
27653       n = 2;
27654       goto half;
27655
27656 half:
27657       /* Compute offset.  */
27658       i = elt / n;
27659       elt %= n;
27660
27661       gcc_assert (i <= 1);
27662
27663       /* Extract the half.  */
27664       tmp = gen_reg_rtx (half_mode);
27665       emit_insn ((*gen_extract[j][i]) (tmp, target));
27666
27667       /* Put val in tmp at elt.  */
27668       ix86_expand_vector_set (false, tmp, val, elt);
27669
27670       /* Put it back.  */
27671       emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27672       return;
27673
27674     default:
27675       break;
27676     }
27677
27678   if (use_vec_merge)
27679     {
27680       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27681       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27682       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27683     }
27684   else
27685     {
27686       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27687
27688       emit_move_insn (mem, target);
27689
27690       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27691       emit_move_insn (tmp, val);
27692
27693       emit_move_insn (target, mem);
27694     }
27695 }
27696
27697 void
27698 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27699 {
27700   enum machine_mode mode = GET_MODE (vec);
27701   enum machine_mode inner_mode = GET_MODE_INNER (mode);
27702   bool use_vec_extr = false;
27703   rtx tmp;
27704
27705   switch (mode)
27706     {
27707     case V2SImode:
27708     case V2SFmode:
27709       if (!mmx_ok)
27710         break;
27711       /* FALLTHRU */
27712
27713     case V2DFmode:
27714     case V2DImode:
27715       use_vec_extr = true;
27716       break;
27717
27718     case V4SFmode:
27719       use_vec_extr = TARGET_SSE4_1;
27720       if (use_vec_extr)
27721         break;
27722
27723       switch (elt)
27724         {
27725         case 0:
27726           tmp = vec;
27727           break;
27728
27729         case 1:
27730         case 3:
27731           tmp = gen_reg_rtx (mode);
27732           emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27733                                        GEN_INT (elt), GEN_INT (elt),
27734                                        GEN_INT (elt+4), GEN_INT (elt+4)));
27735           break;
27736
27737         case 2:
27738           tmp = gen_reg_rtx (mode);
27739           emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
27740           break;
27741
27742         default:
27743           gcc_unreachable ();
27744         }
27745       vec = tmp;
27746       use_vec_extr = true;
27747       elt = 0;
27748       break;
27749
27750     case V4SImode:
27751       use_vec_extr = TARGET_SSE4_1;
27752       if (use_vec_extr)
27753         break;
27754
27755       if (TARGET_SSE2)
27756         {
27757           switch (elt)
27758             {
27759             case 0:
27760               tmp = vec;
27761               break;
27762
27763             case 1:
27764             case 3:
27765               tmp = gen_reg_rtx (mode);
27766               emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27767                                             GEN_INT (elt), GEN_INT (elt),
27768                                             GEN_INT (elt), GEN_INT (elt)));
27769               break;
27770
27771             case 2:
27772               tmp = gen_reg_rtx (mode);
27773               emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
27774               break;
27775
27776             default:
27777               gcc_unreachable ();
27778             }
27779           vec = tmp;
27780           use_vec_extr = true;
27781           elt = 0;
27782         }
27783       else
27784         {
27785           /* For SSE1, we have to reuse the V4SF code.  */
27786           ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27787                                       gen_lowpart (V4SFmode, vec), elt);
27788           return;
27789         }
27790       break;
27791
27792     case V8HImode:
27793       use_vec_extr = TARGET_SSE2;
27794       break;
27795     case V4HImode:
27796       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27797       break;
27798
27799     case V16QImode:
27800       use_vec_extr = TARGET_SSE4_1;
27801       break;
27802
27803     case V8QImode:
27804       /* ??? Could extract the appropriate HImode element and shift.  */
27805     default:
27806       break;
27807     }
27808
27809   if (use_vec_extr)
27810     {
27811       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27812       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27813
27814       /* Let the rtl optimizers know about the zero extension performed.  */
27815       if (inner_mode == QImode || inner_mode == HImode)
27816         {
27817           tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27818           target = gen_lowpart (SImode, target);
27819         }
27820
27821       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27822     }
27823   else
27824     {
27825       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27826
27827       emit_move_insn (mem, vec);
27828
27829       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27830       emit_move_insn (target, tmp);
27831     }
27832 }
27833
27834 /* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
27835    pattern to reduce; DEST is the destination; IN is the input vector.  */
27836
27837 void
27838 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27839 {
27840   rtx tmp1, tmp2, tmp3;
27841
27842   tmp1 = gen_reg_rtx (V4SFmode);
27843   tmp2 = gen_reg_rtx (V4SFmode);
27844   tmp3 = gen_reg_rtx (V4SFmode);
27845
27846   emit_insn (gen_sse_movhlps (tmp1, in, in));
27847   emit_insn (fn (tmp2, tmp1, in));
27848
27849   emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27850                                   const1_rtx, const1_rtx,
27851                                   GEN_INT (1+4), GEN_INT (1+4)));
27852   emit_insn (fn (dest, tmp2, tmp3));
27853 }
27854 \f
27855 /* Target hook for scalar_mode_supported_p.  */
27856 static bool
27857 ix86_scalar_mode_supported_p (enum machine_mode mode)
27858 {
27859   if (DECIMAL_FLOAT_MODE_P (mode))
27860     return default_decimal_float_supported_p ();
27861   else if (mode == TFmode)
27862     return true;
27863   else
27864     return default_scalar_mode_supported_p (mode);
27865 }
27866
27867 /* Implements target hook vector_mode_supported_p.  */
27868 static bool
27869 ix86_vector_mode_supported_p (enum machine_mode mode)
27870 {
27871   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27872     return true;
27873   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27874     return true;
27875   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27876     return true;
27877   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
27878     return true;
27879   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
27880     return true;
27881   return false;
27882 }
27883
27884 /* Target hook for c_mode_for_suffix.  */
27885 static enum machine_mode
27886 ix86_c_mode_for_suffix (char suffix)
27887 {
27888   if (suffix == 'q')
27889     return TFmode;
27890   if (suffix == 'w')
27891     return XFmode;
27892
27893   return VOIDmode;
27894 }
27895
27896 /* Worker function for TARGET_MD_ASM_CLOBBERS.
27897
27898    We do this in the new i386 backend to maintain source compatibility
27899    with the old cc0-based compiler.  */
27900
27901 static tree
27902 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
27903                       tree inputs ATTRIBUTE_UNUSED,
27904                       tree clobbers)
27905 {
27906   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
27907                         clobbers);
27908   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
27909                         clobbers);
27910   return clobbers;
27911 }
27912
27913 /* Implements target vector targetm.asm.encode_section_info.  This
27914    is not used by netware.  */
27915
27916 static void ATTRIBUTE_UNUSED
27917 ix86_encode_section_info (tree decl, rtx rtl, int first)
27918 {
27919   default_encode_section_info (decl, rtl, first);
27920
27921   if (TREE_CODE (decl) == VAR_DECL
27922       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
27923       && ix86_in_large_data_p (decl))
27924     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
27925 }
27926
27927 /* Worker function for REVERSE_CONDITION.  */
27928
27929 enum rtx_code
27930 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
27931 {
27932   return (mode != CCFPmode && mode != CCFPUmode
27933           ? reverse_condition (code)
27934           : reverse_condition_maybe_unordered (code));
27935 }
27936
27937 /* Output code to perform an x87 FP register move, from OPERANDS[1]
27938    to OPERANDS[0].  */
27939
27940 const char *
27941 output_387_reg_move (rtx insn, rtx *operands)
27942 {
27943   if (REG_P (operands[0]))
27944     {
27945       if (REG_P (operands[1])
27946           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27947         {
27948           if (REGNO (operands[0]) == FIRST_STACK_REG)
27949             return output_387_ffreep (operands, 0);
27950           return "fstp\t%y0";
27951         }
27952       if (STACK_TOP_P (operands[0]))
27953         return "fld%Z1\t%y1";
27954       return "fst\t%y0";
27955     }
27956   else if (MEM_P (operands[0]))
27957     {
27958       gcc_assert (REG_P (operands[1]));
27959       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
27960         return "fstp%Z0\t%y0";
27961       else
27962         {
27963           /* There is no non-popping store to memory for XFmode.
27964              So if we need one, follow the store with a load.  */
27965           if (GET_MODE (operands[0]) == XFmode)
27966             return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
27967           else
27968             return "fst%Z0\t%y0";
27969         }
27970     }
27971   else
27972     gcc_unreachable();
27973 }
27974
27975 /* Output code to perform a conditional jump to LABEL, if C2 flag in
27976    FP status register is set.  */
27977
27978 void
27979 ix86_emit_fp_unordered_jump (rtx label)
27980 {
27981   rtx reg = gen_reg_rtx (HImode);
27982   rtx temp;
27983
27984   emit_insn (gen_x86_fnstsw_1 (reg));
27985
27986   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
27987     {
27988       emit_insn (gen_x86_sahf_1 (reg));
27989
27990       temp = gen_rtx_REG (CCmode, FLAGS_REG);
27991       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
27992     }
27993   else
27994     {
27995       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
27996
27997       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
27998       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
27999     }
28000
28001   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28002                               gen_rtx_LABEL_REF (VOIDmode, label),
28003                               pc_rtx);
28004   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28005
28006   emit_jump_insn (temp);
28007   predict_jump (REG_BR_PROB_BASE * 10 / 100);
28008 }
28009
28010 /* Output code to perform a log1p XFmode calculation.  */
28011
28012 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28013 {
28014   rtx label1 = gen_label_rtx ();
28015   rtx label2 = gen_label_rtx ();
28016
28017   rtx tmp = gen_reg_rtx (XFmode);
28018   rtx tmp2 = gen_reg_rtx (XFmode);
28019   rtx test;
28020
28021   emit_insn (gen_absxf2 (tmp, op1));
28022   test = gen_rtx_GE (VOIDmode, tmp,
28023     CONST_DOUBLE_FROM_REAL_VALUE (
28024        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28025        XFmode));
28026   emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
28027
28028   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28029   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28030   emit_jump (label2);
28031
28032   emit_label (label1);
28033   emit_move_insn (tmp, CONST1_RTX (XFmode));
28034   emit_insn (gen_addxf3 (tmp, op1, tmp));
28035   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28036   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28037
28038   emit_label (label2);
28039 }
28040
28041 /* Output code to perform a Newton-Rhapson approximation of a single precision
28042    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
28043
28044 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28045 {
28046   rtx x0, x1, e0, e1, two;
28047
28048   x0 = gen_reg_rtx (mode);
28049   e0 = gen_reg_rtx (mode);
28050   e1 = gen_reg_rtx (mode);
28051   x1 = gen_reg_rtx (mode);
28052
28053   two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28054
28055   if (VECTOR_MODE_P (mode))
28056     two = ix86_build_const_vector (SFmode, true, two);
28057
28058   two = force_reg (mode, two);
28059
28060   /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28061
28062   /* x0 = rcp(b) estimate */
28063   emit_insn (gen_rtx_SET (VOIDmode, x0,
28064                           gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28065                                           UNSPEC_RCP)));
28066   /* e0 = x0 * a */
28067   emit_insn (gen_rtx_SET (VOIDmode, e0,
28068                           gen_rtx_MULT (mode, x0, a)));
28069   /* e1 = x0 * b */
28070   emit_insn (gen_rtx_SET (VOIDmode, e1,
28071                           gen_rtx_MULT (mode, x0, b)));
28072   /* x1 = 2. - e1 */
28073   emit_insn (gen_rtx_SET (VOIDmode, x1,
28074                           gen_rtx_MINUS (mode, two, e1)));
28075   /* res = e0 * x1 */
28076   emit_insn (gen_rtx_SET (VOIDmode, res,
28077                           gen_rtx_MULT (mode, e0, x1)));
28078 }
28079
28080 /* Output code to perform a Newton-Rhapson approximation of a
28081    single precision floating point [reciprocal] square root.  */
28082
28083 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28084                          bool recip)
28085 {
28086   rtx x0, e0, e1, e2, e3, mthree, mhalf;
28087   REAL_VALUE_TYPE r;
28088
28089   x0 = gen_reg_rtx (mode);
28090   e0 = gen_reg_rtx (mode);
28091   e1 = gen_reg_rtx (mode);
28092   e2 = gen_reg_rtx (mode);
28093   e3 = gen_reg_rtx (mode);
28094
28095   real_from_integer (&r, VOIDmode, -3, -1, 0);
28096   mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28097
28098   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28099   mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28100
28101   if (VECTOR_MODE_P (mode))
28102     {
28103       mthree = ix86_build_const_vector (SFmode, true, mthree);
28104       mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28105     }
28106
28107   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28108      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28109
28110   /* x0 = rsqrt(a) estimate */
28111   emit_insn (gen_rtx_SET (VOIDmode, x0,
28112                           gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28113                                           UNSPEC_RSQRT)));
28114
28115   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
28116   if (!recip)
28117     {
28118       rtx zero, mask;
28119
28120       zero = gen_reg_rtx (mode);
28121       mask = gen_reg_rtx (mode);
28122
28123       zero = force_reg (mode, CONST0_RTX(mode));
28124       emit_insn (gen_rtx_SET (VOIDmode, mask,
28125                               gen_rtx_NE (mode, zero, a)));
28126
28127       emit_insn (gen_rtx_SET (VOIDmode, x0,
28128                               gen_rtx_AND (mode, x0, mask)));
28129     }
28130
28131   /* e0 = x0 * a */
28132   emit_insn (gen_rtx_SET (VOIDmode, e0,
28133                           gen_rtx_MULT (mode, x0, a)));
28134   /* e1 = e0 * x0 */
28135   emit_insn (gen_rtx_SET (VOIDmode, e1,
28136                           gen_rtx_MULT (mode, e0, x0)));
28137
28138   /* e2 = e1 - 3. */
28139   mthree = force_reg (mode, mthree);
28140   emit_insn (gen_rtx_SET (VOIDmode, e2,
28141                           gen_rtx_PLUS (mode, e1, mthree)));
28142
28143   mhalf = force_reg (mode, mhalf);
28144   if (recip)
28145     /* e3 = -.5 * x0 */
28146     emit_insn (gen_rtx_SET (VOIDmode, e3,
28147                             gen_rtx_MULT (mode, x0, mhalf)));
28148   else
28149     /* e3 = -.5 * e0 */
28150     emit_insn (gen_rtx_SET (VOIDmode, e3,
28151                             gen_rtx_MULT (mode, e0, mhalf)));
28152   /* ret = e2 * e3 */
28153   emit_insn (gen_rtx_SET (VOIDmode, res,
28154                           gen_rtx_MULT (mode, e2, e3)));
28155 }
28156
28157 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
28158
28159 static void ATTRIBUTE_UNUSED
28160 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28161                                 tree decl)
28162 {
28163   /* With Binutils 2.15, the "@unwind" marker must be specified on
28164      every occurrence of the ".eh_frame" section, not just the first
28165      one.  */
28166   if (TARGET_64BIT
28167       && strcmp (name, ".eh_frame") == 0)
28168     {
28169       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28170                flags & SECTION_WRITE ? "aw" : "a");
28171       return;
28172     }
28173   default_elf_asm_named_section (name, flags, decl);
28174 }
28175
28176 /* Return the mangling of TYPE if it is an extended fundamental type.  */
28177
28178 static const char *
28179 ix86_mangle_type (const_tree type)
28180 {
28181   type = TYPE_MAIN_VARIANT (type);
28182
28183   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28184       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28185     return NULL;
28186
28187   switch (TYPE_MODE (type))
28188     {
28189     case TFmode:
28190       /* __float128 is "g".  */
28191       return "g";
28192     case XFmode:
28193       /* "long double" or __float80 is "e".  */
28194       return "e";
28195     default:
28196       return NULL;
28197     }
28198 }
28199
28200 /* For 32-bit code we can save PIC register setup by using
28201    __stack_chk_fail_local hidden function instead of calling
28202    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
28203    register, so it is better to call __stack_chk_fail directly.  */
28204
28205 static tree
28206 ix86_stack_protect_fail (void)
28207 {
28208   return TARGET_64BIT
28209          ? default_external_stack_protect_fail ()
28210          : default_hidden_stack_protect_fail ();
28211 }
28212
28213 /* Select a format to encode pointers in exception handling data.  CODE
28214    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
28215    true if the symbol may be affected by dynamic relocations.
28216
28217    ??? All x86 object file formats are capable of representing this.
28218    After all, the relocation needed is the same as for the call insn.
28219    Whether or not a particular assembler allows us to enter such, I
28220    guess we'll have to see.  */
28221 int
28222 asm_preferred_eh_data_format (int code, int global)
28223 {
28224   if (flag_pic)
28225     {
28226       int type = DW_EH_PE_sdata8;
28227       if (!TARGET_64BIT
28228           || ix86_cmodel == CM_SMALL_PIC
28229           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28230         type = DW_EH_PE_sdata4;
28231       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28232     }
28233   if (ix86_cmodel == CM_SMALL
28234       || (ix86_cmodel == CM_MEDIUM && code))
28235     return DW_EH_PE_udata4;
28236   return DW_EH_PE_absptr;
28237 }
28238 \f
28239 /* Expand copysign from SIGN to the positive value ABS_VALUE
28240    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
28241    the sign-bit.  */
28242 static void
28243 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28244 {
28245   enum machine_mode mode = GET_MODE (sign);
28246   rtx sgn = gen_reg_rtx (mode);
28247   if (mask == NULL_RTX)
28248     {
28249       mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28250       if (!VECTOR_MODE_P (mode))
28251         {
28252           /* We need to generate a scalar mode mask in this case.  */
28253           rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28254           tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28255           mask = gen_reg_rtx (mode);
28256           emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28257         }
28258     }
28259   else
28260     mask = gen_rtx_NOT (mode, mask);
28261   emit_insn (gen_rtx_SET (VOIDmode, sgn,
28262                           gen_rtx_AND (mode, mask, sign)));
28263   emit_insn (gen_rtx_SET (VOIDmode, result,
28264                           gen_rtx_IOR (mode, abs_value, sgn)));
28265 }
28266
28267 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
28268    mask for masking out the sign-bit is stored in *SMASK, if that is
28269    non-null.  */
28270 static rtx
28271 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28272 {
28273   enum machine_mode mode = GET_MODE (op0);
28274   rtx xa, mask;
28275
28276   xa = gen_reg_rtx (mode);
28277   mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28278   if (!VECTOR_MODE_P (mode))
28279     {
28280       /* We need to generate a scalar mode mask in this case.  */
28281       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28282       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28283       mask = gen_reg_rtx (mode);
28284       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28285     }
28286   emit_insn (gen_rtx_SET (VOIDmode, xa,
28287                           gen_rtx_AND (mode, op0, mask)));
28288
28289   if (smask)
28290     *smask = mask;
28291
28292   return xa;
28293 }
28294
28295 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28296    swapping the operands if SWAP_OPERANDS is true.  The expanded
28297    code is a forward jump to a newly created label in case the
28298    comparison is true.  The generated label rtx is returned.  */
28299 static rtx
28300 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28301                                   bool swap_operands)
28302 {
28303   rtx label, tmp;
28304
28305   if (swap_operands)
28306     {
28307       tmp = op0;
28308       op0 = op1;
28309       op1 = tmp;
28310     }
28311
28312   label = gen_label_rtx ();
28313   tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28314   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28315                           gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28316   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28317   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28318                               gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28319   tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28320   JUMP_LABEL (tmp) = label;
28321
28322   return label;
28323 }
28324
28325 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28326    using comparison code CODE.  Operands are swapped for the comparison if
28327    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
28328 static rtx
28329 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28330                               bool swap_operands)
28331 {
28332   enum machine_mode mode = GET_MODE (op0);
28333   rtx mask = gen_reg_rtx (mode);
28334
28335   if (swap_operands)
28336     {
28337       rtx tmp = op0;
28338       op0 = op1;
28339       op1 = tmp;
28340     }
28341
28342   if (mode == DFmode)
28343     emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28344                                     gen_rtx_fmt_ee (code, mode, op0, op1)));
28345   else
28346     emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28347                                    gen_rtx_fmt_ee (code, mode, op0, op1)));
28348
28349   return mask;
28350 }
28351
28352 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28353    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
28354 static rtx
28355 ix86_gen_TWO52 (enum machine_mode mode)
28356 {
28357   REAL_VALUE_TYPE TWO52r;
28358   rtx TWO52;
28359
28360   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28361   TWO52 = const_double_from_real_value (TWO52r, mode);
28362   TWO52 = force_reg (mode, TWO52);
28363
28364   return TWO52;
28365 }
28366
28367 /* Expand SSE sequence for computing lround from OP1 storing
28368    into OP0.  */
28369 void
28370 ix86_expand_lround (rtx op0, rtx op1)
28371 {
28372   /* C code for the stuff we're doing below:
28373        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28374        return (long)tmp;
28375    */
28376   enum machine_mode mode = GET_MODE (op1);
28377   const struct real_format *fmt;
28378   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28379   rtx adj;
28380
28381   /* load nextafter (0.5, 0.0) */
28382   fmt = REAL_MODE_FORMAT (mode);
28383   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28384   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28385
28386   /* adj = copysign (0.5, op1) */
28387   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28388   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28389
28390   /* adj = op1 + adj */
28391   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28392
28393   /* op0 = (imode)adj */
28394   expand_fix (op0, adj, 0);
28395 }
28396
28397 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28398    into OPERAND0.  */
28399 void
28400 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28401 {
28402   /* C code for the stuff we're doing below (for do_floor):
28403         xi = (long)op1;
28404         xi -= (double)xi > op1 ? 1 : 0;
28405         return xi;
28406    */
28407   enum machine_mode fmode = GET_MODE (op1);
28408   enum machine_mode imode = GET_MODE (op0);
28409   rtx ireg, freg, label, tmp;
28410
28411   /* reg = (long)op1 */
28412   ireg = gen_reg_rtx (imode);
28413   expand_fix (ireg, op1, 0);
28414
28415   /* freg = (double)reg */
28416   freg = gen_reg_rtx (fmode);
28417   expand_float (freg, ireg, 0);
28418
28419   /* ireg = (freg > op1) ? ireg - 1 : ireg */
28420   label = ix86_expand_sse_compare_and_jump (UNLE,
28421                                             freg, op1, !do_floor);
28422   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28423                              ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28424   emit_move_insn (ireg, tmp);
28425
28426   emit_label (label);
28427   LABEL_NUSES (label) = 1;
28428
28429   emit_move_insn (op0, ireg);
28430 }
28431
28432 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28433    result in OPERAND0.  */
28434 void
28435 ix86_expand_rint (rtx operand0, rtx operand1)
28436 {
28437   /* C code for the stuff we're doing below:
28438         xa = fabs (operand1);
28439         if (!isless (xa, 2**52))
28440           return operand1;
28441         xa = xa + 2**52 - 2**52;
28442         return copysign (xa, operand1);
28443    */
28444   enum machine_mode mode = GET_MODE (operand0);
28445   rtx res, xa, label, TWO52, mask;
28446
28447   res = gen_reg_rtx (mode);
28448   emit_move_insn (res, operand1);
28449
28450   /* xa = abs (operand1) */
28451   xa = ix86_expand_sse_fabs (res, &mask);
28452
28453   /* if (!isless (xa, TWO52)) goto label; */
28454   TWO52 = ix86_gen_TWO52 (mode);
28455   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28456
28457   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28458   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28459
28460   ix86_sse_copysign_to_positive (res, xa, res, mask);
28461
28462   emit_label (label);
28463   LABEL_NUSES (label) = 1;
28464
28465   emit_move_insn (operand0, res);
28466 }
28467
28468 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28469    into OPERAND0.  */
28470 void
28471 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28472 {
28473   /* C code for the stuff we expand below.
28474         double xa = fabs (x), x2;
28475         if (!isless (xa, TWO52))
28476           return x;
28477         xa = xa + TWO52 - TWO52;
28478         x2 = copysign (xa, x);
28479      Compensate.  Floor:
28480         if (x2 > x)
28481           x2 -= 1;
28482      Compensate.  Ceil:
28483         if (x2 < x)
28484           x2 -= -1;
28485         return x2;
28486    */
28487   enum machine_mode mode = GET_MODE (operand0);
28488   rtx xa, TWO52, tmp, label, one, res, mask;
28489
28490   TWO52 = ix86_gen_TWO52 (mode);
28491
28492   /* Temporary for holding the result, initialized to the input
28493      operand to ease control flow.  */
28494   res = gen_reg_rtx (mode);
28495   emit_move_insn (res, operand1);
28496
28497   /* xa = abs (operand1) */
28498   xa = ix86_expand_sse_fabs (res, &mask);
28499
28500   /* if (!isless (xa, TWO52)) goto label; */
28501   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28502
28503   /* xa = xa + TWO52 - TWO52; */
28504   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28505   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28506
28507   /* xa = copysign (xa, operand1) */
28508   ix86_sse_copysign_to_positive (xa, xa, res, mask);
28509
28510   /* generate 1.0 or -1.0 */
28511   one = force_reg (mode,
28512                    const_double_from_real_value (do_floor
28513                                                  ? dconst1 : dconstm1, mode));
28514
28515   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28516   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28517   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28518                           gen_rtx_AND (mode, one, tmp)));
28519   /* We always need to subtract here to preserve signed zero.  */
28520   tmp = expand_simple_binop (mode, MINUS,
28521                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28522   emit_move_insn (res, tmp);
28523
28524   emit_label (label);
28525   LABEL_NUSES (label) = 1;
28526
28527   emit_move_insn (operand0, res);
28528 }
28529
28530 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28531    into OPERAND0.  */
28532 void
28533 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28534 {
28535   /* C code for the stuff we expand below.
28536         double xa = fabs (x), x2;
28537         if (!isless (xa, TWO52))
28538           return x;
28539         x2 = (double)(long)x;
28540      Compensate.  Floor:
28541         if (x2 > x)
28542           x2 -= 1;
28543      Compensate.  Ceil:
28544         if (x2 < x)
28545           x2 += 1;
28546         if (HONOR_SIGNED_ZEROS (mode))
28547           return copysign (x2, x);
28548         return x2;
28549    */
28550   enum machine_mode mode = GET_MODE (operand0);
28551   rtx xa, xi, TWO52, tmp, label, one, res, mask;
28552
28553   TWO52 = ix86_gen_TWO52 (mode);
28554
28555   /* Temporary for holding the result, initialized to the input
28556      operand to ease control flow.  */
28557   res = gen_reg_rtx (mode);
28558   emit_move_insn (res, operand1);
28559
28560   /* xa = abs (operand1) */
28561   xa = ix86_expand_sse_fabs (res, &mask);
28562
28563   /* if (!isless (xa, TWO52)) goto label; */
28564   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28565
28566   /* xa = (double)(long)x */
28567   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28568   expand_fix (xi, res, 0);
28569   expand_float (xa, xi, 0);
28570
28571   /* generate 1.0 */
28572   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28573
28574   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28575   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28576   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28577                           gen_rtx_AND (mode, one, tmp)));
28578   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28579                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28580   emit_move_insn (res, tmp);
28581
28582   if (HONOR_SIGNED_ZEROS (mode))
28583     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28584
28585   emit_label (label);
28586   LABEL_NUSES (label) = 1;
28587
28588   emit_move_insn (operand0, res);
28589 }
28590
28591 /* Expand SSE sequence for computing round from OPERAND1 storing
28592    into OPERAND0.  Sequence that works without relying on DImode truncation
28593    via cvttsd2siq that is only available on 64bit targets.  */
28594 void
28595 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28596 {
28597   /* C code for the stuff we expand below.
28598         double xa = fabs (x), xa2, x2;
28599         if (!isless (xa, TWO52))
28600           return x;
28601      Using the absolute value and copying back sign makes
28602      -0.0 -> -0.0 correct.
28603         xa2 = xa + TWO52 - TWO52;
28604      Compensate.
28605         dxa = xa2 - xa;
28606         if (dxa <= -0.5)
28607           xa2 += 1;
28608         else if (dxa > 0.5)
28609           xa2 -= 1;
28610         x2 = copysign (xa2, x);
28611         return x2;
28612    */
28613   enum machine_mode mode = GET_MODE (operand0);
28614   rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28615
28616   TWO52 = ix86_gen_TWO52 (mode);
28617
28618   /* Temporary for holding the result, initialized to the input
28619      operand to ease control flow.  */
28620   res = gen_reg_rtx (mode);
28621   emit_move_insn (res, operand1);
28622
28623   /* xa = abs (operand1) */
28624   xa = ix86_expand_sse_fabs (res, &mask);
28625
28626   /* if (!isless (xa, TWO52)) goto label; */
28627   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28628
28629   /* xa2 = xa + TWO52 - TWO52; */
28630   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28631   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28632
28633   /* dxa = xa2 - xa; */
28634   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28635
28636   /* generate 0.5, 1.0 and -0.5 */
28637   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28638   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28639   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28640                                0, OPTAB_DIRECT);
28641
28642   /* Compensate.  */
28643   tmp = gen_reg_rtx (mode);
28644   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28645   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28646   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28647                           gen_rtx_AND (mode, one, tmp)));
28648   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28649   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28650   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28651   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28652                           gen_rtx_AND (mode, one, tmp)));
28653   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28654
28655   /* res = copysign (xa2, operand1) */
28656   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28657
28658   emit_label (label);
28659   LABEL_NUSES (label) = 1;
28660
28661   emit_move_insn (operand0, res);
28662 }
28663
28664 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28665    into OPERAND0.  */
28666 void
28667 ix86_expand_trunc (rtx operand0, rtx operand1)
28668 {
28669   /* C code for SSE variant we expand below.
28670         double xa = fabs (x), x2;
28671         if (!isless (xa, TWO52))
28672           return x;
28673         x2 = (double)(long)x;
28674         if (HONOR_SIGNED_ZEROS (mode))
28675           return copysign (x2, x);
28676         return x2;
28677    */
28678   enum machine_mode mode = GET_MODE (operand0);
28679   rtx xa, xi, TWO52, label, res, mask;
28680
28681   TWO52 = ix86_gen_TWO52 (mode);
28682
28683   /* Temporary for holding the result, initialized to the input
28684      operand to ease control flow.  */
28685   res = gen_reg_rtx (mode);
28686   emit_move_insn (res, operand1);
28687
28688   /* xa = abs (operand1) */
28689   xa = ix86_expand_sse_fabs (res, &mask);
28690
28691   /* if (!isless (xa, TWO52)) goto label; */
28692   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28693
28694   /* x = (double)(long)x */
28695   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28696   expand_fix (xi, res, 0);
28697   expand_float (res, xi, 0);
28698
28699   if (HONOR_SIGNED_ZEROS (mode))
28700     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28701
28702   emit_label (label);
28703   LABEL_NUSES (label) = 1;
28704
28705   emit_move_insn (operand0, res);
28706 }
28707
28708 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28709    into OPERAND0.  */
28710 void
28711 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28712 {
28713   enum machine_mode mode = GET_MODE (operand0);
28714   rtx xa, mask, TWO52, label, one, res, smask, tmp;
28715
28716   /* C code for SSE variant we expand below.
28717         double xa = fabs (x), x2;
28718         if (!isless (xa, TWO52))
28719           return x;
28720         xa2 = xa + TWO52 - TWO52;
28721      Compensate:
28722         if (xa2 > xa)
28723           xa2 -= 1.0;
28724         x2 = copysign (xa2, x);
28725         return x2;
28726    */
28727
28728   TWO52 = ix86_gen_TWO52 (mode);
28729
28730   /* Temporary for holding the result, initialized to the input
28731      operand to ease control flow.  */
28732   res = gen_reg_rtx (mode);
28733   emit_move_insn (res, operand1);
28734
28735   /* xa = abs (operand1) */
28736   xa = ix86_expand_sse_fabs (res, &smask);
28737
28738   /* if (!isless (xa, TWO52)) goto label; */
28739   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28740
28741   /* res = xa + TWO52 - TWO52; */
28742   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28743   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28744   emit_move_insn (res, tmp);
28745
28746   /* generate 1.0 */
28747   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28748
28749   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
28750   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28751   emit_insn (gen_rtx_SET (VOIDmode, mask,
28752                           gen_rtx_AND (mode, mask, one)));
28753   tmp = expand_simple_binop (mode, MINUS,
28754                              res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28755   emit_move_insn (res, tmp);
28756
28757   /* res = copysign (res, operand1) */
28758   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28759
28760   emit_label (label);
28761   LABEL_NUSES (label) = 1;
28762
28763   emit_move_insn (operand0, res);
28764 }
28765
28766 /* Expand SSE sequence for computing round from OPERAND1 storing
28767    into OPERAND0.  */
28768 void
28769 ix86_expand_round (rtx operand0, rtx operand1)
28770 {
28771   /* C code for the stuff we're doing below:
28772         double xa = fabs (x);
28773         if (!isless (xa, TWO52))
28774           return x;
28775         xa = (double)(long)(xa + nextafter (0.5, 0.0));
28776         return copysign (xa, x);
28777    */
28778   enum machine_mode mode = GET_MODE (operand0);
28779   rtx res, TWO52, xa, label, xi, half, mask;
28780   const struct real_format *fmt;
28781   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28782
28783   /* Temporary for holding the result, initialized to the input
28784      operand to ease control flow.  */
28785   res = gen_reg_rtx (mode);
28786   emit_move_insn (res, operand1);
28787
28788   TWO52 = ix86_gen_TWO52 (mode);
28789   xa = ix86_expand_sse_fabs (res, &mask);
28790   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28791
28792   /* load nextafter (0.5, 0.0) */
28793   fmt = REAL_MODE_FORMAT (mode);
28794   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28795   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28796
28797   /* xa = xa + 0.5 */
28798   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28799   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28800
28801   /* xa = (double)(int64_t)xa */
28802   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28803   expand_fix (xi, xa, 0);
28804   expand_float (xa, xi, 0);
28805
28806   /* res = copysign (xa, operand1) */
28807   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28808
28809   emit_label (label);
28810   LABEL_NUSES (label) = 1;
28811
28812   emit_move_insn (operand0, res);
28813 }
28814 \f
28815
28816 /* Table of valid machine attributes.  */
28817 static const struct attribute_spec ix86_attribute_table[] =
28818 {
28819   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28820   /* Stdcall attribute says callee is responsible for popping arguments
28821      if they are not variable.  */
28822   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
28823   /* Fastcall attribute says callee is responsible for popping arguments
28824      if they are not variable.  */
28825   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
28826   /* Cdecl attribute says the callee is a normal C declaration */
28827   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
28828   /* Regparm attribute specifies how many integer arguments are to be
28829      passed in registers.  */
28830   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
28831   /* Sseregparm attribute says we are using x86_64 calling conventions
28832      for FP arguments.  */
28833   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28834   /* force_align_arg_pointer says this function realigns the stack at entry.  */
28835   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28836     false, true,  true, ix86_handle_cconv_attribute },
28837 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28838   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28839   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28840   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
28841 #endif
28842   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
28843   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
28844 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28845   SUBTARGET_ATTRIBUTE_TABLE,
28846 #endif
28847   /* ms_abi and sysv_abi calling convention function attributes.  */
28848   { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28849   { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28850   { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
28851   /* End element.  */
28852   { NULL,        0, 0, false, false, false, NULL }
28853 };
28854
28855 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
28856 static int
28857 ix86_builtin_vectorization_cost (bool runtime_test)
28858 {
28859   /* If the branch of the runtime test is taken - i.e. - the vectorized
28860      version is skipped - this incurs a misprediction cost (because the
28861      vectorized version is expected to be the fall-through).  So we subtract
28862      the latency of a mispredicted branch from the costs that are incured
28863      when the vectorized version is executed.
28864
28865      TODO: The values in individual target tables have to be tuned or new
28866      fields may be needed. For eg. on K8, the default branch path is the
28867      not-taken path. If the taken path is predicted correctly, the minimum
28868      penalty of going down the taken-path is 1 cycle. If the taken-path is
28869      not predicted correctly, then the minimum penalty is 10 cycles.  */
28870
28871   if (runtime_test)
28872     {
28873       return (-(ix86_cost->cond_taken_branch_cost));
28874     }
28875   else
28876     return 0;
28877 }
28878
28879 /* Implement targetm.vectorize.builtin_vec_perm.  */
28880
28881 static tree
28882 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
28883 {
28884   tree itype = TREE_TYPE (vec_type);
28885   bool u = TYPE_UNSIGNED (itype);
28886   enum machine_mode vmode = TYPE_MODE (vec_type);
28887   enum ix86_builtins fcode = fcode; /* Silence bogus warning.  */
28888   bool ok = TARGET_SSE2;
28889
28890   switch (vmode)
28891     {
28892     case V4DFmode:
28893       ok = TARGET_AVX;
28894       fcode = IX86_BUILTIN_VEC_PERM_V4DF;
28895       goto get_di;
28896     case V2DFmode:
28897       fcode = IX86_BUILTIN_VEC_PERM_V2DF;
28898     get_di:
28899       itype = ix86_get_builtin_type (IX86_BT_DI);
28900       break;
28901
28902     case V8SFmode:
28903       ok = TARGET_AVX;
28904       fcode = IX86_BUILTIN_VEC_PERM_V8SF;
28905       goto get_si;
28906     case V4SFmode:
28907       ok = TARGET_SSE;
28908       fcode = IX86_BUILTIN_VEC_PERM_V4SF;
28909     get_si:
28910       itype = ix86_get_builtin_type (IX86_BT_SI);
28911       break;
28912
28913     case V2DImode:
28914       fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
28915       break;
28916     case V4SImode:
28917       fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
28918       break;
28919     case V8HImode:
28920       fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
28921       break;
28922     case V16QImode:
28923       fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
28924       break;
28925     default:
28926       ok = false;
28927       break;
28928     }
28929
28930   if (!ok)
28931     return NULL_TREE;
28932
28933   *mask_type = itype;
28934   return ix86_builtins[(int) fcode];
28935 }
28936
28937 /* Return a vector mode with twice as many elements as VMODE.  */
28938 /* ??? Consider moving this to a table generated by genmodes.c.  */
28939
28940 static enum machine_mode
28941 doublesize_vector_mode (enum machine_mode vmode)
28942 {
28943   switch (vmode)
28944     {
28945     case V2SFmode:      return V4SFmode;
28946     case V1DImode:      return V2DImode;
28947     case V2SImode:      return V4SImode;
28948     case V4HImode:      return V8HImode;
28949     case V8QImode:      return V16QImode;
28950
28951     case V2DFmode:      return V4DFmode;
28952     case V4SFmode:      return V8SFmode;
28953     case V2DImode:      return V4DImode;
28954     case V4SImode:      return V8SImode;
28955     case V8HImode:      return V16HImode;
28956     case V16QImode:     return V32QImode;
28957
28958     case V4DFmode:      return V8DFmode;
28959     case V8SFmode:      return V16SFmode;
28960     case V4DImode:      return V8DImode;
28961     case V8SImode:      return V16SImode;
28962     case V16HImode:     return V32HImode;
28963     case V32QImode:     return V64QImode;
28964
28965     default:
28966       gcc_unreachable ();
28967     }
28968 }
28969
28970 /* Construct (set target (vec_select op0 (parallel perm))) and
28971    return true if that's a valid instruction in the active ISA.  */
28972
28973 static bool
28974 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
28975 {
28976   rtx rperm[MAX_VECT_LEN], x;
28977   unsigned i;
28978
28979   for (i = 0; i < nelt; ++i)
28980     rperm[i] = GEN_INT (perm[i]);
28981
28982   x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
28983   x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
28984   x = gen_rtx_SET (VOIDmode, target, x);
28985
28986   x = emit_insn (x);
28987   if (recog_memoized (x) < 0)
28988     {
28989       remove_insn (x);
28990       return false;
28991     }
28992   return true;
28993 }
28994
28995 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
28996
28997 static bool
28998 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
28999                         const unsigned char *perm, unsigned nelt)
29000 {
29001   enum machine_mode v2mode;
29002   rtx x;
29003
29004   v2mode = doublesize_vector_mode (GET_MODE (op0));
29005   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
29006   return expand_vselect (target, x, perm, nelt);
29007 }
29008
29009 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29010    in terms of blendp[sd] / pblendw / pblendvb.  */
29011
29012 static bool
29013 expand_vec_perm_blend (struct expand_vec_perm_d *d)
29014 {
29015   enum machine_mode vmode = d->vmode;
29016   unsigned i, mask, nelt = d->nelt;
29017   rtx target, op0, op1, x;
29018
29019   if (!TARGET_SSE4_1 || d->op0 == d->op1)
29020     return false;
29021   if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
29022     return false;
29023
29024   /* This is a blend, not a permute.  Elements must stay in their
29025      respective lanes.  */
29026   for (i = 0; i < nelt; ++i)
29027     {
29028       unsigned e = d->perm[i];
29029       if (!(e == i || e == i + nelt))
29030         return false;
29031     }
29032
29033   if (d->testing_p)
29034     return true;
29035
29036   /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
29037      decision should be extracted elsewhere, so that we only try that
29038      sequence once all budget==3 options have been tried.  */
29039
29040   /* For bytes, see if bytes move in pairs so we can use pblendw with
29041      an immediate argument, rather than pblendvb with a vector argument.  */
29042   if (vmode == V16QImode)
29043     {
29044       bool pblendw_ok = true;
29045       for (i = 0; i < 16 && pblendw_ok; i += 2)
29046         pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
29047
29048       if (!pblendw_ok)
29049         {
29050           rtx rperm[16], vperm;
29051
29052           for (i = 0; i < nelt; ++i)
29053             rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
29054
29055           vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
29056           vperm = force_reg (V16QImode, vperm);
29057
29058           emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
29059           return true;
29060         }
29061     }
29062
29063   target = d->target;
29064   op0 = d->op0;
29065   op1 = d->op1;
29066   mask = 0;
29067
29068   switch (vmode)
29069     {
29070     case V4DFmode:
29071     case V8SFmode:
29072     case V2DFmode:
29073     case V4SFmode:
29074     case V8HImode:
29075       for (i = 0; i < nelt; ++i)
29076         mask |= (d->perm[i] >= nelt) << i;
29077       break;
29078
29079     case V2DImode:
29080       for (i = 0; i < 2; ++i)
29081         mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
29082       goto do_subreg;
29083
29084     case V4SImode:
29085       for (i = 0; i < 4; ++i)
29086         mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
29087       goto do_subreg;
29088
29089     case V16QImode:
29090       for (i = 0; i < 8; ++i)
29091         mask |= (d->perm[i * 2] >= 16) << i;
29092
29093     do_subreg:
29094       vmode = V8HImode;
29095       target = gen_lowpart (vmode, target);
29096       op0 = gen_lowpart (vmode, target);
29097       op1 = gen_lowpart (vmode, target);
29098       break;
29099
29100     default:
29101       gcc_unreachable ();
29102     }
29103
29104   /* This matches five different patterns with the different modes.  */
29105   x = gen_rtx_VEC_MERGE (vmode, op0, op1, GEN_INT (mask));
29106   x = gen_rtx_SET (VOIDmode, target, x);
29107   emit_insn (x);
29108
29109   return true;
29110 }
29111
29112 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29113    in terms of the variable form of vpermilps.
29114
29115    Note that we will have already failed the immediate input vpermilps,
29116    which requires that the high and low part shuffle be identical; the
29117    variable form doesn't require that.  */
29118
29119 static bool
29120 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
29121 {
29122   rtx rperm[8], vperm;
29123   unsigned i;
29124
29125   if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
29126     return false;
29127
29128   /* We can only permute within the 128-bit lane.  */
29129   for (i = 0; i < 8; ++i)
29130     {
29131       unsigned e = d->perm[i];
29132       if (i < 4 ? e >= 4 : e < 4)
29133         return false;
29134     }
29135
29136   if (d->testing_p)
29137     return true;
29138
29139   for (i = 0; i < 8; ++i)
29140     {
29141       unsigned e = d->perm[i];
29142
29143       /* Within each 128-bit lane, the elements of op0 are numbered
29144          from 0 and the elements of op1 are numbered from 4.  */
29145       if (e >= 8 + 4)
29146         e -= 8;
29147       else if (e >= 4)
29148         e -= 4;
29149
29150       rperm[i] = GEN_INT (e);
29151     }
29152
29153   vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
29154   vperm = force_reg (V8SImode, vperm);
29155   emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
29156
29157   return true;
29158 }
29159
29160 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29161    in terms of pshufb or vpperm.  */
29162
29163 static bool
29164 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
29165 {
29166   unsigned i, nelt, eltsz;
29167   rtx rperm[16], vperm, target, op0, op1;
29168
29169   if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
29170     return false;
29171   if (GET_MODE_SIZE (d->vmode) != 16)
29172     return false;
29173
29174   if (d->testing_p)
29175     return true;
29176
29177   nelt = d->nelt;
29178   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
29179
29180   for (i = 0; i < nelt; ++i)
29181     {
29182       unsigned j, e = d->perm[i];
29183       for (j = 0; j < eltsz; ++j)
29184         rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
29185     }
29186
29187   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
29188   vperm = force_reg (V16QImode, vperm);
29189
29190   target = gen_lowpart (V16QImode, d->target);
29191   op0 = gen_lowpart (V16QImode, d->op0);
29192   if (d->op0 == d->op1)
29193     emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
29194   else
29195     {
29196       op1 = gen_lowpart (V16QImode, d->op1);
29197       emit_insn (gen_xop_pperm (target, op0, op1, vperm));
29198     }
29199
29200   return true;
29201 }
29202
29203 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
29204    in a single instruction.  */
29205
29206 static bool
29207 expand_vec_perm_1 (struct expand_vec_perm_d *d)
29208 {
29209   unsigned i, nelt = d->nelt;
29210   unsigned char perm2[MAX_VECT_LEN];
29211
29212   /* Check plain VEC_SELECT first, because AVX has instructions that could
29213      match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
29214      input where SEL+CONCAT may not.  */
29215   if (d->op0 == d->op1)
29216     {
29217       if (expand_vselect (d->target, d->op0, d->perm, nelt))
29218         return true;
29219
29220       /* There are plenty of patterns in sse.md that are written for
29221          SEL+CONCAT and are not replicated for a single op.  Perhaps
29222          that should be changed, to avoid the nastiness here.  */
29223
29224       /* Recognize interleave style patterns, which means incrementing
29225          every other permutation operand.  */
29226       for (i = 0; i < nelt; i += 2)
29227         {
29228           perm2[i] = d->perm[i];
29229           perm2[i+1] = d->perm[i+1] + nelt;
29230         }
29231       if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
29232         return true;
29233
29234       /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
29235       if (nelt >= 4)
29236         {
29237           memcpy (perm2, d->perm, nelt);
29238           for (i = 2; i < nelt; i += 4)
29239             {
29240               perm2[i+0] += nelt;
29241               perm2[i+1] += nelt;
29242             }
29243
29244           if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
29245             return true;
29246         }
29247     }
29248
29249   /* Finally, try the fully general two operand permute.  */
29250   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
29251     return true;
29252
29253   /* Recognize interleave style patterns with reversed operands.  */
29254   if (d->op0 != d->op1)
29255     {
29256       for (i = 0; i < nelt; ++i)
29257         {
29258           unsigned e = d->perm[i];
29259           if (e >= nelt)
29260             e -= nelt;
29261           else
29262             e += nelt;
29263           perm2[i] = e;
29264         }
29265
29266       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
29267         return true;
29268     }
29269
29270   /* Try the SSE4.1 blend variable merge instructions.  */
29271   if (expand_vec_perm_blend (d))
29272     return true;
29273
29274   /* Try one of the AVX vpermil variable permutations.  */
29275   if (expand_vec_perm_vpermil (d))
29276     return true;
29277
29278   /* Try the SSSE3 pshufb or XOP vpperm variable permutation.  */
29279   if (expand_vec_perm_pshufb (d))
29280     return true;
29281
29282   return false;
29283 }
29284
29285 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
29286    in terms of a pair of pshuflw + pshufhw instructions.  */
29287
29288 static bool
29289 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
29290 {
29291   unsigned char perm2[MAX_VECT_LEN];
29292   unsigned i;
29293   bool ok;
29294
29295   if (d->vmode != V8HImode || d->op0 != d->op1)
29296     return false;
29297
29298   /* The two permutations only operate in 64-bit lanes.  */
29299   for (i = 0; i < 4; ++i)
29300     if (d->perm[i] >= 4)
29301       return false;
29302   for (i = 4; i < 8; ++i)
29303     if (d->perm[i] < 4)
29304       return false;
29305
29306   if (d->testing_p)
29307     return true;
29308
29309   /* Emit the pshuflw.  */
29310   memcpy (perm2, d->perm, 4);
29311   for (i = 4; i < 8; ++i)
29312     perm2[i] = i;
29313   ok = expand_vselect (d->target, d->op0, perm2, 8);
29314   gcc_assert (ok);
29315
29316   /* Emit the pshufhw.  */
29317   memcpy (perm2 + 4, d->perm + 4, 4);
29318   for (i = 0; i < 4; ++i)
29319     perm2[i] = i;
29320   ok = expand_vselect (d->target, d->target, perm2, 8);
29321   gcc_assert (ok);
29322
29323   return true;
29324 }
29325
29326 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
29327    the permutation using the SSSE3 palignr instruction.  This succeeds
29328    when all of the elements in PERM fit within one vector and we merely
29329    need to shift them down so that a single vector permutation has a
29330    chance to succeed.  */
29331
29332 static bool
29333 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
29334 {
29335   unsigned i, nelt = d->nelt;
29336   unsigned min, max;
29337   bool in_order, ok;
29338   rtx shift;
29339
29340   /* Even with AVX, palignr only operates on 128-bit vectors.  */
29341   if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
29342     return false;
29343
29344   min = nelt, max = 0;
29345   for (i = 0; i < nelt; ++i)
29346     {
29347       unsigned e = d->perm[i];
29348       if (e < min)
29349         min = e;
29350       if (e > max)
29351         max = e;
29352     }
29353   if (min == 0 || max - min >= nelt)
29354     return false;
29355
29356   /* Given that we have SSSE3, we know we'll be able to implement the
29357      single operand permutation after the palignr with pshufb.  */
29358   if (d->testing_p)
29359     return true;
29360
29361   shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
29362   emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
29363                                   gen_lowpart (TImode, d->op1),
29364                                   gen_lowpart (TImode, d->op0), shift));
29365
29366   d->op0 = d->op1 = d->target;
29367
29368   in_order = true;
29369   for (i = 0; i < nelt; ++i)
29370     {
29371       unsigned e = d->perm[i] - min;
29372       if (e != i)
29373         in_order = false;
29374       d->perm[i] = e;
29375     }
29376
29377   /* Test for the degenerate case where the alignment by itself
29378      produces the desired permutation.  */
29379   if (in_order)
29380     return true;
29381
29382   ok = expand_vec_perm_1 (d);
29383   gcc_assert (ok);
29384
29385   return ok;
29386 }
29387
29388 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
29389    a two vector permutation into a single vector permutation by using
29390    an interleave operation to merge the vectors.  */
29391
29392 static bool
29393 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
29394 {
29395   struct expand_vec_perm_d dremap, dfinal;
29396   unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
29397   unsigned contents, h1, h2, h3, h4;
29398   unsigned char remap[2 * MAX_VECT_LEN];
29399   rtx seq;
29400   bool ok;
29401
29402   if (d->op0 == d->op1)
29403     return false;
29404
29405   /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
29406      lanes.  We can use similar techniques with the vperm2f128 instruction,
29407      but it requires slightly different logic.  */
29408   if (GET_MODE_SIZE (d->vmode) != 16)
29409     return false;
29410
29411   /* Examine from whence the elements come.  */
29412   contents = 0;
29413   for (i = 0; i < nelt; ++i)
29414     contents |= 1u << d->perm[i];
29415
29416   /* Split the two input vectors into 4 halves.  */
29417   h1 = (1u << nelt2) - 1;
29418   h2 = h1 << nelt2;
29419   h3 = h2 << nelt2;
29420   h4 = h3 << nelt2;
29421
29422   memset (remap, 0xff, sizeof (remap));
29423   dremap = *d;
29424
29425   /* If the elements from the low halves use interleave low, and similarly
29426      for interleave high.  If the elements are from mis-matched halves, we
29427      can use shufps for V4SF/V4SI or do a DImode shuffle.  */
29428   if ((contents & (h1 | h3)) == contents)
29429     {
29430       for (i = 0; i < nelt2; ++i)
29431         {
29432           remap[i] = i * 2;
29433           remap[i + nelt] = i * 2 + 1;
29434           dremap.perm[i * 2] = i;
29435           dremap.perm[i * 2 + 1] = i + nelt;
29436         }
29437     }
29438   else if ((contents & (h2 | h4)) == contents)
29439     {
29440       for (i = 0; i < nelt2; ++i)
29441         {
29442           remap[i + nelt2] = i * 2;
29443           remap[i + nelt + nelt2] = i * 2 + 1;
29444           dremap.perm[i * 2] = i + nelt2;
29445           dremap.perm[i * 2 + 1] = i + nelt + nelt2;
29446         }
29447     }
29448   else if ((contents & (h1 | h4)) == contents)
29449     {
29450       for (i = 0; i < nelt2; ++i)
29451         {
29452           remap[i] = i;
29453           remap[i + nelt + nelt2] = i + nelt2;
29454           dremap.perm[i] = i;
29455           dremap.perm[i + nelt2] = i + nelt + nelt2;
29456         }
29457       if (nelt != 4)
29458         {
29459           dremap.vmode = V2DImode;
29460           dremap.nelt = 2;
29461           dremap.perm[0] = 0;
29462           dremap.perm[1] = 3;
29463         }
29464     }
29465   else if ((contents & (h2 | h3)) == contents)
29466     {
29467       for (i = 0; i < nelt2; ++i)
29468         {
29469           remap[i + nelt2] = i;
29470           remap[i + nelt] = i + nelt2;
29471           dremap.perm[i] = i + nelt2;
29472           dremap.perm[i + nelt2] = i + nelt;
29473         }
29474       if (nelt != 4)
29475         {
29476           dremap.vmode = V2DImode;
29477           dremap.nelt = 2;
29478           dremap.perm[0] = 1;
29479           dremap.perm[1] = 2;
29480         }
29481     }
29482   else
29483     return false;
29484
29485   /* Use the remapping array set up above to move the elements from their
29486      swizzled locations into their final destinations.  */
29487   dfinal = *d;
29488   for (i = 0; i < nelt; ++i)
29489     {
29490       unsigned e = remap[d->perm[i]];
29491       gcc_assert (e < nelt);
29492       dfinal.perm[i] = e;
29493     }
29494   dfinal.op0 = gen_reg_rtx (dfinal.vmode);
29495   dfinal.op1 = dfinal.op0;
29496   dremap.target = dfinal.op0;
29497
29498   /* Test if the final remap can be done with a single insn.  For V4SFmode or
29499      V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
29500   start_sequence ();
29501   ok = expand_vec_perm_1 (&dfinal);
29502   seq = get_insns ();
29503   end_sequence ();
29504
29505   if (!ok)
29506     return false;
29507
29508   if (dremap.vmode != dfinal.vmode)
29509     {
29510       dremap.target = gen_lowpart (dremap.vmode, dremap.target);
29511       dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
29512       dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
29513     }
29514
29515   ok = expand_vec_perm_1 (&dremap);
29516   gcc_assert (ok);
29517
29518   emit_insn (seq);
29519   return true;
29520 }
29521
29522 /* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
29523    permutation with two pshufb insns and an ior.  We should have already
29524    failed all two instruction sequences.  */
29525
29526 static bool
29527 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
29528 {
29529   rtx rperm[2][16], vperm, l, h, op, m128;
29530   unsigned int i, nelt, eltsz;
29531
29532   if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
29533     return false;
29534   gcc_assert (d->op0 != d->op1);
29535
29536   nelt = d->nelt;
29537   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
29538
29539   /* Generate two permutation masks.  If the required element is within
29540      the given vector it is shuffled into the proper lane.  If the required
29541      element is in the other vector, force a zero into the lane by setting
29542      bit 7 in the permutation mask.  */
29543   m128 = GEN_INT (-128);
29544   for (i = 0; i < nelt; ++i)
29545     {
29546       unsigned j, e = d->perm[i];
29547       unsigned which = (e >= nelt);
29548       if (e >= nelt)
29549         e -= nelt;
29550
29551       for (j = 0; j < eltsz; ++j)
29552         {
29553           rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
29554           rperm[1-which][i*eltsz + j] = m128;
29555         }
29556     }
29557
29558   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
29559   vperm = force_reg (V16QImode, vperm);
29560
29561   l = gen_reg_rtx (V16QImode);
29562   op = gen_lowpart (V16QImode, d->op0);
29563   emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
29564
29565   vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
29566   vperm = force_reg (V16QImode, vperm);
29567
29568   h = gen_reg_rtx (V16QImode);
29569   op = gen_lowpart (V16QImode, d->op1);
29570   emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
29571
29572   op = gen_lowpart (V16QImode, d->target);
29573   emit_insn (gen_iorv16qi3 (op, l, h));
29574
29575   return true;
29576 }
29577
29578 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
29579    and extract-odd permutations.  */
29580
29581 static bool
29582 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
29583 {
29584   rtx t1, t2, t3, t4;
29585
29586   switch (d->vmode)
29587     {
29588     case V4DFmode:
29589       t1 = gen_reg_rtx (V4DFmode);
29590       t2 = gen_reg_rtx (V4DFmode);
29591
29592       /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
29593       emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
29594       emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
29595
29596       /* Now an unpck[lh]pd will produce the result required.  */
29597       if (odd)
29598         t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
29599       else
29600         t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
29601       emit_insn (t3);
29602       break;
29603
29604     case V8SFmode:
29605       {
29606         static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
29607         static const unsigned char perme[8] = { 0, 1,  8,  9, 4, 5, 12, 13 };
29608         static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
29609
29610         t1 = gen_reg_rtx (V8SFmode);
29611         t2 = gen_reg_rtx (V8SFmode);
29612         t3 = gen_reg_rtx (V8SFmode);
29613         t4 = gen_reg_rtx (V8SFmode);
29614
29615         /* Shuffle within the 128-bit lanes to produce:
29616            { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }.  */
29617         expand_vselect (t1, d->op0, perm1, 8);
29618         expand_vselect (t2, d->op1, perm1, 8);
29619
29620         /* Shuffle the lanes around to produce:
29621            { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }.  */
29622         emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
29623         emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
29624
29625         /* Now a vpermil2p will produce the result required.  */
29626         /* ??? The vpermil2p requires a vector constant.  Another option
29627            is a unpck[lh]ps to merge the two vectors to produce
29628            { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }.  Then use another
29629            vpermilps to get the elements into the final order.  */
29630         d->op0 = t3;
29631         d->op1 = t4;
29632         memcpy (d->perm, odd ? permo: perme, 8);
29633         expand_vec_perm_vpermil (d);
29634       }
29635       break;
29636
29637     case V2DFmode:
29638     case V4SFmode:
29639     case V2DImode:
29640     case V4SImode:
29641       /* These are always directly implementable by expand_vec_perm_1.  */
29642       gcc_unreachable ();
29643
29644     case V8HImode:
29645       if (TARGET_SSSE3)
29646         return expand_vec_perm_pshufb2 (d);
29647       else
29648         {
29649           /* We need 2*log2(N)-1 operations to achieve odd/even
29650              with interleave. */
29651           t1 = gen_reg_rtx (V8HImode);
29652           t2 = gen_reg_rtx (V8HImode);
29653           emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
29654           emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
29655           emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
29656           emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
29657           if (odd)
29658             t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
29659           else
29660             t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
29661           emit_insn (t3);
29662         }
29663       break;
29664
29665     case V16QImode:
29666       if (TARGET_SSSE3)
29667         return expand_vec_perm_pshufb2 (d);
29668       else
29669         {
29670           t1 = gen_reg_rtx (V16QImode);
29671           t2 = gen_reg_rtx (V16QImode);
29672           t3 = gen_reg_rtx (V16QImode);
29673           emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
29674           emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
29675           emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
29676           emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
29677           emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
29678           emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
29679           if (odd)
29680             t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
29681           else
29682             t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
29683           emit_insn (t3);
29684         }
29685       break;
29686
29687     default:
29688       gcc_unreachable ();
29689     }
29690
29691   return true;
29692 }
29693
29694 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
29695    extract-even and extract-odd permutations.  */
29696
29697 static bool
29698 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
29699 {
29700   unsigned i, odd, nelt = d->nelt;
29701
29702   odd = d->perm[0];
29703   if (odd != 0 && odd != 1)
29704     return false;
29705
29706   for (i = 1; i < nelt; ++i)
29707     if (d->perm[i] != 2 * i + odd)
29708       return false;
29709
29710   return expand_vec_perm_even_odd_1 (d, odd);
29711 }
29712
29713 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
29714    permutations.  We assume that expand_vec_perm_1 has already failed.  */
29715
29716 static bool
29717 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
29718 {
29719   unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
29720   enum machine_mode vmode = d->vmode;
29721   unsigned char perm2[4];
29722   rtx op0 = d->op0;
29723   bool ok;
29724
29725   switch (vmode)
29726     {
29727     case V4DFmode:
29728     case V8SFmode:
29729       /* These are special-cased in sse.md so that we can optionally
29730          use the vbroadcast instruction.  They expand to two insns
29731          if the input happens to be in a register.  */
29732       gcc_unreachable ();
29733
29734     case V2DFmode:
29735     case V2DImode:
29736     case V4SFmode:
29737     case V4SImode:
29738       /* These are always implementable using standard shuffle patterns.  */
29739       gcc_unreachable ();
29740
29741     case V8HImode:
29742     case V16QImode:
29743       /* These can be implemented via interleave.  We save one insn by
29744          stopping once we have promoted to V4SImode and then use pshufd.  */
29745       do
29746         {
29747           optab otab = vec_interleave_low_optab;
29748
29749           if (elt >= nelt2)
29750             {
29751               otab = vec_interleave_high_optab;
29752               elt -= nelt2;
29753             }
29754           nelt2 /= 2;
29755
29756           op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
29757           vmode = get_mode_wider_vector (vmode);
29758           op0 = gen_lowpart (vmode, op0);
29759         }
29760       while (vmode != V4SImode);
29761
29762       memset (perm2, elt, 4);
29763       ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
29764       gcc_assert (ok);
29765       return true;
29766
29767     default:
29768       gcc_unreachable ();
29769     }
29770 }
29771
29772 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
29773    broadcast permutations.  */
29774
29775 static bool
29776 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
29777 {
29778   unsigned i, elt, nelt = d->nelt;
29779
29780   if (d->op0 != d->op1)
29781     return false;
29782
29783   elt = d->perm[0];
29784   for (i = 1; i < nelt; ++i)
29785     if (d->perm[i] != elt)
29786       return false;
29787
29788   return expand_vec_perm_broadcast_1 (d);
29789 }
29790
29791 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
29792    With all of the interface bits taken care of, perform the expansion
29793    in D and return true on success.  */
29794
29795 static bool
29796 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
29797 {
29798   /* Try a single instruction expansion.  */
29799   if (expand_vec_perm_1 (d))
29800     return true;
29801
29802   /* Try sequences of two instructions.  */
29803
29804   if (expand_vec_perm_pshuflw_pshufhw (d))
29805     return true;
29806
29807   if (expand_vec_perm_palignr (d))
29808     return true;
29809
29810   if (expand_vec_perm_interleave2 (d))
29811     return true;
29812
29813   if (expand_vec_perm_broadcast (d))
29814     return true;
29815
29816   /* Try sequences of three instructions.  */
29817
29818   if (expand_vec_perm_pshufb2 (d))
29819     return true;
29820
29821   /* ??? Look for narrow permutations whose element orderings would
29822      allow the promotion to a wider mode.  */
29823
29824   /* ??? Look for sequences of interleave or a wider permute that place
29825      the data into the correct lanes for a half-vector shuffle like
29826      pshuf[lh]w or vpermilps.  */
29827
29828   /* ??? Look for sequences of interleave that produce the desired results.
29829      The combinatorics of punpck[lh] get pretty ugly... */
29830
29831   if (expand_vec_perm_even_odd (d))
29832     return true;
29833
29834   return false;
29835 }
29836
29837 /* Extract the values from the vector CST into the permutation array in D.
29838    Return 0 on error, 1 if all values from the permutation come from the
29839    first vector, 2 if all values from the second vector, and 3 otherwise.  */
29840
29841 static int
29842 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
29843 {
29844   tree list = TREE_VECTOR_CST_ELTS (cst);
29845   unsigned i, nelt = d->nelt;
29846   int ret = 0;
29847
29848   for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
29849     {
29850       unsigned HOST_WIDE_INT e;
29851
29852       if (!host_integerp (TREE_VALUE (list), 1))
29853         return 0;
29854       e = tree_low_cst (TREE_VALUE (list), 1);
29855       if (e >= 2 * nelt)
29856         return 0;
29857
29858       ret |= (e < nelt ? 1 : 2);
29859       d->perm[i] = e;
29860     }
29861   gcc_assert (list == NULL);
29862
29863   /* For all elements from second vector, fold the elements to first.  */
29864   if (ret == 2)
29865     for (i = 0; i < nelt; ++i)
29866       d->perm[i] -= nelt;
29867
29868   return ret;
29869 }
29870
29871 static rtx
29872 ix86_expand_vec_perm_builtin (tree exp)
29873 {
29874   struct expand_vec_perm_d d;
29875   tree arg0, arg1, arg2;
29876
29877   arg0 = CALL_EXPR_ARG (exp, 0);
29878   arg1 = CALL_EXPR_ARG (exp, 1);
29879   arg2 = CALL_EXPR_ARG (exp, 2);
29880
29881   d.vmode = TYPE_MODE (TREE_TYPE (arg0));
29882   d.nelt = GET_MODE_NUNITS (d.vmode);
29883   d.testing_p = false;
29884   gcc_assert (VECTOR_MODE_P (d.vmode));
29885
29886   if (TREE_CODE (arg2) != VECTOR_CST)
29887     {
29888       error_at (EXPR_LOCATION (exp),
29889                 "vector permutation requires vector constant");
29890       goto exit_error;
29891     }
29892
29893   switch (extract_vec_perm_cst (&d, arg2))
29894     {
29895     default:
29896       gcc_unreachable();
29897
29898     case 0:
29899       error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
29900       goto exit_error;
29901
29902     case 3:
29903       if (!operand_equal_p (arg0, arg1, 0))
29904         {
29905           d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
29906           d.op0 = force_reg (d.vmode, d.op0);
29907           d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
29908           d.op1 = force_reg (d.vmode, d.op1);
29909           break;
29910         }
29911
29912       /* The elements of PERM do not suggest that only the first operand
29913          is used, but both operands are identical.  Allow easier matching
29914          of the permutation by folding the permutation into the single
29915          input vector.  */
29916       {
29917         unsigned i, nelt = d.nelt;
29918         for (i = 0; i < nelt; ++i)
29919           if (d.perm[i] >= nelt)
29920             d.perm[i] -= nelt;
29921       }
29922       /* FALLTHRU */
29923
29924     case 1:
29925       d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
29926       d.op0 = force_reg (d.vmode, d.op0);
29927       d.op1 = d.op0;
29928       break;
29929
29930     case 2:
29931       d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
29932       d.op0 = force_reg (d.vmode, d.op0);
29933       d.op1 = d.op0;
29934       break;
29935     }
29936
29937   d.target = gen_reg_rtx (d.vmode);
29938   if (ix86_expand_vec_perm_builtin_1 (&d))
29939     return d.target;
29940
29941   /* For compiler generated permutations, we should never got here, because
29942      the compiler should also be checking the ok hook.  But since this is a
29943      builtin the user has access too, so don't abort.  */
29944   switch (d.nelt)
29945     {
29946     case 2:
29947       sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
29948       break;
29949     case 4:
29950       sorry ("vector permutation (%d %d %d %d)",
29951              d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
29952       break;
29953     case 8:
29954       sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
29955              d.perm[0], d.perm[1], d.perm[2], d.perm[3],
29956              d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
29957       break;
29958     case 16:
29959       sorry ("vector permutation "
29960              "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
29961              d.perm[0], d.perm[1], d.perm[2], d.perm[3],
29962              d.perm[4], d.perm[5], d.perm[6], d.perm[7],
29963              d.perm[8], d.perm[9], d.perm[10], d.perm[11],
29964              d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
29965       break;
29966     default:
29967       gcc_unreachable ();
29968     }
29969  exit_error:
29970   return CONST0_RTX (d.vmode);
29971 }
29972
29973 /* Implement targetm.vectorize.builtin_vec_perm_ok.  */
29974
29975 static bool
29976 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
29977 {
29978   struct expand_vec_perm_d d;
29979   int vec_mask;
29980   bool ret, one_vec;
29981
29982   d.vmode = TYPE_MODE (vec_type);
29983   d.nelt = GET_MODE_NUNITS (d.vmode);
29984   d.testing_p = true;
29985
29986   /* Given sufficient ISA support we can just return true here
29987      for selected vector modes.  */
29988   if (GET_MODE_SIZE (d.vmode) == 16)
29989     {
29990       /* All implementable with a single vpperm insn.  */
29991       if (TARGET_XOP)
29992         return true;
29993       /* All implementable with 2 pshufb + 1 ior.  */
29994       if (TARGET_SSSE3)
29995         return true;
29996       /* All implementable with shufpd or unpck[lh]pd.  */
29997       if (d.nelt == 2)
29998         return true;
29999     }
30000
30001   vec_mask = extract_vec_perm_cst (&d, mask);
30002
30003   /* This hook is cannot be called in response to something that the
30004      user does (unlike the builtin expander) so we shouldn't ever see
30005      an error generated from the extract.  */
30006   gcc_assert (vec_mask > 0 && vec_mask <= 3);
30007   one_vec = (vec_mask != 3);
30008
30009   /* Implementable with shufps or pshufd.  */
30010   if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
30011     return true;
30012
30013   /* Otherwise we have to go through the motions and see if we can
30014      figure out how to generate the requested permutation.  */
30015   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30016   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30017   if (!one_vec)
30018     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30019
30020   start_sequence ();
30021   ret = ix86_expand_vec_perm_builtin_1 (&d);
30022   end_sequence ();
30023
30024   return ret;
30025 }
30026
30027 void
30028 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
30029 {
30030   struct expand_vec_perm_d d;
30031   unsigned i, nelt;
30032
30033   d.target = targ;
30034   d.op0 = op0;
30035   d.op1 = op1;
30036   d.vmode = GET_MODE (targ);
30037   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30038   d.testing_p = false;
30039
30040   for (i = 0; i < nelt; ++i)
30041     d.perm[i] = i * 2 + odd;
30042
30043   /* We'll either be able to implement the permutation directly...  */
30044   if (expand_vec_perm_1 (&d))
30045     return;
30046
30047   /* ... or we use the special-case patterns.  */
30048   expand_vec_perm_even_odd_1 (&d, odd);
30049 }
30050 \f
30051 /* This function returns the calling abi specific va_list type node.
30052    It returns  the FNDECL specific va_list type.  */
30053
30054 tree
30055 ix86_fn_abi_va_list (tree fndecl)
30056 {
30057   if (!TARGET_64BIT)
30058     return va_list_type_node;
30059   gcc_assert (fndecl != NULL_TREE);
30060
30061   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30062     return ms_va_list_type_node;
30063   else
30064     return sysv_va_list_type_node;
30065 }
30066
30067 /* Returns the canonical va_list type specified by TYPE. If there
30068    is no valid TYPE provided, it return NULL_TREE.  */
30069
30070 tree
30071 ix86_canonical_va_list_type (tree type)
30072 {
30073   tree wtype, htype;
30074
30075   /* Resolve references and pointers to va_list type.  */
30076   if (INDIRECT_REF_P (type))
30077     type = TREE_TYPE (type);
30078   else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30079     type = TREE_TYPE (type);
30080
30081   if (TARGET_64BIT)
30082     {
30083       wtype = va_list_type_node;
30084           gcc_assert (wtype != NULL_TREE);
30085       htype = type;
30086       if (TREE_CODE (wtype) == ARRAY_TYPE)
30087         {
30088           /* If va_list is an array type, the argument may have decayed
30089              to a pointer type, e.g. by being passed to another function.
30090              In that case, unwrap both types so that we can compare the
30091              underlying records.  */
30092           if (TREE_CODE (htype) == ARRAY_TYPE
30093               || POINTER_TYPE_P (htype))
30094             {
30095               wtype = TREE_TYPE (wtype);
30096               htype = TREE_TYPE (htype);
30097             }
30098         }
30099       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30100         return va_list_type_node;
30101       wtype = sysv_va_list_type_node;
30102           gcc_assert (wtype != NULL_TREE);
30103       htype = type;
30104       if (TREE_CODE (wtype) == ARRAY_TYPE)
30105         {
30106           /* If va_list is an array type, the argument may have decayed
30107              to a pointer type, e.g. by being passed to another function.
30108              In that case, unwrap both types so that we can compare the
30109              underlying records.  */
30110           if (TREE_CODE (htype) == ARRAY_TYPE
30111               || POINTER_TYPE_P (htype))
30112             {
30113               wtype = TREE_TYPE (wtype);
30114               htype = TREE_TYPE (htype);
30115             }
30116         }
30117       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30118         return sysv_va_list_type_node;
30119       wtype = ms_va_list_type_node;
30120           gcc_assert (wtype != NULL_TREE);
30121       htype = type;
30122       if (TREE_CODE (wtype) == ARRAY_TYPE)
30123         {
30124           /* If va_list is an array type, the argument may have decayed
30125              to a pointer type, e.g. by being passed to another function.
30126              In that case, unwrap both types so that we can compare the
30127              underlying records.  */
30128           if (TREE_CODE (htype) == ARRAY_TYPE
30129               || POINTER_TYPE_P (htype))
30130             {
30131               wtype = TREE_TYPE (wtype);
30132               htype = TREE_TYPE (htype);
30133             }
30134         }
30135       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30136         return ms_va_list_type_node;
30137       return NULL_TREE;
30138     }
30139   return std_canonical_va_list_type (type);
30140 }
30141
30142 /* Iterate through the target-specific builtin types for va_list.
30143     IDX denotes the iterator, *PTREE is set to the result type of
30144     the va_list builtin, and *PNAME to its internal type.
30145     Returns zero if there is no element for this index, otherwise
30146     IDX should be increased upon the next call.
30147     Note, do not iterate a base builtin's name like __builtin_va_list.
30148     Used from c_common_nodes_and_builtins.  */
30149
30150 int
30151 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30152 {
30153   if (!TARGET_64BIT)
30154     return 0;
30155   switch (idx) {
30156   case 0:
30157     *ptree = ms_va_list_type_node;
30158     *pname = "__builtin_ms_va_list";
30159     break;
30160   case 1:
30161     *ptree = sysv_va_list_type_node;
30162     *pname = "__builtin_sysv_va_list";
30163     break;
30164   default:
30165     return 0;
30166   }
30167   return 1;
30168 }
30169
30170 /* Initialize the GCC target structure.  */
30171 #undef TARGET_RETURN_IN_MEMORY
30172 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30173
30174 #undef TARGET_LEGITIMIZE_ADDRESS
30175 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30176
30177 #undef TARGET_ATTRIBUTE_TABLE
30178 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30179 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30180 #  undef TARGET_MERGE_DECL_ATTRIBUTES
30181 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30182 #endif
30183
30184 #undef TARGET_COMP_TYPE_ATTRIBUTES
30185 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30186
30187 #undef TARGET_INIT_BUILTINS
30188 #define TARGET_INIT_BUILTINS ix86_init_builtins
30189 #undef TARGET_BUILTIN_DECL
30190 #define TARGET_BUILTIN_DECL ix86_builtin_decl
30191 #undef TARGET_EXPAND_BUILTIN
30192 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30193
30194 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30195 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30196   ix86_builtin_vectorized_function
30197
30198 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30199 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30200
30201 #undef TARGET_BUILTIN_RECIPROCAL
30202 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30203
30204 #undef TARGET_ASM_FUNCTION_EPILOGUE
30205 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30206
30207 #undef TARGET_ENCODE_SECTION_INFO
30208 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30209 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30210 #else
30211 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30212 #endif
30213
30214 #undef TARGET_ASM_OPEN_PAREN
30215 #define TARGET_ASM_OPEN_PAREN ""
30216 #undef TARGET_ASM_CLOSE_PAREN
30217 #define TARGET_ASM_CLOSE_PAREN ""
30218
30219 #undef TARGET_ASM_BYTE_OP
30220 #define TARGET_ASM_BYTE_OP ASM_BYTE
30221
30222 #undef TARGET_ASM_ALIGNED_HI_OP
30223 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30224 #undef TARGET_ASM_ALIGNED_SI_OP
30225 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30226 #ifdef ASM_QUAD
30227 #undef TARGET_ASM_ALIGNED_DI_OP
30228 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30229 #endif
30230
30231 #undef TARGET_ASM_UNALIGNED_HI_OP
30232 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30233 #undef TARGET_ASM_UNALIGNED_SI_OP
30234 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30235 #undef TARGET_ASM_UNALIGNED_DI_OP
30236 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30237
30238 #undef TARGET_SCHED_ADJUST_COST
30239 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30240 #undef TARGET_SCHED_ISSUE_RATE
30241 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30242 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30243 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30244   ia32_multipass_dfa_lookahead
30245
30246 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30247 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30248
30249 #ifdef HAVE_AS_TLS
30250 #undef TARGET_HAVE_TLS
30251 #define TARGET_HAVE_TLS true
30252 #endif
30253 #undef TARGET_CANNOT_FORCE_CONST_MEM
30254 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30255 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30256 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30257
30258 #undef TARGET_DELEGITIMIZE_ADDRESS
30259 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30260
30261 #undef TARGET_MS_BITFIELD_LAYOUT_P
30262 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30263
30264 #if TARGET_MACHO
30265 #undef TARGET_BINDS_LOCAL_P
30266 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30267 #endif
30268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30269 #undef TARGET_BINDS_LOCAL_P
30270 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30271 #endif
30272
30273 #undef TARGET_ASM_OUTPUT_MI_THUNK
30274 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30275 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30276 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30277
30278 #undef TARGET_ASM_FILE_START
30279 #define TARGET_ASM_FILE_START x86_file_start
30280
30281 #undef TARGET_DEFAULT_TARGET_FLAGS
30282 #define TARGET_DEFAULT_TARGET_FLAGS     \
30283   (TARGET_DEFAULT                       \
30284    | TARGET_SUBTARGET_DEFAULT           \
30285    | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
30286    | MASK_FUSED_MADD)
30287
30288 #undef TARGET_HANDLE_OPTION
30289 #define TARGET_HANDLE_OPTION ix86_handle_option
30290
30291 #undef TARGET_RTX_COSTS
30292 #define TARGET_RTX_COSTS ix86_rtx_costs
30293 #undef TARGET_ADDRESS_COST
30294 #define TARGET_ADDRESS_COST ix86_address_cost
30295
30296 #undef TARGET_FIXED_CONDITION_CODE_REGS
30297 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30298 #undef TARGET_CC_MODES_COMPATIBLE
30299 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30300
30301 #undef TARGET_MACHINE_DEPENDENT_REORG
30302 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30303
30304 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30305 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30306
30307 #undef TARGET_BUILD_BUILTIN_VA_LIST
30308 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30309
30310 #undef TARGET_FN_ABI_VA_LIST
30311 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30312
30313 #undef TARGET_CANONICAL_VA_LIST_TYPE
30314 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30315
30316 #undef TARGET_EXPAND_BUILTIN_VA_START
30317 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30318
30319 #undef TARGET_MD_ASM_CLOBBERS
30320 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30321
30322 #undef TARGET_PROMOTE_PROTOTYPES
30323 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30324 #undef TARGET_STRUCT_VALUE_RTX
30325 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30326 #undef TARGET_SETUP_INCOMING_VARARGS
30327 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30328 #undef TARGET_MUST_PASS_IN_STACK
30329 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30330 #undef TARGET_PASS_BY_REFERENCE
30331 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30332 #undef TARGET_INTERNAL_ARG_POINTER
30333 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30334 #undef TARGET_UPDATE_STACK_BOUNDARY
30335 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30336 #undef TARGET_GET_DRAP_RTX
30337 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30338 #undef TARGET_STRICT_ARGUMENT_NAMING
30339 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30340 #undef TARGET_STATIC_CHAIN
30341 #define TARGET_STATIC_CHAIN ix86_static_chain
30342 #undef TARGET_TRAMPOLINE_INIT
30343 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
30344
30345 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30346 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30347
30348 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30349 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30350
30351 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30352 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30353
30354 #undef TARGET_C_MODE_FOR_SUFFIX
30355 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30356
30357 #ifdef HAVE_AS_TLS
30358 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30359 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30360 #endif
30361
30362 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30363 #undef TARGET_INSERT_ATTRIBUTES
30364 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30365 #endif
30366
30367 #undef TARGET_MANGLE_TYPE
30368 #define TARGET_MANGLE_TYPE ix86_mangle_type
30369
30370 #undef TARGET_STACK_PROTECT_FAIL
30371 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30372
30373 #undef TARGET_FUNCTION_VALUE
30374 #define TARGET_FUNCTION_VALUE ix86_function_value
30375
30376 #undef TARGET_SECONDARY_RELOAD
30377 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30378
30379 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30380 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
30381   ix86_builtin_vectorization_cost
30382 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
30383 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
30384   ix86_vectorize_builtin_vec_perm
30385 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
30386 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
30387   ix86_vectorize_builtin_vec_perm_ok
30388
30389 #undef TARGET_SET_CURRENT_FUNCTION
30390 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30391
30392 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30393 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30394
30395 #undef TARGET_OPTION_SAVE
30396 #define TARGET_OPTION_SAVE ix86_function_specific_save
30397
30398 #undef TARGET_OPTION_RESTORE
30399 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30400
30401 #undef TARGET_OPTION_PRINT
30402 #define TARGET_OPTION_PRINT ix86_function_specific_print
30403
30404 #undef TARGET_CAN_INLINE_P
30405 #define TARGET_CAN_INLINE_P ix86_can_inline_p
30406
30407 #undef TARGET_EXPAND_TO_RTL_HOOK
30408 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30409
30410 #undef TARGET_LEGITIMATE_ADDRESS_P
30411 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30412
30413 #undef TARGET_IRA_COVER_CLASSES
30414 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
30415
30416 #undef TARGET_FRAME_POINTER_REQUIRED
30417 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30418
30419 #undef TARGET_CAN_ELIMINATE
30420 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
30421
30422 struct gcc_target targetm = TARGET_INITIALIZER;
30423 \f
30424 #include "gt-i386.h"